├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── publish-pypi.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── examples ├── async.py ├── cache.py ├── httpbin.py ├── retry.py └── table.py ├── poetry.lock ├── pyproject.toml ├── src └── robox │ ├── __init__.py │ ├── _client.py │ ├── _controls.py │ ├── _download.py │ ├── _exceptions.py │ ├── _form.py │ ├── _history.py │ ├── _link.py │ ├── _options.py │ ├── _page.py │ ├── _retry.py │ ├── _robots.py │ ├── _table.py │ └── py.typed └── tests ├── __init__.py ├── conftest.py ├── test_client.py ├── test_controls.py ├── test_form.py ├── test_page.py └── test_table.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | ci: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Check out repository 9 | uses: actions/checkout@v2 10 | 11 | - name: Set up python 3.8 12 | uses: actions/setup-python@v2 13 | with: 14 | python-version: 3.8 15 | 16 | - name: Install poetry 17 | uses: snok/install-poetry@v1 18 | with: 19 | virtualenvs-create: true 20 | virtualenvs-in-project: true 21 | installer-parallel: true 22 | 23 | - name: Load cached venv 24 | id: cached-poetry-dependencies 25 | uses: actions/cache@v2 26 | with: 27 | path: .venv 28 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 29 | 30 | - name: Install dependencies 31 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 32 | run: poetry install --no-interaction --no-root 33 | 34 | - name: Install library 35 | run: poetry install --no-interaction 36 | 37 | - name: Code quality 38 | run: .venv/bin/pre-commit run --all-files 39 | 40 | - name: Run tests 41 | run: | 42 | source .venv/bin/activate 43 | pytest -v --asyncio-mode=auto --cov=src/ --cov-report=xml tests/ 44 | 45 | - name: Upload coverage to Codecov 46 | uses: codecov/codecov-action@v2 47 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to pypi 2 | on: 3 | push: 4 | tags: 5 | - "v*.*.*" 6 | 7 | jobs: 8 | build-and-publish: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Check out repository 12 | uses: actions/checkout@v2 13 | 14 | - name: Set up python 3.8 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.8 18 | 19 | - name: Install poetry 20 | uses: snok/install-poetry@v1 21 | with: 22 | virtualenvs-create: true 23 | virtualenvs-in-project: true 24 | installer-parallel: true 25 | 26 | - name: Build package 27 | run: poetry build 28 | 29 | - name: Publish to PyPI 30 | uses: pypa/gh-action-pypi-publish@release/v1 31 | with: 32 | password: ${{ secrets.PYPI_API_TOKEN }} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Rest 132 | .DS_Store 133 | .vscode 134 | t.py 135 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.1.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: local 9 | hooks: 10 | - id: black 11 | name: black 12 | entry: poetry run black 13 | language: system 14 | types: [python] 15 | - id: flake8 16 | name: flake8 17 | entry: poetry run flake8 --max-line-lengt=88 --exclude t.py 18 | language: system 19 | types: [python] 20 | - id: isort 21 | name: isort 22 | entry: poetry run isort --profile=black 23 | language: system 24 | types: [python] 25 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). 6 | 7 | ## 0.2.3 (3th March, 2022) 8 | * save and load cookies from disk 9 | 10 | ## 0.2.2 (22th February, 2022) 11 | * ability to parse tables 12 | 13 | ## 0.2.1 (10th February, 2022) 14 | * fix `get_forms` and `follor_link_by_text` 15 | 16 | ## 0.2.0 (8th February, 2022) 17 | * Add retry mechanism 18 | 19 | ## 0.1.0 (4th February, 2022) 20 | * Initial release 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Dan Claudiu Pop 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![codecov](https://codecov.io/gh/danclaudiupop/robox/branch/main/graph/badge.svg?token=2DR9K7DR0V)](https://codecov.io/gh/danclaudiupop/robox) 2 | [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/danclaudiupop/robox.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/danclaudiupop/robox/context:python) 3 | [![Run tests](https://github.com/danclaudiupop/robox/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/danclaudiupop/robox/actions/workflows/ci.yml) 4 | [![view examples](https://img.shields.io/badge/learn%20by-examples-0077b3.svg)](https://github.com/danclaudiupop/robox/tree/main/examples) 5 | [![PyPI version](https://badge.fury.io/py/robox.svg)](https://badge.fury.io/py/robox) 6 | 7 | ## Overview 8 | Robox is a simple library with a clean interface for exploring/scraping the web or testing a website you’re developing. Robox can fetch a page, click on links and buttons, and fill out and submit forms. Robox is built on top of two excelent libraries: [httpx](https://www.python-httpx.org/) and [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/). 9 | 10 | --- 11 | Robox has all the standard features of httpx, including async, plus: 12 | - clean api 13 | - caching 14 | - downloading files 15 | - history 16 | - retry 17 | - parsing tables 18 | - understands robots.txt 19 | 20 | 21 | ## Examples 22 | 23 | ```python 24 | from robox import Robox 25 | 26 | 27 | with Robox() as robox: 28 | page = robox.open("https://httpbin.org/forms/post") 29 | form = page.get_form() 30 | form.fill_in("custname", value="foo") 31 | form.check("topping", values=["Onion"]) 32 | form.choose("size", option="Medium") 33 | form.fill_in("comments", value="all good in the hood") 34 | form.fill_in("delivery", value="13:37") 35 | page = page.submit_form(form) 36 | assert page.url == "https://httpbin.org/post" 37 | ``` 38 | 39 | or use async version: 40 | 41 | ```python 42 | import asyncio 43 | 44 | from pprint import pprint 45 | from robox import AsyncRobox 46 | 47 | 48 | async def main(): 49 | async with AsyncRobox(follow_redirects=True) as robox: 50 | page = await robox.open("https://www.google.com") 51 | form = page.get_form() 52 | form.fill_in("q", value="python") 53 | consent_page = await page.submit_form(form) 54 | form = consent_page.get_form() 55 | page = await consent_page.submit_form(form) 56 | links = page.get_links() 57 | pprint([link for link in links if "Python" in link.text]) 58 | 59 | 60 | asyncio.run(main()) 61 | ``` 62 | 63 | Caching can be easily configured via [httpx-cache](https://obendidi.github.io/httpx-cache/) 64 | 65 | ```python 66 | from robox import Robox, DictCache, FileCache 67 | 68 | 69 | with Robox(options=Options(cache=DictCache())) as robox: 70 | p1 = robox.open("https://httpbin.org/get") 71 | assert not p1.from_cache 72 | p2 = robox.open("https://httpbin.org/get") 73 | assert p2.from_cache 74 | ``` 75 | 76 | Failed requests that are potentially caused by temporary problems such as a connection timeout or HTTP 500 error can be retried: 77 | 78 | ```python 79 | with Robox( 80 | options=Options( 81 | retry=True, 82 | retry_max_attempts=2, 83 | raise_on_4xx_5xx=True, 84 | ) 85 | ) as robox: 86 | page = robox.open("https://httpbin.org/status/503,200") 87 | assert page.status_code == 200 88 | ``` 89 | 90 | Parse tables with rowspan and colspan: 91 | ```python 92 | with Robox() as robox: 93 | page = robox.open("https://html.com/tables/rowspan-colspan/") 94 | tables = page.get_tables() 95 | for table in tables: 96 | pprint(table.get_rows()) 97 | ``` 98 | ```bash 99 | [['65', '65', '40', '40', '20', '20'], 100 | ['Men', 'Women', 'Men', 'Women', 'Men', 'Women'], 101 | ['82', '85', '78', '82', '77', '81']] 102 | ... 103 | ``` 104 | 105 | An example on how to reuse authentication state with cookies: 106 | ```python 107 | with Robox() as robox: 108 | page = robox.open("https://news.ycombinator.com/login") 109 | form = page.get_forms()[0] 110 | form.fill_in("acct", value=os.getenv("PASSWORD")) 111 | form.fill_in("pw", value=os.getenv("USERNAME")) 112 | page.submit_form(form) 113 | robox.save_cookies("cookies.json") 114 | 115 | 116 | with Robox() as robox: 117 | robox.load_cookies("cookies.json") 118 | page = robox.open("https://news.ycombinator.com/") 119 | assert page.parsed.find("a", attrs={"id": "logout"}) 120 | ``` 121 | 122 | See [examples](https://github.com/danclaudiupop/robox/tree/main/examples) folder for more detailed examples. 123 | 124 | ## Installation 125 | 126 | Using pip: 127 | 128 | ```sh 129 | pip install robox 130 | ``` 131 | 132 | Robox requires Python 3.8+. 133 | See [Changelog](https://github.com/danclaudiupop/robox/blob/main/CHANGELOG.md) for changes. 134 | -------------------------------------------------------------------------------- /examples/async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pprint import pprint 3 | 4 | from robox import AsyncRobox 5 | 6 | 7 | async def main(): 8 | async with AsyncRobox(follow_redirects=True) as robox: 9 | page = await robox.open("https://www.google.com") 10 | form = page.get_form() 11 | form.fill_in("q", value="python") 12 | consent_page = await page.submit_form(form) 13 | form = consent_page.get_form() 14 | page = await consent_page.submit_form(form) 15 | links = page.get_links() 16 | pprint([link for link in links if "Python" in link.text]) 17 | 18 | 19 | asyncio.run(main()) 20 | -------------------------------------------------------------------------------- /examples/cache.py: -------------------------------------------------------------------------------- 1 | from robox import DictCache, FileCache, Options, Robox 2 | 3 | with Robox(options=Options(cache=DictCache())) as robox: 4 | p1 = robox.open("https://httpbin.org/get") 5 | assert not p1.from_cache 6 | p2 = robox.open("https://httpbin.org/get") 7 | assert p2.from_cache 8 | 9 | 10 | with Robox(options=Options(cache=FileCache("./cache"))) as robox: 11 | p1 = robox.open("https://httpbin.org/get") 12 | assert not p1.from_cache 13 | p2 = robox.open("https://httpbin.org/get") 14 | assert p2.from_cache 15 | -------------------------------------------------------------------------------- /examples/httpbin.py: -------------------------------------------------------------------------------- 1 | from robox import Options, Robox 2 | 3 | with Robox() as robox: 4 | page = robox.open("https://httpbin.org/forms/post") 5 | form = page.get_form() 6 | form.fill_in("custname", value="foo") 7 | form.check("topping", values=["Onion"]) 8 | form.choose("size", option="Medium") 9 | form.fill_in("comments", value="all good in the hood") 10 | form.fill_in("delivery", value="13:37") 11 | page = page.submit_form(form) 12 | assert page.url == "https://httpbin.org/post" 13 | 14 | 15 | with Robox(options=Options(obey_robotstxt=True)) as robox: 16 | robox.open("https://news.ycombinator.com/") 17 | -------------------------------------------------------------------------------- /examples/retry.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from robox import AsyncRobox, Options, Robox 4 | 5 | 6 | async def main(): 7 | async with AsyncRobox( 8 | options=Options( 9 | retry=True, 10 | retry_max_attempts=2, 11 | ) 12 | ) as robox: 13 | await robox.open("https://httpbin.org/status/500") 14 | 15 | 16 | asyncio.run(main()) 17 | 18 | 19 | with Robox( 20 | options=Options( 21 | retry=True, 22 | retry_max_attempts=2, 23 | raise_on_4xx_5xx=True, 24 | ) 25 | ) as robox: 26 | robox.open("https://httpbin.org/status/503,200") 27 | -------------------------------------------------------------------------------- /examples/table.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | 3 | from robox import Robox 4 | 5 | with Robox() as robox: 6 | page = robox.open("https://html.com/tables/rowspan-colspan/") 7 | tables = page.get_tables() 8 | for table in tables: 9 | pprint(table.get_rows()) 10 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "aiofiles" 3 | version = "0.8.0" 4 | description = "File support for asyncio." 5 | category = "main" 6 | optional = false 7 | python-versions = ">=3.6,<4.0" 8 | 9 | [[package]] 10 | name = "anyio" 11 | version = "3.5.0" 12 | description = "High level compatibility layer for multiple asynchronous event loop implementations" 13 | category = "main" 14 | optional = false 15 | python-versions = ">=3.6.2" 16 | 17 | [package.dependencies] 18 | idna = ">=2.8" 19 | sniffio = ">=1.1" 20 | 21 | [package.extras] 22 | doc = ["packaging", "sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"] 23 | test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=6.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"] 24 | trio = ["trio (>=0.16)"] 25 | 26 | [[package]] 27 | name = "atomicwrites" 28 | version = "1.4.0" 29 | description = "Atomic file writes." 30 | category = "dev" 31 | optional = false 32 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 33 | 34 | [[package]] 35 | name = "attrs" 36 | version = "21.4.0" 37 | description = "Classes Without Boilerplate" 38 | category = "main" 39 | optional = false 40 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 41 | 42 | [package.extras] 43 | dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] 44 | docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] 45 | tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] 46 | tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] 47 | 48 | [[package]] 49 | name = "beautifulsoup4" 50 | version = "4.10.0" 51 | description = "Screen-scraping library" 52 | category = "main" 53 | optional = false 54 | python-versions = ">3.0.0" 55 | 56 | [package.dependencies] 57 | soupsieve = ">1.2" 58 | 59 | [package.extras] 60 | html5lib = ["html5lib"] 61 | lxml = ["lxml"] 62 | 63 | [[package]] 64 | name = "black" 65 | version = "22.1.0" 66 | description = "The uncompromising code formatter." 67 | category = "dev" 68 | optional = false 69 | python-versions = ">=3.6.2" 70 | 71 | [package.dependencies] 72 | click = ">=8.0.0" 73 | mypy-extensions = ">=0.4.3" 74 | pathspec = ">=0.9.0" 75 | platformdirs = ">=2" 76 | tomli = ">=1.1.0" 77 | typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} 78 | 79 | [package.extras] 80 | colorama = ["colorama (>=0.4.3)"] 81 | d = ["aiohttp (>=3.7.4)"] 82 | jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] 83 | uvloop = ["uvloop (>=0.15.2)"] 84 | 85 | [[package]] 86 | name = "certifi" 87 | version = "2021.10.8" 88 | description = "Python package for providing Mozilla's CA Bundle." 89 | category = "main" 90 | optional = false 91 | python-versions = "*" 92 | 93 | [[package]] 94 | name = "cfgv" 95 | version = "3.3.1" 96 | description = "Validate configuration and produce human readable error messages." 97 | category = "dev" 98 | optional = false 99 | python-versions = ">=3.6.1" 100 | 101 | [[package]] 102 | name = "charset-normalizer" 103 | version = "2.0.11" 104 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 105 | category = "main" 106 | optional = false 107 | python-versions = ">=3.5.0" 108 | 109 | [package.extras] 110 | unicode_backport = ["unicodedata2"] 111 | 112 | [[package]] 113 | name = "click" 114 | version = "8.0.3" 115 | description = "Composable command line interface toolkit" 116 | category = "dev" 117 | optional = false 118 | python-versions = ">=3.6" 119 | 120 | [package.dependencies] 121 | colorama = {version = "*", markers = "platform_system == \"Windows\""} 122 | 123 | [[package]] 124 | name = "colorama" 125 | version = "0.4.4" 126 | description = "Cross-platform colored terminal text." 127 | category = "dev" 128 | optional = false 129 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 130 | 131 | [[package]] 132 | name = "coverage" 133 | version = "6.3.2" 134 | description = "Code coverage measurement for Python" 135 | category = "dev" 136 | optional = false 137 | python-versions = ">=3.7" 138 | 139 | [package.dependencies] 140 | tomli = {version = "*", optional = true, markers = "extra == \"toml\""} 141 | 142 | [package.extras] 143 | toml = ["tomli"] 144 | 145 | [[package]] 146 | name = "distlib" 147 | version = "0.3.4" 148 | description = "Distribution utilities" 149 | category = "dev" 150 | optional = false 151 | python-versions = "*" 152 | 153 | [[package]] 154 | name = "fasteners" 155 | version = "0.17.3" 156 | description = "A python package that provides useful locks" 157 | category = "main" 158 | optional = false 159 | python-versions = ">=3.6" 160 | 161 | [[package]] 162 | name = "filelock" 163 | version = "3.4.2" 164 | description = "A platform independent file lock." 165 | category = "dev" 166 | optional = false 167 | python-versions = ">=3.7" 168 | 169 | [package.extras] 170 | docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=1.12)"] 171 | testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"] 172 | 173 | [[package]] 174 | name = "flake8" 175 | version = "4.0.1" 176 | description = "the modular source code checker: pep8 pyflakes and co" 177 | category = "dev" 178 | optional = false 179 | python-versions = ">=3.6" 180 | 181 | [package.dependencies] 182 | mccabe = ">=0.6.0,<0.7.0" 183 | pycodestyle = ">=2.8.0,<2.9.0" 184 | pyflakes = ">=2.4.0,<2.5.0" 185 | 186 | [[package]] 187 | name = "h11" 188 | version = "0.12.0" 189 | description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" 190 | category = "main" 191 | optional = false 192 | python-versions = ">=3.6" 193 | 194 | [[package]] 195 | name = "httpcore" 196 | version = "0.14.6" 197 | description = "A minimal low-level HTTP client." 198 | category = "main" 199 | optional = false 200 | python-versions = ">=3.6" 201 | 202 | [package.dependencies] 203 | anyio = ">=3.0.0,<4.0.0" 204 | certifi = "*" 205 | h11 = ">=0.11,<0.13" 206 | sniffio = ">=1.0.0,<2.0.0" 207 | 208 | [package.extras] 209 | http2 = ["h2 (>=3,<5)"] 210 | socks = ["socksio (>=1.0.0,<2.0.0)"] 211 | 212 | [[package]] 213 | name = "httpx" 214 | version = "0.21.3" 215 | description = "The next generation HTTP client." 216 | category = "main" 217 | optional = false 218 | python-versions = ">=3.6" 219 | 220 | [package.dependencies] 221 | certifi = "*" 222 | charset-normalizer = "*" 223 | httpcore = ">=0.14.0,<0.15.0" 224 | rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]} 225 | sniffio = "*" 226 | 227 | [package.extras] 228 | brotli = ["brotlicffi", "brotli"] 229 | cli = ["click (>=8.0.0,<9.0.0)", "rich (>=10.0.0,<11.0.0)", "pygments (>=2.0.0,<3.0.0)"] 230 | http2 = ["h2 (>=3,<5)"] 231 | 232 | [[package]] 233 | name = "httpx-cache" 234 | version = "0.4.1" 235 | description = "Simple caching transport for httpx." 236 | category = "main" 237 | optional = false 238 | python-versions = ">=3.6.2,<4.0" 239 | 240 | [package.dependencies] 241 | anyio = ">=3.4.0,<4.0.0" 242 | attrs = ">=21.4.0,<22.0.0" 243 | fasteners = ">=0.16.3,<0.18.0" 244 | httpx = ">=0.21.1,<0.22.0" 245 | msgpack = ">=1.0.3,<2.0.0" 246 | 247 | [[package]] 248 | name = "identify" 249 | version = "2.4.7" 250 | description = "File identification library for Python" 251 | category = "dev" 252 | optional = false 253 | python-versions = ">=3.7" 254 | 255 | [package.extras] 256 | license = ["ukkonen"] 257 | 258 | [[package]] 259 | name = "idna" 260 | version = "3.3" 261 | description = "Internationalized Domain Names in Applications (IDNA)" 262 | category = "main" 263 | optional = false 264 | python-versions = ">=3.5" 265 | 266 | [[package]] 267 | name = "iniconfig" 268 | version = "1.1.1" 269 | description = "iniconfig: brain-dead simple config-ini parsing" 270 | category = "dev" 271 | optional = false 272 | python-versions = "*" 273 | 274 | [[package]] 275 | name = "isort" 276 | version = "5.10.1" 277 | description = "A Python utility / library to sort Python imports." 278 | category = "dev" 279 | optional = false 280 | python-versions = ">=3.6.1,<4.0" 281 | 282 | [package.extras] 283 | pipfile_deprecated_finder = ["pipreqs", "requirementslib"] 284 | requirements_deprecated_finder = ["pipreqs", "pip-api"] 285 | colors = ["colorama (>=0.4.3,<0.5.0)"] 286 | plugins = ["setuptools"] 287 | 288 | [[package]] 289 | name = "mccabe" 290 | version = "0.6.1" 291 | description = "McCabe checker, plugin for flake8" 292 | category = "dev" 293 | optional = false 294 | python-versions = "*" 295 | 296 | [[package]] 297 | name = "msgpack" 298 | version = "1.0.3" 299 | description = "MessagePack (de)serializer." 300 | category = "main" 301 | optional = false 302 | python-versions = "*" 303 | 304 | [[package]] 305 | name = "mypy-extensions" 306 | version = "0.4.3" 307 | description = "Experimental type system extensions for programs checked with the mypy typechecker." 308 | category = "dev" 309 | optional = false 310 | python-versions = "*" 311 | 312 | [[package]] 313 | name = "nodeenv" 314 | version = "1.6.0" 315 | description = "Node.js virtual environment builder" 316 | category = "dev" 317 | optional = false 318 | python-versions = "*" 319 | 320 | [[package]] 321 | name = "packaging" 322 | version = "21.3" 323 | description = "Core utilities for Python packages" 324 | category = "dev" 325 | optional = false 326 | python-versions = ">=3.6" 327 | 328 | [package.dependencies] 329 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" 330 | 331 | [[package]] 332 | name = "pathspec" 333 | version = "0.9.0" 334 | description = "Utility library for gitignore style pattern matching of file paths." 335 | category = "dev" 336 | optional = false 337 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" 338 | 339 | [[package]] 340 | name = "platformdirs" 341 | version = "2.4.1" 342 | description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." 343 | category = "dev" 344 | optional = false 345 | python-versions = ">=3.7" 346 | 347 | [package.extras] 348 | docs = ["Sphinx (>=4)", "furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)"] 349 | test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"] 350 | 351 | [[package]] 352 | name = "pluggy" 353 | version = "1.0.0" 354 | description = "plugin and hook calling mechanisms for python" 355 | category = "dev" 356 | optional = false 357 | python-versions = ">=3.6" 358 | 359 | [package.extras] 360 | dev = ["pre-commit", "tox"] 361 | testing = ["pytest", "pytest-benchmark"] 362 | 363 | [[package]] 364 | name = "pre-commit" 365 | version = "2.17.0" 366 | description = "A framework for managing and maintaining multi-language pre-commit hooks." 367 | category = "dev" 368 | optional = false 369 | python-versions = ">=3.6.1" 370 | 371 | [package.dependencies] 372 | cfgv = ">=2.0.0" 373 | identify = ">=1.0.0" 374 | nodeenv = ">=0.11.1" 375 | pyyaml = ">=5.1" 376 | toml = "*" 377 | virtualenv = ">=20.0.8" 378 | 379 | [[package]] 380 | name = "py" 381 | version = "1.11.0" 382 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 383 | category = "dev" 384 | optional = false 385 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 386 | 387 | [[package]] 388 | name = "pycodestyle" 389 | version = "2.8.0" 390 | description = "Python style guide checker" 391 | category = "dev" 392 | optional = false 393 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 394 | 395 | [[package]] 396 | name = "pyflakes" 397 | version = "2.4.0" 398 | description = "passive checker of Python programs" 399 | category = "dev" 400 | optional = false 401 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 402 | 403 | [[package]] 404 | name = "pyparsing" 405 | version = "3.0.7" 406 | description = "Python parsing module" 407 | category = "dev" 408 | optional = false 409 | python-versions = ">=3.6" 410 | 411 | [package.extras] 412 | diagrams = ["jinja2", "railroad-diagrams"] 413 | 414 | [[package]] 415 | name = "pytest" 416 | version = "7.0.1" 417 | description = "pytest: simple powerful testing with Python" 418 | category = "dev" 419 | optional = false 420 | python-versions = ">=3.6" 421 | 422 | [package.dependencies] 423 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} 424 | attrs = ">=19.2.0" 425 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 426 | iniconfig = "*" 427 | packaging = "*" 428 | pluggy = ">=0.12,<2.0" 429 | py = ">=1.8.2" 430 | tomli = ">=1.0.0" 431 | 432 | [package.extras] 433 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] 434 | 435 | [[package]] 436 | name = "pytest-asyncio" 437 | version = "0.17.2" 438 | description = "Pytest support for asyncio" 439 | category = "dev" 440 | optional = false 441 | python-versions = ">=3.7" 442 | 443 | [package.dependencies] 444 | pytest = ">=6.1.0" 445 | 446 | [package.extras] 447 | testing = ["coverage (==6.2)", "hypothesis (>=5.7.1)", "flaky (>=3.5.0)", "mypy (==0.931)"] 448 | 449 | [[package]] 450 | name = "pytest-cov" 451 | version = "3.0.0" 452 | description = "Pytest plugin for measuring coverage." 453 | category = "dev" 454 | optional = false 455 | python-versions = ">=3.6" 456 | 457 | [package.dependencies] 458 | coverage = {version = ">=5.2.1", extras = ["toml"]} 459 | pytest = ">=4.6" 460 | 461 | [package.extras] 462 | testing = ["fields", "hunter", "process-tests", "six", "pytest-xdist", "virtualenv"] 463 | 464 | [[package]] 465 | name = "pyyaml" 466 | version = "6.0" 467 | description = "YAML parser and emitter for Python" 468 | category = "dev" 469 | optional = false 470 | python-versions = ">=3.6" 471 | 472 | [[package]] 473 | name = "respx" 474 | version = "0.19.2" 475 | description = "A utility for mocking out the Python HTTPX and HTTP Core libraries." 476 | category = "dev" 477 | optional = false 478 | python-versions = ">=3.6" 479 | 480 | [package.dependencies] 481 | httpx = ">=0.21.0" 482 | 483 | [[package]] 484 | name = "rfc3986" 485 | version = "1.5.0" 486 | description = "Validating URI References per RFC 3986" 487 | category = "main" 488 | optional = false 489 | python-versions = "*" 490 | 491 | [package.dependencies] 492 | idna = {version = "*", optional = true, markers = "extra == \"idna2008\""} 493 | 494 | [package.extras] 495 | idna2008 = ["idna"] 496 | 497 | [[package]] 498 | name = "six" 499 | version = "1.16.0" 500 | description = "Python 2 and 3 compatibility utilities" 501 | category = "dev" 502 | optional = false 503 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 504 | 505 | [[package]] 506 | name = "sniffio" 507 | version = "1.2.0" 508 | description = "Sniff out which async library your code is running under" 509 | category = "main" 510 | optional = false 511 | python-versions = ">=3.5" 512 | 513 | [[package]] 514 | name = "soupsieve" 515 | version = "2.3.1" 516 | description = "A modern CSS selector implementation for Beautiful Soup." 517 | category = "main" 518 | optional = false 519 | python-versions = ">=3.6" 520 | 521 | [[package]] 522 | name = "tenacity" 523 | version = "8.0.1" 524 | description = "Retry code until it succeeds" 525 | category = "main" 526 | optional = false 527 | python-versions = ">=3.6" 528 | 529 | [package.extras] 530 | doc = ["reno", "sphinx", "tornado (>=4.5)"] 531 | 532 | [[package]] 533 | name = "toml" 534 | version = "0.10.2" 535 | description = "Python Library for Tom's Obvious, Minimal Language" 536 | category = "dev" 537 | optional = false 538 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 539 | 540 | [[package]] 541 | name = "tomli" 542 | version = "1.2.3" 543 | description = "A lil' TOML parser" 544 | category = "dev" 545 | optional = false 546 | python-versions = ">=3.6" 547 | 548 | [[package]] 549 | name = "typing-extensions" 550 | version = "4.0.1" 551 | description = "Backported and Experimental Type Hints for Python 3.6+" 552 | category = "dev" 553 | optional = false 554 | python-versions = ">=3.6" 555 | 556 | [[package]] 557 | name = "virtualenv" 558 | version = "20.13.0" 559 | description = "Virtual Python Environment builder" 560 | category = "dev" 561 | optional = false 562 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" 563 | 564 | [package.dependencies] 565 | distlib = ">=0.3.1,<1" 566 | filelock = ">=3.2,<4" 567 | platformdirs = ">=2,<3" 568 | six = ">=1.9.0,<2" 569 | 570 | [package.extras] 571 | docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"] 572 | testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"] 573 | 574 | [metadata] 575 | lock-version = "1.1" 576 | python-versions = "^3.8" 577 | content-hash = "91ad8bfaa0ff8d61643c90c926554f51ea97d8758b5bd040c6a4d1a3ab52cfeb" 578 | 579 | [metadata.files] 580 | aiofiles = [ 581 | {file = "aiofiles-0.8.0-py3-none-any.whl", hash = "sha256:7a973fc22b29e9962d0897805ace5856e6a566ab1f0c8e5c91ff6c866519c937"}, 582 | {file = "aiofiles-0.8.0.tar.gz", hash = "sha256:8334f23235248a3b2e83b2c3a78a22674f39969b96397126cc93664d9a901e59"}, 583 | ] 584 | anyio = [ 585 | {file = "anyio-3.5.0-py3-none-any.whl", hash = "sha256:b5fa16c5ff93fa1046f2eeb5bbff2dad4d3514d6cda61d02816dba34fa8c3c2e"}, 586 | {file = "anyio-3.5.0.tar.gz", hash = "sha256:a0aeffe2fb1fdf374a8e4b471444f0f3ac4fb9f5a5b542b48824475e0042a5a6"}, 587 | ] 588 | atomicwrites = [ 589 | {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, 590 | {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, 591 | ] 592 | attrs = [ 593 | {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, 594 | {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, 595 | ] 596 | beautifulsoup4 = [ 597 | {file = "beautifulsoup4-4.10.0-py3-none-any.whl", hash = "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf"}, 598 | {file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"}, 599 | ] 600 | black = [ 601 | {file = "black-22.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1297c63b9e1b96a3d0da2d85d11cd9bf8664251fd69ddac068b98dc4f34f73b6"}, 602 | {file = "black-22.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2ff96450d3ad9ea499fc4c60e425a1439c2120cbbc1ab959ff20f7c76ec7e866"}, 603 | {file = "black-22.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e21e1f1efa65a50e3960edd068b6ae6d64ad6235bd8bfea116a03b21836af71"}, 604 | {file = "black-22.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2f69158a7d120fd641d1fa9a921d898e20d52e44a74a6fbbcc570a62a6bc8ab"}, 605 | {file = "black-22.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:228b5ae2c8e3d6227e4bde5920d2fc66cc3400fde7bcc74f480cb07ef0b570d5"}, 606 | {file = "black-22.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b1a5ed73ab4c482208d20434f700d514f66ffe2840f63a6252ecc43a9bc77e8a"}, 607 | {file = "black-22.1.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35944b7100af4a985abfcaa860b06af15590deb1f392f06c8683b4381e8eeaf0"}, 608 | {file = "black-22.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7835fee5238fc0a0baf6c9268fb816b5f5cd9b8793423a75e8cd663c48d073ba"}, 609 | {file = "black-22.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dae63f2dbf82882fa3b2a3c49c32bffe144970a573cd68d247af6560fc493ae1"}, 610 | {file = "black-22.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa1db02410b1924b6749c245ab38d30621564e658297484952f3d8a39fce7e8"}, 611 | {file = "black-22.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c8226f50b8c34a14608b848dc23a46e5d08397d009446353dad45e04af0c8e28"}, 612 | {file = "black-22.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2d6f331c02f0f40aa51a22e479c8209d37fcd520c77721c034517d44eecf5912"}, 613 | {file = "black-22.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:742ce9af3086e5bd07e58c8feb09dbb2b047b7f566eb5f5bc63fd455814979f3"}, 614 | {file = "black-22.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fdb8754b453fb15fad3f72cd9cad3e16776f0964d67cf30ebcbf10327a3777a3"}, 615 | {file = "black-22.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5660feab44c2e3cb24b2419b998846cbb01c23c7fe645fee45087efa3da2d61"}, 616 | {file = "black-22.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:6f2f01381f91c1efb1451998bd65a129b3ed6f64f79663a55fe0e9b74a5f81fd"}, 617 | {file = "black-22.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:efbadd9b52c060a8fc3b9658744091cb33c31f830b3f074422ed27bad2b18e8f"}, 618 | {file = "black-22.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8871fcb4b447206904932b54b567923e5be802b9b19b744fdff092bd2f3118d0"}, 619 | {file = "black-22.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ccad888050f5393f0d6029deea2a33e5ae371fd182a697313bdbd835d3edaf9c"}, 620 | {file = "black-22.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07e5c049442d7ca1a2fc273c79d1aecbbf1bc858f62e8184abe1ad175c4f7cc2"}, 621 | {file = "black-22.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:373922fc66676133ddc3e754e4509196a8c392fec3f5ca4486673e685a421321"}, 622 | {file = "black-22.1.0-py3-none-any.whl", hash = "sha256:3524739d76b6b3ed1132422bf9d82123cd1705086723bc3e235ca39fd21c667d"}, 623 | {file = "black-22.1.0.tar.gz", hash = "sha256:a7c0192d35635f6fc1174be575cb7915e92e5dd629ee79fdaf0dcfa41a80afb5"}, 624 | ] 625 | certifi = [ 626 | {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, 627 | {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, 628 | ] 629 | cfgv = [ 630 | {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, 631 | {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, 632 | ] 633 | charset-normalizer = [ 634 | {file = "charset-normalizer-2.0.11.tar.gz", hash = "sha256:98398a9d69ee80548c762ba991a4728bfc3836768ed226b3945908d1a688371c"}, 635 | {file = "charset_normalizer-2.0.11-py3-none-any.whl", hash = "sha256:2842d8f5e82a1f6aa437380934d5e1cd4fcf2003b06fed6940769c164a480a45"}, 636 | ] 637 | click = [ 638 | {file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"}, 639 | {file = "click-8.0.3.tar.gz", hash = "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"}, 640 | ] 641 | colorama = [ 642 | {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, 643 | {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, 644 | ] 645 | coverage = [ 646 | {file = "coverage-6.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9b27d894748475fa858f9597c0ee1d4829f44683f3813633aaf94b19cb5453cf"}, 647 | {file = "coverage-6.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37d1141ad6b2466a7b53a22e08fe76994c2d35a5b6b469590424a9953155afac"}, 648 | {file = "coverage-6.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9987b0354b06d4df0f4d3e0ec1ae76d7ce7cbca9a2f98c25041eb79eec766f1"}, 649 | {file = "coverage-6.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:26e2deacd414fc2f97dd9f7676ee3eaecd299ca751412d89f40bc01557a6b1b4"}, 650 | {file = "coverage-6.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd8bafa458b5c7d061540f1ee9f18025a68e2d8471b3e858a9dad47c8d41903"}, 651 | {file = "coverage-6.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:46191097ebc381fbf89bdce207a6c107ac4ec0890d8d20f3360345ff5976155c"}, 652 | {file = "coverage-6.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6f89d05e028d274ce4fa1a86887b071ae1755082ef94a6740238cd7a8178804f"}, 653 | {file = "coverage-6.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:58303469e9a272b4abdb9e302a780072c0633cdcc0165db7eec0f9e32f901e05"}, 654 | {file = "coverage-6.3.2-cp310-cp310-win32.whl", hash = "sha256:2fea046bfb455510e05be95e879f0e768d45c10c11509e20e06d8fcaa31d9e39"}, 655 | {file = "coverage-6.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:a2a8b8bcc399edb4347a5ca8b9b87e7524c0967b335fbb08a83c8421489ddee1"}, 656 | {file = "coverage-6.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f1555ea6d6da108e1999b2463ea1003fe03f29213e459145e70edbaf3e004aaa"}, 657 | {file = "coverage-6.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5f4e1edcf57ce94e5475fe09e5afa3e3145081318e5fd1a43a6b4539a97e518"}, 658 | {file = "coverage-6.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a15dc0a14008f1da3d1ebd44bdda3e357dbabdf5a0b5034d38fcde0b5c234b7"}, 659 | {file = "coverage-6.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21b7745788866028adeb1e0eca3bf1101109e2dc58456cb49d2d9b99a8c516e6"}, 660 | {file = "coverage-6.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8ce257cac556cb03be4a248d92ed36904a59a4a5ff55a994e92214cde15c5bad"}, 661 | {file = "coverage-6.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b0be84e5a6209858a1d3e8d1806c46214e867ce1b0fd32e4ea03f4bd8b2e3359"}, 662 | {file = "coverage-6.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:acf53bc2cf7282ab9b8ba346746afe703474004d9e566ad164c91a7a59f188a4"}, 663 | {file = "coverage-6.3.2-cp37-cp37m-win32.whl", hash = "sha256:8bdde1177f2311ee552f47ae6e5aa7750c0e3291ca6b75f71f7ffe1f1dab3dca"}, 664 | {file = "coverage-6.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b31651d018b23ec463e95cf10070d0b2c548aa950a03d0b559eaa11c7e5a6fa3"}, 665 | {file = "coverage-6.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:07e6db90cd9686c767dcc593dff16c8c09f9814f5e9c51034066cad3373b914d"}, 666 | {file = "coverage-6.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c6dbb42f3ad25760010c45191e9757e7dce981cbfb90e42feef301d71540059"}, 667 | {file = "coverage-6.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c76aeef1b95aff3905fb2ae2d96e319caca5b76fa41d3470b19d4e4a3a313512"}, 668 | {file = "coverage-6.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cf5cfcb1521dc3255d845d9dca3ff204b3229401994ef8d1984b32746bb45ca"}, 669 | {file = "coverage-6.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fbbdc8d55990eac1b0919ca69eb5a988a802b854488c34b8f37f3e2025fa90d"}, 670 | {file = "coverage-6.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ec6bc7fe73a938933d4178c9b23c4e0568e43e220aef9472c4f6044bfc6dd0f0"}, 671 | {file = "coverage-6.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9baff2a45ae1f17c8078452e9e5962e518eab705e50a0aa8083733ea7d45f3a6"}, 672 | {file = "coverage-6.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd9e830e9d8d89b20ab1e5af09b32d33e1a08ef4c4e14411e559556fd788e6b2"}, 673 | {file = "coverage-6.3.2-cp38-cp38-win32.whl", hash = "sha256:f7331dbf301b7289013175087636bbaf5b2405e57259dd2c42fdcc9fcc47325e"}, 674 | {file = "coverage-6.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:68353fe7cdf91f109fc7d474461b46e7f1f14e533e911a2a2cbb8b0fc8613cf1"}, 675 | {file = "coverage-6.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b78e5afb39941572209f71866aa0b206c12f0109835aa0d601e41552f9b3e620"}, 676 | {file = "coverage-6.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e21876082ed887baed0146fe222f861b5815455ada3b33b890f4105d806128d"}, 677 | {file = "coverage-6.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34626a7eee2a3da12af0507780bb51eb52dca0e1751fd1471d0810539cefb536"}, 678 | {file = "coverage-6.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ebf730d2381158ecf3dfd4453fbca0613e16eaa547b4170e2450c9707665ce7"}, 679 | {file = "coverage-6.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd6fe30bd519694b356cbfcaca9bd5c1737cddd20778c6a581ae20dc8c04def2"}, 680 | {file = "coverage-6.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:96f8a1cb43ca1422f36492bebe63312d396491a9165ed3b9231e778d43a7fca4"}, 681 | {file = "coverage-6.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:dd035edafefee4d573140a76fdc785dc38829fe5a455c4bb12bac8c20cfc3d69"}, 682 | {file = "coverage-6.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5ca5aeb4344b30d0bec47481536b8ba1181d50dbe783b0e4ad03c95dc1296684"}, 683 | {file = "coverage-6.3.2-cp39-cp39-win32.whl", hash = "sha256:f5fa5803f47e095d7ad8443d28b01d48c0359484fec1b9d8606d0e3282084bc4"}, 684 | {file = "coverage-6.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:9548f10d8be799551eb3a9c74bbf2b4934ddb330e08a73320123c07f95cc2d92"}, 685 | {file = "coverage-6.3.2-pp36.pp37.pp38-none-any.whl", hash = "sha256:18d520c6860515a771708937d2f78f63cc47ab3b80cb78e86573b0a760161faf"}, 686 | {file = "coverage-6.3.2.tar.gz", hash = "sha256:03e2a7826086b91ef345ff18742ee9fc47a6839ccd517061ef8fa1976e652ce9"}, 687 | ] 688 | distlib = [ 689 | {file = "distlib-0.3.4-py2.py3-none-any.whl", hash = "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b"}, 690 | {file = "distlib-0.3.4.zip", hash = "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579"}, 691 | ] 692 | fasteners = [ 693 | {file = "fasteners-0.17.3-py3-none-any.whl", hash = "sha256:cae0772df265923e71435cc5057840138f4e8b6302f888a567d06ed8e1cbca03"}, 694 | {file = "fasteners-0.17.3.tar.gz", hash = "sha256:a9a42a208573d4074c77d041447336cf4e3c1389a256fd3e113ef59cf29b7980"}, 695 | ] 696 | filelock = [ 697 | {file = "filelock-3.4.2-py3-none-any.whl", hash = "sha256:cf0fc6a2f8d26bd900f19bf33915ca70ba4dd8c56903eeb14e1e7a2fd7590146"}, 698 | {file = "filelock-3.4.2.tar.gz", hash = "sha256:38b4f4c989f9d06d44524df1b24bd19e167d851f19b50bf3e3559952dddc5b80"}, 699 | ] 700 | flake8 = [ 701 | {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, 702 | {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"}, 703 | ] 704 | h11 = [ 705 | {file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"}, 706 | {file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"}, 707 | ] 708 | httpcore = [ 709 | {file = "httpcore-0.14.6-py3-none-any.whl", hash = "sha256:508401ab24060cfa1e959feda1c38eaa09ccf9074c928f9c3d2864f8921373ce"}, 710 | {file = "httpcore-0.14.6.tar.gz", hash = "sha256:d1ba9926a7b761bfb92d171df81ac9f87f549533cc8fcbcffca6943d3fccabf7"}, 711 | ] 712 | httpx = [ 713 | {file = "httpx-0.21.3-py3-none-any.whl", hash = "sha256:df9a0fd43fa79dbab411d83eb1ea6f7a525c96ad92e60c2d7f40388971b25777"}, 714 | {file = "httpx-0.21.3.tar.gz", hash = "sha256:7a3eb67ef0b8abbd6d9402248ef2f84a76080fa1c839f8662e6eb385640e445a"}, 715 | ] 716 | httpx-cache = [ 717 | {file = "httpx-cache-0.4.1.tar.gz", hash = "sha256:a9b43642fedffccb6725402373f364832596595f88083fe5a95c65c308563f72"}, 718 | {file = "httpx_cache-0.4.1-py3-none-any.whl", hash = "sha256:a89ca06641e3a2b3b6bf41d71ba9d54211f0cf1efc67b3f95b82b5b0464fe6b0"}, 719 | ] 720 | identify = [ 721 | {file = "identify-2.4.7-py2.py3-none-any.whl", hash = "sha256:e64210654dfbca6ced33230eb1b137591a0981425e1a60b4c6c36309f787bbd5"}, 722 | {file = "identify-2.4.7.tar.gz", hash = "sha256:8408f01e0be25492017346d7dffe7e7711b762b23375c775d24d3bc38618fabc"}, 723 | ] 724 | idna = [ 725 | {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, 726 | {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, 727 | ] 728 | iniconfig = [ 729 | {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, 730 | {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, 731 | ] 732 | isort = [ 733 | {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, 734 | {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, 735 | ] 736 | mccabe = [ 737 | {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, 738 | {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, 739 | ] 740 | msgpack = [ 741 | {file = "msgpack-1.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:96acc674bb9c9be63fa8b6dabc3248fdc575c4adc005c440ad02f87ca7edd079"}, 742 | {file = "msgpack-1.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c3ca57c96c8e69c1a0d2926a6acf2d9a522b41dc4253a8945c4c6cd4981a4e3"}, 743 | {file = "msgpack-1.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0a792c091bac433dfe0a70ac17fc2087d4595ab835b47b89defc8bbabcf5c73"}, 744 | {file = "msgpack-1.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c58cdec1cb5fcea8c2f1771d7b5fec79307d056874f746690bd2bdd609ab147"}, 745 | {file = "msgpack-1.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f97c0f35b3b096a330bb4a1a9247d0bd7e1f3a2eba7ab69795501504b1c2c39"}, 746 | {file = "msgpack-1.0.3-cp310-cp310-win32.whl", hash = "sha256:36a64a10b16c2ab31dcd5f32d9787ed41fe68ab23dd66957ca2826c7f10d0b85"}, 747 | {file = "msgpack-1.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c1ba333b4024c17c7591f0f372e2daa3c31db495a9b2af3cf664aef3c14354f7"}, 748 | {file = "msgpack-1.0.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:c2140cf7a3ec475ef0938edb6eb363fa704159e0bf71dde15d953bacc1cf9d7d"}, 749 | {file = "msgpack-1.0.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f4c22717c74d44bcd7af353024ce71c6b55346dad5e2cc1ddc17ce8c4507c6b"}, 750 | {file = "msgpack-1.0.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d733a15ade190540c703de209ffbc42a3367600421b62ac0c09fde594da6ec"}, 751 | {file = "msgpack-1.0.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7e03b06f2982aa98d4ddd082a210c3db200471da523f9ac197f2828e80e7770"}, 752 | {file = "msgpack-1.0.3-cp36-cp36m-win32.whl", hash = "sha256:3d875631ecab42f65f9dce6f55ce6d736696ced240f2634633188de2f5f21af9"}, 753 | {file = "msgpack-1.0.3-cp36-cp36m-win_amd64.whl", hash = "sha256:40fb89b4625d12d6027a19f4df18a4de5c64f6f3314325049f219683e07e678a"}, 754 | {file = "msgpack-1.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6eef0cf8db3857b2b556213d97dd82de76e28a6524853a9beb3264983391dc1a"}, 755 | {file = "msgpack-1.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d8c332f53ffff01953ad25131272506500b14750c1d0ce8614b17d098252fbc"}, 756 | {file = "msgpack-1.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c0903bd93cbd34653dd63bbfcb99d7539c372795201f39d16fdfde4418de43a"}, 757 | {file = "msgpack-1.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf1e6bfed4860d72106f4e0a1ab519546982b45689937b40257cfd820650b920"}, 758 | {file = "msgpack-1.0.3-cp37-cp37m-win32.whl", hash = "sha256:d02cea2252abc3756b2ac31f781f7a98e89ff9759b2e7450a1c7a0d13302ff50"}, 759 | {file = "msgpack-1.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f30dd0dc4dfe6231ad253b6f9f7128ac3202ae49edd3f10d311adc358772dba"}, 760 | {file = "msgpack-1.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f201d34dc89342fabb2a10ed7c9a9aaaed9b7af0f16a5923f1ae562b31258dea"}, 761 | {file = "msgpack-1.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bb87f23ae7d14b7b3c21009c4b1705ec107cb21ee71975992f6aca571fb4a42a"}, 762 | {file = "msgpack-1.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a3a5c4b16e9d0edb823fe54b59b5660cc8d4782d7bf2c214cb4b91a1940a8ef"}, 763 | {file = "msgpack-1.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f74da1e5fcf20ade12c6bf1baa17a2dc3604958922de8dc83cbe3eff22e8b611"}, 764 | {file = "msgpack-1.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:73a80bd6eb6bcb338c1ec0da273f87420829c266379c8c82fa14c23fb586cfa1"}, 765 | {file = "msgpack-1.0.3-cp38-cp38-win32.whl", hash = "sha256:9fce00156e79af37bb6db4e7587b30d11e7ac6a02cb5bac387f023808cd7d7f4"}, 766 | {file = "msgpack-1.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:9b6f2d714c506e79cbead331de9aae6837c8dd36190d02da74cb409b36162e8a"}, 767 | {file = "msgpack-1.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:89908aea5f46ee1474cc37fbc146677f8529ac99201bc2faf4ef8edc023c2bf3"}, 768 | {file = "msgpack-1.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:973ad69fd7e31159eae8f580f3f707b718b61141838321c6fa4d891c4a2cca52"}, 769 | {file = "msgpack-1.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da24375ab4c50e5b7486c115a3198d207954fe10aaa5708f7b65105df09109b2"}, 770 | {file = "msgpack-1.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a598d0685e4ae07a0672b59792d2cc767d09d7a7f39fd9bd37ff84e060b1a996"}, 771 | {file = "msgpack-1.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4c309a68cb5d6bbd0c50d5c71a25ae81f268c2dc675c6f4ea8ab2feec2ac4e2"}, 772 | {file = "msgpack-1.0.3-cp39-cp39-win32.whl", hash = "sha256:494471d65b25a8751d19c83f1a482fd411d7ca7a3b9e17d25980a74075ba0e88"}, 773 | {file = "msgpack-1.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:f01b26c2290cbd74316990ba84a14ac3d599af9cebefc543d241a66e785cf17d"}, 774 | {file = "msgpack-1.0.3.tar.gz", hash = "sha256:51fdc7fb93615286428ee7758cecc2f374d5ff363bdd884c7ea622a7a327a81e"}, 775 | ] 776 | mypy-extensions = [ 777 | {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, 778 | {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, 779 | ] 780 | nodeenv = [ 781 | {file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"}, 782 | {file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"}, 783 | ] 784 | packaging = [ 785 | {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, 786 | {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, 787 | ] 788 | pathspec = [ 789 | {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, 790 | {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, 791 | ] 792 | platformdirs = [ 793 | {file = "platformdirs-2.4.1-py3-none-any.whl", hash = "sha256:1d7385c7db91728b83efd0ca99a5afb296cab9d0ed8313a45ed8ba17967ecfca"}, 794 | {file = "platformdirs-2.4.1.tar.gz", hash = "sha256:440633ddfebcc36264232365d7840a970e75e1018d15b4327d11f91909045fda"}, 795 | ] 796 | pluggy = [ 797 | {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, 798 | {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, 799 | ] 800 | pre-commit = [ 801 | {file = "pre_commit-2.17.0-py2.py3-none-any.whl", hash = "sha256:725fa7459782d7bec5ead072810e47351de01709be838c2ce1726b9591dad616"}, 802 | {file = "pre_commit-2.17.0.tar.gz", hash = "sha256:c1a8040ff15ad3d648c70cc3e55b93e4d2d5b687320955505587fd79bbaed06a"}, 803 | ] 804 | py = [ 805 | {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, 806 | {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, 807 | ] 808 | pycodestyle = [ 809 | {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"}, 810 | {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"}, 811 | ] 812 | pyflakes = [ 813 | {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"}, 814 | {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"}, 815 | ] 816 | pyparsing = [ 817 | {file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"}, 818 | {file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"}, 819 | ] 820 | pytest = [ 821 | {file = "pytest-7.0.1-py3-none-any.whl", hash = "sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db"}, 822 | {file = "pytest-7.0.1.tar.gz", hash = "sha256:e30905a0c131d3d94b89624a1cc5afec3e0ba2fbdb151867d8e0ebd49850f171"}, 823 | ] 824 | pytest-asyncio = [ 825 | {file = "pytest-asyncio-0.17.2.tar.gz", hash = "sha256:6d895b02432c028e6957d25fc936494e78c6305736e785d9fee408b1efbc7ff4"}, 826 | {file = "pytest_asyncio-0.17.2-py3-none-any.whl", hash = "sha256:e0fe5dbea40516b661ef1bcfe0bd9461c2847c4ef4bb40012324f2454fb7d56d"}, 827 | ] 828 | pytest-cov = [ 829 | {file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"}, 830 | {file = "pytest_cov-3.0.0-py3-none-any.whl", hash = "sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6"}, 831 | ] 832 | pyyaml = [ 833 | {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, 834 | {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, 835 | {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, 836 | {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, 837 | {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, 838 | {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, 839 | {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, 840 | {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, 841 | {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, 842 | {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, 843 | {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, 844 | {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, 845 | {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, 846 | {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, 847 | {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, 848 | {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, 849 | {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, 850 | {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, 851 | {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, 852 | {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, 853 | {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, 854 | {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, 855 | {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, 856 | {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, 857 | {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, 858 | {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, 859 | {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, 860 | {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, 861 | {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, 862 | {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, 863 | {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, 864 | {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, 865 | {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, 866 | ] 867 | respx = [ 868 | {file = "respx-0.19.2-py2.py3-none-any.whl", hash = "sha256:417f986fec599b9cc6531e93e494b7a75d1cb7bccff9dde5b53edc51f7954494"}, 869 | {file = "respx-0.19.2.tar.gz", hash = "sha256:f3d210bb4de0ccc4c5afabeb87c3c1b03b3765a9c1a73eb042a07bb18ac33705"}, 870 | ] 871 | rfc3986 = [ 872 | {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, 873 | {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, 874 | ] 875 | six = [ 876 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 877 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 878 | ] 879 | sniffio = [ 880 | {file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"}, 881 | {file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"}, 882 | ] 883 | soupsieve = [ 884 | {file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"}, 885 | {file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"}, 886 | ] 887 | tenacity = [ 888 | {file = "tenacity-8.0.1-py3-none-any.whl", hash = "sha256:f78f4ea81b0fabc06728c11dc2a8c01277bfc5181b321a4770471902e3eb844a"}, 889 | {file = "tenacity-8.0.1.tar.gz", hash = "sha256:43242a20e3e73291a28bcbcacfd6e000b02d3857a9a9fff56b297a27afdc932f"}, 890 | ] 891 | toml = [ 892 | {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, 893 | {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, 894 | ] 895 | tomli = [ 896 | {file = "tomli-1.2.3-py3-none-any.whl", hash = "sha256:e3069e4be3ead9668e21cb9b074cd948f7b3113fd9c8bba083f48247aab8b11c"}, 897 | {file = "tomli-1.2.3.tar.gz", hash = "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f"}, 898 | ] 899 | typing-extensions = [ 900 | {file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"}, 901 | {file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"}, 902 | ] 903 | virtualenv = [ 904 | {file = "virtualenv-20.13.0-py2.py3-none-any.whl", hash = "sha256:339f16c4a86b44240ba7223d0f93a7887c3ca04b5f9c8129da7958447d079b09"}, 905 | {file = "virtualenv-20.13.0.tar.gz", hash = "sha256:d8458cf8d59d0ea495ad9b34c2599487f8a7772d796f9910858376d1600dd2dd"}, 906 | ] 907 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "robox" 3 | version = "0.2.3" 4 | description = "Robox is a simple library for exploring/scraping the web or testing a website you’re developing." 5 | authors = ["Dan Claudiu Pop "] 6 | license = "BSD 3" 7 | readme = "README.md" 8 | repository = "https://github.com/danclaudiupop/robox" 9 | homepage = "https://github.com/danclaudiupop/robox" 10 | keywords = ["httpx", "scraping", "testing"] 11 | classifiers = [ 12 | "Development Status :: 3 - Alpha", 13 | "Environment :: Web Environment", 14 | "Intended Audience :: Developers", 15 | "License :: OSI Approved :: BSD License", 16 | "Operating System :: OS Independent", 17 | "Topic :: Internet :: WWW/HTTP", 18 | "Framework :: AsyncIO", 19 | "Framework :: Trio", 20 | "Programming Language :: Python :: 3", 21 | "Programming Language :: Python :: 3.8", 22 | "Programming Language :: Python :: 3.9", 23 | "Programming Language :: Python :: 3.10", 24 | "Programming Language :: Python :: 3 :: Only", 25 | ] 26 | include = ["LICENSE"] 27 | 28 | [tool.poetry.dependencies] 29 | python = "^3.8" 30 | httpx = "^0.21.1" 31 | beautifulsoup4 = "^4.9.3" 32 | aiofiles = "^0.8.0" 33 | httpx-cache = "^0.4.0" 34 | tenacity = "^8.0.1" 35 | 36 | [tool.poetry.dev-dependencies] 37 | pytest = "^7.0.1" 38 | pytest-cov = "^3.0.0" 39 | pytest-asyncio = "^0.17.2" 40 | respx = "^0.19.2" 41 | black = "^22.1" 42 | flake8 = "^4.0.1" 43 | isort = "^5.10.1" 44 | pre-commit = "^2.17.0" 45 | 46 | [build-system] 47 | requires = ["poetry-core>=1.0.0"] 48 | build-backend = "poetry.core.masonry.api" 49 | -------------------------------------------------------------------------------- /src/robox/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | LOG_DIR = Path("./log") 5 | if not LOG_DIR.exists(): 6 | LOG_DIR.mkdir(parents=True, exist_ok=True) 7 | 8 | 9 | LOG_FILE = LOG_DIR / f"{__name__.split('.')[-1]}.log" 10 | FILE_HANDLER = logging.FileHandler(LOG_FILE, mode="w", encoding="utf-8") 11 | FILEFORMAT = logging.Formatter( 12 | "%(asctime)s:[%(threadName)-12.12s]:%(levelname)s:%(name)s:%(message)s", 13 | datefmt="%Y-%m-%d %H:%M:%S", 14 | ) 15 | FILE_HANDLER.setFormatter(FILEFORMAT) 16 | 17 | STREAM_HANDLER = logging.StreamHandler() 18 | STREAM_HANDLER.setLevel(logging.DEBUG) 19 | STREAMFORMAT = logging.Formatter("%(asctime)s : %(levelname)s : %(message)s") 20 | STREAM_HANDLER.setFormatter(STREAMFORMAT) 21 | 22 | LOG = logging.getLogger(__name__) 23 | LOG.addHandler(FILE_HANDLER) 24 | LOG.addHandler(STREAM_HANDLER) 25 | LOG.setLevel(logging.DEBUG) 26 | 27 | from httpx_cache import DictCache, FileCache # noqa: E402 28 | 29 | from robox._client import AsyncRobox, Robox # noqa: E402 30 | from robox._options import Options # noqa: E402 31 | 32 | __all__ = ["Robox", "AsyncRobox", "Options", "FileCache", "DictCache"] 33 | -------------------------------------------------------------------------------- /src/robox/_client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import itertools 3 | import json 4 | import random 5 | import time 6 | import typing as tp 7 | from pathlib import Path 8 | 9 | import httpx 10 | from httpx._client import USE_CLIENT_DEFAULT, UseClientDefault 11 | from httpx._config import ( 12 | DEFAULT_LIMITS, 13 | DEFAULT_MAX_REDIRECTS, 14 | DEFAULT_TIMEOUT_CONFIG, 15 | Limits, 16 | Proxy, 17 | ) 18 | from httpx._models import URL 19 | from httpx._types import ( 20 | AuthTypes, 21 | CertTypes, 22 | CookieTypes, 23 | HeaderTypes, 24 | ProxiesTypes, 25 | QueryParamTypes, 26 | RequestContent, 27 | RequestData, 28 | RequestFiles, 29 | TimeoutTypes, 30 | URLTypes, 31 | VerifyTypes, 32 | ) 33 | from httpx_cache import AsyncCacheControlTransport, CacheControlTransport 34 | 35 | from robox import LOG 36 | from robox._download import async_download_file, download_file 37 | from robox._exceptions import ForbiddenByRobots, RoboxError 38 | from robox._history import BrowserHistory 39 | from robox._options import Options 40 | from robox._page import AsyncPage, Page 41 | from robox._retry import call_with_retry 42 | from robox._robots import ask_robots, async_ask_robots 43 | 44 | 45 | class RoboxMixin: 46 | @property 47 | def user_agent(self) -> str: 48 | return self._user_agent 49 | 50 | @user_agent.setter 51 | def user_agent(self, user_agent: str) -> None: 52 | self.headers["user-agent"] = user_agent 53 | 54 | def get_history(self) -> tp.List[tp.Union[Page, AsyncPage]]: 55 | return self.history.get_locations() 56 | 57 | @property 58 | def current_url(self) -> URL: 59 | if latest_entry := self.history.latest_entry(): 60 | return latest_entry.url 61 | else: 62 | raise RoboxError("Not tracking history") 63 | 64 | def save_cookies(self, filename: str) -> None: 65 | cookies = {} 66 | for cookie in self.cookies.jar: 67 | cookies[cookie.name] = cookie.value 68 | with open(filename, "w") as f: 69 | json.dump(cookies, f) 70 | 71 | def load_cookies(self, filename: str) -> None: 72 | if not Path(filename).is_file(): 73 | return None 74 | with open(filename, "r") as f: 75 | cookies = httpx.Cookies(json.load(f)) 76 | self.cookies = cookies 77 | 78 | def _increment_request_counter(self) -> None: 79 | self.total_requests = next(self._request_counter) 80 | 81 | def _build_page_response( 82 | self, response: httpx.Response, page_cls: tp.Union[Page, AsyncPage] 83 | ) -> tp.Union[Page, AsyncPage]: 84 | self._format_response_log(response) 85 | if self.options.raise_on_4xx_5xx: 86 | try: 87 | response.raise_for_status() 88 | except httpx.HTTPStatusError as exc: 89 | LOG.error( 90 | f"Error response {exc.response.status_code} " 91 | f"while requesting {exc.request.url!r}." 92 | ) 93 | raise exc 94 | 95 | page = page_cls(response, robox=self) 96 | if self.options.history: 97 | self.history.location = page 98 | return page 99 | 100 | @staticmethod 101 | def _format_response_log(response: httpx.Response) -> None: 102 | def format_headers(d): 103 | return "\n".join(f"{k}: {v}" for k, v in d.items()) 104 | 105 | msg = ( 106 | f"\n----- REPORT START -----\n" 107 | f"Method: {response.request.method}\n" 108 | f"URL: {response.url}\n" 109 | f"Time: {response.elapsed.total_seconds():.3f}s\n" 110 | f"Status Code: {response.status_code}\n" 111 | f"---- request headers -----\n" 112 | f"{format_headers(response.request.headers)}\n" 113 | f"---- response headers -----\n" 114 | f"{format_headers(response.headers)}\n" 115 | f"----- REPORT END -----\n" 116 | ) 117 | LOG.debug(msg) 118 | 119 | def __repr__(self) -> str: 120 | try: 121 | return f"{self.__class__.__name__} - {self.current_url}" 122 | except RoboxError: 123 | return f"{self.__class__.__name__}" 124 | 125 | 126 | class Robox(httpx.Client, RoboxMixin): 127 | def __init__( 128 | self, 129 | *, 130 | auth: AuthTypes = None, 131 | params: QueryParamTypes = None, 132 | headers: HeaderTypes = None, 133 | cookies: CookieTypes = None, 134 | verify: VerifyTypes = True, 135 | cert: CertTypes = None, 136 | http1: bool = True, 137 | http2: bool = False, 138 | proxies: ProxiesTypes = None, 139 | mounts: tp.Mapping[str, httpx.BaseTransport] = None, 140 | timeout: TimeoutTypes = DEFAULT_TIMEOUT_CONFIG, 141 | follow_redirects: bool = True, 142 | limits: Limits = DEFAULT_LIMITS, 143 | max_redirects: int = DEFAULT_MAX_REDIRECTS, 144 | event_hooks: tp.Mapping[str, tp.List[tp.Callable]] = None, 145 | base_url: URLTypes = "", 146 | transport: httpx.BaseTransport = None, 147 | app: tp.Callable = None, 148 | trust_env: bool = True, 149 | options: Options = None, 150 | ) -> None: 151 | self.options = options or Options() 152 | self.history = BrowserHistory() 153 | self.total_requests = 0 154 | self._request_counter = itertools.count(start=1) 155 | super().__init__( 156 | auth=auth, 157 | params=params, 158 | headers=headers, 159 | cookies=cookies, 160 | timeout=timeout, 161 | follow_redirects=follow_redirects, 162 | max_redirects=max_redirects, 163 | event_hooks=event_hooks, 164 | base_url=base_url, 165 | trust_env=trust_env, 166 | verify=verify, 167 | cert=cert, 168 | http1=http1, 169 | http2=http2, 170 | proxies=proxies, 171 | mounts=mounts, 172 | limits=limits, 173 | transport=transport, 174 | app=app, 175 | ) 176 | 177 | def _init_transport( 178 | self, 179 | verify: VerifyTypes = True, 180 | cert: CertTypes = None, 181 | http1: bool = True, 182 | http2: bool = False, 183 | limits: Limits = DEFAULT_LIMITS, 184 | transport: httpx.BaseTransport = None, 185 | app: tp.Callable = None, 186 | trust_env: bool = True, 187 | ) -> CacheControlTransport: 188 | _transport = super()._init_transport( 189 | verify=verify, 190 | cert=cert, 191 | http1=http1, 192 | http2=http2, 193 | limits=limits, 194 | transport=transport, 195 | app=app, 196 | trust_env=trust_env, 197 | ) 198 | if self.options.cache: 199 | if isinstance(_transport, CacheControlTransport): 200 | return _transport 201 | return CacheControlTransport( 202 | transport=_transport, 203 | cache=self.options.cache, 204 | cacheable_status_codes=self.options.cacheable_status_codes, 205 | cacheable_methods=self.options.cacheable_methods, 206 | ) 207 | return _transport 208 | 209 | def _init_proxy_transport( 210 | self, 211 | proxy: Proxy, 212 | verify: VerifyTypes = True, 213 | cert: CertTypes = None, 214 | http1: bool = True, 215 | http2: bool = False, 216 | limits: Limits = DEFAULT_LIMITS, 217 | trust_env: bool = True, 218 | ) -> CacheControlTransport: 219 | _transport = super()._init_transport( 220 | verify=verify, 221 | cert=cert, 222 | http1=http1, 223 | http2=http2, 224 | limits=limits, 225 | trust_env=trust_env, 226 | proxy=proxy, 227 | ) 228 | if self.options.cache: 229 | return CacheControlTransport( 230 | transport=_transport, 231 | cache=self.options.cache, 232 | cacheable_status_codes=self.options.cacheable_status_codes, 233 | cacheable_methods=self.options.cacheable_methods, 234 | ) 235 | return _transport 236 | 237 | def open( 238 | self, 239 | url: str, 240 | method="GET", 241 | *, 242 | content: RequestContent = None, 243 | data: RequestData = None, 244 | files: RequestFiles = None, 245 | json: tp.Any = None, 246 | params: QueryParamTypes = None, 247 | headers: HeaderTypes = None, 248 | cookies: CookieTypes = None, 249 | auth: tp.Union[AuthTypes, UseClientDefault] = USE_CLIENT_DEFAULT, 250 | follow_redirects: tp.Union[bool, UseClientDefault] = USE_CLIENT_DEFAULT, 251 | timeout: tp.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, 252 | extensions: dict = None, 253 | ) -> Page: 254 | def _open(): 255 | LOG.debug("Making HTTP request. URL: %s, Method: %s", url, method) 256 | if self.options.obey_robotstxt: 257 | can_fetch, crawl_delay = ask_robots(url) 258 | if not can_fetch: 259 | msg = "Forbidden by robots.txt" 260 | LOG.debug(msg) 261 | raise ForbiddenByRobots(msg) 262 | 263 | if crawl_delay: 264 | LOG.debug( 265 | "Waiting %s seconds before next request b/c of crawl-delay", 266 | crawl_delay, 267 | ) 268 | time.sleep(crawl_delay) 269 | 270 | time.sleep(random.uniform(*self.options.delay_between_requests)) 271 | response = self.request( 272 | method=method, 273 | url=url, 274 | content=content, 275 | data=data, 276 | files=files, 277 | json=json, 278 | params=params, 279 | headers=headers, 280 | cookies=cookies, 281 | auth=auth, 282 | follow_redirects=follow_redirects, 283 | timeout=timeout, 284 | extensions=extensions, 285 | ) 286 | self._increment_request_counter() 287 | return self._build_page_response(response, Page) 288 | 289 | _open.method = method 290 | return call_with_retry(_open, self.options)() 291 | 292 | def download_file(self, *, url: str, destination_folder: str) -> str: 293 | return download_file(self, url, destination_folder) 294 | 295 | def refresh(self) -> Page: 296 | page = self.history.location 297 | return self.open(page.url) 298 | 299 | def forward(self, n: int = 1) -> Page: 300 | if not len(self.get_history()): 301 | raise ValueError("No history to forward") 302 | self.history.forward(n) 303 | page = self.history.location 304 | return self.open(page.url) 305 | 306 | def back(self, n: int = 1) -> Page: 307 | if not len(self.get_history()): 308 | raise ValueError("No history to back") 309 | self.history.back(n) 310 | page = self.history.location 311 | return self.open(page.url) 312 | 313 | 314 | class AsyncRobox(httpx.AsyncClient, RoboxMixin): 315 | def __init__( 316 | self, 317 | *, 318 | auth: AuthTypes = None, 319 | params: QueryParamTypes = None, 320 | headers: HeaderTypes = None, 321 | cookies: CookieTypes = None, 322 | verify: VerifyTypes = True, 323 | cert: CertTypes = None, 324 | http1: bool = True, 325 | http2: bool = False, 326 | proxies: ProxiesTypes = None, 327 | mounts: tp.Mapping[str, httpx.AsyncBaseTransport] = None, 328 | timeout: TimeoutTypes = DEFAULT_TIMEOUT_CONFIG, 329 | follow_redirects: bool = False, 330 | limits: Limits = DEFAULT_LIMITS, 331 | max_redirects: int = DEFAULT_MAX_REDIRECTS, 332 | event_hooks: tp.Mapping[str, tp.List[tp.Callable]] = None, 333 | base_url: URLTypes = "", 334 | transport: httpx.AsyncBaseTransport = None, 335 | app: tp.Callable = None, 336 | trust_env: bool = True, 337 | options: Options = None, 338 | ) -> None: 339 | self.options = options or Options() 340 | self.history = BrowserHistory() 341 | self.total_requests = 0 342 | self._request_counter = itertools.count(start=1) 343 | super().__init__( 344 | auth=auth, 345 | params=params, 346 | headers=headers, 347 | cookies=cookies, 348 | timeout=timeout, 349 | follow_redirects=follow_redirects, 350 | max_redirects=max_redirects, 351 | event_hooks=event_hooks, 352 | base_url=base_url, 353 | trust_env=trust_env, 354 | verify=verify, 355 | cert=cert, 356 | http1=http1, 357 | http2=http2, 358 | proxies=proxies, 359 | mounts=mounts, 360 | limits=limits, 361 | transport=transport, 362 | app=app, 363 | ) 364 | 365 | def _init_transport( 366 | self, 367 | verify: VerifyTypes = True, 368 | cert: CertTypes = None, 369 | http1: bool = True, 370 | http2: bool = False, 371 | limits: Limits = DEFAULT_LIMITS, 372 | transport: httpx.AsyncBaseTransport = None, 373 | app: tp.Callable = None, 374 | trust_env: bool = True, 375 | ) -> AsyncCacheControlTransport: 376 | _transport = super()._init_transport( 377 | verify=verify, 378 | cert=cert, 379 | http1=http1, 380 | http2=http2, 381 | limits=limits, 382 | transport=transport, 383 | app=app, 384 | trust_env=trust_env, 385 | ) 386 | if self.options.cache: 387 | if isinstance(_transport, AsyncCacheControlTransport): 388 | return _transport 389 | return AsyncCacheControlTransport( 390 | transport=_transport, 391 | cache=self.options.cache, 392 | cacheable_status_codes=self.options.cacheable_status_codes, 393 | cacheable_methods=self.options.cacheable_methods, 394 | ) 395 | return _transport 396 | 397 | def _init_proxy_transport( 398 | self, 399 | proxy: Proxy, 400 | verify: VerifyTypes = True, 401 | cert: CertTypes = None, 402 | http1: bool = True, 403 | http2: bool = False, 404 | limits: Limits = DEFAULT_LIMITS, 405 | trust_env: bool = True, 406 | ) -> AsyncCacheControlTransport: 407 | _transport = super()._init_transport( 408 | verify=verify, 409 | cert=cert, 410 | http1=http1, 411 | http2=http2, 412 | limits=limits, 413 | trust_env=trust_env, 414 | proxy=proxy, 415 | ) 416 | if self.options.cache: 417 | return AsyncCacheControlTransport( 418 | transport=_transport, 419 | cache=self.options.cache, 420 | cacheable_status_codes=self.options.cacheable_status_codes, 421 | cacheable_methods=self.options.cacheable_methods, 422 | ) 423 | return _transport 424 | 425 | async def open( 426 | self, 427 | url: str, 428 | method="GET", 429 | *, 430 | content: RequestContent = None, 431 | data: RequestData = None, 432 | files: RequestFiles = None, 433 | json: tp.Any = None, 434 | params: QueryParamTypes = None, 435 | headers: HeaderTypes = None, 436 | cookies: CookieTypes = None, 437 | auth: tp.Union[AuthTypes, UseClientDefault] = USE_CLIENT_DEFAULT, 438 | follow_redirects: tp.Union[bool, UseClientDefault] = USE_CLIENT_DEFAULT, 439 | timeout: tp.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, 440 | extensions: dict = None, 441 | ) -> AsyncPage: 442 | async def _open(): 443 | LOG.debug("Making HTTP request. URL: %s, Method: %s", url, method) 444 | if self.options.obey_robotstxt: 445 | can_fetch, crawl_delay = await async_ask_robots(url) 446 | if not can_fetch: 447 | msg = "Forbidden by robots.txt" 448 | LOG.debug(msg) 449 | raise ForbiddenByRobots(msg) 450 | 451 | if crawl_delay: 452 | LOG.debug( 453 | "Waiting %s seconds before next request b/c of crawl-delay", 454 | crawl_delay, 455 | ) 456 | await asyncio.sleep(crawl_delay) 457 | 458 | await asyncio.sleep(random.uniform(*self.options.delay_between_requests)) 459 | response = await self.request( 460 | method=method, 461 | url=url, 462 | content=content, 463 | data=data, 464 | files=files, 465 | json=json, 466 | params=params, 467 | headers=headers, 468 | cookies=cookies, 469 | auth=auth, 470 | follow_redirects=follow_redirects, 471 | timeout=timeout, 472 | extensions=extensions, 473 | ) 474 | self._increment_request_counter() 475 | return self._build_page_response(response, AsyncPage) 476 | 477 | _open.method = method 478 | return await call_with_retry(_open, self.options)() 479 | 480 | async def download_file(self, *, url: str, destination_folder: str) -> str: 481 | return await async_download_file(self, url, destination_folder) 482 | 483 | async def refresh(self) -> AsyncPage: 484 | page = self.history.location 485 | return await self.open(page.url) 486 | 487 | async def forward(self, n: int = 1) -> AsyncPage: 488 | if not len(self.get_history()): 489 | raise ValueError("No history to forward") 490 | self.history.forward(n) 491 | page = self.history.location 492 | return await self.open(page.url) 493 | 494 | async def back(self, n: int = 1) -> AsyncPage: 495 | if not len(self.get_history()): 496 | raise ValueError("No history to back") 497 | self.history.back(n) 498 | page = self.history.location 499 | return await self.open(page.url) 500 | -------------------------------------------------------------------------------- /src/robox/_controls.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, Iterator, List, Optional, Sequence, TypeVar, overload 2 | 3 | import bs4 4 | 5 | from robox._exceptions import MultipleFieldsReturned 6 | 7 | 8 | class Field: 9 | def __init__(self, tag: bs4.element.Tag) -> None: 10 | self.tag = tag 11 | self._value = self.tag.get("value") 12 | 13 | @property 14 | def disabled(self) -> bool: 15 | return "disabled" in self.tag.attrs 16 | 17 | @property 18 | def readonly(self) -> bool: 19 | return "readonly" in self.tag.attrs 20 | 21 | @property 22 | def name(self) -> str: 23 | return self.tag.get("name") 24 | 25 | @property 26 | def id(self) -> str: 27 | return self.tag.get("id") 28 | 29 | @property 30 | def label(self) -> Optional[str]: 31 | if label := self.tag.find_previous("label"): 32 | return label.text.strip() 33 | 34 | @property 35 | def placeholder(self) -> str: 36 | return self.tag.get("placeholder") 37 | 38 | def has_multiple(self) -> bool: 39 | return self.tag.has_attr("multiple") 40 | 41 | @property 42 | def value(self) -> Optional[str]: 43 | return self._value or "" 44 | 45 | @value.setter 46 | def value(self, value: str) -> None: 47 | self._value = value 48 | 49 | def __repr__(self) -> str: 50 | return f"<{type(self).__name__} name={self.name!r}>" 51 | 52 | 53 | class Input(Field): 54 | ... 55 | 56 | 57 | class Submit(Field): 58 | def __init__(self, tag: bs4.element.Tag) -> None: 59 | super().__init__(tag) 60 | self.is_default = False 61 | 62 | 63 | class Textarea(Field): 64 | def __init__(self, tag: bs4.element.Tag) -> None: 65 | super().__init__(tag) 66 | self.value = self.tag.text.rstrip("\r").rstrip("\n") 67 | 68 | 69 | class Checkable: 70 | def is_checked(self) -> bool: 71 | return self.tag.has_attr("checked") 72 | 73 | def check(self) -> None: 74 | self.tag["checked"] = "checked" 75 | 76 | def values(self) -> List[str]: 77 | return list(filter(None, [self.value, self.label, self.id])) 78 | 79 | 80 | class Checkbox(Input, Checkable): 81 | ... 82 | 83 | 84 | class Radio(Input, Checkable): 85 | ... 86 | 87 | 88 | class Option: 89 | def __init__(self, tag: bs4.element.Tag) -> None: 90 | self.tag = tag 91 | 92 | @property 93 | def text(self) -> str: 94 | return self.tag.text.strip() 95 | 96 | @property 97 | def value(self) -> Optional[str]: 98 | return self.tag.get("value") 99 | 100 | def is_selected(self) -> bool: 101 | return self.tag.has_attr("selected") 102 | 103 | def select(self) -> None: 104 | self.tag["selected"] = "selected" 105 | 106 | 107 | class Select(Field): 108 | def options(self) -> List[Option]: 109 | return [Option(o) for o in self.tag.find_all("option")] 110 | 111 | 112 | class File(Input): 113 | @Field.value.setter 114 | def value(self, values: List[str]) -> None: 115 | _values = [] 116 | for value in values: 117 | if hasattr(value, "read"): 118 | _values.append(value) 119 | elif isinstance(value, str): 120 | _values.append(open(value)) 121 | else: 122 | raise ValueError("Value must be a file object or file path") 123 | self._value = _values 124 | 125 | 126 | T = TypeVar("T") 127 | 128 | 129 | class Fields(Sequence[T]): 130 | def __init__(self) -> None: 131 | self._container: Sequence[T] = [] 132 | 133 | def __iter__(self) -> Iterator[Field]: 134 | for field in self._container: 135 | yield field 136 | 137 | def __len__(self) -> int: 138 | return len(self._container) 139 | 140 | @overload 141 | def __getitem__(self, idx: int) -> T: 142 | ... 143 | 144 | @overload 145 | def __getitem__(self, s: slice) -> Sequence[T]: 146 | ... 147 | 148 | def __getitem__(self, index: int): 149 | return self._container[index] 150 | 151 | def add(self, field: Field) -> None: 152 | if not isinstance(field, Field): 153 | raise ValueError('Argument "field" must be an instance of Field') 154 | self._container.append(field) 155 | 156 | def get(self, locator: str, field_type: Field = None) -> Field: 157 | result = self.filter_by(locator, field_type) 158 | if len(result) > 1: 159 | raise MultipleFieldsReturned(f"Multiple fields returned for {field_type}") 160 | return result[0] 161 | 162 | def get_submits(self) -> List[Submit]: 163 | return list(self.filter_by_type(self._container, Submit)) 164 | 165 | def filter_by(self, locator: str, field_type: Field = None) -> List[Field]: 166 | fields = self.filter_by_locator(self._container, locator) 167 | if field_type: 168 | fields = self.filter_by_type(fields, field_type) 169 | fields = list(fields) 170 | if not fields: 171 | raise LookupError(f"No fields found for {locator}") 172 | return fields 173 | 174 | @staticmethod 175 | def filter_by_locator( 176 | fields: List[Field], locator: str 177 | ) -> Generator[Field, None, None]: 178 | for field in fields: 179 | if locator in (field.name, field.id, field.label, field.value.strip()): 180 | yield field 181 | 182 | @staticmethod 183 | def filter_by_type( 184 | fields: List[Field], field_type: Field 185 | ) -> Generator[Field, None, None]: 186 | for field in fields: 187 | if isinstance(field, field_type): 188 | yield field 189 | 190 | def list(self) -> List[Field]: 191 | return self._container 192 | -------------------------------------------------------------------------------- /src/robox/_download.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import functools 3 | import mimetypes 4 | from pathlib import Path 5 | from typing import Callable 6 | 7 | import aiofiles 8 | import httpx 9 | 10 | from robox import LOG 11 | 12 | 13 | def get_filename_from_url(response: httpx.Response) -> str: 14 | url = response.request.url 15 | filename = url.path.split("/")[-1] 16 | 17 | if Path(filename).suffix: 18 | return filename 19 | 20 | content_type = response.headers.get("content-type") 21 | if content_type is None: 22 | return filename 23 | 24 | extension = mimetypes.guess_extension(content_type) 25 | if extension is None: 26 | return filename 27 | 28 | return f'{filename.rstrip(".")}{extension}' 29 | 30 | 31 | def setup_destination(url, destination_folder: str) -> str: 32 | destination = Path(destination_folder).expanduser() 33 | if (destination / url.split("/")[-1]).is_file(): 34 | return 35 | 36 | destination.mkdir(parents=True, exist_ok=True) 37 | return destination 38 | 39 | 40 | def handle_error(func: Callable) -> Callable: 41 | @functools.wraps(func) 42 | def wrapper(client, url, destination_folder): 43 | destination = setup_destination(url, destination_folder) 44 | try: 45 | if asyncio.iscoroutinefunction(func): 46 | 47 | async def _(): 48 | return await func(client, url, destination) 49 | 50 | return _() 51 | else: 52 | return func(client, url, destination) 53 | except Exception as e: 54 | LOG.error( 55 | f"Downloading from {url} has failed!\nThe exception thrown is {e}" 56 | ) 57 | raise 58 | 59 | return wrapper 60 | 61 | 62 | @handle_error 63 | def download_file(client: httpx.Client, url: str, destination_folder: str) -> str: 64 | with client.stream("GET", url) as response: 65 | response.raise_for_status() 66 | filename = get_filename_from_url(response) 67 | file = destination_folder / filename 68 | 69 | with file.open("wb") as out_file: 70 | for chunk in response.iter_raw(): 71 | out_file.write(chunk) 72 | return filename 73 | 74 | 75 | @handle_error 76 | async def async_download_file( 77 | client: httpx.AsyncClient, url: str, destination_folder: str 78 | ) -> str: 79 | async with client.stream("GET", url) as response: 80 | response.raise_for_status() 81 | filename = get_filename_from_url(response) 82 | file = destination_folder / filename 83 | 84 | async with aiofiles.open(file, "wb") as out_file: 85 | async for data in response.aiter_bytes(): 86 | if data: 87 | await out_file.write(data) 88 | return filename 89 | -------------------------------------------------------------------------------- /src/robox/_exceptions.py: -------------------------------------------------------------------------------- 1 | class RoboxError(Exception): 2 | ... 3 | 4 | 5 | class MultipleFieldsReturned(RoboxError): 6 | ... 7 | 8 | 9 | class InvalidValue(RoboxError): 10 | ... 11 | 12 | 13 | class ForbiddenByRobots(RoboxError): 14 | ... 15 | 16 | 17 | class RetryError(RoboxError): 18 | ... 19 | -------------------------------------------------------------------------------- /src/robox/_form.py: -------------------------------------------------------------------------------- 1 | import typing as tp 2 | from functools import lru_cache, singledispatch 3 | 4 | from bs4.element import Tag 5 | 6 | from robox._controls import ( 7 | Checkbox, 8 | Field, 9 | Fields, 10 | File, 11 | Input, 12 | Radio, 13 | Select, 14 | Submit, 15 | Textarea, 16 | ) 17 | from robox._exceptions import InvalidValue 18 | 19 | 20 | class Form: 21 | def __init__(self, parsed_form: Tag) -> None: 22 | self.parsed_form = parsed_form 23 | 24 | @property 25 | def action(self) -> str: 26 | return self.parsed_form.get("action", "") 27 | 28 | @property 29 | def method(self) -> str: 30 | return self.parsed_form.get("method", "GET") 31 | 32 | def fill_in(self, locator: str, *, value: str) -> None: 33 | self.fields.get(locator).value = value 34 | 35 | def check(self, locator: str, *, values: tp.List[str]) -> None: 36 | assert isinstance(values, list) 37 | checkboxes = self.fields.filter_by(locator, Checkbox) 38 | checked = False 39 | for value in values: 40 | for checkbox in checkboxes: 41 | if value in checkbox.values() or (value == "on" and not checkbox.value): 42 | checkbox.check() 43 | checked = True 44 | break 45 | if not checked: 46 | raise InvalidValue(f"Invalid value for {checkbox}") 47 | 48 | def choose(self, locator: str, *, option: str) -> None: 49 | radios = self.fields.filter_by(locator, Radio) 50 | checked = False 51 | for radio in radios: 52 | if option in radio.values(): 53 | radio.check() 54 | checked = True 55 | break 56 | if not checked: 57 | raise InvalidValue(f"Option: '{option}' not found in {radios}") 58 | 59 | def select(self, locator: str, *, options: tp.List[str]) -> None: 60 | assert isinstance(options, list) 61 | select = self.fields.get(locator, Select) 62 | if not select.has_multiple(): 63 | if len(options) > 1: 64 | raise ValueError("Cannot select multiple options!") 65 | 66 | available_options = {} 67 | for option in select.options(): 68 | available_options[option.text] = option 69 | available_options[option.value] = option 70 | 71 | not_found_options = [] 72 | for option in options: 73 | if option in available_options: 74 | available_options[option].select() 75 | else: 76 | not_found_options.append(option) 77 | if not_found_options: 78 | raise InvalidValue( 79 | f"The following options: {not_found_options} were" 80 | f" not found on field: {select}" 81 | ) 82 | 83 | def upload(self, locator: str, *, values: tp.List[str]) -> None: 84 | assert isinstance(values, list) 85 | field = self.fields.get(locator, File) 86 | if not field.has_multiple(): 87 | if len(values) > 1: 88 | raise ValueError("Cannot select multiple options!") 89 | field.value = values 90 | 91 | @property 92 | @lru_cache 93 | def fields(self) -> Fields: 94 | mapping = { 95 | "textarea": Textarea, 96 | "select": Select, 97 | "radio": Radio, 98 | "checkbox": Checkbox, 99 | "file": File, 100 | } 101 | fields = Fields() 102 | for field in self.parsed_form.find_all( 103 | ("input", "button", "select", "textarea") 104 | ): 105 | if not field.attrs.get("name"): 106 | continue 107 | 108 | tag_type = field.attrs.get("type") 109 | 110 | klass = mapping.get(field.name) or mapping.get(tag_type) 111 | if klass: 112 | fields.add(klass(field)) 113 | elif field.name in ("input", "button"): 114 | if tag_type == "submit": 115 | fields.add(Submit(field)) 116 | else: 117 | fields.add(Input(field)) 118 | return fields 119 | 120 | def _set_default_submit(self, submit_button: tp.Union[str, Submit]) -> None: 121 | if isinstance(submit_button, Submit): 122 | if submit_button in self.fields: 123 | submit_button.is_default = True 124 | else: 125 | raise LookupError( 126 | f"Submit button: {submit_button} not found in {self.fields}" 127 | ) 128 | else: 129 | submit = self.fields.get(locator=submit_button, field_type=Submit) 130 | submit.is_default = True 131 | 132 | def to_httpx( 133 | self, submit_button: tp.Union[str, Submit] = None 134 | ) -> tp.Dict[str, tp.Any]: 135 | payload = {} 136 | key = "params" if self.method.lower() == "get" else "data" 137 | payload[key] = {} 138 | if submit_button: 139 | self._set_default_submit(submit_button) 140 | for field in self.fields.list(): 141 | if not field.disabled or not field.readonly: 142 | serialize(field, payload, key) 143 | return payload 144 | 145 | def __repr__(self) -> str: 146 | msg = f"<{type(self).__name__} method={self.method}>" 147 | if self.action: 148 | msg += f" action={self.action}" 149 | msg += f" fields={self.fields.list()}>" 150 | return msg 151 | 152 | 153 | @singledispatch 154 | def serialize(field: Field, payload: tp.Dict[str, dict], key: str) -> None: 155 | raise NotImplementedError(f"Field: {field} not supported") 156 | 157 | 158 | @serialize.register(File) 159 | def _(field: Field, payload: tp.Dict[str, dict], key="files") -> None: 160 | for value in field.value: 161 | payload[key].setdefault(field.name, []).append(value) 162 | 163 | 164 | @serialize.register(Input) 165 | @serialize.register(Textarea) 166 | def _(field: Field, payload: tp.Dict[str, dict], key: str) -> None: 167 | payload[key].update({field.name: field.value}) 168 | 169 | 170 | @serialize.register(Radio) 171 | def _(field: Field, payload: tp.Dict[str, dict], key: str) -> None: 172 | if field.is_checked(): 173 | payload[key].update({field.name: field.value}) 174 | 175 | 176 | @serialize.register(Checkbox) 177 | def _(field: Field, payload: tp.Dict[str, dict], key: str) -> None: 178 | if field.is_checked(): 179 | if not field.value: 180 | payload[key].update({field.name: "on"}) 181 | else: 182 | payload[key].setdefault(field.name, []).append(field.value) 183 | payload[key][field.name].sort() 184 | 185 | 186 | @serialize.register(Select) 187 | def _(field: Field, payload: tp.Dict[str, dict], key: str) -> None: 188 | values = [option.value for option in field.options() if option.is_selected()] 189 | if not field.has_multiple(): 190 | payload[key].update({field.name: values[0]}) 191 | else: 192 | for value in values: 193 | payload[key].setdefault(field.name, []).append(value) 194 | payload[key][field.name].sort() 195 | 196 | 197 | @serialize.register(Submit) 198 | def _(field: Field, payload: tp.Dict[str, dict], key: str) -> None: 199 | if field.is_default: 200 | payload[key].update({field.name: field.value}) 201 | -------------------------------------------------------------------------------- /src/robox/_history.py: -------------------------------------------------------------------------------- 1 | import typing as tp 2 | from collections import deque 3 | 4 | 5 | class BrowserHistory: 6 | def __init__( 7 | self, location: tp.Any = None, max_back: int = None, max_forward: int = None 8 | ) -> None: 9 | if max_back is not None: 10 | max_back += 1 # +1 for storing the current location 11 | self._back = deque(maxlen=max_back) 12 | self._forward = deque(maxlen=max_forward) 13 | if location is not None: 14 | self.location = location 15 | 16 | @property 17 | def location(self) -> tp.Any: 18 | if self._back: 19 | return self._back[-1] 20 | raise AttributeError("Location has not been set") 21 | 22 | @location.setter 23 | def location(self, value: tp.Any) -> None: 24 | latest_entry = self.latest_entry() 25 | if not latest_entry or latest_entry.url != value.url: 26 | self._back.append(value) 27 | self._forward.clear() 28 | 29 | def back(self, i: int = 1) -> tp.Any: 30 | if i > 0: 31 | for _ in range(min(i, len(self._back) - 1)): 32 | self._forward.appendleft(self._back.pop()) 33 | return self.location 34 | 35 | def forward(self, i: int = 1) -> tp.Any: 36 | if i > 0: 37 | for _ in range(i): 38 | self._back.append(self._forward.popleft()) 39 | return self.location 40 | 41 | def go(self, i: int) -> tp.Any: 42 | if i < 0: 43 | return self.back(-i) 44 | if i > 0: 45 | return self.forward(i) 46 | return self.location 47 | 48 | def get_locations(self) -> tp.List[tp.Any]: 49 | result = [] 50 | # back and current locations 51 | n = len(self._back) - 1 52 | result.extend((i - n, location) for i, location in enumerate(self._back)) 53 | # forward locations 54 | result.extend((i + 1, location) for i, location in enumerate(self._forward)) 55 | result.reverse() 56 | return result 57 | 58 | def latest_entry(self) -> tp.Optional[tp.Any]: 59 | try: 60 | return self._back[-1] 61 | except IndexError: 62 | return None 63 | -------------------------------------------------------------------------------- /src/robox/_link.py: -------------------------------------------------------------------------------- 1 | import typing as tp 2 | 3 | from bs4 import BeautifulSoup, Tag 4 | 5 | 6 | class Link(tp.NamedTuple): 7 | href: str 8 | text: str 9 | 10 | def __repr__(self) -> str: 11 | return f"<{self.__class__.__name__} text={self.text} href={self.href}>" 12 | 13 | 14 | def find_all_a_tags_with_href( 15 | parsed: BeautifulSoup, *args: tp.Any, **kwargs: tp.Any 16 | ) -> tp.List[Tag]: 17 | for a in parsed.find_all("a", href=True, *args, **kwargs): 18 | href = a.get("href") 19 | if href: 20 | yield href, a.text 21 | 22 | 23 | def remove_page_jumps_from_links( 24 | links: tp.Iterator[Link], 25 | ) -> tp.Generator[Link, None, None]: 26 | for href, text in links: 27 | yield href.split("#")[0], text 28 | 29 | 30 | def remove_duplicate_links(links: tp.Iterator[Link]) -> tp.Generator[Link, None, None]: 31 | seen = set() 32 | seen_add = seen.add 33 | for href, text in links: 34 | if not (href in seen or seen_add(href)): 35 | yield href, text 36 | 37 | 38 | def only_internal_links( 39 | links: tp.Iterator[Link], host: str 40 | ) -> tp.Generator[Link, None, None]: 41 | for href, text in links: 42 | if host in href: 43 | yield href, text 44 | -------------------------------------------------------------------------------- /src/robox/_options.py: -------------------------------------------------------------------------------- 1 | import typing as tp 2 | from dataclasses import dataclass, field 3 | 4 | from httpx import NetworkError, TimeoutException 5 | from httpx_cache.cache import BaseCache 6 | 7 | RETRY_STATUS_FORCELIST = (429, 500, 502, 503, 504) 8 | RETRY_METHOD_WHITELIST = ("HEAD", "GET", "OPTIONS") 9 | 10 | 11 | @dataclass(frozen=True) 12 | class Options: 13 | user_agent: str = None 14 | raise_on_4xx_5xx: bool = False 15 | delay_between_requests: tp.Tuple[float, float] = (0.0, 0.0) 16 | soup_kwargs: dict = field(default_factory=dict) 17 | obey_robotstxt: bool = False 18 | history: bool = True 19 | cache: tp.Optional[BaseCache] = None 20 | cacheable_methods: tp.Tuple[str, ...] = ("GET",) 21 | cacheable_status_codes: tp.Tuple[int, ...] = (200, 203, 300, 301, 308) 22 | retry: bool = False 23 | retry_max_attempts: int = 3 24 | retry_status_forcelist: tp.Tuple[int, ...] = RETRY_STATUS_FORCELIST 25 | retry_method_whitelist: tp.Tuple[str, ...] = RETRY_METHOD_WHITELIST 26 | retry_on_exceptions: tp.Tuple[Exception, ...] = (TimeoutException, NetworkError) 27 | retry_multiplier: int = 1 28 | retry_max_delay: int = 100 29 | 30 | def __post_init__(self): 31 | self.soup_kwargs.setdefault("features", "html.parser") 32 | -------------------------------------------------------------------------------- /src/robox/_page.py: -------------------------------------------------------------------------------- 1 | import re 2 | import tempfile 3 | import typing as tp 4 | import webbrowser 5 | from functools import cached_property 6 | from typing import TYPE_CHECKING 7 | from urllib.parse import urljoin 8 | 9 | import httpx 10 | from bs4 import BeautifulSoup, Tag 11 | 12 | from robox._controls import Submit 13 | from robox._form import Form 14 | from robox._link import ( 15 | Link, 16 | find_all_a_tags_with_href, 17 | remove_duplicate_links, 18 | remove_page_jumps_from_links, 19 | ) 20 | from robox._table import Table 21 | 22 | if TYPE_CHECKING: 23 | from robox import Robox 24 | 25 | T = tp.TypeVar("T", bound="Robox") 26 | 27 | 28 | class BasePage: 29 | def __init__(self, response: httpx.Response, robox: T) -> None: 30 | self.response = response 31 | self.content = self.response.content 32 | self.url = self.response.url 33 | self.robox = robox 34 | 35 | @property 36 | def status_code(self) -> int: 37 | return self.response.status_code 38 | 39 | @property 40 | def from_cache(self) -> bool: 41 | try: 42 | return self.response.from_cache 43 | except AttributeError: 44 | return False 45 | 46 | @cached_property 47 | def parsed(self) -> BeautifulSoup: 48 | return BeautifulSoup(self.content, **self.robox.options.soup_kwargs) 49 | 50 | @cached_property 51 | def title(self) -> str: 52 | title = self.parsed.title 53 | if title: 54 | return title.text 55 | 56 | @cached_property 57 | def description(self) -> tp.Optional[str]: 58 | description = self.parsed.find("meta", {"name": "description"}) 59 | if description: 60 | return description["content"] 61 | 62 | def get_form(self, *args: tp.Any, **kwargs: tp.Any) -> tp.Optional[Form]: 63 | form = self.parsed.find(name="form", *args, **kwargs) 64 | if not form: 65 | raise ValueError("No form found") 66 | return Form(form) 67 | 68 | def get_forms(self, *args: tp.Any, **kwargs: tp.Any) -> tp.List[Form]: 69 | forms = self.parsed.find_all(name="form", *args, **kwargs) 70 | if not forms: 71 | raise ValueError("No forms found") 72 | return [Form(form) for form in forms] 73 | 74 | def get_tables(self, *args: tp.Any, **kwargs: tp.Any) -> tp.List[Table]: 75 | tables = self.parsed.find_all(name="table", *args, **kwargs) 76 | if not tables: 77 | raise ValueError("No tables found") 78 | return [Table(table) for table in tables] 79 | 80 | def _prepare_referer_header(self) -> tp.Dict[str, str]: 81 | headers = {} 82 | if "Referer" not in self.response.headers: 83 | headers["Referer"] = str(self.response.url) 84 | return headers 85 | 86 | def get_links( 87 | self, only_internal_links: bool = False, *args: tp.Any, **kwargs: tp.Any 88 | ) -> tp.Generator[Link, None, None]: 89 | links = find_all_a_tags_with_href(self.parsed, *args, **kwargs) 90 | links = remove_page_jumps_from_links(links) 91 | links = remove_duplicate_links(links) 92 | if only_internal_links: 93 | links = only_internal_links(links, self.url.host) 94 | for href, text in links: 95 | yield Link(href=href, text=text.strip()) 96 | 97 | def get_links_by_regex( 98 | self, regex: str, *args: tp.Any, **kwargs: tp.Any 99 | ) -> tp.List[Link]: 100 | return [ 101 | link 102 | for link in self.get_links(*args, **kwargs) 103 | if re.search(regex, link.href) 104 | ] 105 | 106 | def _get_links_by_text( 107 | self, text: str, *args: tp.Any, **kwargs: tp.Any 108 | ) -> tp.List[Link]: 109 | return [ 110 | link 111 | for link in self.get_links(*args, **kwargs) 112 | if text.lower() == link.text.lower() 113 | ] 114 | 115 | def _get_link_text(self, text: str) -> Link: 116 | links = self.get_links_by_text(text) 117 | if not links: 118 | raise ValueError(f"No link with text {text} found") 119 | if len(links) > 1: 120 | raise ValueError(f"Multiple links with text {text} found") 121 | return links[0] 122 | 123 | def debug_page(self) -> None: 124 | with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as f: 125 | url = f"file://{f.name}" 126 | f.write(str(self.parsed)) 127 | webbrowser.open(url) 128 | 129 | def __hash__(self) -> int: 130 | return hash(tuple([self.parsed, self.url])) 131 | 132 | def __eq__(self, other: object) -> bool: 133 | return ( 134 | isinstance(other, BasePage) 135 | and self.parsed == other.parsed 136 | and self.url == other.url 137 | ) 138 | 139 | def __repr__(self) -> str: 140 | return f"<{self.__class__.__name__} url={self.url}>" 141 | 142 | 143 | class Page(BasePage): 144 | def submit_form( 145 | self, form: Form, submit_button: tp.Union[str, Submit] = None 146 | ) -> "Page": 147 | payload = form.to_httpx(submit_button) 148 | headers = self._prepare_referer_header() 149 | return self.robox.open( 150 | url=self.response.url.join(form.action), 151 | method=form.method, 152 | headers=headers, 153 | **payload, 154 | ) 155 | 156 | def follow_link(self, link: Link) -> "Page": 157 | return self.robox.open(urljoin(str(self.url), link.href)) 158 | 159 | def follow_link_by_tag(self, tag: Tag) -> "Page": 160 | return self.robox.open(urljoin(str(self.url), tag["href"])) 161 | 162 | def follow_link_by_text(self, text: str) -> "Page": 163 | link = self._get_link_text(text) 164 | return self.follow_link(link) 165 | 166 | 167 | class AsyncPage(BasePage): 168 | async def submit_form( 169 | self, form: Form, submit_button: tp.Union[str, Submit] = None 170 | ) -> "AsyncPage": 171 | payload = form.to_httpx(submit_button) 172 | headers = self._prepare_referer_header() 173 | return await self.robox.open( 174 | url=self.response.url.join(form.action), 175 | method=form.method, 176 | headers=headers, 177 | **payload, 178 | ) 179 | 180 | async def follow_link(self, link: Link) -> "AsyncPage": 181 | return await self.robox.open(urljoin(str(self.url), link.href)) 182 | 183 | async def follow_link_by_tag(self, tag: Tag) -> "AsyncPage": 184 | return await self.robox.open(urljoin(str(self.url), tag["href"])) 185 | 186 | async def follow_link_by_text(self, text: str) -> "AsyncPage": 187 | link = self._get_link_text(text) 188 | return await self.follow_link(link) 189 | -------------------------------------------------------------------------------- /src/robox/_retry.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing as tp 3 | 4 | import tenacity 5 | from httpx import HTTPStatusError 6 | 7 | from robox import LOG 8 | from robox._exceptions import RetryError 9 | from robox._options import RETRY_METHOD_WHITELIST, RETRY_STATUS_FORCELIST, Options 10 | 11 | 12 | def raise_retry_error(retry_state: tenacity.RetryCallState) -> None: 13 | outcome = retry_state.outcome 14 | msg = "Retry failed on {} after {} attempts" 15 | if outcome.failed: 16 | url = outcome.exception().request.url 17 | raise RetryError(msg.format(url, outcome.attempt_number)) 18 | page = outcome.result() 19 | if page.status_code in RETRY_STATUS_FORCELIST: 20 | raise RetryError(msg.format(page.response.request.url, outcome.attempt_number)) 21 | 22 | 23 | def is_exception_with_retry_status_forcelist(e: Exception) -> bool: 24 | return ( 25 | isinstance(e, HTTPStatusError) 26 | and e.response.status_code in RETRY_STATUS_FORCELIST 27 | ) 28 | 29 | 30 | class retry_if_code_in_retry_status_forcelist(tenacity.retry_base): 31 | def __call__(self, retry_state: tenacity.RetryCallState) -> bool: 32 | if retry_state.outcome.failed: 33 | exception = retry_state.outcome.exception() 34 | return is_exception_with_retry_status_forcelist(exception) 35 | page = retry_state.outcome.result() 36 | return page.status_code in RETRY_STATUS_FORCELIST 37 | 38 | 39 | def call_with_retry(open_func: tp.Callable, options: Options) -> tp.Callable: 40 | if options.retry and open_func.method in RETRY_METHOD_WHITELIST: 41 | retry_strategy = ( 42 | retry_if_code_in_retry_status_forcelist() 43 | | tenacity.retry_if_exception_type(options.retry_on_exceptions) 44 | ) 45 | return tenacity.retry( 46 | retry=retry_strategy, 47 | stop=tenacity.stop_after_attempt(options.retry_max_attempts), 48 | retry_error_callback=raise_retry_error, 49 | wait=tenacity.wait_exponential( 50 | multiplier=options.retry_multiplier, 51 | max=options.retry_max_delay, 52 | ), 53 | before=tenacity.before_log(LOG, logging.DEBUG), 54 | after=tenacity.after_log(LOG, logging.DEBUG), 55 | reraise=True, 56 | )(open_func) 57 | return open_func 58 | -------------------------------------------------------------------------------- /src/robox/_robots.py: -------------------------------------------------------------------------------- 1 | import typing as tp 2 | from urllib.parse import urlparse 3 | from urllib.robotparser import RobotFileParser 4 | 5 | robotstxt = {} 6 | 7 | 8 | def resolve_robotstxt_url(url: str) -> str: 9 | url_struct = urlparse(url) 10 | return f"{url_struct.scheme}://{url_struct.netloc}/robots.txt" 11 | 12 | 13 | def ask_robots(url: str, useragent: str = "*") -> tp.Tuple[bool, tp.Optional[int]]: 14 | r_url = resolve_robotstxt_url(url) 15 | if r_url not in robotstxt: 16 | robotstxt[r_url] = RobotFileParser() 17 | robotstxt[r_url].set_url(r_url) 18 | robotstxt[r_url].read() 19 | return robotstxt[r_url].can_fetch(useragent, url), robotstxt[r_url].crawl_delay( 20 | useragent 21 | ) 22 | 23 | 24 | async def async_ask_robots( 25 | url: str, useragent: str = "*" 26 | ) -> tp.Tuple[bool, tp.Optional[int]]: 27 | return ask_robots(url, useragent) 28 | -------------------------------------------------------------------------------- /src/robox/_table.py: -------------------------------------------------------------------------------- 1 | import reprlib 2 | import typing as tp 3 | from itertools import product 4 | 5 | from bs4.element import Tag 6 | 7 | 8 | class Table: 9 | def __init__(self, parsed_table: Tag) -> None: 10 | self.parsed_table = parsed_table 11 | 12 | def _parse_tr(self) -> tp.List[Tag]: 13 | return self.parsed_table.find_all("tr") 14 | 15 | def get_rows(self) -> tp.List[tp.List[str]]: 16 | rowspans = [] # track pending rowspans 17 | rows = self._parse_tr() 18 | 19 | # first scan, see how many columns we need 20 | colcount = 0 21 | for r, row in enumerate(rows): 22 | cells = row.find_all(["td", "th"], recursive=False) 23 | colcount = max( 24 | colcount, 25 | sum(int(c.get("colspan", 1)) or 1 for c in cells[:-1]) 26 | + len(cells[-1:]) 27 | + len(rowspans), 28 | ) 29 | rowspans += [int(c.get("rowspan", 1)) or len(rows) - r for c in cells] 30 | rowspans = [s - 1 for s in rowspans if s > 1] 31 | 32 | table = [[None] * colcount for _ in rows] 33 | 34 | # fill matrix from row data 35 | rowspans = {} 36 | for row, row_elem in enumerate(rows): 37 | span_offset = 0 38 | for col, cell in enumerate( 39 | row_elem.find_all(["td", "th"], recursive=False) 40 | ): 41 | col += span_offset 42 | while rowspans.get(col, 0): 43 | span_offset += 1 44 | col += 1 45 | 46 | # fill table data 47 | rowspan = rowspans[col] = int(cell.get("rowspan", 1)) or len(rows) - row 48 | colspan = int(cell.get("colspan", 1)) or colcount - col 49 | # next column is offset by the colspan 50 | span_offset += colspan - 1 51 | value = cell.get_text() 52 | for drow, dcol in product(range(rowspan), range(colspan)): 53 | try: 54 | table[row + drow][col + dcol] = value 55 | rowspans[col + dcol] = rowspan 56 | except IndexError: 57 | pass 58 | 59 | # update rowspan bookkeeping 60 | rowspans = {c: s - 1 for c, s in rowspans.items() if s > 1} 61 | 62 | return table 63 | 64 | def __repr__(self) -> str: 65 | return reprlib.repr(self.get_rows()) 66 | -------------------------------------------------------------------------------- /src/robox/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danclaudiupop/robox/e2c4203aade6401c206f5cfdfd55d1c8d94a7e5f/src/robox/py.typed -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danclaudiupop/robox/e2c4203aade6401c206f5cfdfd55d1c8d94a7e5f/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from bs4 import BeautifulSoup 3 | 4 | 5 | @pytest.fixture 6 | def beautiful_soup(): 7 | def _(html): 8 | return BeautifulSoup(html, features="html.parser") 9 | 10 | return _ 11 | -------------------------------------------------------------------------------- /tests/test_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | from http.cookiejar import Cookie, CookieJar 3 | from unittest.mock import MagicMock, patch 4 | 5 | import httpx 6 | import pytest 7 | import respx 8 | 9 | from robox import AsyncRobox, DictCache, Options, Robox 10 | from robox._exceptions import ForbiddenByRobots, RetryError 11 | 12 | TEST_URL = "https://foo.bar" 13 | 14 | 15 | def test_open(respx_mock): 16 | respx_mock.get(TEST_URL).respond(200) 17 | with Robox() as robox: 18 | page = robox.open(TEST_URL) 19 | assert page.status_code == 200 20 | 21 | 22 | @pytest.mark.asyncio 23 | async def test_async_open(respx_mock): 24 | async with AsyncRobox() as robox: 25 | respx_mock.get(TEST_URL).respond(200) 26 | page = await robox.open(TEST_URL) 27 | assert page.status_code == 200 28 | 29 | 30 | def test_refresh(respx_mock): 31 | respx_mock.get(TEST_URL).respond(200) 32 | with Robox() as robox: 33 | robox.open(TEST_URL) 34 | assert robox.total_requests == 1 35 | robox.refresh() 36 | assert robox.total_requests == 2 37 | assert len(robox.get_history()) == 1 38 | 39 | 40 | def test_back_without_history(): 41 | with pytest.raises(ValueError): 42 | with Robox() as robox: 43 | robox.back() 44 | 45 | 46 | def test_forward_without_history(): 47 | with pytest.raises(ValueError): 48 | with Robox() as robox: 49 | robox.forward() 50 | 51 | 52 | @respx.mock(base_url=TEST_URL) 53 | def test_back_and_forward(respx_mock): 54 | first_url = f"{TEST_URL}/1" 55 | second_url = f"{TEST_URL}/2" 56 | respx_mock.get("/1").respond(200) 57 | respx_mock.get("/2").respond(200) 58 | with Robox() as robox: 59 | robox.open(first_url) 60 | robox.open(second_url) 61 | robox.back() 62 | robox.current_url == first_url 63 | robox.forward() 64 | robox.current_url == second_url 65 | 66 | 67 | def test_download(respx_mock, tmpdir): 68 | download_url = f"{TEST_URL}/foo.bin" 69 | respx_mock.get(download_url).respond(200, text="Foo") 70 | with Robox() as robox: 71 | robox.download_file(url=download_url, destination_folder=tmpdir) 72 | assert (tmpdir / "foo.bin").exists() 73 | 74 | 75 | @pytest.mark.asyncio 76 | async def test_async_download(respx_mock, tmpdir): 77 | download_url = f"{TEST_URL}/foo.bin" 78 | respx_mock.get(download_url).respond(200, text="Foo") 79 | async with AsyncRobox() as robox: 80 | await robox.download_file(url=download_url, destination_folder=tmpdir) 81 | assert (tmpdir / "foo.bin").exists() 82 | 83 | 84 | def test_raise_on_4xx_5xx(respx_mock): 85 | respx_mock.get(TEST_URL).respond(400) 86 | with pytest.raises(httpx.HTTPStatusError): 87 | Robox(options=Options(raise_on_4xx_5xx=True)).open(TEST_URL) 88 | 89 | 90 | def test_cache(respx_mock): 91 | respx_mock.get(TEST_URL).respond(200, html="foo") 92 | with Robox(options=Options(cache=DictCache())) as robox: 93 | p1 = robox.open(TEST_URL) 94 | assert not p1.from_cache 95 | p2 = robox.open(TEST_URL) 96 | assert p2.from_cache 97 | 98 | 99 | @pytest.mark.asyncio 100 | async def test_async_cache(respx_mock): 101 | respx_mock.get(TEST_URL).respond(200, html="foo") 102 | async with AsyncRobox(options=Options(cache=DictCache())) as robox: 103 | p1 = await robox.open(TEST_URL) 104 | assert not p1.from_cache 105 | p2 = await robox.open(TEST_URL) 106 | assert p2.from_cache 107 | 108 | 109 | def test_robots(respx_mock): 110 | cm = MagicMock() 111 | cm.getcode.return_value = 200 112 | cm.read.return_value = b"User-agent: *\nDisallow: /" 113 | with patch("urllib.request.urlopen", return_value=cm): 114 | respx_mock.get(TEST_URL).respond(200) 115 | with pytest.raises(ForbiddenByRobots): 116 | with Robox(options=Options(obey_robotstxt=True)) as robox: 117 | robox.open(TEST_URL) 118 | 119 | 120 | def test_retry(respx_mock): 121 | respx_mock.get(TEST_URL).mock(side_effect=httpx.ConnectError) 122 | with pytest.raises(RetryError): 123 | with Robox(options=Options(retry=True, retry_max_attempts=1)) as robox: 124 | robox.open(TEST_URL) 125 | 126 | 127 | def test_retry_raise_on_4xx_5xx(respx_mock): 128 | respx_mock.get(TEST_URL).respond(500) 129 | with pytest.raises(RetryError): 130 | with Robox( 131 | options=Options(retry=True, retry_max_attempts=1, raise_on_4xx_5xx=True) 132 | ) as robox: 133 | robox.open(TEST_URL) 134 | 135 | 136 | @pytest.mark.asyncio 137 | async def test_async_retry(respx_mock): 138 | respx_mock.get(TEST_URL).respond(500) 139 | with pytest.raises(RetryError): 140 | async with AsyncRobox( 141 | options=Options(retry=True, retry_max_attempts=1) 142 | ) as robox: 143 | await robox.open(TEST_URL) 144 | 145 | 146 | def test_retry_recovarable(respx_mock): 147 | route = respx_mock.get(TEST_URL) 148 | route.side_effect = [ 149 | httpx.Response(500), 150 | httpx.Response(200), 151 | ] 152 | with Robox(options=Options(retry=True, retry_max_attempts=2)) as robox: 153 | page = robox.open(TEST_URL) 154 | assert page.status_code == 200 155 | 156 | 157 | def test_save_and_load_cookies(respx_mock, tmp_path): 158 | cookies = CookieJar() 159 | cookie = Cookie( 160 | version=0, 161 | name="example-name", 162 | value="example-value", 163 | port=None, 164 | port_specified=False, 165 | domain="", 166 | domain_specified=False, 167 | domain_initial_dot=False, 168 | path="/", 169 | path_specified=True, 170 | secure=False, 171 | expires=None, 172 | discard=True, 173 | comment=None, 174 | comment_url=None, 175 | rest={"HttpOnly": ""}, 176 | rfc2109=False, 177 | ) 178 | cookies.set_cookie(cookie) 179 | 180 | respx_mock.get(TEST_URL).respond(200) 181 | with Robox(cookies=cookies) as robox: 182 | robox.open(TEST_URL) 183 | robox.save_cookies(tmp_path / "cookies.json") 184 | with open(tmp_path / "cookies.json") as f: 185 | loaded_cookies = json.load(f) 186 | assert loaded_cookies == {"example-name": "example-value"} 187 | assert len(robox.cookies) == 1 188 | 189 | with Robox() as robox: 190 | robox.load_cookies(tmp_path / "cookies.json") 191 | robox.open(TEST_URL) 192 | assert len(robox.cookies) == 1 193 | -------------------------------------------------------------------------------- /tests/test_controls.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from bs4 import BeautifulSoup 3 | 4 | from robox._controls import Checkbox, Input, Option, Select 5 | 6 | 7 | class TestCheckboxField: 8 | def test_checkbox_values(self, beautiful_soup): 9 | html = """ 10 | 11 | 12 | """ 13 | tag = beautiful_soup(html).find("input") 14 | checkbox = Checkbox(tag) 15 | assert list(checkbox.values()) == ["bar", "Bar"] 16 | 17 | def test_checkbox_without_value_attribute(self): 18 | html = '' 19 | checkbox = Checkbox(BeautifulSoup(html, features="html.parser").find("input")) 20 | assert list(checkbox.values()) == [] 21 | 22 | @pytest.mark.parametrize( 23 | "html, is_checked", 24 | [ 25 | ('', False), 26 | ('', True), 27 | ], 28 | ) 29 | def test_is_checked(self, html, is_checked, beautiful_soup): 30 | tag = beautiful_soup(html).find("input") 31 | checkbox = Checkbox(tag) 32 | assert checkbox.is_checked() is is_checked 33 | 34 | def test_check(self, beautiful_soup): 35 | html = '' 36 | tag = beautiful_soup(html).find("input") 37 | checkbox = Checkbox(tag) 38 | checkbox.check() 39 | assert checkbox.tag.has_attr("checked") 40 | 41 | 42 | class TestSelectField: 43 | @pytest.mark.parametrize( 44 | "html, is_multiple", 45 | [ 46 | ('', False), 47 | ('', True), 48 | ], 49 | ) 50 | def test_select_with_multiple(self, html, is_multiple, beautiful_soup): 51 | tag = beautiful_soup(html).find("select") 52 | select = Select(tag) 53 | assert select.has_multiple() is is_multiple 54 | 55 | def test_select_with_options(self, beautiful_soup): 56 | html = """ 57 | 60 | """ 61 | tag = beautiful_soup(html).find("select") 62 | select = Select(tag) 63 | assert len(select.options()) == 1 64 | assert isinstance(select.options()[0], Option) 65 | 66 | def test_option(self, beautiful_soup): 67 | html = '' 68 | tag = beautiful_soup(html).find("option") 69 | option = Option(tag) 70 | assert option.text == "Cat" 71 | assert option.value == "cat" 72 | 73 | def test_option_is_selected(self, beautiful_soup): 74 | html = '' 75 | tag = beautiful_soup(html).find("option") 76 | option = Option(tag) 77 | assert option.is_selected() 78 | 79 | def test_select_option(self, beautiful_soup): 80 | html = '' 81 | tag = beautiful_soup(html).find("option") 82 | option = Option(tag) 83 | option.select() 84 | assert option.tag.has_attr("selected") 85 | 86 | 87 | @pytest.mark.parametrize( 88 | "html, expected_result", 89 | [ 90 | ('', "Bar"), 91 | ('', "Bar"), 92 | ], 93 | ) 94 | def test_field_label(beautiful_soup, html, expected_result): 95 | tag = beautiful_soup(html).find("input") 96 | input_field = Input(tag) 97 | assert input_field.label == expected_result 98 | -------------------------------------------------------------------------------- /tests/test_form.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from robox._exceptions import InvalidValue 4 | from robox._form import Form 5 | 6 | 7 | @pytest.fixture 8 | def parsed_select_form(beautiful_soup): 9 | def generate_form(multiple=False): 10 | select_form = """ 11 |
12 | 13 | 17 |
18 | """ 19 | parsed = beautiful_soup(select_form) 20 | if multiple: 21 | parsed.form.find("select")["multiple"] = "multiple" 22 | return parsed 23 | 24 | return generate_form 25 | 26 | 27 | def test_select_multiple(parsed_select_form): 28 | form = Form(parsed_select_form(multiple=True)) 29 | form.select("pets", options=["dog", "Cat"]) 30 | assert form.to_httpx() == {"params": {"pets": ["cat", "dog"]}} 31 | 32 | 33 | def test_select_simple(parsed_select_form): 34 | form = Form(parsed_select_form()) 35 | form.select("pets", options=["dog"]) 36 | assert form.to_httpx() == {"params": {"pets": "dog"}} 37 | 38 | 39 | def test_select_invalid_option(parsed_select_form): 40 | with pytest.raises(InvalidValue) as exc: 41 | form = Form(parsed_select_form(multiple=True)) 42 | form.select("pets", options=["dog", "hamster"]) 43 | expected_message = ( 44 | "The following options: ['hamster'] were not" 45 | " found on field: 64 | 65 |
66 | 67 | 68 |
69 | 70 | """ 71 | return beautiful_soup(checkbox_form) 72 | 73 | 74 | def test_checkbox(parsed_checkbox_form): 75 | form = Form(parsed_checkbox_form) 76 | form.check("animal", values=["Cat"]) 77 | assert form.to_httpx() == {"params": {"animal": ["cat", "dog"]}} 78 | 79 | 80 | def test_checkbox_invalid_option(parsed_checkbox_form): 81 | with pytest.raises(InvalidValue) as exc: 82 | form = Form(parsed_checkbox_form) 83 | form.check("animal", values=["Foo"]) 84 | assert exc.value.args[0] == "Invalid value for " 85 | 86 | 87 | def test_checkbox_without_value(beautiful_soup): 88 | checkbox_form = """ 89 |
90 |
91 | 92 | 93 |
94 |
95 | """ 96 | parsed = beautiful_soup(checkbox_form) 97 | form = Form(parsed) 98 | form.check("dog", values=["on"]) 99 | assert form.to_httpx() == {"params": {"dog": "on"}} 100 | 101 | 102 | def test_fill_in_input(beautiful_soup): 103 | input_form = """ 104 |
105 | 106 | 107 |
108 | """ 109 | parsed = beautiful_soup(input_form) 110 | form = Form(parsed) 111 | form.fill_in("Name:", value="foo") 112 | assert form.to_httpx() == {"params": {"name": "foo"}} 113 | 114 | 115 | def test_fill_in_textarea(beautiful_soup): 116 | textarea_form = """ 117 |
118 | 119 | 122 |
123 | """ 124 | parsed = beautiful_soup(textarea_form) 125 | form = Form(parsed) 126 | form.fill_in("story", value="foo") 127 | assert form.to_httpx() == {"params": {"story": "foo"}} 128 | 129 | 130 | @pytest.fixture 131 | def parsed_input_file_form(beautiful_soup): 132 | def generate_form(multiple=False): 133 | image_form = """ 134 |
135 | 136 | 137 |
138 | """ 139 | parsed = beautiful_soup(image_form) 140 | if multiple: 141 | parsed.form.find("input")["multiple"] = "multiple" 142 | return parsed 143 | 144 | return generate_form 145 | 146 | 147 | def test_upload_file(tmp_path, parsed_input_file_form): 148 | foo_txt = tmp_path / "foo.txt" 149 | foo_txt.write_text("foo") 150 | form = Form(parsed_input_file_form()) 151 | with open(foo_txt) as content: 152 | form.upload("doc", values=[content]) 153 | assert len(form.to_httpx()["params"]["doc"]) == 1 154 | 155 | 156 | def test_upload_multiple_files(tmp_path, parsed_input_file_form): 157 | foo_txt = tmp_path / "foo.txt" 158 | foo_txt.write_text("foo") 159 | form = Form(parsed_input_file_form(multiple=True)) 160 | with open(foo_txt) as content: 161 | form.upload("doc", values=[content, content]) 162 | assert len(form.to_httpx()["params"]["doc"]) == 2 163 | -------------------------------------------------------------------------------- /tests/test_page.py: -------------------------------------------------------------------------------- 1 | from types import SimpleNamespace 2 | 3 | import pytest 4 | from httpcore import URL 5 | from httpx import Response 6 | 7 | from robox import Options 8 | from robox._page import Form, Page 9 | 10 | 11 | class MockResponse(Response): 12 | def url(self) -> URL: 13 | return "https://example.com" 14 | 15 | 16 | robox = SimpleNamespace(options=Options()) 17 | 18 | 19 | @pytest.fixture 20 | def page(): 21 | def _page(html): 22 | return Page(response=MockResponse(200, html=html), robox=robox) 23 | 24 | return _page 25 | 26 | 27 | def test_get_links(page): 28 | page = page(html='foo') 29 | links = list(page.get_links()) 30 | assert len(links) == 1 31 | assert links[0].href == "https://foo.bar" 32 | assert links[0].text == "foo" 33 | 34 | 35 | def test_get_links_by_regex(page): 36 | page = page(html='foo') 37 | links = list(page.get_links_by_regex(r"foo")) 38 | assert len(links) == 1 39 | assert links[0].href == "https://foo.bar" 40 | assert links[0].text == "foo" 41 | 42 | 43 | def test_get_form(page): 44 | page = page(html="
") 45 | form = page.get_form() 46 | assert isinstance(form, Form) 47 | 48 | 49 | def test_get_forms(page): 50 | page = page(html="
") 51 | forms = page.get_forms() 52 | assert len(forms) == 2 53 | assert isinstance(forms[0], Form) 54 | assert isinstance(forms[1], Form) 55 | 56 | 57 | def test_no_form(page): 58 | with pytest.raises(ValueError): 59 | page(html="").get_form() 60 | 61 | 62 | def test_get_no_forms(page): 63 | with pytest.raises(ValueError): 64 | page(html="").get_forms() 65 | 66 | 67 | def test_get_no_tables(page): 68 | with pytest.raises(ValueError): 69 | page(html="").get_tables() 70 | -------------------------------------------------------------------------------- /tests/test_table.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from robox._table import Table 4 | 5 | test_data = [ 6 | ( 7 | """ 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
The table header
The table bodywith two columns
21 | """, 22 | [ 23 | ["The table header", "The table header"], 24 | ["The table body", "with two columns"], 25 | ], 26 | ), 27 | ( 28 | """ 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
AB
CD
EF
GH
47 | """, 48 | [ 49 | ["A", "B", None, None], 50 | ["C", "D", None, None], 51 | ["C", "D", "E", "F"], 52 | ["G", "H", None, None], 53 | ], 54 | ), 55 | ( 56 | """ 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 |
ABCD
E
ECC
ECCCCC
81 | """, 82 | [ 83 | ["A", "A", "A", "B", "C", "D"], 84 | ["A", "A", "A", "E", "E", "E"], 85 | ["A", "A", "A", "E", "C", "C"], 86 | ["E", "C", "C", "C", "C", "C"], 87 | ], 88 | ), 89 | ] 90 | 91 | 92 | @pytest.mark.parametrize( 93 | "table_html, expected_result", test_data, ids=["t1", "t2", "t3"] 94 | ) 95 | def test_table(beautiful_soup, table_html, expected_result): 96 | parsed = beautiful_soup(table_html) 97 | table = Table(parsed) 98 | assert table.get_rows() == expected_result 99 | --------------------------------------------------------------------------------