├── .coveragerc ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── LICENSE ├── README.md ├── cover.png ├── mypy.ini ├── poetry.lock ├── pyproject.toml ├── script.py ├── src └── tiny_web_crawler │ ├── __init__.py │ ├── core │ ├── __init__.py │ ├── spider.py │ └── spider_settings.py │ ├── logging.py │ ├── middlewares │ └── __init__.py │ ├── networking │ ├── __init__.py │ ├── fetcher.py │ ├── formatter.py │ ├── robots_txt.py │ └── validator.py │ └── webdrivers │ └── __init__.py └── tests ├── __init__.py ├── core ├── __init__.py └── test_spider.py ├── logging ├── __init__.py └── test_logging.py ├── networking ├── __init__.py ├── test_fetcher.py ├── test_formatter.py ├── test_robots_txt.py └── test_validator.py ├── test_crawler.py └── utils.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | relative_files = true 3 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | lint: 6 | runs-on: ubuntu-latest 7 | strategy: 8 | matrix: 9 | python-version: ["3.8", "3.9", "3.10", "3.11"] 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Python ${{ matrix.python-version }} 13 | uses: actions/setup-python@v5 14 | with: 15 | python-version: ${{ matrix.python-version }} 16 | - name: Install Poetry 17 | run: curl -sSL https://install.python-poetry.org | python3 - 18 | - name: Install dependencies 19 | run: | 20 | poetry install --with dev 21 | - name: Run linter :pylint 22 | run: | 23 | poetry run pylint src 24 | - name: Run mypy :type_checking 25 | run: | 26 | poetry run mypy --install-types --non-interactive src 27 | 28 | test: 29 | needs: lint 30 | runs-on: ubuntu-latest 31 | strategy: 32 | matrix: 33 | python-version: ["3.8", "3.9", "3.10", "3.11"] 34 | 35 | env: 36 | COVERAGE_FILE: ".coverage.${{ matrix.python-version }}" 37 | 38 | steps: 39 | - uses: actions/checkout@v4 40 | - name: Set up Python ${{ matrix.python-version }} 41 | uses: actions/setup-python@v5 42 | with: 43 | python-version: ${{ matrix.python-version }} 44 | - name: Install Poetry 45 | run: curl -sSL https://install.python-poetry.org | python3 - 46 | - name: Install dependencies 47 | run: | 48 | poetry install --with dev 49 | - name: Run tests 50 | run: | 51 | poetry run pytest --cov=./ 52 | - name: Store coverage file 53 | uses: actions/upload-artifact@v4 54 | with: 55 | name: .coverage.${{ matrix.python-version }} 56 | path: .coverage.${{ matrix.python-version }} 57 | 58 | coverage-comment: 59 | name: Create coverage comment 60 | needs: test 61 | runs-on: ubuntu-latest 62 | 63 | permissions: 64 | contents: write 65 | 66 | steps: 67 | - uses: actions/checkout@v4 68 | 69 | - uses: actions/download-artifact@v4 70 | name: Retrieve coverage files 71 | id: download 72 | with: 73 | pattern: .coverage.* 74 | merge-multiple: true 75 | 76 | - uses: py-cov-action/python-coverage-comment-action@v3.24 77 | name: Generate coverage comment 78 | id: coverage_comment 79 | with: 80 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 81 | MERGE_COVERAGE_FILES: true 82 | MINIMUM_GREEN: 80 83 | MINIMUM_ORANGE: 70 84 | 85 | publish: 86 | needs: test 87 | runs-on: ubuntu-latest 88 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') 89 | environment: publish-pip 90 | steps: 91 | - uses: actions/checkout@v4 92 | - name: Set up Python 93 | uses: actions/setup-python@v5 94 | with: 95 | python-version: '3.x' 96 | - name: Install Poetry 97 | run: curl -sSL https://install.python-poetry.org | python3 - 98 | - name: Install dependencies 99 | run: | 100 | poetry install --with dev 101 | - name: Build package 102 | run: poetry build 103 | - name: Publish package 104 | uses: pypa/gh-action-pypi-publish@v1.5.1 105 | with: 106 | user: __token__ 107 | password: ${{ secrets.PYPI_API_TOKEN }} 108 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # OS generated files # 7 | ###################### 8 | .DS_Store 9 | .DS_Store? 10 | 11 | 12 | # Virtual environments 13 | .env 14 | .venv 15 | env/ 16 | venv/ 17 | 18 | # Distribution / packaging 19 | build/ 20 | dist/ 21 | *.egg-info/ 22 | *.egg 23 | 24 | # VS Code 25 | .vscode/ 26 | 27 | # Anaconda 28 | *.anaconda 29 | 30 | # Spyder project settings 31 | .spyderproject 32 | 33 | # Jupyter Notebook 34 | .ipynb_checkpoints 35 | 36 | # PyCharm 37 | .idea/ 38 | 39 | # Sphinx documentation 40 | docs/_build/ 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | out.json 55 | out.json 56 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: local 9 | hooks: 10 | - id: pylint 11 | name: Run pylint 12 | entry: poetry run pylint 13 | language: system 14 | types: [python] 15 | args: ["src"] 16 | stages: [commit] 17 | - id: pytest 18 | name: Run pytest 19 | entry: poetry run pytest 20 | language: system 21 | pass_filenames: false 22 | always_run: true 23 | stages: [push] 24 | - repo: https://github.com/pre-commit/mirrors-mypy 25 | rev: "v1.10.0" 26 | hooks: 27 | - id: mypy 28 | args: [--install-types, --non-interactive] 29 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MAIN] 2 | 3 | # Analyse import fallback blocks. This can be used to support both Python 2 and 4 | # 3 compatible code, which means that the block might have code that exists 5 | # only in one or another interpreter, leading to false positives when analysed. 6 | analyse-fallback-blocks=no 7 | 8 | # Clear in-memory caches upon conclusion of linting. Useful if running pylint 9 | # in a server-like mode. 10 | clear-cache-post-run=no 11 | 12 | # Load and enable all available extensions. Use --list-extensions to see a list 13 | # all available extensions. 14 | #enable-all-extensions= 15 | 16 | # In error mode, messages with a category besides ERROR or FATAL are 17 | # suppressed, and no reports are done by default. Error mode is compatible with 18 | # disabling specific errors. 19 | #errors-only= 20 | 21 | # Always return a 0 (non-error) status code, even if lint errors are found. 22 | # This is primarily useful in continuous integration scripts. 23 | #exit-zero= 24 | 25 | # A comma-separated list of package or module names from where C extensions may 26 | # be loaded. Extensions are loading into the active Python interpreter and may 27 | # run arbitrary code. 28 | extension-pkg-allow-list= 29 | 30 | # A comma-separated list of package or module names from where C extensions may 31 | # be loaded. Extensions are loading into the active Python interpreter and may 32 | # run arbitrary code. (This is an alternative name to extension-pkg-allow-list 33 | # for backward compatibility.) 34 | extension-pkg-whitelist= 35 | 36 | # Return non-zero exit code if any of these messages/categories are detected, 37 | # even if score is above --fail-under value. Syntax same as enable. Messages 38 | # specified are enabled, while categories only check already-enabled messages. 39 | fail-on= 40 | 41 | # Specify a score threshold under which the program will exit with error. 42 | fail-under=10 43 | 44 | # Interpret the stdin as a python script, whose filename needs to be passed as 45 | # the module_or_package argument. 46 | #from-stdin= 47 | 48 | # Files or directories to be skipped. They should be base names, not paths. 49 | ignore=CVS 50 | 51 | # Add files or directories matching the regular expressions patterns to the 52 | # ignore-list. The regex matches against paths and can be in Posix or Windows 53 | # format. Because '\\' represents the directory delimiter on Windows systems, 54 | # it can't be used as an escape character. 55 | ignore-paths= 56 | 57 | # Files or directories matching the regular expression patterns are skipped. 58 | # The regex matches against base names, not paths. The default value ignores 59 | # Emacs file locks 60 | ignore-patterns=^\.# 61 | 62 | # List of module names for which member attributes should not be checked 63 | # (useful for modules/projects where namespaces are manipulated during runtime 64 | # and thus existing member attributes cannot be deduced by static analysis). It 65 | # supports qualified module names, as well as Unix pattern matching. 66 | ignored-modules= 67 | 68 | # Python code to execute, usually for sys.path manipulation such as 69 | # pygtk.require(). 70 | #init-hook= 71 | 72 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 73 | # number of processors available to use, and will cap the count on Windows to 74 | # avoid hangs. 75 | jobs=1 76 | 77 | # Control the amount of potential inferred values when inferring a single 78 | # object. This can help the performance when dealing with large functions or 79 | # complex, nested conditions. 80 | limit-inference-results=100 81 | 82 | # List of plugins (as comma separated values of python module names) to load, 83 | # usually to register additional checkers. 84 | load-plugins= 85 | 86 | # Pickle collected data for later comparisons. 87 | persistent=yes 88 | 89 | # Minimum Python version to use for version dependent checks. Will default to 90 | # the version used to run pylint. 91 | py-version=3.12 92 | 93 | # Discover python modules and packages in the file system subtree. 94 | recursive=no 95 | 96 | # Add paths to the list of the source roots. Supports globbing patterns. The 97 | # source root is an absolute path or a path relative to the current working 98 | # directory used to determine a package namespace for modules located under the 99 | # source root. 100 | source-roots= 101 | 102 | # When enabled, pylint would attempt to guess common misconfiguration and emit 103 | # user-friendly hints instead of false-positive error messages. 104 | suggestion-mode=yes 105 | 106 | # Allow loading of arbitrary C extensions. Extensions are imported into the 107 | # active Python interpreter and may run arbitrary code. 108 | unsafe-load-any-extension=no 109 | 110 | # In verbose mode, extra non-checker-related info will be displayed. 111 | #verbose= 112 | 113 | 114 | [BASIC] 115 | 116 | # Naming style matching correct argument names. 117 | argument-naming-style=snake_case 118 | 119 | # Regular expression matching correct argument names. Overrides argument- 120 | # naming-style. If left empty, argument names will be checked with the set 121 | # naming style. 122 | #argument-rgx= 123 | 124 | # Naming style matching correct attribute names. 125 | attr-naming-style=snake_case 126 | 127 | # Regular expression matching correct attribute names. Overrides attr-naming- 128 | # style. If left empty, attribute names will be checked with the set naming 129 | # style. 130 | #attr-rgx= 131 | 132 | # Bad variable names which should always be refused, separated by a comma. 133 | bad-names=foo, 134 | bar, 135 | baz, 136 | toto, 137 | tutu, 138 | tata 139 | 140 | # Bad variable names regexes, separated by a comma. If names match any regex, 141 | # they will always be refused 142 | bad-names-rgxs= 143 | 144 | # Naming style matching correct class attribute names. 145 | class-attribute-naming-style=any 146 | 147 | # Regular expression matching correct class attribute names. Overrides class- 148 | # attribute-naming-style. If left empty, class attribute names will be checked 149 | # with the set naming style. 150 | #class-attribute-rgx= 151 | 152 | # Naming style matching correct class constant names. 153 | class-const-naming-style=UPPER_CASE 154 | 155 | # Regular expression matching correct class constant names. Overrides class- 156 | # const-naming-style. If left empty, class constant names will be checked with 157 | # the set naming style. 158 | #class-const-rgx= 159 | 160 | # Naming style matching correct class names. 161 | class-naming-style=PascalCase 162 | 163 | # Regular expression matching correct class names. Overrides class-naming- 164 | # style. If left empty, class names will be checked with the set naming style. 165 | #class-rgx= 166 | 167 | # Naming style matching correct constant names. 168 | const-naming-style=UPPER_CASE 169 | 170 | # Regular expression matching correct constant names. Overrides const-naming- 171 | # style. If left empty, constant names will be checked with the set naming 172 | # style. 173 | #const-rgx= 174 | 175 | # Minimum line length for functions/classes that require docstrings, shorter 176 | # ones are exempt. 177 | docstring-min-length=-1 178 | 179 | # Naming style matching correct function names. 180 | function-naming-style=snake_case 181 | 182 | # Regular expression matching correct function names. Overrides function- 183 | # naming-style. If left empty, function names will be checked with the set 184 | # naming style. 185 | #function-rgx= 186 | 187 | # Good variable names which should always be accepted, separated by a comma. 188 | good-names=i, 189 | j, 190 | k, 191 | ex, 192 | Run, 193 | _ 194 | 195 | # Good variable names regexes, separated by a comma. If names match any regex, 196 | # they will always be accepted 197 | good-names-rgxs= 198 | 199 | # Include a hint for the correct naming format with invalid-name. 200 | include-naming-hint=no 201 | 202 | # Naming style matching correct inline iteration names. 203 | inlinevar-naming-style=any 204 | 205 | # Regular expression matching correct inline iteration names. Overrides 206 | # inlinevar-naming-style. If left empty, inline iteration names will be checked 207 | # with the set naming style. 208 | #inlinevar-rgx= 209 | 210 | # Naming style matching correct method names. 211 | method-naming-style=snake_case 212 | 213 | # Regular expression matching correct method names. Overrides method-naming- 214 | # style. If left empty, method names will be checked with the set naming style. 215 | #method-rgx= 216 | 217 | # Naming style matching correct module names. 218 | module-naming-style=snake_case 219 | 220 | # Regular expression matching correct module names. Overrides module-naming- 221 | # style. If left empty, module names will be checked with the set naming style. 222 | #module-rgx= 223 | 224 | # Colon-delimited sets of names that determine each other's naming style when 225 | # the name regexes allow several styles. 226 | name-group= 227 | 228 | # Regular expression which should only match function or class names that do 229 | # not require a docstring. 230 | no-docstring-rgx=^_ 231 | 232 | # List of decorators that produce properties, such as abc.abstractproperty. Add 233 | # to this list to register other decorators that produce valid properties. 234 | # These decorators are taken in consideration only for invalid-name. 235 | property-classes=abc.abstractproperty 236 | 237 | # Regular expression matching correct type alias names. If left empty, type 238 | # alias names will be checked with the set naming style. 239 | #typealias-rgx= 240 | 241 | # Regular expression matching correct type variable names. If left empty, type 242 | # variable names will be checked with the set naming style. 243 | #typevar-rgx= 244 | 245 | # Naming style matching correct variable names. 246 | variable-naming-style=snake_case 247 | 248 | # Regular expression matching correct variable names. Overrides variable- 249 | # naming-style. If left empty, variable names will be checked with the set 250 | # naming style. 251 | #variable-rgx= 252 | 253 | 254 | [CLASSES] 255 | 256 | # Warn about protected attribute access inside special methods 257 | check-protected-access-in-special-methods=no 258 | 259 | # List of method names used to declare (i.e. assign) instance attributes. 260 | defining-attr-methods=__init__, 261 | __new__, 262 | setUp, 263 | asyncSetUp, 264 | __post_init__ 265 | 266 | # List of member names, which should be excluded from the protected access 267 | # warning. 268 | exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit 269 | 270 | # List of valid names for the first argument in a class method. 271 | valid-classmethod-first-arg=cls 272 | 273 | # List of valid names for the first argument in a metaclass class method. 274 | valid-metaclass-classmethod-first-arg=mcs 275 | 276 | 277 | [DESIGN] 278 | 279 | # List of regular expressions of class ancestor names to ignore when counting 280 | # public methods (see R0903) 281 | exclude-too-few-public-methods= 282 | 283 | # List of qualified class names to ignore when counting class parents (see 284 | # R0901) 285 | ignored-parents= 286 | 287 | # Maximum number of arguments for function / method. 288 | max-args=10 289 | 290 | # Maximum number of attributes for a class (see R0902). 291 | max-attributes=15 292 | 293 | # Maximum number of boolean expressions in an if statement (see R0916). 294 | max-bool-expr=5 295 | 296 | # Maximum number of branch for function / method body. 297 | max-branches=12 298 | 299 | # Maximum number of locals for function / method body. 300 | max-locals=15 301 | 302 | # Maximum number of parents for a class (see R0901). 303 | max-parents=7 304 | 305 | # Maximum number of public methods for a class (see R0904). 306 | max-public-methods=20 307 | 308 | # Maximum number of return / yield for function / method body. 309 | max-returns=6 310 | 311 | # Maximum number of statements in function / method body. 312 | max-statements=50 313 | 314 | # Minimum number of public methods for a class (see R0903). 315 | min-public-methods=2 316 | 317 | 318 | [EXCEPTIONS] 319 | 320 | # Exceptions that will emit a warning when caught. 321 | overgeneral-exceptions=builtins.BaseException,builtins.Exception 322 | 323 | 324 | [FORMAT] 325 | 326 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 327 | expected-line-ending-format= 328 | 329 | # Regexp for a line that is allowed to be longer than the limit. 330 | ignore-long-lines=^\s*(# )??$ 331 | 332 | # Number of spaces of indent required inside a hanging or continued line. 333 | indent-after-paren=4 334 | 335 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 336 | # tab). 337 | indent-string=' ' 338 | 339 | # Maximum number of characters on a single line. 340 | max-line-length=120 341 | 342 | # Maximum number of lines in a module. 343 | max-module-lines=1000 344 | 345 | # Allow the body of a class to be on the same line as the declaration if body 346 | # contains single statement. 347 | single-line-class-stmt=no 348 | 349 | # Allow the body of an if to be on the same line as the test if there is no 350 | # else. 351 | single-line-if-stmt=no 352 | 353 | 354 | [IMPORTS] 355 | 356 | # List of modules that can be imported at any level, not just the top level 357 | # one. 358 | allow-any-import-level= 359 | 360 | # Allow explicit reexports by alias from a package __init__. 361 | allow-reexport-from-package=no 362 | 363 | # Allow wildcard imports from modules that define __all__. 364 | allow-wildcard-with-all=no 365 | 366 | # Deprecated modules which should not be used, separated by a comma. 367 | deprecated-modules= 368 | 369 | # Output a graph (.gv or any supported image format) of external dependencies 370 | # to the given file (report RP0402 must not be disabled). 371 | ext-import-graph= 372 | 373 | # Output a graph (.gv or any supported image format) of all (i.e. internal and 374 | # external) dependencies to the given file (report RP0402 must not be 375 | # disabled). 376 | import-graph= 377 | 378 | # Output a graph (.gv or any supported image format) of internal dependencies 379 | # to the given file (report RP0402 must not be disabled). 380 | int-import-graph= 381 | 382 | # Force import order to recognize a module as part of the standard 383 | # compatibility libraries. 384 | known-standard-library= 385 | 386 | # Force import order to recognize a module as part of a third party library. 387 | known-third-party=enchant 388 | 389 | # Couples of modules and preferred modules, separated by a comma. 390 | preferred-modules= 391 | 392 | 393 | [LOGGING] 394 | 395 | # The type of string formatting that logging methods do. `old` means using % 396 | # formatting, `new` is for `{}` formatting. 397 | logging-format-style=old 398 | 399 | # Logging modules to check that the string format arguments are in logging 400 | # function parameter format. 401 | logging-modules=logging 402 | 403 | 404 | [MESSAGES CONTROL] 405 | 406 | # Only show warnings with the listed confidence levels. Leave empty to show 407 | # all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, 408 | # UNDEFINED. 409 | confidence=HIGH, 410 | CONTROL_FLOW, 411 | INFERENCE, 412 | INFERENCE_FAILURE, 413 | UNDEFINED 414 | 415 | # Disable the message, report, category or checker with the given id(s). You 416 | # can either give multiple identifiers separated by comma (,) or put this 417 | # option multiple times (only on the command line, not in the configuration 418 | # file where it should appear only once). You can also use "--disable=all" to 419 | # disable everything first and then re-enable specific checks. For example, if 420 | # you want to run only the similarities checker, you can use "--disable=all 421 | # --enable=similarities". If you want to run only the classes checker, but have 422 | # no Warning level messages displayed, use "--disable=all --enable=classes 423 | # --disable=W". 424 | disable=raw-checker-failed, 425 | bad-inline-option, 426 | locally-disabled, 427 | file-ignored, 428 | suppressed-message, 429 | useless-suppression, 430 | deprecated-pragma, 431 | use-symbolic-message-instead, 432 | use-implicit-booleaness-not-comparison-to-string, 433 | use-implicit-booleaness-not-comparison-to-zero, 434 | missing-function-docstring, 435 | missing-class-docstring, 436 | missing-module-docstring 437 | 438 | # Enable the message, report, category or checker with the given id(s). You can 439 | # either give multiple identifier separated by comma (,) or put this option 440 | # multiple time (only on the command line, not in the configuration file where 441 | # it should appear only once). See also the "--disable" option for examples. 442 | enable= 443 | 444 | 445 | [METHOD_ARGS] 446 | 447 | # List of qualified names (i.e., library.method) which require a timeout 448 | # parameter e.g. 'requests.api.get,requests.api.post' 449 | timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request 450 | 451 | 452 | [MISCELLANEOUS] 453 | 454 | # List of note tags to take in consideration, separated by a comma. 455 | notes=FIXME, 456 | XXX, 457 | TODO 458 | 459 | # Regular expression of note tags to take in consideration. 460 | notes-rgx= 461 | 462 | 463 | [REFACTORING] 464 | 465 | # Maximum number of nested blocks for function / method body 466 | max-nested-blocks=5 467 | 468 | # Complete name of functions that never returns. When checking for 469 | # inconsistent-return-statements if a never returning function is called then 470 | # it will be considered as an explicit return statement and no message will be 471 | # printed. 472 | never-returning-functions=sys.exit,argparse.parse_error 473 | 474 | 475 | [REPORTS] 476 | 477 | # Python expression which should return a score less than or equal to 10. You 478 | # have access to the variables 'fatal', 'error', 'warning', 'refactor', 479 | # 'convention', and 'info' which contain the number of messages in each 480 | # category, as well as 'statement' which is the total number of statements 481 | # analyzed. This score is used by the global evaluation report (RP0004). 482 | evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) 483 | 484 | # Template used to display messages. This is a python new-style format string 485 | # used to format the message information. See doc for all details. 486 | msg-template= 487 | 488 | # Set the output format. Available formats are: text, parseable, colorized, 489 | # json2 (improved json format), json (old json format) and msvs (visual 490 | # studio). You can also give a reporter class, e.g. 491 | # mypackage.mymodule.MyReporterClass. 492 | #output-format= 493 | 494 | # Tells whether to display a full report or only the messages. 495 | reports=no 496 | 497 | # Activate the evaluation score. 498 | score=yes 499 | 500 | 501 | [SIMILARITIES] 502 | 503 | # Comments are removed from the similarity computation 504 | ignore-comments=yes 505 | 506 | # Docstrings are removed from the similarity computation 507 | ignore-docstrings=yes 508 | 509 | # Imports are removed from the similarity computation 510 | ignore-imports=yes 511 | 512 | # Signatures are removed from the similarity computation 513 | ignore-signatures=yes 514 | 515 | # Minimum lines number of a similarity. 516 | min-similarity-lines=4 517 | 518 | 519 | [SPELLING] 520 | 521 | # Limits count of emitted suggestions for spelling mistakes. 522 | max-spelling-suggestions=4 523 | 524 | # Spelling dictionary name. No available dictionaries : You need to install 525 | # both the python package and the system dependency for enchant to work. 526 | spelling-dict= 527 | 528 | # List of comma separated words that should be considered directives if they 529 | # appear at the beginning of a comment and should not be checked. 530 | spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: 531 | 532 | # List of comma separated words that should not be checked. 533 | spelling-ignore-words= 534 | 535 | # A path to a file that contains the private dictionary; one word per line. 536 | spelling-private-dict-file= 537 | 538 | # Tells whether to store unknown words to the private dictionary (see the 539 | # --spelling-private-dict-file option) instead of raising a message. 540 | spelling-store-unknown-words=no 541 | 542 | 543 | [STRING] 544 | 545 | # This flag controls whether inconsistent-quotes generates a warning when the 546 | # character used as a quote delimiter is used inconsistently within a module. 547 | check-quote-consistency=no 548 | 549 | # This flag controls whether the implicit-str-concat should generate a warning 550 | # on implicit string concatenation in sequences defined over several lines. 551 | check-str-concat-over-line-jumps=no 552 | 553 | 554 | [TYPECHECK] 555 | 556 | # List of decorators that produce context managers, such as 557 | # contextlib.contextmanager. Add to this list to register other decorators that 558 | # produce valid context managers. 559 | contextmanager-decorators=contextlib.contextmanager 560 | 561 | # List of members which are set dynamically and missed by pylint inference 562 | # system, and so shouldn't trigger E1101 when accessed. Python regular 563 | # expressions are accepted. 564 | generated-members= 565 | 566 | # Tells whether to warn about missing members when the owner of the attribute 567 | # is inferred to be None. 568 | ignore-none=yes 569 | 570 | # This flag controls whether pylint should warn about no-member and similar 571 | # checks whenever an opaque object is returned when inferring. The inference 572 | # can return multiple potential results while evaluating a Python object, but 573 | # some branches might not be evaluated, which results in partial inference. In 574 | # that case, it might be useful to still emit no-member and other checks for 575 | # the rest of the inferred objects. 576 | ignore-on-opaque-inference=yes 577 | 578 | # List of symbolic message names to ignore for Mixin members. 579 | ignored-checks-for-mixins=no-member, 580 | not-async-context-manager, 581 | not-context-manager, 582 | attribute-defined-outside-init 583 | 584 | # List of class names for which member attributes should not be checked (useful 585 | # for classes with dynamically set attributes). This supports the use of 586 | # qualified names. 587 | ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace 588 | 589 | # Show a hint with possible names when a member name was not found. The aspect 590 | # of finding the hint is based on edit distance. 591 | missing-member-hint=yes 592 | 593 | # The minimum edit distance a name should have in order to be considered a 594 | # similar match for a missing member name. 595 | missing-member-hint-distance=1 596 | 597 | # The total number of similar names that should be taken in consideration when 598 | # showing a hint for a missing member. 599 | missing-member-max-choices=1 600 | 601 | # Regex pattern to define which classes are considered mixins. 602 | mixin-class-rgx=.*[Mm]ixin 603 | 604 | # List of decorators that change the signature of a decorated function. 605 | signature-mutators= 606 | 607 | 608 | [VARIABLES] 609 | 610 | # List of additional names supposed to be defined in builtins. Remember that 611 | # you should avoid defining new builtins when possible. 612 | additional-builtins= 613 | 614 | # Tells whether unused global variables should be treated as a violation. 615 | allow-global-unused-variables=yes 616 | 617 | # List of names allowed to shadow builtins 618 | allowed-redefined-builtins= 619 | 620 | # List of strings which can identify a callback function by name. A callback 621 | # name must start or end with one of those strings. 622 | callbacks=cb_, 623 | _cb 624 | 625 | # A regular expression matching the name of dummy variables (i.e. expected to 626 | # not be used). 627 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 628 | 629 | # Argument names that match this expression will be ignored. 630 | ignored-argument-names=_.*|^ignored_|^unused_ 631 | 632 | # Tells whether we should check for unused import in __init__ files. 633 | init-import=no 634 | 635 | # List of qualified module names which can have objects that can redefine 636 | # builtins. 637 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 638 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Indrajith Indraprastham 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![cover](cover.png) 2 | # Tiny Web Crawler 3 | 4 | [![CI](https://github.com/indrajithi/tiny-web-crawler/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/indrajithi/tiny-web-crawler/actions/workflows/ci.yml) 5 | [![Coverage badge](https://img.shields.io/badge/dynamic/json?color=brightgreen&label=coverage&query=%24.message&url=https%3A%2F%2Fraw.githubusercontent.com%2Findrajithi%2Ftiny-web-crawler%2Fpython-coverage-comment-action-data%2Fendpoint.json)](https://htmlpreview.github.io/?https://github.com/indrajithi/tiny-web-crawler/blob/python-coverage-comment-action-data/htmlcov/index.html) 6 | [![Stable Version](https://img.shields.io/pypi/v/tiny-web-crawler?label=stable)](https://pypi.org/project/tiny-web-crawler/#history) 7 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 8 | [![Download Stats](https://img.shields.io/pypi/dm/tiny-web-crawler)](https://pypistats.org/packages/tiny-web-crawler) 9 | [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&style=flat-square)](https://discord.gg/avfTZ7K2) 10 | 11 | 12 | A simple and efficient web crawler for Python. 13 | 14 | 15 | ## Features 16 | 17 | - Crawl web pages and extract links starting from a root URL recursively 18 | - Concurrent workers and custom delay 19 | - Handle relative and absolute URLs 20 | - Designed with simplicity in mind, making it easy to use and extend for various web crawling tasks 21 | 22 | ## Installation 23 | 24 | Install using pip: 25 | 26 | ```sh 27 | pip install tiny-web-crawler 28 | ``` 29 | 30 | ## Usage 31 | 32 | ```python 33 | from tiny_web_crawler import Spider 34 | from tiny_web_crawler import SpiderSettings 35 | 36 | settings = SpiderSettings( 37 | root_url = 'http://github.com', 38 | max_links = 2 39 | ) 40 | 41 | spider = Spider(settings) 42 | spider.start() 43 | 44 | 45 | # Set workers and delay (default: delay is 0.5 sec and verbose is True) 46 | # If you do not want delay, set delay=0 47 | 48 | settings = SpiderSettings( 49 | root_url = 'https://github.com', 50 | max_links = 5, 51 | max_workers = 5, 52 | delay = 1, 53 | verbose = False 54 | ) 55 | 56 | spider = Spider(settings) 57 | spider.start() 58 | 59 | ``` 60 | 61 | 62 | ## Output Format 63 | 64 | Crawled output sample for `https://github.com` 65 | 66 | ```json 67 | { 68 | "http://github.com": { 69 | "urls": [ 70 | "http://github.com/", 71 | "https://githubuniverse.com/", 72 | "..." 73 | ], 74 | "https://github.com/solutions/ci-cd": { 75 | "urls": [ 76 | "https://github.com/solutions/ci-cd/", 77 | "https://githubuniverse.com/", 78 | "..." 79 | ] 80 | } 81 | } 82 | } 83 | ``` 84 | 85 | 86 | ## Contributing 87 | 88 | Thank you for considering to contribute. 89 | 90 | - If you are a first time contributor you can pick a [`good-first-issue`](https://github.com/indrajithi/tiny-web-crawler/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) and get started. 91 | - Please feel free to ask questions. 92 | - Before starting to work on an issue. Please get it assigned to you so that we can avoid multiple people from working on the same issue. 93 | - We are working on doing our first major release. Please check this [`issue`](https://github.com/indrajithi/tiny-web-crawler/issues/24) and see if anything interests you. 94 | 95 | ### Dev setup 96 | 97 | - Install poetry in your system `pipx install poetry` 98 | - Clone the repo you forked 99 | - Create a venv or use `poetry shell` 100 | - Run `poetry install --with dev` 101 | - `pre-commit install` ([see](https://github.com/indrajithi/tiny-web-crawler/issues/23)) 102 | - `pre-commit install --hook-type pre-push` 103 | 104 | ### Before raising a PR. Please make sure you have these checks covered 105 | 106 | - [x] An issue exists or is created which address the PR 107 | - [x] Tests are written for the changes 108 | - [x] All lint/test passes 109 | -------------------------------------------------------------------------------- /cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/cover.png -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.12 3 | disallow_untyped_calls = True 4 | disallow_untyped_defs = True 5 | ignore_missing_imports = True 6 | 7 | # Exclude the build directory 8 | exclude = build/ 9 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "astroid" 5 | version = "3.0.3" 6 | description = "An abstract syntax tree for Python with inference support." 7 | optional = false 8 | python-versions = ">=3.8.0" 9 | files = [ 10 | {file = "astroid-3.0.3-py3-none-any.whl", hash = "sha256:92fcf218b89f449cdf9f7b39a269f8d5d617b27be68434912e11e79203963a17"}, 11 | {file = "astroid-3.0.3.tar.gz", hash = "sha256:4148645659b08b70d72460ed1921158027a9e53ae8b7234149b1400eddacbb93"}, 12 | ] 13 | 14 | [package.dependencies] 15 | typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} 16 | 17 | [[package]] 18 | name = "atomicwrites" 19 | version = "1.4.1" 20 | description = "Atomic file writes." 21 | optional = false 22 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 23 | files = [ 24 | {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, 25 | ] 26 | 27 | [[package]] 28 | name = "attrs" 29 | version = "23.2.0" 30 | description = "Classes Without Boilerplate" 31 | optional = false 32 | python-versions = ">=3.7" 33 | files = [ 34 | {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, 35 | {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, 36 | ] 37 | 38 | [package.extras] 39 | cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] 40 | dev = ["attrs[tests]", "pre-commit"] 41 | docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] 42 | tests = ["attrs[tests-no-zope]", "zope-interface"] 43 | tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] 44 | tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] 45 | 46 | [[package]] 47 | name = "beautifulsoup4" 48 | version = "4.12.3" 49 | description = "Screen-scraping library" 50 | optional = false 51 | python-versions = ">=3.6.0" 52 | files = [ 53 | {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, 54 | {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, 55 | ] 56 | 57 | [package.dependencies] 58 | soupsieve = ">1.2" 59 | 60 | [package.extras] 61 | cchardet = ["cchardet"] 62 | chardet = ["chardet"] 63 | charset-normalizer = ["charset-normalizer"] 64 | html5lib = ["html5lib"] 65 | lxml = ["lxml"] 66 | 67 | [[package]] 68 | name = "certifi" 69 | version = "2024.6.2" 70 | description = "Python package for providing Mozilla's CA Bundle." 71 | optional = false 72 | python-versions = ">=3.6" 73 | files = [ 74 | {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, 75 | {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, 76 | ] 77 | 78 | [[package]] 79 | name = "cfgv" 80 | version = "3.4.0" 81 | description = "Validate configuration and produce human readable error messages." 82 | optional = false 83 | python-versions = ">=3.8" 84 | files = [ 85 | {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, 86 | {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, 87 | ] 88 | 89 | [[package]] 90 | name = "charset-normalizer" 91 | version = "3.3.2" 92 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 93 | optional = false 94 | python-versions = ">=3.7.0" 95 | files = [ 96 | {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, 97 | {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, 98 | {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, 99 | {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, 100 | {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, 101 | {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, 102 | {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, 103 | {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, 104 | {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, 105 | {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, 106 | {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, 107 | {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, 108 | {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, 109 | {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, 110 | {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, 111 | {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, 112 | {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, 113 | {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, 114 | {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, 115 | {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, 116 | {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, 117 | {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, 118 | {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, 119 | {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, 120 | {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, 121 | {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, 122 | {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, 123 | {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, 124 | {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, 125 | {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, 126 | {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, 127 | {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, 128 | {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, 129 | {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, 130 | {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, 131 | {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, 132 | {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, 133 | {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, 134 | {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, 135 | {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, 136 | {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, 137 | {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, 138 | {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, 139 | {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, 140 | {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, 141 | {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, 142 | {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, 143 | {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, 144 | {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, 145 | {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, 146 | {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, 147 | {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, 148 | {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, 149 | {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, 150 | {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, 151 | {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, 152 | {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, 153 | {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, 154 | {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, 155 | {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, 156 | {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, 157 | {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, 158 | {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, 159 | {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, 160 | {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, 161 | {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, 162 | {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, 163 | {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, 164 | {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, 165 | {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, 166 | {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, 167 | {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, 168 | {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, 169 | {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, 170 | {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, 171 | {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, 172 | {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, 173 | {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, 174 | {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, 175 | {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, 176 | {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, 177 | {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, 178 | {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, 179 | {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, 180 | {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, 181 | {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, 182 | {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, 183 | {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, 184 | {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, 185 | {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, 186 | ] 187 | 188 | [[package]] 189 | name = "colorama" 190 | version = "0.4.6" 191 | description = "Cross-platform colored terminal text." 192 | optional = false 193 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 194 | files = [ 195 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 196 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 197 | ] 198 | 199 | [[package]] 200 | name = "coverage" 201 | version = "7.5.3" 202 | description = "Code coverage measurement for Python" 203 | optional = false 204 | python-versions = ">=3.8" 205 | files = [ 206 | {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, 207 | {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, 208 | {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"}, 209 | {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"}, 210 | {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"}, 211 | {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"}, 212 | {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"}, 213 | {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"}, 214 | {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"}, 215 | {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"}, 216 | {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"}, 217 | {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"}, 218 | {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"}, 219 | {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"}, 220 | {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"}, 221 | {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"}, 222 | {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"}, 223 | {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"}, 224 | {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"}, 225 | {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"}, 226 | {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, 227 | {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, 228 | {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, 229 | {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, 230 | {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, 231 | {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, 232 | {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, 233 | {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, 234 | {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, 235 | {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, 236 | {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"}, 237 | {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"}, 238 | {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"}, 239 | {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"}, 240 | {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"}, 241 | {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"}, 242 | {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"}, 243 | {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"}, 244 | {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"}, 245 | {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"}, 246 | {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"}, 247 | {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"}, 248 | {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"}, 249 | {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"}, 250 | {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"}, 251 | {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"}, 252 | {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"}, 253 | {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"}, 254 | {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"}, 255 | {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"}, 256 | {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, 257 | {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, 258 | ] 259 | 260 | [package.dependencies] 261 | tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} 262 | 263 | [package.extras] 264 | toml = ["tomli"] 265 | 266 | [[package]] 267 | name = "dill" 268 | version = "0.3.8" 269 | description = "serialize all of Python" 270 | optional = false 271 | python-versions = ">=3.8" 272 | files = [ 273 | {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, 274 | {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, 275 | ] 276 | 277 | [package.extras] 278 | graph = ["objgraph (>=1.7.2)"] 279 | profile = ["gprof2dot (>=2022.7.29)"] 280 | 281 | [[package]] 282 | name = "distlib" 283 | version = "0.3.8" 284 | description = "Distribution utilities" 285 | optional = false 286 | python-versions = "*" 287 | files = [ 288 | {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, 289 | {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, 290 | ] 291 | 292 | [[package]] 293 | name = "filelock" 294 | version = "3.15.1" 295 | description = "A platform independent file lock." 296 | optional = false 297 | python-versions = ">=3.8" 298 | files = [ 299 | {file = "filelock-3.15.1-py3-none-any.whl", hash = "sha256:71b3102950e91dfc1bb4209b64be4dc8854f40e5f534428d8684f953ac847fac"}, 300 | {file = "filelock-3.15.1.tar.gz", hash = "sha256:58a2549afdf9e02e10720eaa4d4470f56386d7a6f72edd7d0596337af8ed7ad8"}, 301 | ] 302 | 303 | [package.extras] 304 | docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] 305 | testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] 306 | typing = ["typing-extensions (>=4.8)"] 307 | 308 | [[package]] 309 | name = "identify" 310 | version = "2.5.36" 311 | description = "File identification library for Python" 312 | optional = false 313 | python-versions = ">=3.8" 314 | files = [ 315 | {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"}, 316 | {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, 317 | ] 318 | 319 | [package.extras] 320 | license = ["ukkonen"] 321 | 322 | [[package]] 323 | name = "idna" 324 | version = "3.7" 325 | description = "Internationalized Domain Names in Applications (IDNA)" 326 | optional = false 327 | python-versions = ">=3.5" 328 | files = [ 329 | {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, 330 | {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, 331 | ] 332 | 333 | [[package]] 334 | name = "iniconfig" 335 | version = "2.0.0" 336 | description = "brain-dead simple config-ini parsing" 337 | optional = false 338 | python-versions = ">=3.7" 339 | files = [ 340 | {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, 341 | {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, 342 | ] 343 | 344 | [[package]] 345 | name = "isort" 346 | version = "5.13.2" 347 | description = "A Python utility / library to sort Python imports." 348 | optional = false 349 | python-versions = ">=3.8.0" 350 | files = [ 351 | {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, 352 | {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, 353 | ] 354 | 355 | [package.extras] 356 | colors = ["colorama (>=0.4.6)"] 357 | 358 | [[package]] 359 | name = "lxml" 360 | version = "5.2.2" 361 | description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." 362 | optional = false 363 | python-versions = ">=3.6" 364 | files = [ 365 | {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:364d03207f3e603922d0d3932ef363d55bbf48e3647395765f9bfcbdf6d23632"}, 366 | {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50127c186f191b8917ea2fb8b206fbebe87fd414a6084d15568c27d0a21d60db"}, 367 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4f025ef3db1c6da4460dd27c118d8cd136d0391da4e387a15e48e5c975147"}, 368 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981a06a3076997adf7c743dcd0d7a0415582661e2517c7d961493572e909aa1d"}, 369 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aef5474d913d3b05e613906ba4090433c515e13ea49c837aca18bde190853dff"}, 370 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e275ea572389e41e8b039ac076a46cb87ee6b8542df3fff26f5baab43713bca"}, 371 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5b65529bb2f21ac7861a0e94fdbf5dc0daab41497d18223b46ee8515e5ad297"}, 372 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bcc98f911f10278d1daf14b87d65325851a1d29153caaf146877ec37031d5f36"}, 373 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:b47633251727c8fe279f34025844b3b3a3e40cd1b198356d003aa146258d13a2"}, 374 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:fbc9d316552f9ef7bba39f4edfad4a734d3d6f93341232a9dddadec4f15d425f"}, 375 | {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:13e69be35391ce72712184f69000cda04fc89689429179bc4c0ae5f0b7a8c21b"}, 376 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3b6a30a9ab040b3f545b697cb3adbf3696c05a3a68aad172e3fd7ca73ab3c835"}, 377 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a233bb68625a85126ac9f1fc66d24337d6e8a0f9207b688eec2e7c880f012ec0"}, 378 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dfa7c241073d8f2b8e8dbc7803c434f57dbb83ae2a3d7892dd068d99e96efe2c"}, 379 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a7aca7964ac4bb07680d5c9d63b9d7028cace3e2d43175cb50bba8c5ad33316"}, 380 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ae4073a60ab98529ab8a72ebf429f2a8cc612619a8c04e08bed27450d52103c0"}, 381 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ffb2be176fed4457e445fe540617f0252a72a8bc56208fd65a690fdb1f57660b"}, 382 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e290d79a4107d7d794634ce3e985b9ae4f920380a813717adf61804904dc4393"}, 383 | {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96e85aa09274955bb6bd483eaf5b12abadade01010478154b0ec70284c1b1526"}, 384 | {file = "lxml-5.2.2-cp310-cp310-win32.whl", hash = "sha256:f956196ef61369f1685d14dad80611488d8dc1ef00be57c0c5a03064005b0f30"}, 385 | {file = "lxml-5.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:875a3f90d7eb5c5d77e529080d95140eacb3c6d13ad5b616ee8095447b1d22e7"}, 386 | {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:45f9494613160d0405682f9eee781c7e6d1bf45f819654eb249f8f46a2c22545"}, 387 | {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0b3f2df149efb242cee2ffdeb6674b7f30d23c9a7af26595099afaf46ef4e88"}, 388 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d28cb356f119a437cc58a13f8135ab8a4c8ece18159eb9194b0d269ec4e28083"}, 389 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657a972f46bbefdbba2d4f14413c0d079f9ae243bd68193cb5061b9732fa54c1"}, 390 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b9ea10063efb77a965a8d5f4182806fbf59ed068b3c3fd6f30d2ac7bee734"}, 391 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07542787f86112d46d07d4f3c4e7c760282011b354d012dc4141cc12a68cef5f"}, 392 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:303f540ad2dddd35b92415b74b900c749ec2010e703ab3bfd6660979d01fd4ed"}, 393 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2eb2227ce1ff998faf0cd7fe85bbf086aa41dfc5af3b1d80867ecfe75fb68df3"}, 394 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:1d8a701774dfc42a2f0b8ccdfe7dbc140500d1049e0632a611985d943fcf12df"}, 395 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:56793b7a1a091a7c286b5f4aa1fe4ae5d1446fe742d00cdf2ffb1077865db10d"}, 396 | {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eb00b549b13bd6d884c863554566095bf6fa9c3cecb2e7b399c4bc7904cb33b5"}, 397 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a2569a1f15ae6c8c64108a2cd2b4a858fc1e13d25846be0666fc144715e32ab"}, 398 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:8cf85a6e40ff1f37fe0f25719aadf443686b1ac7652593dc53c7ef9b8492b115"}, 399 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d237ba6664b8e60fd90b8549a149a74fcc675272e0e95539a00522e4ca688b04"}, 400 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0b3f5016e00ae7630a4b83d0868fca1e3d494c78a75b1c7252606a3a1c5fc2ad"}, 401 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:23441e2b5339bc54dc949e9e675fa35efe858108404ef9aa92f0456929ef6fe8"}, 402 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2fb0ba3e8566548d6c8e7dd82a8229ff47bd8fb8c2da237607ac8e5a1b8312e5"}, 403 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:79d1fb9252e7e2cfe4de6e9a6610c7cbb99b9708e2c3e29057f487de5a9eaefa"}, 404 | {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6dcc3d17eac1df7859ae01202e9bb11ffa8c98949dcbeb1069c8b9a75917e01b"}, 405 | {file = "lxml-5.2.2-cp311-cp311-win32.whl", hash = "sha256:4c30a2f83677876465f44c018830f608fa3c6a8a466eb223535035fbc16f3438"}, 406 | {file = "lxml-5.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:49095a38eb333aaf44c06052fd2ec3b8f23e19747ca7ec6f6c954ffea6dbf7be"}, 407 | {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7429e7faa1a60cad26ae4227f4dd0459efde239e494c7312624ce228e04f6391"}, 408 | {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:50ccb5d355961c0f12f6cf24b7187dbabd5433f29e15147a67995474f27d1776"}, 409 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc911208b18842a3a57266d8e51fc3cfaccee90a5351b92079beed912a7914c2"}, 410 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33ce9e786753743159799fdf8e92a5da351158c4bfb6f2db0bf31e7892a1feb5"}, 411 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec87c44f619380878bd49ca109669c9f221d9ae6883a5bcb3616785fa8f94c97"}, 412 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08ea0f606808354eb8f2dfaac095963cb25d9d28e27edcc375d7b30ab01abbf6"}, 413 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75a9632f1d4f698b2e6e2e1ada40e71f369b15d69baddb8968dcc8e683839b18"}, 414 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74da9f97daec6928567b48c90ea2c82a106b2d500f397eeb8941e47d30b1ca85"}, 415 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:0969e92af09c5687d769731e3f39ed62427cc72176cebb54b7a9d52cc4fa3b73"}, 416 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:9164361769b6ca7769079f4d426a41df6164879f7f3568be9086e15baca61466"}, 417 | {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d26a618ae1766279f2660aca0081b2220aca6bd1aa06b2cf73f07383faf48927"}, 418 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab67ed772c584b7ef2379797bf14b82df9aa5f7438c5b9a09624dd834c1c1aaf"}, 419 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3d1e35572a56941b32c239774d7e9ad724074d37f90c7a7d499ab98761bd80cf"}, 420 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:8268cbcd48c5375f46e000adb1390572c98879eb4f77910c6053d25cc3ac2c67"}, 421 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e282aedd63c639c07c3857097fc0e236f984ceb4089a8b284da1c526491e3f3d"}, 422 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfdc2bfe69e9adf0df4915949c22a25b39d175d599bf98e7ddf620a13678585"}, 423 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4aefd911793b5d2d7a921233a54c90329bf3d4a6817dc465f12ffdfe4fc7b8fe"}, 424 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8b8df03a9e995b6211dafa63b32f9d405881518ff1ddd775db4e7b98fb545e1c"}, 425 | {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f11ae142f3a322d44513de1018b50f474f8f736bc3cd91d969f464b5bfef8836"}, 426 | {file = "lxml-5.2.2-cp312-cp312-win32.whl", hash = "sha256:16a8326e51fcdffc886294c1e70b11ddccec836516a343f9ed0f82aac043c24a"}, 427 | {file = "lxml-5.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:bbc4b80af581e18568ff07f6395c02114d05f4865c2812a1f02f2eaecf0bfd48"}, 428 | {file = "lxml-5.2.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e3d9d13603410b72787579769469af730c38f2f25505573a5888a94b62b920f8"}, 429 | {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38b67afb0a06b8575948641c1d6d68e41b83a3abeae2ca9eed2ac59892b36706"}, 430 | {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c689d0d5381f56de7bd6966a4541bff6e08bf8d3871bbd89a0c6ab18aa699573"}, 431 | {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:cf2a978c795b54c539f47964ec05e35c05bd045db5ca1e8366988c7f2fe6b3ce"}, 432 | {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:739e36ef7412b2bd940f75b278749106e6d025e40027c0b94a17ef7968d55d56"}, 433 | {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d8bbcd21769594dbba9c37d3c819e2d5847656ca99c747ddb31ac1701d0c0ed9"}, 434 | {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:2304d3c93f2258ccf2cf7a6ba8c761d76ef84948d87bf9664e14d203da2cd264"}, 435 | {file = "lxml-5.2.2-cp36-cp36m-win32.whl", hash = "sha256:02437fb7308386867c8b7b0e5bc4cd4b04548b1c5d089ffb8e7b31009b961dc3"}, 436 | {file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"}, 437 | {file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"}, 438 | {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"}, 439 | {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"}, 440 | {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"}, 441 | {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"}, 442 | {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"}, 443 | {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"}, 444 | {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"}, 445 | {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"}, 446 | {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"}, 447 | {file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"}, 448 | {file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"}, 449 | {file = "lxml-5.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7ed07b3062b055d7a7f9d6557a251cc655eed0b3152b76de619516621c56f5d3"}, 450 | {file = "lxml-5.2.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f60fdd125d85bf9c279ffb8e94c78c51b3b6a37711464e1f5f31078b45002421"}, 451 | {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a7e24cb69ee5f32e003f50e016d5fde438010c1022c96738b04fc2423e61706"}, 452 | {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23cfafd56887eaed93d07bc4547abd5e09d837a002b791e9767765492a75883f"}, 453 | {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19b4e485cd07b7d83e3fe3b72132e7df70bfac22b14fe4bf7a23822c3a35bff5"}, 454 | {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7ce7ad8abebe737ad6143d9d3bf94b88b93365ea30a5b81f6877ec9c0dee0a48"}, 455 | {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e49b052b768bb74f58c7dda4e0bdf7b79d43a9204ca584ffe1fb48a6f3c84c66"}, 456 | {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d14a0d029a4e176795cef99c056d58067c06195e0c7e2dbb293bf95c08f772a3"}, 457 | {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:be49ad33819d7dcc28a309b86d4ed98e1a65f3075c6acd3cd4fe32103235222b"}, 458 | {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a6d17e0370d2516d5bb9062c7b4cb731cff921fc875644c3d751ad857ba9c5b1"}, 459 | {file = "lxml-5.2.2-cp38-cp38-win32.whl", hash = "sha256:5b8c041b6265e08eac8a724b74b655404070b636a8dd6d7a13c3adc07882ef30"}, 460 | {file = "lxml-5.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:f61efaf4bed1cc0860e567d2ecb2363974d414f7f1f124b1df368bbf183453a6"}, 461 | {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb91819461b1b56d06fa4bcf86617fac795f6a99d12239fb0c68dbeba41a0a30"}, 462 | {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d4ed0c7cbecde7194cd3228c044e86bf73e30a23505af852857c09c24e77ec5d"}, 463 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54401c77a63cc7d6dc4b4e173bb484f28a5607f3df71484709fe037c92d4f0ed"}, 464 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:625e3ef310e7fa3a761d48ca7ea1f9d8718a32b1542e727d584d82f4453d5eeb"}, 465 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:519895c99c815a1a24a926d5b60627ce5ea48e9f639a5cd328bda0515ea0f10c"}, 466 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c7079d5eb1c1315a858bbf180000757db8ad904a89476653232db835c3114001"}, 467 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:343ab62e9ca78094f2306aefed67dcfad61c4683f87eee48ff2fd74902447726"}, 468 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:cd9e78285da6c9ba2d5c769628f43ef66d96ac3085e59b10ad4f3707980710d3"}, 469 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:546cf886f6242dff9ec206331209db9c8e1643ae642dea5fdbecae2453cb50fd"}, 470 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:02f6a8eb6512fdc2fd4ca10a49c341c4e109aa6e9448cc4859af5b949622715a"}, 471 | {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:339ee4a4704bc724757cd5dd9dc8cf4d00980f5d3e6e06d5847c1b594ace68ab"}, 472 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0a028b61a2e357ace98b1615fc03f76eb517cc028993964fe08ad514b1e8892d"}, 473 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f90e552ecbad426eab352e7b2933091f2be77115bb16f09f78404861c8322981"}, 474 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:d83e2d94b69bf31ead2fa45f0acdef0757fa0458a129734f59f67f3d2eb7ef32"}, 475 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a02d3c48f9bb1e10c7788d92c0c7db6f2002d024ab6e74d6f45ae33e3d0288a3"}, 476 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6d68ce8e7b2075390e8ac1e1d3a99e8b6372c694bbe612632606d1d546794207"}, 477 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:453d037e09a5176d92ec0fd282e934ed26d806331a8b70ab431a81e2fbabf56d"}, 478 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:3b019d4ee84b683342af793b56bb35034bd749e4cbdd3d33f7d1107790f8c472"}, 479 | {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb3942960f0beb9f46e2a71a3aca220d1ca32feb5a398656be934320804c0df9"}, 480 | {file = "lxml-5.2.2-cp39-cp39-win32.whl", hash = "sha256:ac6540c9fff6e3813d29d0403ee7a81897f1d8ecc09a8ff84d2eea70ede1cdbf"}, 481 | {file = "lxml-5.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:610b5c77428a50269f38a534057444c249976433f40f53e3b47e68349cca1425"}, 482 | {file = "lxml-5.2.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b537bd04d7ccd7c6350cdaaaad911f6312cbd61e6e6045542f781c7f8b2e99d2"}, 483 | {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4820c02195d6dfb7b8508ff276752f6b2ff8b64ae5d13ebe02e7667e035000b9"}, 484 | {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a09f6184f17a80897172863a655467da2b11151ec98ba8d7af89f17bf63dae"}, 485 | {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76acba4c66c47d27c8365e7c10b3d8016a7da83d3191d053a58382311a8bf4e1"}, 486 | {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b128092c927eaf485928cec0c28f6b8bead277e28acf56800e972aa2c2abd7a2"}, 487 | {file = "lxml-5.2.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ae791f6bd43305aade8c0e22f816b34f3b72b6c820477aab4d18473a37e8090b"}, 488 | {file = "lxml-5.2.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a2f6a1bc2460e643785a2cde17293bd7a8f990884b822f7bca47bee0a82fc66b"}, 489 | {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e8d351ff44c1638cb6e980623d517abd9f580d2e53bfcd18d8941c052a5a009"}, 490 | {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec4bd9133420c5c52d562469c754f27c5c9e36ee06abc169612c959bd7dbb07"}, 491 | {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:55ce6b6d803890bd3cc89975fca9de1dff39729b43b73cb15ddd933b8bc20484"}, 492 | {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ab6a358d1286498d80fe67bd3d69fcbc7d1359b45b41e74c4a26964ca99c3f8"}, 493 | {file = "lxml-5.2.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:06668e39e1f3c065349c51ac27ae430719d7806c026fec462e5693b08b95696b"}, 494 | {file = "lxml-5.2.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9cd5323344d8ebb9fb5e96da5de5ad4ebab993bbf51674259dbe9d7a18049525"}, 495 | {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89feb82ca055af0fe797a2323ec9043b26bc371365847dbe83c7fd2e2f181c34"}, 496 | {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e481bba1e11ba585fb06db666bfc23dbe181dbafc7b25776156120bf12e0d5a6"}, 497 | {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d6c6ea6a11ca0ff9cd0390b885984ed31157c168565702959c25e2191674a14"}, 498 | {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3d98de734abee23e61f6b8c2e08a88453ada7d6486dc7cdc82922a03968928db"}, 499 | {file = "lxml-5.2.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:69ab77a1373f1e7563e0fb5a29a8440367dec051da6c7405333699d07444f511"}, 500 | {file = "lxml-5.2.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:34e17913c431f5ae01d8658dbf792fdc457073dcdfbb31dc0cc6ab256e664a8d"}, 501 | {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f8757b03208c3f50097761be2dea0aba02e94f0dc7023ed73a7bb14ff11eb0"}, 502 | {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a520b4f9974b0a0a6ed73c2154de57cdfd0c8800f4f15ab2b73238ffed0b36e"}, 503 | {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5e097646944b66207023bc3c634827de858aebc226d5d4d6d16f0b77566ea182"}, 504 | {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b5e4ef22ff25bfd4ede5f8fb30f7b24446345f3e79d9b7455aef2836437bc38a"}, 505 | {file = "lxml-5.2.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff69a9a0b4b17d78170c73abe2ab12084bdf1691550c5629ad1fe7849433f324"}, 506 | {file = "lxml-5.2.2.tar.gz", hash = "sha256:bb2dc4898180bea79863d5487e5f9c7c34297414bad54bcd0f0852aee9cfdb87"}, 507 | ] 508 | 509 | [package.extras] 510 | cssselect = ["cssselect (>=0.7)"] 511 | html-clean = ["lxml-html-clean"] 512 | html5 = ["html5lib"] 513 | htmlsoup = ["BeautifulSoup4"] 514 | source = ["Cython (>=3.0.10)"] 515 | 516 | [[package]] 517 | name = "mccabe" 518 | version = "0.7.0" 519 | description = "McCabe checker, plugin for flake8" 520 | optional = false 521 | python-versions = ">=3.6" 522 | files = [ 523 | {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, 524 | {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, 525 | ] 526 | 527 | [[package]] 528 | name = "mypy" 529 | version = "1.10.0" 530 | description = "Optional static typing for Python" 531 | optional = false 532 | python-versions = ">=3.8" 533 | files = [ 534 | {file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"}, 535 | {file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"}, 536 | {file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"}, 537 | {file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"}, 538 | {file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"}, 539 | {file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"}, 540 | {file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"}, 541 | {file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"}, 542 | {file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"}, 543 | {file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"}, 544 | {file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"}, 545 | {file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"}, 546 | {file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"}, 547 | {file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"}, 548 | {file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"}, 549 | {file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"}, 550 | {file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"}, 551 | {file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"}, 552 | {file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"}, 553 | {file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"}, 554 | {file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"}, 555 | {file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"}, 556 | {file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"}, 557 | {file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"}, 558 | {file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"}, 559 | {file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"}, 560 | {file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"}, 561 | ] 562 | 563 | [package.dependencies] 564 | mypy-extensions = ">=1.0.0" 565 | tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} 566 | typing-extensions = ">=4.1.0" 567 | 568 | [package.extras] 569 | dmypy = ["psutil (>=4.0)"] 570 | install-types = ["pip"] 571 | mypyc = ["setuptools (>=50)"] 572 | reports = ["lxml"] 573 | 574 | [[package]] 575 | name = "mypy-extensions" 576 | version = "1.0.0" 577 | description = "Type system extensions for programs checked with the mypy type checker." 578 | optional = false 579 | python-versions = ">=3.5" 580 | files = [ 581 | {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, 582 | {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, 583 | ] 584 | 585 | [[package]] 586 | name = "nodeenv" 587 | version = "1.9.1" 588 | description = "Node.js virtual environment builder" 589 | optional = false 590 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 591 | files = [ 592 | {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, 593 | {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, 594 | ] 595 | 596 | [[package]] 597 | name = "packaging" 598 | version = "24.1" 599 | description = "Core utilities for Python packages" 600 | optional = false 601 | python-versions = ">=3.8" 602 | files = [ 603 | {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, 604 | {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, 605 | ] 606 | 607 | [[package]] 608 | name = "platformdirs" 609 | version = "4.2.2" 610 | description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." 611 | optional = false 612 | python-versions = ">=3.8" 613 | files = [ 614 | {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, 615 | {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, 616 | ] 617 | 618 | [package.extras] 619 | docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] 620 | test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] 621 | type = ["mypy (>=1.8)"] 622 | 623 | [[package]] 624 | name = "pluggy" 625 | version = "1.5.0" 626 | description = "plugin and hook calling mechanisms for python" 627 | optional = false 628 | python-versions = ">=3.8" 629 | files = [ 630 | {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, 631 | {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, 632 | ] 633 | 634 | [package.extras] 635 | dev = ["pre-commit", "tox"] 636 | testing = ["pytest", "pytest-benchmark"] 637 | 638 | [[package]] 639 | name = "pre-commit" 640 | version = "2.21.0" 641 | description = "A framework for managing and maintaining multi-language pre-commit hooks." 642 | optional = false 643 | python-versions = ">=3.7" 644 | files = [ 645 | {file = "pre_commit-2.21.0-py2.py3-none-any.whl", hash = "sha256:e2f91727039fc39a92f58a588a25b87f936de6567eed4f0e673e0507edc75bad"}, 646 | {file = "pre_commit-2.21.0.tar.gz", hash = "sha256:31ef31af7e474a8d8995027fefdfcf509b5c913ff31f2015b4ec4beb26a6f658"}, 647 | ] 648 | 649 | [package.dependencies] 650 | cfgv = ">=2.0.0" 651 | identify = ">=1.0.0" 652 | nodeenv = ">=0.11.1" 653 | pyyaml = ">=5.1" 654 | virtualenv = ">=20.10.0" 655 | 656 | [[package]] 657 | name = "py" 658 | version = "1.11.0" 659 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 660 | optional = false 661 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 662 | files = [ 663 | {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, 664 | {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, 665 | ] 666 | 667 | [[package]] 668 | name = "pylint" 669 | version = "3.0.2" 670 | description = "python code static checker" 671 | optional = false 672 | python-versions = ">=3.8.0" 673 | files = [ 674 | {file = "pylint-3.0.2-py3-none-any.whl", hash = "sha256:60ed5f3a9ff8b61839ff0348b3624ceeb9e6c2a92c514d81c9cc273da3b6bcda"}, 675 | {file = "pylint-3.0.2.tar.gz", hash = "sha256:0d4c286ef6d2f66c8bfb527a7f8a629009e42c99707dec821a03e1b51a4c1496"}, 676 | ] 677 | 678 | [package.dependencies] 679 | astroid = ">=3.0.1,<=3.1.0-dev0" 680 | colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} 681 | dill = [ 682 | {version = ">=0.2", markers = "python_version < \"3.11\""}, 683 | {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, 684 | {version = ">=0.3.6", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, 685 | ] 686 | isort = ">=4.2.5,<6" 687 | mccabe = ">=0.6,<0.8" 688 | platformdirs = ">=2.2.0" 689 | tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} 690 | tomlkit = ">=0.10.1" 691 | typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} 692 | 693 | [package.extras] 694 | spelling = ["pyenchant (>=3.2,<4.0)"] 695 | testutils = ["gitpython (>3)"] 696 | 697 | [[package]] 698 | name = "pytest" 699 | version = "6.2.5" 700 | description = "pytest: simple powerful testing with Python" 701 | optional = false 702 | python-versions = ">=3.6" 703 | files = [ 704 | {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, 705 | {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, 706 | ] 707 | 708 | [package.dependencies] 709 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} 710 | attrs = ">=19.2.0" 711 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 712 | iniconfig = "*" 713 | packaging = "*" 714 | pluggy = ">=0.12,<2.0" 715 | py = ">=1.8.2" 716 | toml = "*" 717 | 718 | [package.extras] 719 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] 720 | 721 | [[package]] 722 | name = "pytest-cov" 723 | version = "5.0.0" 724 | description = "Pytest plugin for measuring coverage." 725 | optional = false 726 | python-versions = ">=3.8" 727 | files = [ 728 | {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, 729 | {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, 730 | ] 731 | 732 | [package.dependencies] 733 | coverage = {version = ">=5.2.1", extras = ["toml"]} 734 | pytest = ">=4.6" 735 | 736 | [package.extras] 737 | testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] 738 | 739 | [[package]] 740 | name = "pyyaml" 741 | version = "6.0.1" 742 | description = "YAML parser and emitter for Python" 743 | optional = false 744 | python-versions = ">=3.6" 745 | files = [ 746 | {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, 747 | {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, 748 | {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, 749 | {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, 750 | {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, 751 | {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, 752 | {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, 753 | {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, 754 | {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, 755 | {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, 756 | {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, 757 | {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, 758 | {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, 759 | {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, 760 | {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, 761 | {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, 762 | {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, 763 | {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, 764 | {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, 765 | {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, 766 | {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, 767 | {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, 768 | {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, 769 | {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, 770 | {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, 771 | {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, 772 | {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, 773 | {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, 774 | {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, 775 | {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, 776 | {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, 777 | {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, 778 | {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, 779 | {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, 780 | {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, 781 | {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, 782 | {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, 783 | {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, 784 | {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, 785 | {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, 786 | {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, 787 | {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, 788 | {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, 789 | {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, 790 | {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, 791 | {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, 792 | {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, 793 | {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, 794 | {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, 795 | {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, 796 | {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, 797 | ] 798 | 799 | [[package]] 800 | name = "requests" 801 | version = "2.32.3" 802 | description = "Python HTTP for Humans." 803 | optional = false 804 | python-versions = ">=3.8" 805 | files = [ 806 | {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, 807 | {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, 808 | ] 809 | 810 | [package.dependencies] 811 | certifi = ">=2017.4.17" 812 | charset-normalizer = ">=2,<4" 813 | idna = ">=2.5,<4" 814 | urllib3 = ">=1.21.1,<3" 815 | 816 | [package.extras] 817 | socks = ["PySocks (>=1.5.6,!=1.5.7)"] 818 | use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] 819 | 820 | [[package]] 821 | name = "requests-mock" 822 | version = "1.12.1" 823 | description = "Mock out responses from the requests package" 824 | optional = false 825 | python-versions = ">=3.5" 826 | files = [ 827 | {file = "requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401"}, 828 | {file = "requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563"}, 829 | ] 830 | 831 | [package.dependencies] 832 | requests = ">=2.22,<3" 833 | 834 | [package.extras] 835 | fixture = ["fixtures"] 836 | 837 | [[package]] 838 | name = "responses" 839 | version = "0.13.4" 840 | description = "A utility library for mocking out the `requests` Python library." 841 | optional = false 842 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 843 | files = [ 844 | {file = "responses-0.13.4-py2.py3-none-any.whl", hash = "sha256:d8d0f655710c46fd3513b9202a7f0dcedd02ca0f8cf4976f27fa8ab5b81e656d"}, 845 | {file = "responses-0.13.4.tar.gz", hash = "sha256:9476775d856d3c24ae660bbebe29fb6d789d4ad16acd723efbfb6ee20990b899"}, 846 | ] 847 | 848 | [package.dependencies] 849 | requests = ">=2.0" 850 | six = "*" 851 | urllib3 = ">=1.25.10" 852 | 853 | [package.extras] 854 | tests = ["coverage (>=3.7.1,<6.0.0)", "flake8", "mypy", "pytest (>=4.6)", "pytest (>=4.6,<5.0)", "pytest-cov", "pytest-localserver", "types-mock", "types-requests", "types-six"] 855 | 856 | [[package]] 857 | name = "six" 858 | version = "1.16.0" 859 | description = "Python 2 and 3 compatibility utilities" 860 | optional = false 861 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 862 | files = [ 863 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 864 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 865 | ] 866 | 867 | [[package]] 868 | name = "soupsieve" 869 | version = "2.5" 870 | description = "A modern CSS selector implementation for Beautiful Soup." 871 | optional = false 872 | python-versions = ">=3.8" 873 | files = [ 874 | {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, 875 | {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, 876 | ] 877 | 878 | [[package]] 879 | name = "toml" 880 | version = "0.10.2" 881 | description = "Python Library for Tom's Obvious, Minimal Language" 882 | optional = false 883 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 884 | files = [ 885 | {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, 886 | {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, 887 | ] 888 | 889 | [[package]] 890 | name = "tomli" 891 | version = "2.0.1" 892 | description = "A lil' TOML parser" 893 | optional = false 894 | python-versions = ">=3.7" 895 | files = [ 896 | {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, 897 | {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, 898 | ] 899 | 900 | [[package]] 901 | name = "tomlkit" 902 | version = "0.12.5" 903 | description = "Style preserving TOML library" 904 | optional = false 905 | python-versions = ">=3.7" 906 | files = [ 907 | {file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"}, 908 | {file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"}, 909 | ] 910 | 911 | [[package]] 912 | name = "typing-extensions" 913 | version = "4.12.2" 914 | description = "Backported and Experimental Type Hints for Python 3.8+" 915 | optional = false 916 | python-versions = ">=3.8" 917 | files = [ 918 | {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, 919 | {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, 920 | ] 921 | 922 | [[package]] 923 | name = "urllib3" 924 | version = "2.2.1" 925 | description = "HTTP library with thread-safe connection pooling, file post, and more." 926 | optional = false 927 | python-versions = ">=3.8" 928 | files = [ 929 | {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, 930 | {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, 931 | ] 932 | 933 | [package.extras] 934 | brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] 935 | h2 = ["h2 (>=4,<5)"] 936 | socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] 937 | zstd = ["zstandard (>=0.18.0)"] 938 | 939 | [[package]] 940 | name = "validators" 941 | version = "0.28.3" 942 | description = "Python Data Validation for Humans™" 943 | optional = false 944 | python-versions = ">=3.8" 945 | files = [ 946 | {file = "validators-0.28.3-py3-none-any.whl", hash = "sha256:53cafa854f13850156259d9cc479b864ee901f6a96e6b109e6fc33f98f37d99f"}, 947 | {file = "validators-0.28.3.tar.gz", hash = "sha256:c6c79840bcde9ba77b19f6218f7738188115e27830cbaff43264bc4ed24c429d"}, 948 | ] 949 | 950 | [[package]] 951 | name = "virtualenv" 952 | version = "20.26.2" 953 | description = "Virtual Python Environment builder" 954 | optional = false 955 | python-versions = ">=3.7" 956 | files = [ 957 | {file = "virtualenv-20.26.2-py3-none-any.whl", hash = "sha256:a624db5e94f01ad993d476b9ee5346fdf7b9de43ccaee0e0197012dc838a0e9b"}, 958 | {file = "virtualenv-20.26.2.tar.gz", hash = "sha256:82bf0f4eebbb78d36ddaee0283d43fe5736b53880b8a8cdcd37390a07ac3741c"}, 959 | ] 960 | 961 | [package.dependencies] 962 | distlib = ">=0.3.7,<1" 963 | filelock = ">=3.12.2,<4" 964 | platformdirs = ">=3.9.1,<5" 965 | 966 | [package.extras] 967 | docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] 968 | test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] 969 | 970 | [metadata] 971 | lock-version = "2.0" 972 | python-versions = "^3.8" 973 | content-hash = "1c39abeead42adb2ddc75ca11494a22943c681af485bd1e757f87e621ad4590e" 974 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "tiny-web-crawler" 3 | version = "0.5.0" 4 | description = "A simple and efficient web crawler in Python." 5 | authors = ["Indrajith Indraprastham "] 6 | license = "GPL-3.0-or-later" 7 | readme = "README.md" 8 | homepage = "http://github.com/indrajithi/tiny-web-crawler" 9 | repository = "http://github.com/indrajithi/tiny-web-crawler" 10 | documentation = "http://github.com/indrajithi/tiny-web-crawler" 11 | packages = [ 12 | { include = "tiny_web_crawler", from = "src" } 13 | ] 14 | 15 | [tool.poetry.scripts] 16 | post_install = "scripts:post_install" 17 | 18 | [tool.poetry.dependencies] 19 | python = "^3.8" 20 | validators = "^0.28.3" 21 | beautifulsoup4 = "^4.12.3" 22 | lxml = "^5.2.2" 23 | colorama = "^0.4.6" 24 | requests = "^2.32.3" 25 | pylint = "3.0.2" 26 | 27 | [tool.poetry.group.dev.dependencies] 28 | pytest = "^6.2" 29 | responses = "^0.13.4" 30 | pylint = "^3.0.2" 31 | mypy = "^1.10.0" 32 | pytest-cov = "^5.0.0" 33 | requests-mock = "^1.12.1" 34 | pre-commit = ">=2.15,<3.0" 35 | 36 | 37 | [build-system] 38 | requires = ["poetry-core>=1.0.0"] 39 | build-backend = "poetry.core.masonry.api" 40 | -------------------------------------------------------------------------------- /script.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | def post_install() -> None: 4 | subprocess.run(["poetry", "run", "pre-commit", "install"], check=True) 5 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/__init__.py: -------------------------------------------------------------------------------- 1 | from tiny_web_crawler.core.spider import Spider 2 | from tiny_web_crawler.core.spider_settings import SpiderSettings 3 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/src/tiny_web_crawler/core/__init__.py -------------------------------------------------------------------------------- /src/tiny_web_crawler/core/spider.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass, field 4 | import json 5 | import time 6 | import re 7 | 8 | from typing import Dict, List, Set, Any 9 | from concurrent.futures import ThreadPoolExecutor, as_completed 10 | import urllib.parse 11 | import urllib.robotparser 12 | import requests 13 | 14 | from tiny_web_crawler.core.spider_settings import SpiderSettings 15 | from tiny_web_crawler.networking.fetcher import fetch_url 16 | from tiny_web_crawler.networking.validator import is_valid_url 17 | from tiny_web_crawler.networking.formatter import format_url 18 | from tiny_web_crawler.networking.robots_txt import is_robots_txt_allowed, setup_robots_txt_parser, get_robots_txt_url 19 | from tiny_web_crawler.logging import get_logger, set_logging_level, INFO, DEBUG 20 | 21 | DEFAULT_SCHEME: str = 'http://' 22 | logger = get_logger() 23 | 24 | @dataclass 25 | class Spider: 26 | """ 27 | A simple web crawler class. 28 | 29 | Attributes: 30 | settings (SpiderSettings): The SpiderSettings object with the settings for the Spider object 31 | """ 32 | 33 | settings: SpiderSettings 34 | 35 | crawl_result: Dict[str, Dict[str, Any]] = field(default_factory=dict) 36 | crawl_set: Set[str] = field(default_factory=set) 37 | link_count: int = 0 38 | 39 | def __post_init__(self) -> None: 40 | self.scheme: str = DEFAULT_SCHEME 41 | 42 | self.robots: Dict[str, urllib.robotparser.RobotFileParser] = {} 43 | 44 | self.root_netloc: str = urllib.parse.urlparse(self.settings.root_url).netloc 45 | 46 | if self.settings.verbose: 47 | set_logging_level(DEBUG) 48 | else: 49 | set_logging_level(INFO) 50 | 51 | if not self.settings.respect_robots_txt: 52 | logger.warning( 53 | "Ignoring robots.txt files! You might be at risk of:\n"+ 54 | "Agent/IP bans;\n"+ 55 | "Disrupted operation;\n"+ 56 | "Increased suspicion from anti-bot services;\n"+ 57 | "Potential legal action;" 58 | ) 59 | 60 | def save_results(self) -> None: 61 | """ 62 | Saves the crawl results into a JSON file. 63 | """ 64 | if self.settings.save_to_file: 65 | with open(self.settings.save_to_file, 'w', encoding='utf-8') as file: 66 | json.dump(self.crawl_result, file, indent=4) 67 | 68 | def crawl(self, url: str) -> None: 69 | """ 70 | Crawls a given URL, extracts links, and adds them to the crawl results. 71 | 72 | Args: 73 | url (str): The URL to crawl. 74 | """ 75 | if not is_valid_url(url): 76 | logger.debug("Invalid url to crawl: %s", url) 77 | return 78 | 79 | if url in self.crawl_result: 80 | logger.debug("URL already crawled: %s", url) 81 | return 82 | 83 | if self.settings.respect_robots_txt and not self._handle_robots_txt(url): 84 | logger.debug("Skipped: Url doesn't allow crawling: %s", url) 85 | return 86 | 87 | logger.debug("Crawling: %s", url) 88 | soup = fetch_url(url, retries=self.settings.max_retry_attempts) 89 | if not soup: 90 | return 91 | 92 | links = soup.body.find_all('a', href=True) if soup.body else [] 93 | self.crawl_result[url] = {'urls': []} 94 | 95 | if self.settings.include_body: 96 | self.crawl_result[url]['body'] = str(soup) 97 | 98 | for link in links: 99 | pretty_url = format_url(link['href'].lstrip(), url, self.scheme) 100 | 101 | if self._should_skip_link(pretty_url, url): 102 | continue 103 | 104 | self.crawl_result[url]['urls'].append(pretty_url) 105 | self.crawl_set.add(pretty_url) 106 | logger.debug("Link found: %s", pretty_url) 107 | 108 | if self.link_count < self.settings.max_links: 109 | self.link_count += 1 110 | logger.debug("Links crawled: %s", self.link_count) 111 | 112 | def _should_skip_link(self, pretty_url: str, url: str) -> bool: 113 | if not is_valid_url(pretty_url): 114 | logger.debug("Invalid url: %s", pretty_url) 115 | return True 116 | 117 | if pretty_url in self.crawl_result[url]['urls']: 118 | return True 119 | 120 | if self.settings.url_regex and not re.compile(self.settings.url_regex).match(pretty_url): 121 | logger.debug("Skipping: URL didn't match regex: %s", pretty_url) 122 | return True 123 | 124 | if self.settings.internal_links_only and self.root_netloc != urllib.parse.urlparse(pretty_url).netloc: 125 | logger.debug("Skipping: External link: %s", pretty_url) 126 | return True 127 | 128 | if self.settings.external_links_only and self.root_netloc == urllib.parse.urlparse(pretty_url).netloc: 129 | logger.debug("Skipping: Internal link: %s", pretty_url) 130 | return True 131 | 132 | return False 133 | 134 | def _handle_robots_txt(self, url: str) -> bool: 135 | user_agent = requests.utils.default_user_agent() 136 | robots_url = get_robots_txt_url(url) 137 | 138 | if robots_url in self.robots: 139 | robot_parser = self.robots[robots_url] 140 | else: 141 | robot_parser = setup_robots_txt_parser(robots_url) 142 | 143 | self.robots[robots_url] = robot_parser 144 | 145 | if not is_robots_txt_allowed(url, robot_parser): 146 | return False 147 | 148 | crawl_delay = robot_parser.crawl_delay(user_agent) 149 | if crawl_delay is not None: 150 | time.sleep(float(crawl_delay)) 151 | 152 | return True 153 | 154 | def start(self) -> Dict[str, Dict[str, List[str]]]: 155 | """ 156 | Starts the crawling process from the root URL. Crawls up to max_links URLs. 157 | 158 | Returns: 159 | Dict[str, Dict[str, List[str]]]: The crawl results. 160 | """ 161 | with ThreadPoolExecutor(max_workers=self.settings.max_workers) as executor: 162 | futures = {executor.submit(self.crawl, self.settings.root_url)} 163 | 164 | while self.link_count < self.settings.max_links and futures: 165 | for future in as_completed(futures): 166 | futures.remove(future) 167 | if future.exception() is None: 168 | while self.link_count < self.settings.max_links and self.crawl_set: 169 | url = self.crawl_set.pop() 170 | if url not in self.crawl_result: 171 | futures.add(executor.submit(self.crawl, url)) 172 | time.sleep(self.settings.delay) 173 | break # Break to check the next future 174 | 175 | if self.settings.save_to_file: 176 | self.save_results() 177 | logger.debug("Exiting....") 178 | return self.crawl_result 179 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/core/spider_settings.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from dataclasses import dataclass 4 | 5 | @dataclass 6 | class GeneralSettings: 7 | """ 8 | A simple dataclass to store general settings for the Spider class 9 | 10 | Attributes: 11 | root_url (str): The root URL to start crawling from. 12 | max_links (int): The maximum number of links to crawl. (Default: 5) 13 | save_to_file (Optional[str]): The file path to save the crawl results. 14 | max_workers (int): Max count of concurrent workers. (Default: 1) 15 | delay (float): Delay between requests. (Default: 0.5) 16 | verbose (bool): Whether or not to print debug messages (Default: True) 17 | """ 18 | 19 | root_url: str = "" 20 | max_links: int = 5 21 | save_to_file: Optional[str] = None 22 | max_workers: int = 1 23 | delay: float = 0.5 24 | verbose: bool = True 25 | 26 | @dataclass 27 | class CrawlSettings: 28 | """ 29 | A simple dataclass to store crawl settings for the Spider class 30 | 31 | Attributes: 32 | url_regex (Optional[str]): A regular expression against which urls will be matched before crawling 33 | include_body (bool): Whether or not to include the crawled page's body in crawl_result (Default: False) 34 | internal_links_only (bool): Whether or not to crawl only internal links (Default: False) 35 | external_links_only (bool): Whether or not to crawl only external links (Default: False) 36 | respect_robots_txt (bool): Whether or not to respect website's robots.txt files (defualt: True) 37 | """ 38 | url_regex: Optional[str] = None 39 | include_body: bool = False 40 | internal_links_only: bool = False 41 | external_links_only: bool = False 42 | respect_robots_txt: bool = True 43 | max_retry_attempts: int = 5 44 | 45 | @dataclass 46 | class SpiderSettings(GeneralSettings, CrawlSettings): 47 | """ 48 | A simple dataclass that stores all the settings for the Spider class 49 | """ 50 | 51 | def __post_init__(self) -> None: 52 | if self.root_url == "": 53 | raise ValueError("\"root_url\" argument is required") 54 | 55 | if self.internal_links_only and self.external_links_only: 56 | raise ValueError("Only one of internal_links_only and external_links_only can be set to True") 57 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from logging import DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL, NOTSET # pylint: disable=unused-import 3 | 4 | from colorama import Fore 5 | 6 | LOGGER_NAME: str = "tiny-web-crawler" 7 | DEFAULT_LOG_LEVEL: int = INFO 8 | 9 | class ColorFormatter(logging.Formatter): 10 | 11 | message_format: str = "%(levelname)s %(message)s" 12 | 13 | FORMATS = { 14 | logging.DEBUG: Fore.LIGHTBLUE_EX + message_format + Fore.RESET, 15 | logging.INFO: Fore.BLUE + message_format + Fore.RESET, 16 | logging.WARNING: Fore.YELLOW + message_format + Fore.RESET, 17 | logging.ERROR: Fore.RED + message_format + Fore.RESET, 18 | logging.CRITICAL: Fore.RED + message_format + Fore.RESET 19 | } 20 | 21 | def format(self, record: logging.LogRecord) -> str: 22 | log_fmt = self.FORMATS.get(record.levelno) 23 | formatter = logging.Formatter(log_fmt) 24 | return formatter.format(record) 25 | 26 | 27 | def get_logger() -> logging.Logger: 28 | return logging.getLogger(LOGGER_NAME) 29 | 30 | 31 | def set_logging_level(level:int) -> None: 32 | get_logger().setLevel(level=level) 33 | 34 | 35 | get_logger().setLevel(level=DEFAULT_LOG_LEVEL) 36 | 37 | console_handler = logging.StreamHandler() 38 | console_handler.setLevel(logging.DEBUG) 39 | 40 | console_handler.setFormatter(ColorFormatter()) 41 | 42 | get_logger().addHandler(console_handler) 43 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/middlewares/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/src/tiny_web_crawler/middlewares/__init__.py -------------------------------------------------------------------------------- /src/tiny_web_crawler/networking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/src/tiny_web_crawler/networking/__init__.py -------------------------------------------------------------------------------- /src/tiny_web_crawler/networking/fetcher.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import time 3 | 4 | import requests 5 | from bs4 import BeautifulSoup 6 | 7 | from tiny_web_crawler.logging import get_logger 8 | 9 | TRANSIENT_ERRORS = [408, 502, 503, 504] 10 | 11 | logger = get_logger() 12 | 13 | def is_transient_error(status_code: int) -> bool: 14 | return status_code in TRANSIENT_ERRORS 15 | 16 | def fetch_url(url: str, retries: int, attempts: int = 0) -> Optional[BeautifulSoup]: 17 | try: 18 | response = requests.get(url, timeout=10) 19 | response.raise_for_status() 20 | data = response.text 21 | return BeautifulSoup(data, 'lxml') 22 | except requests.exceptions.HTTPError as http_err: 23 | if response.status_code and is_transient_error(response.status_code) and retries > 0: 24 | logger.error("Transient HTTP error occurred: %s. Retrying...", http_err) 25 | time.sleep( attempts+1 ) 26 | return fetch_url( url, retries-1 , attempts+1) 27 | 28 | logger.error("HTTP error occurred: %s", http_err) 29 | return None 30 | except requests.exceptions.ConnectionError as conn_err: 31 | logger.error("Connection error occurred: %s", conn_err) 32 | except requests.exceptions.Timeout as timeout_err: 33 | logger.error("Timeout error occurred: %s", timeout_err) 34 | except requests.exceptions.RequestException as req_err: 35 | logger.error("Request error occurred: %s", req_err) 36 | return None 37 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/networking/formatter.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | import validators 3 | 4 | DEFAULT_SCHEME: str = 'http://' 5 | 6 | def format_url(url: str, base_url: str, scheme: str = DEFAULT_SCHEME) -> str: 7 | """ 8 | Formats a URL to ensure it is absolute and removes any query parameters or fragments. 9 | 10 | Args: 11 | url (str): The URL to format. 12 | base_url (str): The base URL to resolve relative URLs. 13 | scheme (str): The URL scheme to use (default: 'http://'). 14 | 15 | Returns: 16 | str: The formatted URL. 17 | """ 18 | parsed_url = urllib.parse.urlparse(url) 19 | base_url = base_url.rstrip('/') 20 | 21 | if parsed_url.scheme: 22 | scheme = parsed_url.scheme 23 | 24 | if not parsed_url.scheme and not parsed_url.netloc: 25 | if validators.url(DEFAULT_SCHEME + parsed_url.path): 26 | return DEFAULT_SCHEME + parsed_url.path 27 | 28 | if parsed_url.path.startswith('/'): 29 | return base_url + parsed_url.path 30 | 31 | return f"{base_url}/{parsed_url.path}" 32 | 33 | return f"{scheme}://{parsed_url.netloc}{parsed_url.path}" 34 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/networking/robots_txt.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | import urllib.robotparser 3 | from typing import Optional 4 | import requests 5 | 6 | def get_robots_txt_url(url: str) -> str: 7 | """ 8 | Returns a url to a robots.txt file from the provided url. 9 | 10 | Args: 11 | url (str): The URL to get the robots.txt of. 12 | 13 | Returns: 14 | str: The robots.txt url. 15 | """ 16 | 17 | parsed_url = urllib.parse.urlparse(url) 18 | 19 | return parsed_url.scheme + "://"+ parsed_url.netloc + "/robots.txt" 20 | 21 | 22 | def is_robots_txt_allowed(url: str, robot_parser: Optional[urllib.robotparser.RobotFileParser] = None) -> bool: 23 | """ 24 | Checks if the provided URL can be crawled, according to its corresponding robots.txt file 25 | 26 | Args: 27 | url (str): The URL to check. 28 | 29 | Returns: 30 | bool: True if the URL can be crawled, False otherwise. 31 | """ 32 | 33 | user_agent = requests.utils.default_user_agent() 34 | 35 | if robot_parser is None: 36 | robot_parser = setup_robots_txt_parser(url) 37 | 38 | return robot_parser.can_fetch(user_agent, url) 39 | 40 | 41 | def setup_robots_txt_parser(robots_txt_url: str) -> urllib.robotparser.RobotFileParser: 42 | """ 43 | Creates a RobotFileParser object from the given url to a robots.txt file 44 | 45 | Args: 46 | robot_txt_url (str): The URL to the robots.txt file. 47 | 48 | Returns: 49 | urllib.robotparser.RobotFileParser: The RobotFileParser object with the url already read. 50 | """ 51 | 52 | robot_parser = urllib.robotparser.RobotFileParser() 53 | robot_parser.set_url(robots_txt_url) 54 | robot_parser.read() 55 | 56 | return robot_parser 57 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/networking/validator.py: -------------------------------------------------------------------------------- 1 | import validators 2 | 3 | def is_valid_url(url: str) -> bool: 4 | """ 5 | Checks if the provided URL is valid. 6 | 7 | Args: 8 | url (str): The URL to validate. 9 | 10 | Returns: 11 | bool: True if the URL is valid, False otherwise. 12 | """ 13 | return bool(validators.url(url)) 14 | -------------------------------------------------------------------------------- /src/tiny_web_crawler/webdrivers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/src/tiny_web_crawler/webdrivers/__init__.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/tests/__init__.py -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/tests/core/__init__.py -------------------------------------------------------------------------------- /tests/core/test_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/tests/core/test_spider.py -------------------------------------------------------------------------------- /tests/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/tests/logging/__init__.py -------------------------------------------------------------------------------- /tests/logging/test_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import responses 3 | 4 | from tiny_web_crawler import Spider 5 | from tiny_web_crawler import SpiderSettings 6 | from tiny_web_crawler.logging import get_logger, set_logging_level, DEBUG, INFO, ERROR, LOGGER_NAME 7 | from tests.utils import setup_mock_response 8 | 9 | 10 | def test_get_logger() -> None: 11 | logger = get_logger() 12 | 13 | assert isinstance(logger, logging.Logger) 14 | assert logger.name == LOGGER_NAME 15 | 16 | 17 | def test_set_logging_level(caplog) -> None: # type: ignore 18 | logger = get_logger() 19 | 20 | set_logging_level(ERROR) 21 | 22 | logger.info("123") 23 | logger.error("456") 24 | 25 | assert logger.getEffectiveLevel() == ERROR 26 | assert "123" not in caplog.text 27 | assert "456" in caplog.text 28 | 29 | 30 | def test_verbose_logging_level() -> None: 31 | logger = get_logger() 32 | 33 | spider = Spider( # pylint: disable=unused-variable 34 | SpiderSettings(root_url="http://example.com", 35 | verbose=True) 36 | ) 37 | 38 | assert logger.getEffectiveLevel() == DEBUG 39 | 40 | spider = Spider( # pylint: disable=unused-variable 41 | SpiderSettings(root_url="http://example.com", 42 | verbose=False) 43 | ) 44 | 45 | assert logger.getEffectiveLevel() == INFO 46 | 47 | 48 | @responses.activate 49 | def test_verbose_true(caplog) -> None: # type: ignore 50 | setup_mock_response( 51 | url="http://example.com", 52 | body="", 53 | status=200 54 | ) 55 | 56 | spider = Spider( 57 | SpiderSettings(root_url="http://example.com", 58 | verbose=True) 59 | ) 60 | spider.start() 61 | 62 | assert len(caplog.text) > 0 63 | assert "DEBUG" in caplog.text 64 | 65 | 66 | @responses.activate 67 | def test_verbose_false_no_errors(caplog) -> None: # type: ignore 68 | setup_mock_response( 69 | url="http://example.com", 70 | body="", 71 | status=200 72 | ) 73 | 74 | spider = Spider(SpiderSettings(root_url="http://example.com", verbose=False)) 75 | spider.start() 76 | 77 | assert len(caplog.text) == 0 78 | 79 | 80 | @responses.activate 81 | def test_verbose_false_errors(caplog) -> None: # type: ignore 82 | setup_mock_response( 83 | url="http://example.com", 84 | body="link", 85 | status=200 86 | ) 87 | 88 | spider = Spider( 89 | SpiderSettings(root_url="http://example.com", verbose=False)) 90 | spider.start() 91 | 92 | assert "DEBUG" not in caplog.text 93 | assert "ERROR" in caplog.text 94 | assert len(caplog.text) > 0 95 | -------------------------------------------------------------------------------- /tests/networking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCrawl-AI/datacrawl/7ce2bed8b26f96ee4a0c8d6b671318d4099d90a8/tests/networking/__init__.py -------------------------------------------------------------------------------- /tests/networking/test_fetcher.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import responses 4 | import requests 5 | 6 | from tiny_web_crawler.networking.fetcher import fetch_url 7 | from tiny_web_crawler.logging import ERROR 8 | from tests.utils import setup_mock_response 9 | 10 | 11 | @responses.activate 12 | def test_fetch_url() -> None: 13 | setup_mock_response( 14 | url="http://example.com", 15 | body="link", 16 | status=200 17 | ) 18 | 19 | resp = fetch_url("http://example.com", 1) 20 | 21 | assert resp is not None 22 | assert resp.text == "link" 23 | 24 | 25 | @responses.activate 26 | def test_fetch_url_connection_error(caplog) -> None: # type: ignore 27 | 28 | with caplog.at_level(ERROR): 29 | # Fetch url whose response isn't mocked to raise ConnectionError 30 | resp = fetch_url("http://connection.error", 1) 31 | 32 | assert "Connection error occurred:" in caplog.text 33 | assert resp is None 34 | 35 | 36 | @responses.activate 37 | def test_fetch_url_http_error(caplog) -> None: # type: ignore 38 | error_codes = [403, 404, 412] 39 | 40 | for error_code in error_codes: 41 | setup_mock_response( 42 | url=f"http://http.error/{error_code}", 43 | body="link", 44 | status=error_code 45 | ) 46 | 47 | with caplog.at_level(ERROR): 48 | resp = fetch_url(f"http://http.error/{error_code}", 1) 49 | 50 | assert "HTTP error occurred:" in caplog.text 51 | assert resp is None 52 | 53 | 54 | @responses.activate 55 | def test_fetch_url_timeout_error(caplog) -> None: # type: ignore 56 | setup_mock_response( 57 | url="http://timeout.error", 58 | body=requests.exceptions.Timeout(), 59 | status=408 60 | ) 61 | 62 | with caplog.at_level(ERROR): 63 | # Fetch url whose response isn't mocked to raise ConnectionError 64 | resp = fetch_url("http://timeout.error", 1) 65 | 66 | assert "Timeout error occurred:" in caplog.text 67 | assert resp is None 68 | 69 | 70 | @responses.activate 71 | def test_fetch_url_requests_exception(caplog) -> None: # type: ignore 72 | setup_mock_response( 73 | url="http://requests.exception", 74 | body=requests.exceptions.RequestException(), 75 | status=404 76 | ) 77 | 78 | with caplog.at_level(ERROR): 79 | # Fetch url whose response isn't mocked to raise ConnectionError 80 | resp = fetch_url("http://requests.exception", 1) 81 | 82 | assert "Request error occurred:" in caplog.text 83 | assert resp is None 84 | 85 | 86 | @patch("time.sleep") 87 | @responses.activate 88 | def test_fetch_url_transient_error_retry_5(mock_sleep, caplog) -> None: # type: ignore 89 | setup_mock_response( 90 | url="http://transient.error", 91 | body="link", 92 | status=503 93 | ) 94 | 95 | max_retry_attempts = 5 96 | 97 | with caplog.at_level(ERROR): 98 | resp = fetch_url("http://transient.error", max_retry_attempts) 99 | 100 | assert resp is None 101 | 102 | # Assert url was fetched once then retried x ammount of times 103 | assert len(responses.calls) == max_retry_attempts + 1 104 | 105 | # Assert sleep time grew with every request 106 | expected_delays = [1, 2, 3, 4, 5] 107 | actual_delays = [call.args[0] for call in mock_sleep.call_args_list] 108 | assert actual_delays == expected_delays 109 | 110 | assert "Transient HTTP error occurred:" in caplog.text 111 | 112 | 113 | @patch("time.sleep") 114 | @responses.activate 115 | def test_fetch_url_transient_error_retry_10(mock_sleep, caplog) -> None: # type: ignore 116 | setup_mock_response( 117 | url="http://transient.error", 118 | body="link", 119 | status=503 120 | ) 121 | 122 | max_retry_attempts = 10 123 | 124 | with caplog.at_level(ERROR): 125 | resp = fetch_url("http://transient.error", max_retry_attempts) 126 | 127 | assert resp is None 128 | 129 | # Assert url was fetched once then retried x ammount of times 130 | assert len(responses.calls) == max_retry_attempts + 1 131 | 132 | # Assert sleep time grew with every request 133 | expected_delays = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 134 | actual_delays = [call.args[0] for call in mock_sleep.call_args_list] 135 | assert actual_delays == expected_delays 136 | 137 | assert "Transient HTTP error occurred:" in caplog.text 138 | 139 | 140 | @patch("time.sleep") 141 | @responses.activate 142 | def test_fetch_url_transient_error_retry_success(mock_sleep, caplog) -> None: # type: ignore 143 | setup_mock_response( 144 | url="http://transient.error", 145 | body="link", 146 | status=503 147 | ) 148 | setup_mock_response( 149 | url="http://transient.error", 150 | body="link", 151 | status=200 152 | ) 153 | 154 | max_retry_attempts = 1 155 | 156 | with caplog.at_level(ERROR): 157 | resp = fetch_url("http://transient.error", max_retry_attempts) 158 | 159 | assert resp is not None 160 | assert resp.text == "link" 161 | 162 | # Assert url was fetched 2 times 163 | assert len(responses.calls) == 2 164 | 165 | # Assert time.sleep was called 166 | mock_sleep.assert_called_once_with(1) 167 | 168 | assert "Transient HTTP error occurred:" in caplog.text 169 | -------------------------------------------------------------------------------- /tests/networking/test_formatter.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from tiny_web_crawler.networking.formatter import format_url, DEFAULT_SCHEME 4 | 5 | @pytest.mark.parametrize( 6 | "url, base_url, expected", 7 | [ 8 | ("/test", "http://example.com", "http://example.com/test"), 9 | ("http://example.com/test", "http://example.com", "http://example.com/test"), 10 | ("path1/path2", "http://example.com", "http://example.com/path1/path2"), 11 | ("/path1/path2", "http://example.com", "http://example.com/path1/path2"), 12 | ("path.com", "http://example.com", f"{DEFAULT_SCHEME}path.com"), 13 | ] 14 | ) 15 | def test_format_url(url: str, base_url: str, expected: str) -> None: 16 | assert format_url(url, base_url) == expected 17 | -------------------------------------------------------------------------------- /tests/networking/test_robots_txt.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch, MagicMock 2 | from io import BytesIO 3 | import urllib.robotparser 4 | 5 | import pytest 6 | 7 | from tiny_web_crawler.networking.robots_txt import get_robots_txt_url, is_robots_txt_allowed, setup_robots_txt_parser 8 | 9 | @pytest.mark.parametrize( 10 | "url, expected", 11 | [ 12 | ("http://example", "http://example/robots.txt"), 13 | ("http://example/path", "http://example/robots.txt"), 14 | ("https://example/", "https://example/robots.txt"), 15 | ("http://example/path1/path2/path3/path4", "http://example/robots.txt"), 16 | ("http://example/path#fragment", "http://example/robots.txt"), 17 | ("http://example/path?query=test", "http://example/robots.txt"), 18 | ] 19 | ) 20 | def test_get_robots_txt_url(url: str, expected: str) -> None: 21 | assert get_robots_txt_url(url) == expected 22 | 23 | 24 | @patch('urllib.request.urlopen') 25 | def test_is_robots_txt_allowed_true(mock_urlopen: MagicMock) -> None: 26 | # Mock the response content of robots.txt 27 | mock_response = b"User-agent: *\nAllow: /" 28 | mock_urlopen.return_value = BytesIO(mock_response) 29 | 30 | assert is_robots_txt_allowed("http://example.com") 31 | 32 | 33 | @patch('urllib.request.urlopen') 34 | def test_is_robots_txt_allowed_false(mock_urlopen: MagicMock) -> None: 35 | # Mock the response content of robots.txt 36 | mock_response = b"User-agent: *\nDisallow: /" 37 | mock_urlopen.return_value = BytesIO(mock_response) 38 | 39 | assert not is_robots_txt_allowed("http://example.com") 40 | 41 | 42 | @patch('urllib.request.urlopen') 43 | def test_is_robots_txt_allowed_mixed(mock_urlopen: MagicMock) -> None: 44 | # Mock the response content of robots.txt 45 | mock_response = b"User-agent: *\nDisallow: /private" 46 | 47 | mock_urlopen.return_value = BytesIO(mock_response) 48 | assert is_robots_txt_allowed("http://example.com") 49 | 50 | mock_urlopen.return_value = BytesIO(mock_response) 51 | assert not is_robots_txt_allowed("http://example.com/private") 52 | 53 | 54 | def test_is_robots_txt_allowed_no_robots_txt() -> None: 55 | # Check that websites with no robots.txt are set as crawlable 56 | assert is_robots_txt_allowed("http://example.com") 57 | 58 | 59 | def test_setup_robots_txt_parser() -> None: 60 | robot_parser = setup_robots_txt_parser("http://example.com") 61 | 62 | assert isinstance(robot_parser, urllib.robotparser.RobotFileParser) 63 | -------------------------------------------------------------------------------- /tests/networking/test_validator.py: -------------------------------------------------------------------------------- 1 | from tiny_web_crawler.networking.validator import is_valid_url 2 | 3 | def test_is_valid_url() -> None: 4 | assert is_valid_url("http://example.com") is True 5 | assert is_valid_url('invalid') is False 6 | -------------------------------------------------------------------------------- /tests/test_crawler.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from unittest.mock import MagicMock, mock_open, patch 3 | import urllib.error 4 | 5 | import responses 6 | 7 | import pytest 8 | 9 | from tiny_web_crawler import Spider 10 | from tiny_web_crawler import SpiderSettings 11 | from tiny_web_crawler.logging import DEBUG, WARNING, ERROR 12 | from tests.utils import setup_mock_response 13 | 14 | @responses.activate 15 | def test_crawl() -> None: 16 | setup_mock_response( 17 | url="http://example.com", 18 | body="link", 19 | status=200, 20 | ) 21 | setup_mock_response( 22 | url="http://example.com/test", 23 | body="link", 24 | status=200, 25 | ) 26 | 27 | spider = Spider( 28 | SpiderSettings(root_url="http://example.com", 29 | max_links=10) 30 | ) 31 | spider.crawl("http://example.com") 32 | 33 | assert "http://example.com" in spider.crawl_result 34 | assert spider.crawl_result["http://example.com"]["urls"] == [ 35 | "http://example.com/test" 36 | ] 37 | 38 | spider.crawl("http://example.com/test") 39 | 40 | assert "http://example.com/test" in spider.crawl_result 41 | assert spider.crawl_result["http://example.com/test"]["urls"] == [ 42 | "http://example.com" 43 | ] 44 | 45 | 46 | @responses.activate 47 | def test_crawl_invalid_url(caplog) -> None: # type: ignore 48 | spider = Spider( 49 | SpiderSettings(root_url="http://example.com") 50 | ) 51 | 52 | with caplog.at_level(DEBUG): 53 | spider.crawl("invalid_url") 54 | 55 | assert "Invalid url to crawl:" in caplog.text 56 | assert spider.crawl_result == {} 57 | 58 | 59 | @responses.activate 60 | def test_crawl_already_crawled_url(caplog) -> None: # type: ignore 61 | setup_mock_response( 62 | url="http://example.com", 63 | body="link", 64 | status=200, 65 | ) 66 | 67 | spider = Spider( 68 | SpiderSettings(root_url="http://example.com") 69 | ) 70 | 71 | with caplog.at_level(DEBUG): 72 | spider.crawl("http://example.com") 73 | spider.crawl("http://example.com") 74 | 75 | assert "URL already crawled:" in caplog.text 76 | assert spider.crawl_result == { 77 | "http://example.com": {"urls": ["http://example.com"]} 78 | } 79 | 80 | 81 | @responses.activate 82 | def test_crawl_unfetchable_url() -> None: 83 | setup_mock_response( 84 | url="http://example.com", 85 | body="link", 86 | status=404, 87 | ) 88 | 89 | spider = Spider( 90 | SpiderSettings(root_url="http://example.com") 91 | ) 92 | 93 | spider.crawl("http://example.com") 94 | assert spider.crawl_result == {} 95 | 96 | 97 | @responses.activate 98 | def test_crawl_found_invalid_url(caplog) -> None: # type: ignore 99 | setup_mock_response( 100 | url="http://example.com", 101 | body="link", 102 | status=200, 103 | ) 104 | 105 | spider = Spider( 106 | SpiderSettings(root_url="http://example.com") 107 | ) 108 | 109 | with caplog.at_level(DEBUG): 110 | spider.crawl("http://example.com") 111 | 112 | assert "Invalid url:" in caplog.text 113 | assert spider.crawl_result == {"http://example.com": {"urls": []}} 114 | 115 | 116 | @responses.activate 117 | def test_crawl_found_duplicate_url() -> None: 118 | setup_mock_response( 119 | url="http://example.com", 120 | body="link1" 121 | + "link2", 122 | status=200, 123 | ) 124 | 125 | spider = Spider( 126 | SpiderSettings(root_url="http://example.com") 127 | ) 128 | spider.crawl("http://example.com") 129 | 130 | assert spider.crawl_result == { 131 | "http://example.com": {"urls": ["http://duplicate.com"]} 132 | } 133 | 134 | 135 | @responses.activate 136 | def test_crawl_no_urls_in_page() -> None: 137 | setup_mock_response( 138 | url="http://example.com", body="", status=200 139 | ) 140 | 141 | spider = Spider( 142 | SpiderSettings(root_url="http://example.com") 143 | ) 144 | spider.crawl("http://example.com") 145 | 146 | assert spider.crawl_result == {"http://example.com": {"urls": []}} 147 | 148 | 149 | @responses.activate 150 | def test_save_results() -> None: 151 | spider = Spider( 152 | SpiderSettings(root_url="http://example.com", 153 | max_links=10, 154 | save_to_file="out.json") 155 | ) 156 | spider.crawl_result = {"http://example.com": {"urls": ["http://example.com/test"]}} 157 | 158 | with patch("builtins.open", mock_open()) as mocked_file: 159 | spider.save_results() 160 | mocked_file.assert_called_once_with("out.json", "w", encoding="utf-8") 161 | 162 | 163 | @responses.activate 164 | def test_url_regex() -> None: 165 | setup_mock_response( 166 | url="http://example.com", 167 | body="link" 168 | + "link", 169 | status=200, 170 | ) 171 | 172 | # This regex matches strings starting with "http://example.com/" 173 | # And only have numeric characters after it 174 | regex = r"http://example\.com/[0-9]+" 175 | 176 | spider = Spider( 177 | SpiderSettings(root_url="http://example.com", 178 | url_regex=regex) 179 | ) 180 | spider.start() 181 | 182 | assert spider.crawl_result["http://example.com"]["urls"] == [ 183 | "http://example.com/123" 184 | ] 185 | 186 | assert ( 187 | "http://example.com/test" 188 | not in spider.crawl_result["http://example.com"]["urls"] 189 | ) 190 | 191 | 192 | @responses.activate 193 | def test_include_body() -> None: 194 | setup_mock_response( 195 | url="http://example.com", 196 | body="link", 197 | status=200, 198 | ) 199 | setup_mock_response( 200 | url="http://example.com/test", 201 | body="This is a header", 202 | status=200, 203 | ) 204 | 205 | spider = Spider( 206 | SpiderSettings(root_url="http://example.com", 207 | include_body=True) 208 | ) 209 | spider.start() 210 | 211 | assert ( 212 | spider.crawl_result["http://example.com"]["body"] 213 | == 'link' 214 | ) 215 | assert ( 216 | spider.crawl_result["http://example.com/test"]["body"] 217 | == "This is a header" 218 | ) 219 | 220 | 221 | @responses.activate 222 | def test_internal_links_only(caplog) -> None: # type: ignore 223 | setup_mock_response( 224 | url="http://internal.com", 225 | body="link" 226 | +"link", 227 | status=200, 228 | ) 229 | 230 | spider = Spider( 231 | SpiderSettings(root_url="http://internal.com", 232 | internal_links_only=True) 233 | ) 234 | 235 | with caplog.at_level(DEBUG): 236 | spider.crawl("http://internal.com") 237 | 238 | assert "Skipping: External link:" in caplog.text 239 | assert spider.crawl_result == {"http://internal.com": {"urls": ["http://internal.com/test"]}} 240 | 241 | 242 | @responses.activate 243 | def test_external_links_only(caplog) -> None: # type: ignore 244 | setup_mock_response( 245 | url="http://internal.com", 246 | body="link" 247 | +"link", 248 | status=200, 249 | ) 250 | 251 | spider = Spider( 252 | SpiderSettings(root_url="http://internal.com", 253 | external_links_only=True) 254 | ) 255 | 256 | with caplog.at_level(DEBUG): 257 | spider.crawl("http://internal.com") 258 | 259 | assert "Skipping: Internal link:" in caplog.text 260 | assert spider.crawl_result == {"http://internal.com": {"urls": ["http://external.com/test"]}} 261 | 262 | 263 | @responses.activate 264 | def test_external_and_internal_links_only() -> None: 265 | with pytest.raises(ValueError): 266 | Spider(SpiderSettings(root_url="http://example.com", 267 | internal_links_only=True, 268 | external_links_only=True) 269 | ) 270 | 271 | 272 | @patch.object(Spider, "crawl") 273 | @patch.object(Spider, "save_results") 274 | def test_start(mock_save_results: MagicMock, mock_crawl: MagicMock) -> None: 275 | spider = Spider( 276 | SpiderSettings(root_url="http://example.com", 277 | max_links=10) 278 | ) 279 | mock_crawl.side_effect = lambda url: spider.crawl_result.update( 280 | {url: {"urls": ["http://example.com/test"]}} 281 | ) 282 | print(mock_save_results) 283 | 284 | spider.start() 285 | 286 | assert mock_crawl.call_count == 1 287 | assert "http://example.com" in spider.crawl_result 288 | assert spider.crawl_result["http://example.com"]["urls"] == [ 289 | "http://example.com/test" 290 | ] 291 | 292 | 293 | @patch.object(Spider, "crawl") 294 | @patch.object(Spider, "save_results") 295 | def test_start_with_save_to_file( 296 | mock_save_results: MagicMock, mock_crawl: MagicMock 297 | ) -> None: 298 | spider = Spider( 299 | SpiderSettings(root_url="http://example.com", 300 | max_links=10, 301 | save_to_file="file.txt") 302 | ) 303 | mock_crawl.side_effect = lambda url: spider.crawl_result.update( 304 | {url: {"urls": ["http://example.com/test"]}} 305 | ) 306 | 307 | spider.start() 308 | 309 | assert mock_crawl.call_count == 1 310 | assert "http://example.com" in spider.crawl_result 311 | assert spider.crawl_result["http://example.com"]["urls"] == [ 312 | "http://example.com/test" 313 | ] 314 | 315 | mock_save_results.assert_called_once() 316 | 317 | 318 | @responses.activate 319 | @patch('urllib.request.urlopen') 320 | def test_respect_robots_txt(mock_urlopen, caplog) -> None: # type: ignore 321 | setup_mock_response( 322 | url="http://crawlable.com", 323 | body="link", 324 | status=200 325 | ) 326 | setup_mock_response( 327 | url="http://notcrawlable.com", 328 | body="link", 329 | status=200 330 | ) 331 | 332 | mock_urlopen.side_effect = lambda url: ( 333 | BytesIO(b"User-agent: *\nAllow: /") if url == "http://crawlable.com/robots.txt" else 334 | BytesIO(b"User-agent: *\nDisallow: /") if url == "http://notcrawlable.com/robots.txt" else 335 | urllib.error.URLError(f"No mock for {url}")) 336 | 337 | spider = Spider( 338 | SpiderSettings(root_url="http://crawlable.com", 339 | respect_robots_txt=True) 340 | ) 341 | 342 | with caplog.at_level(DEBUG): 343 | spider.start() 344 | 345 | assert spider.crawl_result == { 346 | "http://crawlable.com": { 347 | "urls": ["http://notcrawlable.com"] 348 | } 349 | } 350 | 351 | assert "Skipped: Url doesn't allow crawling:" in caplog.text 352 | 353 | assert "http://notcrawlable.com/robots.txt" in spider.robots 354 | 355 | 356 | @responses.activate 357 | @patch('urllib.request.urlopen') 358 | def test_respect_robots_txt_allowed(mock_urlopen, caplog) -> None: # type: ignore 359 | setup_mock_response( 360 | url="http://crawlable.com", 361 | body="link", 362 | status=200 363 | ) 364 | 365 | mock_urlopen.side_effect = lambda url: ( 366 | BytesIO(b"User-agent: *\nAllow: /") if url == "http://crawlable.com/robots.txt" else 367 | urllib.error.URLError(f"No mock for {url}")) 368 | 369 | spider = Spider( 370 | SpiderSettings(root_url="http://crawlable.com", 371 | respect_robots_txt=True) 372 | ) 373 | 374 | with caplog.at_level(DEBUG): 375 | spider.crawl("http://crawlable.com") 376 | 377 | assert spider.crawl_result == { 378 | "http://crawlable.com":{ 379 | "urls": ["http://crawlable.com"] 380 | } 381 | } 382 | 383 | 384 | 385 | @responses.activate 386 | @patch('urllib.request.urlopen') 387 | def test_respect_robots_txt_not_allowed(mock_urlopen, caplog) -> None: # type: ignore 388 | setup_mock_response( 389 | url="http://notcrawlable.com", 390 | body="link", 391 | status=200 392 | ) 393 | 394 | mock_urlopen.side_effect = lambda url: ( 395 | BytesIO(b"User-agent: *\nDisallow: /") if url == "http://notcrawlable.com/robots.txt" else 396 | urllib.error.URLError(f"No mock for {url}")) 397 | 398 | spider = Spider( 399 | SpiderSettings(root_url="http://notcrawlable.com", 400 | respect_robots_txt=True) 401 | ) 402 | 403 | with caplog.at_level(DEBUG): 404 | spider.crawl("http://notcrawlable.com") 405 | 406 | assert spider.crawl_result == {} 407 | 408 | assert "Skipped: Url doesn't allow crawling:" in caplog.text 409 | 410 | assert "http://notcrawlable.com/robots.txt" in spider.robots 411 | 412 | 413 | @responses.activate 414 | @patch('urllib.request.urlopen') 415 | def test_respect_robots_txt_disabled(mock_urlopen, caplog) -> None: # type: ignore 416 | setup_mock_response( 417 | url="http://crawlable.com", 418 | body="link", 419 | status=200 420 | ) 421 | setup_mock_response( 422 | url="http://notcrawlable.com", 423 | body="link", 424 | status=200 425 | ) 426 | 427 | mock_urlopen.side_effect = lambda url: ( 428 | BytesIO(b"User-agent: *\nAllow: /") if url == "http://crawlable.com/robots.txt" else 429 | BytesIO(b"User-agent: *\nDisallow: /") if url == "http://notcrawlable.com/robots.txt" else 430 | urllib.error.URLError(f"No mock for {url}")) 431 | 432 | with caplog.at_level(WARNING): 433 | spider = Spider( 434 | SpiderSettings(root_url="http://crawlable.com", 435 | respect_robots_txt=False) 436 | ) 437 | 438 | assert "Ignoring robots.txt files! You might be at risk of:" in caplog.text 439 | 440 | 441 | with caplog.at_level(DEBUG): 442 | spider.start() 443 | 444 | assert spider.crawl_result == { 445 | "http://crawlable.com": { 446 | "urls": ["http://notcrawlable.com"] 447 | }, 448 | "http://notcrawlable.com": { 449 | "urls": ["http://crawlable.com"] 450 | } 451 | } 452 | 453 | assert not "Skipped: Url doesn't allow crawling:" in caplog.text 454 | 455 | assert "http://notcrawlable.com/robots.txt" not in spider.robots 456 | 457 | 458 | @responses.activate 459 | @patch('urllib.request.urlopen') 460 | @patch('time.sleep', return_value=None) 461 | def test_respect_robots_txt_crawl_delay(mock_sleep, mock_urlopen, caplog) -> None: # type: ignore 462 | setup_mock_response( 463 | url="http://crawlable.com", 464 | body="link", 465 | status=200 466 | ) 467 | 468 | mock_urlopen.side_effect = lambda url: ( 469 | BytesIO(b"User-agent: *\nAllow: /\ncrawl-delay: 1") if url == "http://crawlable.com/robots.txt" else 470 | urllib.error.URLError(f"No mock for {url}")) 471 | 472 | spider = Spider( 473 | SpiderSettings(root_url="http://crawlable.com", 474 | respect_robots_txt=True) 475 | ) 476 | 477 | with caplog.at_level(DEBUG): 478 | spider.crawl("http://crawlable.com") 479 | 480 | assert mock_sleep.call_count == 1 481 | mock_sleep.assert_called_with(1.0) 482 | 483 | assert spider.crawl_result == { 484 | "http://crawlable.com": { 485 | "urls": ["http://notcrawlable.com"] 486 | } 487 | } 488 | 489 | 490 | def test_crawl_no_root_url() -> None: 491 | with pytest.raises(ValueError): 492 | Spider(SpiderSettings(verbose=False)) 493 | 494 | 495 | @patch("time.sleep") 496 | @responses.activate 497 | def test_crawl_url_transient_retry(mock_sleep, caplog) -> None: # type: ignore 498 | setup_mock_response( 499 | url="http://transient.error", 500 | body="link", 501 | status=503 502 | ) 503 | 504 | spider = Spider( 505 | SpiderSettings(root_url="http://transient.error", 506 | respect_robots_txt=False) 507 | ) 508 | 509 | with caplog.at_level(ERROR): 510 | spider.crawl("http://transient.error") 511 | 512 | assert spider.crawl_result == {} 513 | 514 | assert len(responses.calls) == 6 515 | 516 | expected_delays = [1, 2, 3, 4, 5] 517 | actual_delays = [call.args[0] for call in mock_sleep.call_args_list] 518 | assert actual_delays == expected_delays 519 | 520 | assert "Transient HTTP error occurred:" in caplog.text 521 | 522 | 523 | @patch("time.sleep") 524 | @responses.activate 525 | def test_crawl_url_transient_retry_custom_retry_amount(mock_sleep, caplog) -> None: # type: ignore 526 | setup_mock_response( 527 | url="http://transient.error", 528 | body="link", 529 | status=503 530 | ) 531 | 532 | spider = Spider( 533 | SpiderSettings(root_url="http://transient.error", 534 | max_retry_attempts=10, 535 | respect_robots_txt=False) 536 | ) 537 | 538 | with caplog.at_level(ERROR): 539 | spider.crawl("http://transient.error") 540 | 541 | assert spider.crawl_result == {} 542 | 543 | assert len(responses.calls) == 11 544 | 545 | expected_delays = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 546 | actual_delays = [call.args[0] for call in mock_sleep.call_args_list] 547 | assert actual_delays == expected_delays 548 | 549 | assert "Transient HTTP error occurred:" in caplog.text 550 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import responses 3 | 4 | def setup_mock_response(url: str, status: int, body: Union[str, Exception]) -> None: 5 | responses.add( 6 | responses.GET, 7 | url, 8 | body=body, 9 | status=status, 10 | content_type="text/html" 11 | ) 12 | --------------------------------------------------------------------------------