├── .coveragerc ├── .flake8 ├── .gitignore ├── .isort.cfg ├── CHANGELOG ├── LICENSE ├── Makefile ├── README.rst ├── docs ├── conf.py ├── index.rst └── requirements.txt ├── pyproject.toml ├── pytest.ini ├── readthedocs.yml ├── ruff.toml ├── tests ├── __init__.py ├── conftest.py ├── test_attributes.py ├── test_convert.py ├── test_exclude.py ├── test_fuzzy.py ├── test_generate.py ├── test_magnet.py ├── test_partial_size.py ├── test_read.py ├── test_reuse.py ├── test_stream.py ├── test_utils.py ├── test_validate.py ├── test_verify_content.py ├── test_verify_filesize.py └── test_write.py ├── torf ├── __init__.py ├── __init__.pyi ├── _errors.py ├── _errors.pyi ├── _generate.py ├── _magnet.py ├── _magnet.pyi ├── _reuse.py ├── _stream.py ├── _stream.pyi ├── _torrent.py ├── _torrent.pyi ├── _utils.py ├── _utils.pyi └── py.typed └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = torf 3 | command_line = -m pytest 4 | branch = True 5 | concurrency = multiprocessing 6 | 7 | [report] 8 | show_missing = True 9 | skip_covered = False 10 | 11 | [html] 12 | directory = /tmp/htmlcov 13 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | # visually indented line with same indent as next logical line 4 | E129, 5 | # missing whitespace before ':' 6 | E203, 7 | # multiple spaces before operator 8 | E221, 9 | # missing whitespace after ',' 10 | E231, 11 | # too many leading '#' for block comment 12 | E266, 13 | # multiple spaces after keyword 14 | E271, 15 | # multiple spaces before keyword 16 | E272, 17 | # line too long 18 | E501, 19 | # expected 2 blank lines 20 | E302, 21 | # too many blank lines 22 | E303, 23 | # expected 2 blank lines after class or function definition 24 | E305, 25 | # multiple spaces after ',' 26 | E241, 27 | # multiple statements on one line (colon) 28 | E701, 29 | # multiple statements on one line (def) 30 | E704, 31 | # line break before binary operator 32 | W503, 33 | # line break after binary operator 34 | W504, 35 | # invalid escape sequence '\ ' 36 | W605, 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *.egg-info/ 5 | pypirc 6 | 7 | # Pytest cache 8 | .cache 9 | .pytest_cache 10 | .tox 11 | 12 | # Virtual environment 13 | venv 14 | .python-version 15 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | line_length = 120 3 | wrap_length = 10 4 | multi_line_output = 3 5 | include_trailing_comma = true 6 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 2025-02-24 4.3.0 2 | - Torf.read_stream() can now read `bytes` and `bytearray` objects in addition to file-like 3 | objects. 4 | - Provide type annotations for the public API. 5 | 6 | 7 | 2024-06-13 4.2.7 8 | - Exclude tests from the package. 9 | 10 | 11 | 2024-03-25 4.2.6 12 | - Validate creation date if it exists. 13 | 14 | 15 | 2024-03-09 4.2.5 16 | - Bugfix: Include symbolic links in the torrent's files. 17 | 18 | 19 | 2023-12-04 4.2.4 20 | - Bugfix: Remove hardcoded minimum and maximum piece sizes from 21 | Torrent.calculate_piece_size(). 22 | 23 | 24 | 2023-12-01 4.2.3 25 | - Torrent.calculate_piece_size() now returns 16 MiB for content sizes bigger 26 | than 16 GiB. (Thanks @cannibalChipper) 27 | 28 | 29 | 2023-09-11 4.2.2 30 | - Fix a bug that was introduced in 4.2.0. Instead of forcibly decoding all 31 | encodings as UTF-8, go back to defaulting to undecoded bytes (like before) 32 | and only perform a decode-and-replace-invalid-characters routine on known 33 | strings like ["info"]["name"]. 34 | 35 | 36 | 2023-07-02 4.2.1 37 | - Setting Torrent.piece_size_min/max now also sets Torrent.piece_size if it is 38 | too small/big. 39 | 40 | 41 | 2023-04-16 4.2.0 42 | - When reading torrent files, strings are now always decoded as UTF-8 (except 43 | for ["info"]["pieces"]). This makes it harder to store binary data in 44 | non-standard fields, but it also means ["info"]["name"] is always a unicode 45 | string. 46 | - Allow any "piece length" that is divisible by 16 KiB. 47 | Previously, power of 2 was required. 48 | 49 | 50 | 2023-01-26 4.1.4 51 | - Fix "Too many open files" error when creating a torrent with loads of small 52 | files. 53 | 54 | 55 | 2023-01-09 4.1.3 56 | - Try to reduce memory usage before being out of memory. 57 | - Terminate piece hashing threads if they are idle for more than 58 | 500 milliseconds. 59 | 60 | 61 | 2022-11-24 4.1.2 62 | - Bugfix: Torrent's piece_size_min and piece_size_max arguments are no longer 63 | ignored. 64 | - Torrent.piece_size_min and Torrent.piece_size_max are now properties. They 65 | default to the new class attributes piece_size_min_default and 66 | piece_size_max_default. 67 | - Torrent.calculate_piece_size(): New arguments: min_size, max_size specify 68 | the minimum and maximum piece size. 69 | 70 | 71 | 2022-11-15 4.1.1 72 | - Bugfix: Don't crash if "creation date" is something weird like an empty 73 | string. 74 | 75 | 76 | 2022-11-09 4.1.0 77 | - Torrent now accepts piece_size_min and piece_size_max arguments. 78 | - Bugfix: Don't reuse a torrent that has a smaller piece_size than 79 | piece_size_min. 80 | 81 | 82 | 2022-09-22 4.0.4 83 | - Bugfix: Expect non-standard fields (e.g. "md5sum") in file list when reusing 84 | torrent. 85 | 86 | 87 | 2022-07-02 4.0.3 88 | - Bugfix: Copy file order from reused torrent. 89 | 90 | 91 | 2022-06-19 4.0.2 92 | - Bugfix: Don't reuse a torrent that has a bigger piece_size than 93 | piece_size_max. 94 | 95 | 96 | 2022-05-17 4.0.1 97 | - Bugfix: RecursionError when pickling File objects 98 | 99 | 100 | 2022-05-05 4.0.0 101 | - Torrent.verify(): The "skip_on_error" argument was removed. 102 | - The new TorrentFileStream class lets you operate on a torrent's stream of 103 | concatenated files. 104 | - The new attribute Torrent.location provides the file system path without the 105 | torrent's name. 106 | - The new Torrent.reuse() method copies piece hashes from an existing torrent 107 | file with the same name and file tree. It can also find a matching torrent 108 | in a bunch of directories. 109 | 110 | 111 | 2020-10-29 3.1.3 112 | - Bugfix: Exclude tests from package. 113 | 114 | 115 | 2020-10-25 3.1.2 116 | - Bugfix: Preserve binary values of fields that aren't part of the BitTorrent 117 | specification. 118 | (Thanks, @ayasechan) 119 | 120 | 121 | 2020-09-26 3.1.1 122 | - Bugfix: Fix poor performance when torrent contains huge number of files 123 | (Thanks, @mon) 124 | 125 | 126 | 2020-08-11 3.1.0 127 | - Bugfix: Gracefully handle empty string for "url-list" and other URL lists in 128 | metainfo 129 | - The new properties Torrent.include_globs and Torrent.include_regexs can be 130 | used to keep files even if they match an exclude pattern. 131 | 132 | 133 | 2020-06-20 3.0.2 134 | - Bugfix: Torrent.validate() and Torrent.read_stream() now raise MetainfoError 135 | if the "info" field is not a dictionary. 136 | - Bugfix: Magnet.from_string() raised ValueError instead of MagnetError if the 137 | "xl" parameter wasn't a number. 138 | - Bugfix: Magnet.from_string() strips whitespace from the beginning and end 139 | before parsing the URI. 140 | - Bugfix: Magnet.kt used commas to separate keywords. (I don't know why.) 141 | - Bugfix: Gracefully handle empty path components in torrent file 142 | (e.g. "foo//bar") 143 | - Torrent.private is now None instead of False if there is no "private" field 144 | in the "info" section of the metainfo. 145 | 146 | 147 | 2020-04-07 3.0.1 148 | - Make things work with Python 3.6. 149 | - Magnet.as_torrent is now a method called Magnet.torrent(). 150 | 151 | 152 | 2020-04-02 3.0.0 153 | - Depend on flatbencode instead of bencoder.pyx 154 | - Bug fixed: Setting the "private" property to False removed the flag from the 155 | metainfo which could potentially change the info hash if a torrent file had 156 | the flag explicitly disabled. 157 | - Bug fixed: Torrent.read() validated if the "validate" argument was False 158 | - Hashing pieces uses multiple threads for better performance. 159 | - Support for the "md5sum" field was dropped. Calculating MD5 hashes besides 160 | the SHA1 hashes is no longer easily possible due to multithreading and it's 161 | unclear to me if/how this field is even useful. 162 | - The new methods Torrent.verify_content() and Torrent.verify_filesize() check 163 | if on-disk data matches a given torrent file. 164 | - The property Torrent.exclude was replaced by Torrent.exclude_globs and 165 | Torrent.exclude_regexs. These return special lists that filter files when 166 | changed. 167 | - Torrent.path is a path-like object. 168 | - Torrent.files, Torrent.filepaths, Torrent.trackers, Torrent.webseeds and 169 | Torrent.httpseeds are mutable lists of path-like objects or URLs that 170 | automatically synchronize with Torrent.metainfo when changed. 171 | - Torrent.filetree uses File objects as leaf nodes. File is a path-like 172 | object that also stores the file size. 173 | - The new class attributes Torrent.piece_size_min and Torrent.piece_size_max 174 | can be used to quickly specify piece size limits. Setting the piece_size 175 | property to an out-of-bounds piece size or returning one with 176 | Torrent.calculate_piece_size() raises PieceSizeError. 177 | - Torrent.validate() is better at finding invalid stuff in the metainfo. 178 | - Exceptions were added and removed. If you don't catch TorfError, make sure 179 | you're expecting the correct exceptions. 180 | - Except for ReadError and WriteError, exceptions no longer have an "errno" 181 | property. 182 | - When setting the Torrent.path property to None, only "pieces" is removed 183 | from the metainfo. "piece length", "pieces", "length" and "files" are kept. 184 | "name" is only changed when a new path is set. 185 | - The license was changed to GPLv3. 186 | 187 | 188 | 2019-07-01 2.1.0 189 | - Keep piece size smaller for large torrents and use more pieces to 190 | compensate. 191 | - Implement your own piece size calculation of arbitrary complexity by simply 192 | overloading Torrent.calculate_piece_size(). 193 | 194 | 195 | 2019-04-04 2.0.0 196 | - Use proper version number scheme 197 | - Raise PieceSizeError if 'piece_size' is set to a number that isn't a power 198 | of two 199 | 200 | 201 | 2018-06-25 1.5 202 | - New methods read_stream() and write_stream() to import/export a torrent from 203 | any file-like object 204 | 205 | 206 | 2018-06-15 1.4 207 | - New method: calculate_piece_size() 208 | - Piece size is now automatically calculated when path is set instead of 209 | calculating it on demand when requested 210 | - Setting piece size to a non-number now raises ValueError instead of 211 | RuntimeError 212 | - Exclude patterns are now matched against every part of a file's path, not 213 | just the last part (i.e. the file name) 214 | - Setting torrent.path to '.' or '..' now sets the correct name 215 | - Torrent instances are equal (==) if their metainfo is equal 216 | - Torrent instances are hashable 217 | - Torrent instances can be copied with the copy() method or the copy module 218 | from the standard library 219 | 220 | 221 | 2018-04-07 1.3 222 | - Fix 'filepaths' attribute when setting a custom name 223 | 224 | 225 | 2018-02-19 1.2 226 | - Don't leave an empty file when calling write() on an invalid torrent 227 | 228 | 229 | 2018-02-18 1.1 230 | - 'announce' in metainfo is now a single URL instead of the first tier (Marcin 231 | Kurczewski) 232 | 233 | 234 | 2018-02-01 1.0 235 | - Nothing changed except that this is now the final 1.0 version 236 | 237 | 238 | 2018-01-13 1.0rc5 239 | - Fixed a bug where overwriting a torrent file resulted in corrupt torrent 240 | - Added property 'pieces' that returns the number of pieces 241 | - Added property 'filetree' that can be used to create a beautiful tree of a 242 | torrent's content 243 | - Added property 'is_ready' that is True when torrent can be exported 244 | - When reading torrent files, don't read large files all the way to EOF before 245 | failing 246 | - All exceptions now have an 'errno' attribute (see 'errno' module) 247 | 248 | 249 | 2018-01-03 1.0rc4 250 | - Torrent.write() and Torrent.read() take a file path instead of an opened 251 | file object 252 | - Some exception names have changed 253 | - Allow reading arbitrary bencoded data with validation turned off 254 | - Default 'created_by' value is now 'torf/' 255 | 256 | 257 | 2017-12-27 1.0rc3 258 | - Reduce entropy with 'randomize_infohash' enabled (some parsers seem to have 259 | issues with large integers) 260 | 261 | 262 | 2017-12-25 1.0rc2 263 | - Add 'randomize_infohash' as a Torrent argument 264 | - Add 'name' as a Torrent argument 265 | - Call generate() callback again when all pieces are hashed 266 | - Validate 'metainfo' when accessing 'infohash' property 267 | 268 | 269 | 2017-12-21 1.0rc1 270 | Initial release 271 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VENV_PATH?=venv 2 | PYTHON?=python3 3 | 4 | clean: 5 | find . -name "*.pyc" -delete 6 | find . -name "__pycache__" -delete 7 | rm -rf dist build 8 | rm -rf .pytest_cache 9 | rm -rf .tox 10 | rm -rf .coverage .coverage.* 11 | rm -rf "$(VENV_PATH)" *.egg-info 12 | 13 | venv: 14 | "$(PYTHON)" -m venv "$(VENV_PATH)" 15 | "$(VENV_PATH)"/bin/pip install --editable '.[dev]' 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | torf 2 | ==== 3 | 4 | torf provides a ``Torrent`` and a ``Magnet`` class. 5 | 6 | `torf-cli `_ and `torf-gui 7 | `_ provide user interfaces for torf. 8 | 9 | This project started as a fork of `dottorrent 10 | `_ but turned into a rewrite. 11 | 12 | Features 13 | -------- 14 | 15 | - Create a ``Torrent`` instance from a path to the torrent's content or by 16 | reading an existing ``.torrent`` file 17 | - High-level access to standard metainfo fields via properties 18 | - Low-level access to arbitrary metainfo fields via ``metainfo`` property 19 | - Optional metainfo validation with helpful error messages 20 | - Generate a `BTIH magnet URI 21 | `_ from a ``.torrent`` file 22 | (the reverse is also possible but the resulting torrent is incomplete due to 23 | the lack of information in magnet URIs) 24 | - Use multiple CPU cores to compute piece hashes 25 | - Randomize the info hash to help with cross-seeding 26 | - Conveniently re-use piece hashes from an existing torrent file 27 | 28 | Example 29 | ------- 30 | 31 | .. code:: python 32 | 33 | from torf import Torrent 34 | t = Torrent(path='path/to/content', 35 | trackers=['https://tracker1.example.org:1234/announce', 36 | 'https://tracker2.example.org:5678/announce'], 37 | comment='This is a comment') 38 | t.private = True 39 | t.generate() 40 | t.write('my.torrent') 41 | 42 | Documentation 43 | ------------- 44 | 45 | Everything should be explained in the docstrings. Read it with ``pydoc3 46 | torf.Torrent`` or ``pydoc3 torf.Magnet``. 47 | 48 | Documentation is also available at `torf.readthedocs.io 49 | `_ or `torf.readthedocs.io/en/latest 50 | `_ for the development version. 51 | 52 | Installation 53 | ------------ 54 | 55 | torf is available on `PyPI `_. 56 | 57 | The latest development version is in the master branch on `GitHub 58 | `_. 59 | 60 | Contributing 61 | ------------ 62 | 63 | I consider this project feature complete, but feel free to request new features 64 | or improvements. Bug reports are always welcome, of course. 65 | 66 | License 67 | ------- 68 | 69 | `GPLv3+ `_ 70 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | master_doc = 'index' 2 | 3 | extensions = [ 4 | 'sphinx.ext.autodoc', 5 | ] 6 | autodoc_member_order = 'bysource' 7 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | `torf `_ Reference 2 | ================================================== 3 | 4 | .. autoclass:: torf.Torrent 5 | :members: 6 | :member-order: bysource 7 | 8 | .. autoclass:: torf.Magnet 9 | :members: 10 | :member-order: bysource 11 | 12 | .. autoclass:: torf.File 13 | :members: 14 | :member-order: bysource 15 | 16 | .. autoclass:: torf.Filepath 17 | :members: 18 | :member-order: bysource 19 | 20 | .. autoclass:: torf.TorrentFileStream 21 | :members: 22 | :member-order: bysource 23 | 24 | .. autoexception:: torf.TorfError 25 | :members: 26 | 27 | .. autoexception:: torf.URLError 28 | :members: 29 | 30 | .. autoexception:: torf.PieceSizeError 31 | :members: 32 | 33 | .. autoexception:: torf.MetainfoError 34 | :members: 35 | 36 | .. autoexception:: torf.BdecodeError 37 | :members: 38 | 39 | .. autoexception:: torf.MagnetError 40 | :members: 41 | 42 | .. autoexception:: torf.PathError 43 | :members: 44 | 45 | .. autoexception:: torf.CommonPathError 46 | :members: 47 | 48 | .. autoexception:: torf.VerifyNotDirectoryError 49 | :members: 50 | 51 | .. autoexception:: torf.VerifyIsDirectoryError 52 | :members: 53 | 54 | .. autoexception:: torf.VerifyFileSizeError 55 | :members: 56 | 57 | .. autoexception:: torf.VerifyContentError 58 | :members: 59 | 60 | .. autoexception:: torf.ReadError 61 | :members: 62 | 63 | .. autoexception:: torf.WriteError 64 | :members: 65 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_rtd_theme 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "torf" 3 | description = "Python 3 module for creating and parsing torrent files and magnet URIs" 4 | readme = "README.rst" 5 | license = {text = "GPL-3.0-or-later"} 6 | authors = [ 7 | { name="Random User", email="rndusr@posteo.de" }, 8 | ] 9 | keywords = ["bittorrent", "torrent", "magnet"] 10 | dynamic = ["version"] # Get version from PROJECT/__version__ 11 | classifiers = [ 12 | "Development Status :: 5 - Production/Stable", 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", 15 | "Operating System :: OS Independent", 16 | "Programming Language :: Python :: 3.8", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Programming Language :: Python :: 3.13", 22 | "Topic :: Software Development :: Libraries", 23 | ] 24 | requires-python = ">=3.8" 25 | dependencies = [ 26 | "flatbencode==0.2.*", 27 | ] 28 | 29 | [project.optional-dependencies] 30 | dev = [ 31 | "pytest", 32 | "pytest-xdist", 33 | "pytest-httpserver", 34 | "pytest-mock", 35 | 36 | "tox", 37 | 38 | "coverage", 39 | "pytest-cov", 40 | 41 | "ruff", 42 | "flake8", 43 | "isort", 44 | "mypy", 45 | ] 46 | 47 | [project.urls] 48 | Repository = "https://github.com/rndusr/torf" 49 | Documentation = "https://torf.readthedocs.io/" 50 | "Bug Tracker" = "https://github.com/rndusr/torf/issues" 51 | Changelog = "https://raw.githubusercontent.com/rndusr/torf/master/CHANGELOG" 52 | 53 | 54 | [build-system] 55 | requires = ["setuptools"] 56 | build-backend = "setuptools.build_meta" 57 | 58 | [tool.setuptools.packages.find] 59 | include = ["torf*"] 60 | 61 | [tool.setuptools.dynamic] 62 | version = {attr = "torf.__version__"} 63 | 64 | [tool.mypy] 65 | strict = true 66 | pretty = true 67 | exclude = [ 68 | "torf/_reuse.py", # Not part of the public API 69 | "torf/_generate.py", # Not part of the public API 70 | "tests/", 71 | "docs/", 72 | ] 73 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --log-level=DEBUG --numprocesses=9 3 | log_format = %(message)s 4 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | # https://docs.readthedocs.io/en/stable/config-file/v2.html 2 | version: 2 3 | 4 | build: 5 | os: ubuntu-22.04 6 | tools: 7 | python: "3.12" 8 | 9 | sphinx: 10 | builder: html 11 | configuration: docs/conf.py 12 | fail_on_warning: true 13 | 14 | python: 15 | install: 16 | - requirements: docs/requirements.txt 17 | - method: pip 18 | path: . 19 | -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- 1 | line-length = 120 2 | 3 | lint.select = [ 4 | "E", # pycodestyle 5 | "F", # pyflakes 6 | "I", # isort 7 | ] 8 | 9 | [lint.per-file-ignores] 10 | "__init__.py" = [ 11 | # imported but unused 12 | "F401", 13 | ] 14 | "tests/*" = [ 15 | # Line too long 16 | "E501", 17 | ] 18 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import logging 4 | import os 5 | import random 6 | 7 | import torf 8 | 9 | debug = logging.getLogger('test').debug 10 | 11 | def display_filespecs(filespecs, piece_size): 12 | filecount = len(filespecs) 13 | header = ['.' + ' ' * (((4 * filecount) + (2 * filecount - 1)) + 2 - 1)] 14 | for i in range(8): 15 | header.append(str(i) + ' ' * (piece_size - 1)) 16 | line = (', '.join(f'{fn}:{fs:2d}' for fn,fs in filespecs), 17 | ' - ', 18 | ''.join(fn * fs for fn,fs in filespecs)) 19 | debug(f'\n{"".join(header)}\n{"".join(line)}') 20 | 21 | class fuzzylist(list): 22 | """ 23 | List that is fuzzily equal to other lists 24 | 25 | >>> x = fuzzylist('a', 'b', 'c', maybe=('x', 'y', 'z')) 26 | >>> x 27 | ['a', 'b', 'c'] 28 | >>> x == ['z', 'b', 'a', 'c', 'y'] 29 | True 30 | 31 | Limit the number of optional items: 32 | 33 | >>> x = fuzzylist('a', 'b', 'c', maybe=('x', 'x')) 34 | >>> x == ['a', 'x', 'b', 'x', 'c'] 35 | True 36 | >>> x == ['a', 'x', 'b', 'x', 'c', 'x'] 37 | False 38 | 39 | `max_maybe_items` also allows you to limit the number of optional items: 40 | 41 | >>> x = fuzzylist('a', 'b', 'c', maybe=('x', 'y', 'z'), max_maybe_items={'x':1}) 42 | >>> x == ['a', 'x', 'b', 'z', 'c'] 43 | True 44 | >>> x == ['a', 'x', 'b', 'x', 'c'] 45 | False 46 | 47 | Unlike `set(...) == set(...)`, this doesn't remove duplicate items and 48 | allows unhashable items. 49 | """ 50 | def __init__(self, *args, maybe=(), max_maybe_items={}): 51 | self.maybe = list(maybe) 52 | self.max_maybe_items = dict(max_maybe_items) 53 | super().__init__(args) 54 | 55 | def __eq__(self, other): 56 | if tuple(self) != tuple(other): 57 | # Check if either list contains any disallowed items, accepting 58 | # items from `maybe`. 59 | other_maybe = getattr(other, 'maybe', []) 60 | for item in self: 61 | if item not in other and item not in other_maybe: 62 | return False 63 | self_maybe = self.maybe 64 | for item in other: 65 | if item not in self and item not in self_maybe: 66 | return False 67 | # Check if either list contains an excess of items. 68 | other_max = getattr(other, 'max_maybe_items', {}) 69 | for item in itertools.chain(self, self.maybe): 70 | maxcount = max(other_max.get(item, 1), 71 | (other + other_maybe).count(item)) 72 | if self.count(item) > maxcount: 73 | return False 74 | self_max = self.max_maybe_items 75 | for item in itertools.chain(other, other_maybe): 76 | maxcount = max(self_max.get(item, 1), 77 | (self + self_maybe).count(item)) 78 | if other.count(item) > maxcount: 79 | return False 80 | return True 81 | 82 | def __ne__(self, other): 83 | return not self.__eq__(other) 84 | 85 | def __bool__(self): 86 | return len(self) > 0 or len(self.maybe) > 0 87 | 88 | def __add__(self, other): 89 | items = super().__add__(other) 90 | maybe = self.maybe + getattr(other, 'maybe', []) 91 | max_maybe_items = {**self.max_maybe_items, **getattr(other, 'max_maybe_items', {})} 92 | return type(self)(*items, maybe=maybe, max_maybe_items=max_maybe_items) 93 | 94 | def __repr__(self): 95 | s = f'{type(self).__name__}(' 96 | s += '[' + ', '.join(repr(item) for item in super().__iter__()) + ']' 97 | if self.maybe: 98 | s += f', maybe={repr(self.maybe)}' 99 | if self.max_maybe_items: 100 | s += f', max_maybe_items={repr(self.max_maybe_items)}' 101 | return s + ')' 102 | 103 | class fuzzydict(dict): 104 | """ 105 | Dictionary that ignores empty `fuzzylist` values when determining equality, 106 | e.g. fuzzydict(x=fuzzylist()) == {} 107 | """ 108 | def __eq__(self, other): 109 | if super().__eq__(other): 110 | return True 111 | elif not isinstance(other, dict): 112 | return NotImplemented 113 | keys_same = set(self).intersection(other) 114 | for k in keys_same: 115 | if self[k] != other[k]: 116 | return False 117 | keys_diff = set(self).difference(other) 118 | for k in keys_diff: 119 | sv = self.get(k, fuzzylist()) 120 | ov = other.get(k, fuzzylist()) 121 | if sv != ov: 122 | return False 123 | return True 124 | 125 | def __repr__(self): 126 | return f'{type(self).__name__}({super().__repr__()})' 127 | 128 | def ComparableException(exc): 129 | """ 130 | Horrible hack that allows us to compare exceptions comfortably 131 | 132 | `exc1 == exc2` is True if both exceptions have the same type and the same 133 | message. Type checking with issubclass() and isinstance() also works as 134 | expected. 135 | """ 136 | # Make the returned class object an instance of the type of `exc` and the 137 | # returned Comparable* class. 138 | class ComparableExceptionMeta(type): 139 | _cls = type(exc) 140 | 141 | @classmethod 142 | def __subclasscheck__(mcls, cls): 143 | return issubclass(cls, mcls._cls) or issubclass(cls, mcls) 144 | 145 | @classmethod 146 | def __instancecheck__(mcls, inst): 147 | return isinstance(cls, mcls._cls) or isinstance(cls, mcls) 148 | 149 | # Make subclass of the same name with "Comparable" prepended 150 | clsname = 'Comparable' + type(exc).__name__ 151 | bases = (type(exc),) 152 | 153 | def __eq__(self, other, _real_cls=type(exc)): 154 | return isinstance(other, (type(self), _real_cls)) and str(self) == str(other) 155 | 156 | def __hash__(self): 157 | return hash(str(self)) 158 | 159 | attrs = {} 160 | attrs['__eq__'] = __eq__ 161 | attrs['__hash__'] = __hash__ 162 | cls = ComparableExceptionMeta(clsname, bases, attrs) 163 | if isinstance(exc, torf.TorfError): 164 | return cls(*exc.posargs, **exc.kwargs) 165 | else: 166 | raise exc 167 | 168 | def random_positions(stream): 169 | """Return list of 1 to 5 random indexes in `stream`""" 170 | positions = random.sample(range(len(stream)), k=min(len(stream), 5)) 171 | return sorted(positions[:random.randint(1, len(positions))]) 172 | 173 | def change_file_size(filepath, original_size): 174 | """Randomly change size of `filepath` on disk and return new contents""" 175 | diff_range = list(range(-original_size, original_size + 1)) 176 | diff_range.remove(0) 177 | diff = random.choice(diff_range) 178 | data = bytearray(open(filepath, 'rb').read()) 179 | debug(f' Original data ({len(data)} bytes): {data}') 180 | if diff > 0: 181 | # Make add `diff` bytes at `pos` 182 | pos = random.choice(range(original_size + 1)) 183 | data[pos:pos] = b'\xA0' * diff 184 | elif diff < 0: 185 | # Remove `abs(diff)` bytes at `pos` 186 | pos = random.choice(range(original_size - abs(diff) + 1)) 187 | data[pos : pos + abs(diff)] = () 188 | with open(filepath, 'wb') as f: 189 | f.write(data) 190 | f.truncate() 191 | assert os.path.getsize(filepath) == original_size + diff 192 | debug(f' Changed data ({len(data)} bytes): {data}') 193 | with open(filepath, 'rb') as f: 194 | return f.read() 195 | 196 | def round_up_to_multiple(n, x): 197 | """Round `n` up to the next multiple of `x`""" 198 | return n - n % -x 199 | 200 | def round_down_to_multiple(n, x): 201 | """Round `n` down to the previous multiple of `x`""" 202 | if n % x != 0: 203 | return round_up_to_multiple(n, x) - x 204 | else: 205 | return n 206 | 207 | def file_range(filename, filespecs): 208 | """Return `filename`'s first and last byte index in stream""" 209 | pos = 0 210 | for fn,size in filespecs: 211 | if fn == filename: 212 | return pos, pos + size - 1 213 | pos += size 214 | raise RuntimeError(f'Could not find {filename} in {filespecs}') 215 | 216 | def file_piece_indexes(filename, filespecs, piece_size, exclusive=False): 217 | """ 218 | Return list of indexes of pieces that contain bytes from `filename` 219 | 220 | If `exclusive` is True, don't include pieces that contain bytes from 221 | multiple files. 222 | """ 223 | file_beg,file_end = file_range(filename, filespecs) 224 | first_piece_index_pos = round_down_to_multiple(file_beg, piece_size) 225 | piece_indexes = [] 226 | for pos in range(first_piece_index_pos, file_end + 1, piece_size): 227 | if not exclusive or len(pos2files(pos, filespecs, piece_size)) == 1: 228 | piece_indexes.append(pos // piece_size) 229 | return piece_indexes 230 | 231 | def pos2files(pos, filespecs, piece_size, include_file_at_pos=True): 232 | """ 233 | Calculate which piece the byte at `pos` belongs to and return a list of file 234 | names of those files that are covered by that piece. 235 | """ 236 | p = 0 237 | filenames = [] 238 | for filename,filesize in filespecs: 239 | filepos_beg = p 240 | filepos_end = filepos_beg + filesize - 1 241 | first_piece_index = filepos_beg // piece_size 242 | last_piece_index = filepos_end // piece_size 243 | first_piece_index_pos_beg = first_piece_index * piece_size 244 | last_piece_index_pos_end = (last_piece_index + 1) * piece_size - 1 245 | if first_piece_index_pos_beg <= pos <= last_piece_index_pos_end: 246 | filenames.append(filename) 247 | p += filesize 248 | 249 | if not include_file_at_pos: 250 | file_at_pos,_ = pos2file(pos, filespecs, piece_size) 251 | return [f for f in filenames if f != file_at_pos] 252 | else: 253 | return filenames 254 | 255 | def pos2file(pos, filespecs, piece_size): 256 | """Return file name and relative position of `pos` in file""" 257 | p = 0 258 | for filename,filesize in filespecs: 259 | if p <= pos < p + filesize: 260 | return (filename, pos - p) 261 | p += filesize 262 | raise RuntimeError(f'Could not find file at position {pos} in {filespecs}') 263 | 264 | def calc_piece_indexes(filespecs, piece_size, files_missing=(), files_missized=()): 265 | """ 266 | Turn a list of (filename, filesize) tuples into a dictionary that maps file 267 | names to the piece indexes they cover. Pieces that overlap multiple files 268 | belong to the last file they cover. 269 | """ 270 | piece_indexes = collections.defaultdict(lambda: fuzzylist()) 271 | pos = 0 272 | for i, (filename, filesize) in enumerate(filespecs): 273 | # Piece indexes that cover only one file must be reported for that file. 274 | exclusive_file_pis = file_piece_indexes(filename, filespecs, piece_size, exclusive=True) 275 | # Piece indexes that cover multiple files may be reported for any of 276 | # those files. 277 | multiple_file_pis = [ 278 | pi for pi in file_piece_indexes(filename, filespecs, piece_size, exclusive=False) 279 | if pi not in exclusive_file_pis 280 | ] 281 | piece_indexes[filename].extend(exclusive_file_pis) 282 | piece_indexes[filename].maybe.extend(multiple_file_pis) 283 | pos += filesize 284 | 285 | # Remove empty lists 286 | for k in tuple(piece_indexes): 287 | if not piece_indexes[k]: 288 | del piece_indexes[k] 289 | 290 | # For each missing/missized file, the first piece of the file may get two 291 | # calls, one for the "no such file"/"wrong file size" error and one for the 292 | # "corrupt piece" error. 293 | for filepath in itertools.chain(files_missing, files_missized): 294 | filename = os.path.basename(filepath) 295 | file_beg,file_end = file_range(filename, filespecs) 296 | piece_index = file_beg // piece_size 297 | if piece_index not in piece_indexes[filename].maybe: 298 | piece_indexes[filename].maybe.append(piece_index) 299 | 300 | return fuzzydict(piece_indexes) 301 | 302 | def calc_good_pieces(filespecs, piece_size, files_missing, corruption_positions, files_missized): 303 | """ 304 | Same as `calc_piece_indexes`, but exclude corrupt pieces and pieces of 305 | missing or missized files 306 | """ 307 | debug('* Calculating good pieces') 308 | all_piece_indexes = calc_piece_indexes(filespecs, piece_size, files_missing, files_missized) 309 | bad_pis = {corrpos // piece_size for corrpos in corruption_positions} 310 | debug(f' missing files: {files_missing}') 311 | debug(f' missized files: {files_missized}') 312 | debug(f' all piece_indexes: {all_piece_indexes}') 313 | debug(f' corrupt piece_indexes: {bad_pis}') 314 | 315 | # Find pieces that exclusively belong to missing or missized files 316 | for filepath in itertools.chain(files_missing, files_missized): 317 | file_beg,file_end = file_range(os.path.basename(filepath), filespecs) 318 | first_bad_pi = file_beg // piece_size 319 | last_bad_pi = file_end // piece_size 320 | bad_pis.update(range(first_bad_pi, last_bad_pi + 1)) 321 | debug(f' combined bad piece_indexes: {bad_pis}') 322 | 323 | # Remove pieces that are in bad_pis 324 | good_pieces = collections.defaultdict(lambda: fuzzylist()) 325 | for fname,all_pis in all_piece_indexes.items(): 326 | # Maintain mandatory and optional piece_indexes from all_piece_indexes 327 | for pi in itertools.chain(all_pis, all_pis.maybe): 328 | if pi not in bad_pis: 329 | debug(f' filename={fname}: piece_index={pi}: good') 330 | if pi in all_pis.maybe: 331 | good_pieces[fname].maybe.append(pi) 332 | else: 333 | good_pieces[fname].append(pi) 334 | else: 335 | debug(f' filename={fname}: piece_index={pi}: bad') 336 | 337 | good_pieces = fuzzydict(good_pieces) 338 | debug(f' corruptions and missing/missized files removed: {good_pieces}') 339 | return good_pieces 340 | 341 | def skip_good_pieces(good_pieces, filespecs, piece_size, corruption_positions): 342 | """ 343 | For each file in `good_pieces`, remove piece_indexes between the first 344 | corruption and the end of the file 345 | """ 346 | debug('* Skipping good pieces after corruptions') 347 | # Find out which piece_indexes should be skipped 348 | skipped_pis = set() 349 | for corrpos in sorted(corruption_positions): 350 | corr_pi = corrpos // piece_size 351 | affected_files = pos2files(corrpos, filespecs, piece_size) 352 | debug(f' corruption at position {corrpos}, piece_index {corr_pi}: {affected_files}') 353 | for file in affected_files: 354 | file_pis_exclusive = file_piece_indexes(file, filespecs, piece_size, exclusive=True) 355 | debug(f' {file}: piece_indexes exclusive: {file_pis_exclusive}') 356 | file_pis = file_piece_indexes(file, filespecs, piece_size, exclusive=False) 357 | debug(f' piece_indexes non-exclusive: {file_pis}') 358 | try: 359 | first_corr_index_in_file = file_pis.index(corr_pi) 360 | except ValueError: 361 | # Skip all pieces in `file` that don't contain bytes from other files 362 | debug(f' piece_index {corr_pi} is not part of {file}: {file_pis_exclusive}') 363 | skipped_pis.update(file_pis_exclusive) 364 | else: 365 | # Skip all pieces after the first corrupted piece in `file` 366 | skip_pis = file_pis[first_corr_index_in_file + 1:] 367 | debug(f' skipping piece_indexes after corruption: {skip_pis}') 368 | skipped_pis.update(skip_pis) 369 | 370 | # Make skipped piece_indexes optional while unskipped piece_indexes stay 371 | # mandatory. 372 | debug(f' skipping piece_indexes: {skipped_pis}') 373 | good_pieces_skipped = collections.defaultdict(lambda: fuzzylist()) 374 | for fname,pis in good_pieces.items(): 375 | for pi in pis: 376 | if pi in skipped_pis: 377 | good_pieces_skipped[fname].maybe.append(pi) 378 | else: 379 | good_pieces_skipped[fname].append(pi) 380 | return fuzzydict(good_pieces_skipped) 381 | 382 | def calc_corruptions(filespecs, piece_size, corruption_positions): 383 | """Map file names to (piece_index, exception) tuples""" 384 | exceptions = [] 385 | reported = set() 386 | for corrpos in sorted(corruption_positions): 387 | corr_pi = corrpos // piece_size 388 | if corr_pi not in reported: 389 | filepath, _ = pos2file(corrpos, filespecs, piece_size) 390 | exc = ComparableException(torf.VerifyContentError(filepath, corr_pi, piece_size, filespecs)) 391 | exceptions.append(exc) 392 | reported.add(corr_pi) 393 | return fuzzylist(*exceptions) 394 | 395 | def skip_corruptions(all_corruptions, filespecs, piece_size, corruption_positions, files_missing, files_missized): 396 | """Make every non-first corruption optional""" 397 | debug(f'Skipping corruptions: {all_corruptions}') 398 | pis_seen = set() 399 | files_seen = set() 400 | corruptions = fuzzylist() 401 | files_autoskipped = set(str(f) for f in itertools.chain(files_missing, files_missized)) 402 | debug(f' missing or missized: {files_autoskipped}') 403 | for exc in all_corruptions: 404 | # Corruptions for files we haven't seen yet must be reported 405 | if any(f not in files_seen and f not in files_autoskipped 406 | for f in exc.files): 407 | debug(f' mandatory: {exc}') 408 | files_seen.update(exc.files) 409 | pis_seen.add(exc.piece_index) 410 | corruptions.append(exc) 411 | # Corruptions for files we already have seen may still be reported 412 | # because skipping is racy and it's impossible to predict how many 413 | # pieces are processed before the skip manifests. 414 | else: 415 | debug(f' optional: {exc}') 416 | corruptions.maybe.append(exc) 417 | pis_seen.add(exc.piece_index) 418 | 419 | # Because we fake skipped files, their last piece is reported as corrupt if 420 | # it contains bytes from the next file even if there is no corruption in the 421 | # skipped file's last piece. But this is not guaranteed because it's 422 | # possible the corrupt file is fully processed before its corruption is 423 | # noticed. 424 | for corrpos in corruption_positions: 425 | # Find all files that are affected by the corruption 426 | affected_files = pos2files(corrpos, filespecs, piece_size) 427 | debug(f' affected_files: {affected_files}') 428 | # Find piece_index of the end of the last affected file 429 | _,file_end = file_range(affected_files[-1], filespecs) 430 | piece_index = file_end // piece_size 431 | debug(f' {affected_files[-1]} ends at piece_index {piece_index}') 432 | # Add optional exception for that piece 433 | exc = ComparableException(torf.VerifyContentError(piece_index, piece_size, filespecs)) 434 | if exc not in itertools.chain(corruptions, corruptions.maybe): 435 | debug(f'Adding possible exception for last affected file {affected_files[-1]}: {exc}') 436 | corruptions.maybe.append(exc) 437 | 438 | return corruptions 439 | 440 | def calc_pieces_done(filespecs_abspath, piece_size, files_missing=(), files_missized=()): 441 | debug('* Calculating pieces_done') 442 | # The callback gets the number of verified pieces (pieces_done). This 443 | # function calculates the expected values for that argument. 444 | # 445 | # It's not as simple as range(1, +1). For example, if a 446 | # file is missing, we get the same pieces_done value two times, once for "No 447 | # such file" and maybe again for "Corrupt piece" if the piece contains parts 448 | # of another file. 449 | 450 | # Every pieces_done value is reported at least once 451 | total_size = sum(filesize for _,filesize in filespecs_abspath) 452 | pieces_done_list = list((pi // piece_size) + 1 453 | for pi in range(0, total_size, piece_size)) 454 | debug(f' progress reports: {pieces_done_list}') 455 | # List of pieces_done values that may appear multiple times 456 | maybes = set() 457 | # Map pieces_done values to the number of times they may appear 458 | max_maybe_items = collections.defaultdict(lambda: 1) 459 | 460 | # Missing or missized files are reported in addition to progress reports 461 | files_missing = {str(filepath) for filepath in files_missing} 462 | debug(f' files_missing: {files_missing}') 463 | files_missized = {str(filepath) for filepath in files_missized} 464 | debug(f' files_missized: {files_missized}') 465 | for filepath in files_missing.union(files_missized): 466 | # Because we're multithreaded, we can't expect the missing/missized file 467 | # to be reported at its first piece. We can't predict at all when the 468 | # error is reported. The only thing we can savely say that for each 469 | # missing/missized file, every pieces_done_value *may* increase by 1. 470 | for pieces_done_value in pieces_done_list: 471 | maybes.add(pieces_done_value) 472 | max_maybe_items[pieces_done_value] += 1 473 | 474 | fuzzy_pieces_done_list = fuzzylist(*pieces_done_list, 475 | maybe=sorted(maybes), 476 | max_maybe_items=max_maybe_items) 477 | return fuzzy_pieces_done_list 478 | -------------------------------------------------------------------------------- /tests/test_convert.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from datetime import datetime 3 | 4 | import pytest 5 | 6 | import torf 7 | 8 | 9 | def test_valid_metainfo(): 10 | t = torf.Torrent(created_by=None) 11 | now = datetime.now() 12 | t.metainfo['foo'] = now 13 | t.metainfo['baz'] = {'one': True, 'two': 2.34, 14 | 'bam': ['x', 'y', ('z',False)]} 15 | 16 | exp = OrderedDict([(b'baz', OrderedDict([(b'bam', [b'x', b'y', [b'z', 0]]), 17 | (b'one', 1), 18 | (b'two', 2)])), 19 | (b'foo', int(now.timestamp())), 20 | (b'info', OrderedDict())]) 21 | 22 | assert t.convert() == exp 23 | 24 | 25 | def test_invalid_metainfo(): 26 | t = torf.Torrent() 27 | 28 | t.metainfo['invalid'] = lambda foo: 'bar' 29 | with pytest.raises(torf.MetainfoError) as excinfo: 30 | t.convert() 31 | assert excinfo.match("Invalid value: .*lambda") 32 | 33 | t.metainfo['invalid'] = {'arf': int} 34 | with pytest.raises(torf.MetainfoError) as excinfo: 35 | t.convert() 36 | assert excinfo.match("Invalid value: ") 37 | 38 | t.metainfo['invalid'] = [3, ['a', 'b', {str: 'c'}], 4, 5] 39 | with pytest.raises(torf.MetainfoError) as excinfo: 40 | t.convert() 41 | assert excinfo.match("Invalid key: ") 42 | 43 | t.metainfo['invalid'] = {'x': [3, ['a', 'b', {Exception, 'c'}], 4, 5]} 44 | with pytest.raises(torf.MetainfoError) as excinfo: 45 | t.convert() 46 | assert excinfo.match("Invalid value: ") 47 | 48 | 49 | def test_metainfo_sort_order(create_torrent): 50 | torrent = create_torrent() 51 | md_conv = torrent.convert() 52 | exp_keys = sorted(bytes(key, encoding='utf-8', errors='replace') 53 | for key in torrent.metainfo) 54 | assert list(md_conv) == exp_keys 55 | 56 | exp_info_keys = sorted(bytes(key, encoding='utf-8', errors='replace') 57 | for key in torrent.metainfo['info']) 58 | assert list(md_conv[b'info']) == exp_info_keys 59 | -------------------------------------------------------------------------------- /tests/test_exclude.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture 7 | def content(tmp_path): 8 | content = tmp_path / 'content' ; content.mkdir() # noqa: E702 9 | for i in range(1, 5): 10 | ext = 'jpg' if i % 2 == 0 else 'txt' 11 | (content / f'file{i}.{ext}').write_text('') 12 | subdir = content / 'subdir' ; subdir.mkdir() # noqa: E702 13 | 14 | for i in range(1, 4): 15 | ext = 'jpg' if i % 2 == 0 else 'pdf' 16 | (subdir / f'file{i}.{ext}').write_text('') 17 | return content 18 | 19 | def test_exclude_when_path_is_None(create_torrent): 20 | torrent = create_torrent() 21 | torrent.metainfo['info']['files'] = [{'length': 6, 'path': ['file1.txt']}, 22 | {'length': 6, 'path': ['file2.jpg']}, 23 | {'length': 6, 'path': ['file3.txt']}] 24 | torrent.path = None 25 | torrent.exclude_globs.append('*.jpg') 26 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, 27 | {'length': 6, 'path': ['file3.txt']}] 28 | torrent.exclude_regexs.append('file3') 29 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}] 30 | assert torrent.path is None 31 | 32 | def test_exclude_with_singlefile_torrent_and_existing_path(create_torrent, content): 33 | torrent = create_torrent(path=content / 'file1.txt') 34 | assert torrent.metainfo['info']['name'] == 'file1.txt' 35 | assert torrent.metainfo['info']['length'] == 6 36 | torrent.exclude_globs.append('*.txt') 37 | assert torrent.metainfo['info']['name'] == 'file1.txt' 38 | assert 'length' not in torrent.metainfo['info'] 39 | 40 | def test_exclude_with_singlefile_torrent_and_nonexisting_path(create_torrent): 41 | torrent = create_torrent() 42 | torrent.metainfo['info']['name'] = 'foo.txt' 43 | torrent.metainfo['info']['length'] = 123 44 | torrent.exclude_regexs.append(r'fo+\.txt') 45 | assert torrent.metainfo['info']['name'] == 'foo.txt' 46 | assert 'length' not in torrent.metainfo['info'] 47 | 48 | def test_exclude_with_multifile_torrent_and_existing_path(create_torrent, content): 49 | torrent = create_torrent(path=content) 50 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, 51 | {'length': 6, 'path': ['file2.jpg']}, 52 | {'length': 6, 'path': ['file3.txt']}, 53 | {'length': 6, 'path': ['file4.jpg']}, 54 | {'length': 6, 'path': ['subdir', 'file1.pdf']}, 55 | {'length': 6, 'path': ['subdir', 'file2.jpg']}, 56 | {'length': 6, 'path': ['subdir', 'file3.pdf']}] 57 | torrent.exclude_regexs.extend((r'.*1\....$', rf'^{torrent.name}/subdir/.*\.pdf$')) 58 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file2.jpg']}, 59 | {'length': 6, 'path': ['file3.txt']}, 60 | {'length': 6, 'path': ['file4.jpg']}, 61 | {'length': 6, 'path': ['subdir', 'file2.jpg']}] 62 | 63 | def test_exclude_with_multifile_torrent_and_nonexisting_path(create_torrent): 64 | torrent = create_torrent() 65 | torrent.metainfo['info']['name'] = 'content' 66 | torrent.metainfo['info']['files'] = [{'length': 6, 'path': ['file1.txt']}, 67 | {'length': 6, 'path': ['file2.jpg']}, 68 | {'length': 6, 'path': ['file3.txt']}, 69 | {'length': 6, 'path': ['subdir', 'file1.pdf']}, 70 | {'length': 6, 'path': ['subdir', 'file2.jpg']}, 71 | {'length': 6, 'path': ['subdir', 'file3.pdf']}] 72 | torrent.exclude_globs.extend(('*.jpg', '*/subdir/*3.*')) 73 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, 74 | {'length': 6, 'path': ['file3.txt']}, 75 | {'length': 6, 'path': ['subdir', 'file1.pdf']}] 76 | 77 | def test_exclude_globs_can_be_set(create_torrent, content): 78 | torrent = create_torrent(path=content) 79 | torrent.exclude_globs = (f'*{os.sep}file2.*',) 80 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, 81 | {'length': 6, 'path': ['file3.txt']}, 82 | {'length': 6, 'path': ['file4.jpg']}, 83 | {'length': 6, 'path': ['subdir', 'file1.pdf']}, 84 | {'length': 6, 'path': ['subdir', 'file3.pdf']}] 85 | 86 | def test_exclude_regexs_can_be_set(create_torrent, content): 87 | torrent = create_torrent(path=content) 88 | torrent.exclude_regexs = (f'{os.sep}subdir{os.sep}',) 89 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, 90 | {'length': 6, 'path': ['file2.jpg']}, 91 | {'length': 6, 'path': ['file3.txt']}, 92 | {'length': 6, 'path': ['file4.jpg']}] 93 | 94 | def test_exclude_globs_and_exclude_regexs_are_combined(create_torrent, content): 95 | torrent = create_torrent(path=content) 96 | torrent.exclude_globs = ('*.jpg',) 97 | torrent.exclude_regexs = ('txt$',) 98 | assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['subdir', 'file1.pdf']}, 99 | {'length': 6, 'path': ['subdir', 'file3.pdf']}] 100 | 101 | def test_more_exclude_globs_tests(create_torrent, tmp_path): 102 | (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) 103 | (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) 104 | (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') 105 | (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') 106 | (tmp_path / 'content' / 'bar' / 'file3').write_text('data') 107 | (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') 108 | 109 | torrent = create_torrent(path=tmp_path / 'content') 110 | assert torrent.metainfo['info']['name'] == 'content' 111 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 112 | {'length': 4, 'path': ['bar', 'file3']}, 113 | {'length': 4, 'path': ['foo', 'bar', 'file2']}, 114 | {'length': 4, 'path': ['foo', 'file_bar']}] 115 | torrent.exclude_globs = ('*oo/*',) 116 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 117 | {'length': 4, 'path': ['bar', 'file3']}] 118 | torrent.exclude_globs = ('*/ba*',) 119 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['foo', 'file_bar']}] 120 | torrent.exclude_globs = ('*baz*',) 121 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'file3']}, 122 | {'length': 4, 'path': ['foo', 'bar', 'file2']}, 123 | {'length': 4, 'path': ['foo', 'file_bar']}] 124 | torrent.exclude_globs = ('*/file[23]',) 125 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 126 | {'length': 4, 'path': ['foo', 'file_bar']}] 127 | torrent.exclude_globs = ('*Z*',) 128 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'file3']}, 129 | {'length': 4, 'path': ['foo', 'bar', 'file2']}, 130 | {'length': 4, 'path': ['foo', 'file_bar']}] 131 | 132 | def test_more_exclude_regexs_tests(create_torrent, tmp_path): 133 | (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) 134 | (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) 135 | (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') 136 | (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') 137 | (tmp_path / 'content' / 'bar' / 'file3').write_text('data') 138 | (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') 139 | 140 | torrent = create_torrent(path=tmp_path / 'content') 141 | assert torrent.metainfo['info']['name'] == 'content' 142 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 143 | {'length': 4, 'path': ['bar', 'file3']}, 144 | {'length': 4, 'path': ['foo', 'bar', 'file2']}, 145 | {'length': 4, 'path': ['foo', 'file_bar']}] 146 | torrent.exclude_regexs = ('^content/foo',) 147 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 148 | {'length': 4, 'path': ['bar', 'file3']}] 149 | torrent.exclude_regexs = ('.*(?:_bar|2)$',) 150 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 151 | {'length': 4, 'path': ['bar', 'file3']}] 152 | 153 | def test_include_globs_take_precedence(create_torrent, tmp_path): 154 | (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) 155 | (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) 156 | (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') 157 | (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') 158 | (tmp_path / 'content' / 'bar' / 'file3').write_text('data') 159 | (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') 160 | 161 | torrent = create_torrent(path=tmp_path / 'content') 162 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 163 | {'length': 4, 'path': ['bar', 'file3']}, 164 | {'length': 4, 'path': ['foo', 'bar', 'file2']}, 165 | {'length': 4, 'path': ['foo', 'file_bar']}] 166 | torrent.exclude_globs = ('*foo*',) 167 | torrent.include_globs = ('*foo/*/file?',) 168 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 169 | {'length': 4, 'path': ['bar', 'file3']}, 170 | {'length': 4, 'path': ['foo', 'bar', 'file2']}] 171 | 172 | def test_include_regexs_take_precedence(create_torrent, tmp_path): 173 | (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) 174 | (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) 175 | (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') 176 | (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') 177 | (tmp_path / 'content' / 'bar' / 'file3').write_text('data') 178 | (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') 179 | 180 | torrent = create_torrent(path=tmp_path / 'content') 181 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, 182 | {'length': 4, 'path': ['bar', 'file3']}, 183 | {'length': 4, 'path': ['foo', 'bar', 'file2']}, 184 | {'length': 4, 'path': ['foo', 'file_bar']}] 185 | torrent.exclude_regexs = ('file.$',) 186 | torrent.include_regexs = ('file[23]',) 187 | assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'file3']}, 188 | {'length': 4, 'path': ['foo', 'bar', 'file2']}, 189 | {'length': 4, 'path': ['foo', 'file_bar']}] 190 | -------------------------------------------------------------------------------- /tests/test_fuzzy.py: -------------------------------------------------------------------------------- 1 | from . import fuzzydict, fuzzylist 2 | 3 | 4 | def test_fuzzylist(): 5 | x = fuzzylist('a', 'b', 'c', maybe=('x', 'y', 'z'), max_maybe_items={'x':1}) 6 | assert x != ['a', 'b'] 7 | assert not x == ['a', 'b'] 8 | assert x == ['a', 'c', 'b'] 9 | assert not x != ['a', 'c', 'b'] 10 | assert x == ['a', 'x', 'c', 'y', 'b'] 11 | assert not x != ['a', 'x', 'c', 'y', 'b'] 12 | assert x == ['a', 'x', 'b', 'z', 'c', 'y'] 13 | assert not x != ['a', 'x', 'b', 'z', 'c', 'y'] 14 | assert x != ['a', 'l', 'b', 'z', 'c', 'y'] 15 | assert not x == ['a', 'l', 'b', 'z', 'c', 'y'] 16 | assert x != ['x', 'b', 'x', 'a', 'c', 'y'] 17 | assert not x == ['x', 'b', 'x', 'a', 'c', 'y'] 18 | assert fuzzylist(0) == fuzzylist(maybe=(0,)) 19 | assert fuzzylist(maybe=(0,)) == fuzzylist(0) 20 | assert fuzzylist(0) != fuzzylist(maybe=(1,)) 21 | assert fuzzylist(maybe=(1,)) != fuzzylist(0) 22 | assert [1, 1, 2, 3] != fuzzylist(1, 2, 3) 23 | assert fuzzylist(1, 2, 3) != [1, 1, 2, 3] 24 | assert fuzzylist(0, 0, 1) == fuzzylist(0, 1, maybe=[0]) 25 | assert fuzzylist(0, 1, maybe=[0]) == fuzzylist(0, 0, 1) 26 | 27 | def test_fuzzydict(): 28 | assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) == {'a': 'foo'} 29 | assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) == {'a': 'foo', 'b': []} 30 | assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) != {'a': 'foo', 'b': ['bar']} 31 | assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) != {'b': []} 32 | assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) != {} 33 | assert fuzzydict(b=fuzzylist(maybe=(1, 2, 3))) == {} 34 | assert fuzzydict(b=fuzzylist(maybe=(1, 2, 3))) == {'x': fuzzylist(maybe=(4, 5, 6))} 35 | -------------------------------------------------------------------------------- /tests/test_generate.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | from collections import defaultdict 4 | from pathlib import Path 5 | from unittest import mock 6 | 7 | import pytest 8 | 9 | import torf 10 | 11 | from . import * # noqa: F403 12 | 13 | 14 | def test_no_path(): 15 | t = torf.Torrent() 16 | with pytest.raises(RuntimeError) as e: 17 | t.generate() 18 | assert str(e.value) == 'generate() called with no path specified' 19 | 20 | 21 | def test_with_empty_file(create_file): 22 | # Create content so we can set path 23 | content_path = create_file('file.jpg', '') 24 | t = torf.Torrent(content_path) 25 | content_path.write_text('') 26 | with pytest.raises(torf.PathError) as e: 27 | t.generate() 28 | assert str(e.value) == f'{t.path}: Empty or all files excluded' 29 | 30 | 31 | def test_with_empty_directory(create_dir): 32 | # Create content so we can set path 33 | content_path = create_dir('empty', ('a file', '')) 34 | t = torf.Torrent(content_path) 35 | (content_path / 'a file').unlink() 36 | with pytest.raises(torf.ReadError) as e: 37 | t.generate() 38 | assert str(e.value) == f'{content_path / "a file"}: No such file or directory' 39 | 40 | 41 | def test_nonexisting_path(create_file): 42 | content_path = create_file('file.jpg', '') 43 | t = torf.Torrent(content_path) 44 | content_path.unlink() 45 | with pytest.raises(torf.ReadError) as e: 46 | t.generate() 47 | assert str(e.value) == f'{content_path}: No such file or directory' 48 | 49 | 50 | def test_with_all_files_excluded(create_dir): 51 | # Create content so we can set path 52 | content_path = create_dir('content', 53 | ('a.jpg', ''), 54 | ('b.jpg', ''), 55 | ('c.jpg', '')) 56 | t = torf.Torrent(content_path, exclude_globs=['*.jpg']) 57 | with pytest.raises(torf.PathError) as e: 58 | t.generate() 59 | assert str(e.value) == f'{t.path}: Empty or all files excluded' 60 | 61 | 62 | def test_unreadable_basedir_in_multifile_torrent(create_dir): 63 | content_path = create_dir('content', 64 | ('a.jpg', ''), 65 | ('b.jpg', ''), 66 | ('c.jpg', '')) 67 | t = torf.Torrent(content_path) 68 | old_mode = os.stat(content_path).st_mode 69 | try: 70 | os.chmod(content_path, mode=0o222) 71 | with pytest.raises(torf.ReadError) as e: 72 | t.generate() 73 | assert str(e.value) == f'{content_path / "a.jpg"}: Permission denied' 74 | finally: 75 | os.chmod(content_path, mode=old_mode) 76 | 77 | 78 | def test_unreadable_file_in_multifile_torrent(create_dir): 79 | content_path = create_dir('content', 80 | ('a.jpg', ''), 81 | ('b.jpg', ''), 82 | ('c.jpg', '')) 83 | t = torf.Torrent(content_path) 84 | old_mode = os.stat(content_path).st_mode 85 | try: 86 | os.chmod(content_path / 'b.jpg', mode=0o222) 87 | with pytest.raises(torf.ReadError) as e: 88 | t.generate() 89 | assert str(e.value) == f'{content_path / "b.jpg"}: Permission denied' 90 | finally: 91 | os.chmod(content_path, mode=old_mode) 92 | 93 | 94 | def test_metainfo_with_singlefile_torrent(create_file, random_seed): 95 | with random_seed(0): 96 | content_path = create_file('file.jpg', torf.Torrent.piece_size_min_default * 10.123) 97 | # exp_* values come from these commands: 98 | # $ mktorrent -l 15 /tmp/pytest-of-*/pytest-current/test_metainfo_with_singlefile_current/file.jpg 99 | # $ btcheck -i file.jpg.torrent -n | grep Hash 100 | # $ python3 -c "from flatbencode import decode; print(decode(open('file.jpg.torrent', 'rb').read())[b'info'][b'pieces'])" 101 | exp_infohash = 'e7e02c57df57f30f5e66a69bfa210e9c61a5a8f6' 102 | exp_pieces = (b"<\x9c7\x80\xa5\xf6-\xb7)\xd0A\x1d\xb5\x1b\xacw\x10\x91\x9c\xe8\xb4\x16" 103 | b"\x00bg\xbc`\xc5\xc2\xf86\x88\xb2~\xd6E\xeeZ\xb0d\xcd\x9ek(\xc746G\x17" 104 | b"\xab\xa6'/D\xba\xd9\xf0d\x81\xe3\xf5C\x82JQ\xde\xb5\x17w\xda\xbc\xb7Ek" 105 | b"\nHU\xcd\x1f\xd6C\xcb!\xb0CW\\\xc4\x8d\xad9\xbe\xb4V\x8a7\xdf\x9a\xabV" 106 | b"\xa6\xe5\xee3\x81\xe5I\xa7\xfe#\xcb\xea\xc3\x8e\xc4\x00\x91\xdb\x00\xaf") 107 | _check_metainfo(content_path, 2**15, exp_infohash, exp_pieces) 108 | 109 | def test_metainfo_with_multifile_torrent(create_dir, random_seed): 110 | with random_seed(0): 111 | content_path = create_dir('content', 112 | ('a.jpg', torf.Torrent.piece_size_min_default * 1.123), 113 | ('b.jpg', torf.Torrent.piece_size_min_default * 2.456), 114 | ('c.jpg', torf.Torrent.piece_size_min_default * 3.789)) 115 | # exp_* values come from these commands: 116 | # $ mktorrent -l 15 /tmp/pytest-of-*/pytest-current/test_metainfo_with_multifile_tcurrent/content/ 117 | # $ btcheck -i content.torrent -n | grep Hash 118 | # $ python3 -c "from flatbencode import decode; print(decode(open('content.torrent', 'rb').read())[b'info'][b'pieces'])" 119 | exp_infohash = 'b36eeca9231867ebf650ed82a54216617408d2ce' 120 | exp_pieces = (b'\x84{\x9eM\x16\xa9\xe9\xf7V\xb8\xb3\xc2\xb8Q\xfaw\xea \xb9\xdc' 121 | b'\xf2\xc0\x0e\rXE\x85g\xe6k\x1dt\xa6\xca\x7f/\xb5)A"5!\xb9\xda\xe2' 122 | b'"\x15c^\x0e\xf7\x91|\x06V\xdc}\xd9\xb0<./\x0fBe\xcb\xd8*\xae\xd1"' 123 | b'\x05\n\x1b\xf3\x18\x1c\xd7u\xe3') 124 | _check_metainfo(content_path, 2**15, exp_infohash, exp_pieces) 125 | 126 | def _check_metainfo(content_path, piece_size, exp_infohash, exp_pieces): 127 | exp_hashes = tuple(exp_pieces[i : i + 20] 128 | for i in range(0, len(exp_pieces), 20)) 129 | t = torf.Torrent(content_path) 130 | t.piece_size = piece_size 131 | t.generate() 132 | assert t.infohash == exp_infohash 133 | assert t.infohash_base32 == base64.b32encode(base64.b16decode(exp_infohash.upper())) 134 | assert t.metainfo['info']['pieces'] == exp_pieces 135 | assert t.hashes == exp_hashes 136 | assert t.piece_size == piece_size 137 | assert t.metainfo['info']['piece length'] == piece_size 138 | 139 | 140 | def test_callback_is_called_with_correct_arguments(filespecs, piece_size, create_file, create_dir, forced_piece_size): 141 | display_filespecs(filespecs, piece_size) # noqa: F405 142 | if len(filespecs) == 1: 143 | content_path = create_file(filespecs[0][0], filespecs[0][1]) 144 | else: 145 | content_path = create_dir('content', *filespecs) 146 | 147 | exp_pieces_done = 1 148 | seen_filepaths = defaultdict(lambda: 0) 149 | 150 | def assert_cb_args(torrent, filepath, pieces_done, pieces_total): 151 | nonlocal exp_pieces_done 152 | assert torrent is t 153 | assert pieces_done == exp_pieces_done 154 | exp_pieces_done += 1 155 | assert isinstance(filepath, os.PathLike) 156 | seen_filepaths[filepath.name] += 1 157 | assert pieces_total == t.pieces 158 | 159 | with forced_piece_size(piece_size): 160 | t = torf.Torrent(content_path) 161 | cb = mock.Mock(side_effect=assert_cb_args) 162 | success = t.generate(callback=cb, interval=0) 163 | 164 | assert success is True 165 | assert t.piece_size == piece_size 166 | assert cb.call_count == t.pieces 167 | 168 | exp_filepaths = defaultdict(lambda: 0) 169 | for pos in range(0, t.size, piece_size): 170 | files = pos2files(pos, filespecs, piece_size) # noqa: F405 171 | exp_filepaths[files[-1]] += 1 172 | 173 | assert seen_filepaths == exp_filepaths 174 | 175 | 176 | def test_callback_is_called_at_interval(filespecs, piece_size, create_file, create_dir, 177 | forced_piece_size, monkeypatch): 178 | display_filespecs(filespecs, piece_size) # noqa: F405 179 | if len(filespecs) == 1: 180 | content_path = create_file(filespecs[0][0], filespecs[0][1]) 181 | else: 182 | content_path = create_dir('content', *filespecs) 183 | 184 | with forced_piece_size(piece_size): 185 | t = torf.Torrent(content_path) 186 | monkeypatch.setattr(torf._generate, 'time_monotonic', 187 | mock.Mock(side_effect=range(int(1e9)))) 188 | for interval in (1, 2, 3): 189 | cb = mock.Mock(return_value=None) 190 | success = t.generate(callback=cb, interval=interval) 191 | assert success is True 192 | 193 | if interval > 1 and t.pieces % interval == 0: 194 | exp_call_count = t.pieces // interval + t.pieces % interval + 1 195 | else: 196 | exp_call_count = t.pieces // interval + t.pieces % interval 197 | assert cb.call_count == exp_call_count 198 | 199 | 200 | def test_callback_cancels(piece_size, create_file, forced_piece_size, mocker): 201 | def maybe_cancel(torrent, filepath, pieces_done, pieces_total): 202 | if pieces_done / pieces_total > 0.1: 203 | return 'STOP THE PRESSES!' 204 | 205 | cb = mock.Mock(side_effect=maybe_cancel) 206 | piece_count = 1000 207 | content_path = create_file('file.jpg', piece_size * piece_count) 208 | 209 | with forced_piece_size(piece_size): 210 | t = torf.Torrent(content_path) 211 | success = t.generate(callback=cb, interval=0, threads=1) 212 | assert success is False 213 | assert cb.call_count < piece_count 214 | 215 | 216 | def test_callback_raises_exception(piece_size, create_file, forced_piece_size): 217 | # We need a large file size so we can test that the hashers actually stop 218 | # before all pieces are hashed. 219 | content_path = create_file('file.jpg', piece_size * 1000) 220 | with forced_piece_size(piece_size): 221 | with mock.patch('torf._generate.sha1') as sha1_mock: 222 | def mock_digest(): 223 | return b'\x00' * 20 224 | 225 | sha1_mock.return_value.digest.side_effect = mock_digest 226 | cb = mock.Mock(side_effect=Exception('Argh!')) 227 | 228 | t = torf.Torrent(content_path) 229 | with pytest.raises(Exception) as e: 230 | t.generate(callback=cb) 231 | 232 | assert str(e.value) == 'Argh!' 233 | cb.assert_called_once_with(t, Path(content_path), 1, t.pieces) 234 | # The pool of hashers should be stopped before all pieces are hashed 235 | assert sha1_mock.call_count < t.pieces 236 | assert not t.is_ready 237 | -------------------------------------------------------------------------------- /tests/test_partial_size.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import torf 4 | 5 | 6 | def test_partial_size__singlefile__providing_correct_name(tmp_path): 7 | (tmp_path / 'content.jpg').write_text('some data') 8 | t = torf.Torrent(tmp_path / 'content.jpg') 9 | assert t.partial_size('content.jpg') == 9 10 | assert t.partial_size(['content.jpg']) == 9 11 | 12 | def test_partial_size__singlefile__providing_wrong_name(tmp_path): 13 | (tmp_path / 'content.jpg').write_text('some data') 14 | t = torf.Torrent(tmp_path / 'content.jpg') 15 | for path in ('foo.jpg', ['foo.jpg']): 16 | with pytest.raises(torf.PathError) as excinfo: 17 | t.partial_size(path) 18 | assert excinfo.match('^foo.jpg: Unknown path$') 19 | 20 | def test_partial_size__singlefile__providing_path(tmp_path): 21 | (tmp_path / 'content.jpg').write_text('some data') 22 | t = torf.Torrent(tmp_path / 'content.jpg') 23 | for path in ('bar/foo.jpg', ['bar', 'foo.jpg']): 24 | with pytest.raises(torf.PathError) as excinfo: 25 | t.partial_size(path) 26 | assert excinfo.match('^bar/foo.jpg: Unknown path$') 27 | 28 | 29 | def test_partial_size__multifile__providing_path_to_file(tmp_path): 30 | (tmp_path / 'content').mkdir() 31 | (tmp_path / 'content' / 'file1.jpg').write_text('some data') 32 | (tmp_path / 'content' / 'file2.jpg').write_text('some other data') 33 | (tmp_path / 'content' / 'subcontent').mkdir() 34 | (tmp_path / 'content' / 'subcontent' / 'file3.jpg').write_text('some more data') 35 | t = torf.Torrent(tmp_path / 'content') 36 | for path in ('content/file1.jpg', ['content', 'file1.jpg']): 37 | assert t.partial_size(path) == 9 38 | for path in ('content/file2.jpg', ['content', 'file2.jpg']): 39 | assert t.partial_size(path) == 15 40 | for path in ('content/subcontent/file3.jpg', ['content', 'subcontent', 'file3.jpg']): 41 | assert t.partial_size(path) == 14 42 | 43 | def test_partial_size__multifile__providing_path_to_dir(tmp_path): 44 | (tmp_path / 'content').mkdir() 45 | (tmp_path / 'content' / 'file1.jpg').write_text('some data') 46 | (tmp_path / 'content' / 'file2.jpg').write_text('some other data') 47 | (tmp_path / 'content' / 'subcontent1').mkdir() 48 | (tmp_path / 'content' / 'subcontent1' / 'file3.jpg').write_text('some more data') 49 | (tmp_path / 'content' / 'subcontent1' / 'file4.jpg').write_text('and even more data') 50 | (tmp_path / 'content' / 'subcontent2').mkdir() 51 | (tmp_path / 'content' / 'subcontent2' / 'file5.jpg').write_text('some more other data') 52 | (tmp_path / 'content' / 'subcontent2' / 'file6.jpg').write_text('and even more other data') 53 | t = torf.Torrent(tmp_path / 'content') 54 | for path in ('content', ['content']): 55 | assert t.partial_size(path) == 100 56 | for path in ('content/subcontent1', ['content', 'subcontent1']): 57 | assert t.partial_size(path) == 32 58 | for path in ('content/subcontent2', ['content', 'subcontent2']): 59 | assert t.partial_size(path) == 44 60 | 61 | def test_partial_size__multifile__providing_unknown_path(tmp_path): 62 | (tmp_path / 'content').mkdir() 63 | (tmp_path / 'content' / 'file1.jpg').write_text('some data') 64 | (tmp_path / 'content' / 'file2.jpg').write_text('some other data') 65 | (tmp_path / 'content' / 'subcontent').mkdir() 66 | (tmp_path / 'content' / 'subcontent' / 'file3.jpg').write_text('some more data') 67 | t = torf.Torrent(tmp_path / 'content') 68 | for path in ('content/subcontent/file1.jpg', ['content', 'subcontent', 'file1.jpg']): 69 | with pytest.raises(torf.PathError) as excinfo: 70 | t.partial_size(path) 71 | assert excinfo.match('^content/subcontent/file1.jpg: Unknown path$') 72 | for path in ('content/file3.jpg', ['content', 'file3.jpg']): 73 | with pytest.raises(torf.PathError) as excinfo: 74 | t.partial_size(path) 75 | assert excinfo.match('^content/file3.jpg: Unknown path$') 76 | for path in ('file1.jpg', ['file1.jpg']): 77 | with pytest.raises(torf.PathError) as excinfo: 78 | t.partial_size(path) 79 | assert excinfo.match('^file1.jpg: Unknown path$') 80 | -------------------------------------------------------------------------------- /tests/test_read.py: -------------------------------------------------------------------------------- 1 | import io 2 | from collections import OrderedDict 3 | from datetime import datetime 4 | from hashlib import sha1 5 | from pathlib import Path 6 | 7 | import flatbencode as bencode 8 | import pytest 9 | 10 | import torf 11 | from torf import _utils 12 | 13 | 14 | def test_non_bencoded_data(): 15 | fo = io.BytesIO(b'not valid bencoded data') 16 | with pytest.raises(torf.BdecodeError) as excinfo: 17 | torf.Torrent.read_stream(fo) 18 | assert excinfo.match('^Invalid metainfo format$') 19 | 20 | 21 | def test_unreadable_stream(): 22 | class Unreadable(io.BytesIO): 23 | def read(self, *args, **kwargs): 24 | raise OSError('Refusing to read') 25 | fo = Unreadable(b'foo') 26 | with pytest.raises(torf.ReadError) as excinfo: 27 | torf.Torrent.read_stream(fo) 28 | assert excinfo.match('^Unable to read$') 29 | 30 | 31 | def test_validate_when_reading_stream(valid_singlefile_metainfo): 32 | del valid_singlefile_metainfo[b'info'][b'name'] 33 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 34 | 35 | with pytest.raises(torf.MetainfoError) as excinfo: 36 | torf.Torrent.read_stream(fo, validate=True) 37 | assert excinfo.match(r"^Invalid metainfo: Missing 'name' in \['info'\]$") 38 | fo.seek(0) 39 | t = torf.Torrent.read_stream(fo, validate=False) 40 | assert isinstance(t, torf.Torrent) 41 | 42 | def test_validate_when_reading_file(tmp_path, valid_singlefile_metainfo): 43 | del valid_singlefile_metainfo[b'info'][b'length'] 44 | torrent_file = tmp_path / 'invalid.torrent' 45 | with open(torrent_file, 'wb') as f: 46 | f.write(bencode.encode(valid_singlefile_metainfo)) 47 | 48 | with pytest.raises(torf.MetainfoError) as excinfo: 49 | torf.Torrent.read(torrent_file, validate=True) 50 | assert excinfo.match("^Invalid metainfo: Missing 'length' or 'files' in 'info'$") 51 | t = torf.Torrent.read(torrent_file, validate=False) 52 | assert isinstance(t, torf.Torrent) 53 | 54 | 55 | def test_successful_read(valid_singlefile_metainfo): 56 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 57 | t = torf.Torrent.read_stream(fo) 58 | assert t.path is None 59 | assert t.files == (Path(str(valid_singlefile_metainfo[b'info'][b'name'], encoding='utf-8')),) 60 | assert t.filepaths == () 61 | assert t.name == str(valid_singlefile_metainfo[b'info'][b'name'], encoding='utf-8') 62 | assert t.size == valid_singlefile_metainfo[b'info'][b'length'] 63 | assert t.infohash == sha1(bencode.encode(valid_singlefile_metainfo[b'info'])).hexdigest() 64 | assert t.comment == str(valid_singlefile_metainfo[b'comment'], encoding='utf-8') 65 | assert t.creation_date == datetime.fromtimestamp(valid_singlefile_metainfo[b'creation date']) 66 | assert t.created_by == str(valid_singlefile_metainfo[b'created by'], encoding='utf-8') 67 | assert t.private is bool(valid_singlefile_metainfo[b'info'][b'private']) 68 | assert t.piece_size == valid_singlefile_metainfo[b'info'][b'piece length'] 69 | 70 | 71 | def test_single_tracker(valid_singlefile_metainfo): 72 | valid_singlefile_metainfo[b'announce'] = b'http://lonelyhost/announce' 73 | valid_singlefile_metainfo.pop(b'announce-list', None) 74 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 75 | t = torf.Torrent.read_stream(fo) 76 | assert t.trackers == [[str(valid_singlefile_metainfo[b'announce'], encoding='utf-8')]] 77 | 78 | def test_multiple_trackers(valid_singlefile_metainfo): 79 | valid_singlefile_metainfo[b'announce-list'] = [[b'http://localhost', b'http://foohost'], 80 | [b'http://bazhost']] 81 | valid_singlefile_metainfo.pop(b'announce', None) 82 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 83 | t = torf.Torrent.read_stream(fo) 84 | assert t.trackers == [[str(url, encoding='utf-8') for url in tier] for tier 85 | in valid_singlefile_metainfo[b'announce-list']] 86 | 87 | 88 | def test_validate_nondict(): 89 | data = b'3:foo' 90 | with pytest.raises(torf.BdecodeError) as excinfo: 91 | torf.Torrent.read_stream(io.BytesIO(data), validate=True) 92 | assert excinfo.match("^Invalid metainfo format$") 93 | 94 | with pytest.raises(torf.BdecodeError) as excinfo: 95 | torf.Torrent.read_stream(io.BytesIO(data), validate=False) 96 | assert excinfo.match("^Invalid metainfo format$") 97 | 98 | def test_validate_missing_info(): 99 | data = OrderedDict([(b'foo', b'bar')]) 100 | with pytest.raises(torf.MetainfoError) as excinfo: 101 | torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=True) 102 | assert excinfo.match(r"^Invalid metainfo: Missing 'info'$") 103 | 104 | t = torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=False) 105 | assert t.metainfo == {'foo': 'bar', 'info': {}} 106 | 107 | def test_validate_info_not_a_dictionary(): 108 | data = OrderedDict([(b'info', 1)]) 109 | 110 | with pytest.raises(torf.MetainfoError) as excinfo: 111 | torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=True) 112 | assert excinfo.match(r"^Invalid metainfo: \['info'\] must be dict, not int: 1$") 113 | 114 | with pytest.raises(torf.MetainfoError) as excinfo: 115 | torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=False) 116 | assert excinfo.match(r"^Invalid metainfo: \['info'\] must be dict, not int: 1$") 117 | 118 | def test_validate_missing_pieces(): 119 | data = OrderedDict([(b'info', {b'name': b'Foo', 120 | b'piece length': 16384})]) 121 | fo = io.BytesIO(bencode.encode(data)) 122 | with pytest.raises(torf.MetainfoError) as excinfo: 123 | torf.Torrent.read_stream(fo, validate=True) 124 | assert excinfo.match(r"^Invalid metainfo: Missing 'pieces' in \['info'\]$") 125 | 126 | 127 | def test_read_nonstandard_data_without_validation(): 128 | data = OrderedDict([ 129 | (b'foo', b'bar'), 130 | (b'number', 17), 131 | (b'list', [1, b'two']), 132 | (b'dict', OrderedDict([ 133 | (b'yes', 1), 134 | (b'no', 0), 135 | ])) 136 | ]) 137 | fo = io.BytesIO(bencode.encode(data)) 138 | t = torf.Torrent.read_stream(fo, validate=False) 139 | assert t.metainfo['foo'] == 'bar' 140 | assert t.metainfo['number'] == 17 141 | assert t.metainfo['list'] == [1, 'two'] 142 | assert t.metainfo['dict'] == {'yes': 1, 'no': 0} 143 | assert t.metainfo['info'] == {} 144 | 145 | def test_read_from_unreadable_file(valid_singlefile_metainfo, tmp_path): 146 | f = (tmp_path / 'a.torrent') 147 | f.write_bytes(bencode.encode(valid_singlefile_metainfo)) 148 | f.chmod(mode=0o222) 149 | with pytest.raises(torf.ReadError) as excinfo: 150 | torf.Torrent.read(str(f)) 151 | assert excinfo.match(f'^{f}: Permission denied$') 152 | 153 | def test_read_from_invalid_file(tmp_path): 154 | f = tmp_path / 'a.torrent' 155 | f.write_bytes(b'this is not metainfo') 156 | with pytest.raises(torf.BdecodeError) as excinfo: 157 | torf.Torrent.read(f) 158 | assert excinfo.match(f'^{f}: Invalid torrent file format$') 159 | 160 | def test_read_from_nonexisting_file(tmp_path): 161 | f = tmp_path / 'a.torrent' 162 | with pytest.raises(torf.ReadError) as excinfo: 163 | torf.Torrent.read(f) 164 | assert excinfo.match(f'^{f}: No such file or directory$') 165 | 166 | def test_read_from_proper_torrent_file(valid_multifile_metainfo, tmp_path): 167 | f = tmp_path / 'a.torrent' 168 | f.write_bytes(bencode.encode(valid_multifile_metainfo)) 169 | t = torf.Torrent.read(f) 170 | exp_info = valid_multifile_metainfo[b'info'] 171 | assert t.path is None 172 | assert t.files == tuple(Path(str(b'/'.join([exp_info[b'name']] + f[b'path']), encoding='utf-8')) 173 | for f in exp_info[b'files']) 174 | assert t.filepaths == () 175 | assert t.name == str(exp_info[b'name'], encoding='utf-8') 176 | assert t.size == sum(f[b'length'] for f in exp_info[b'files']) 177 | assert t.infohash == sha1(bencode.encode(exp_info)).hexdigest() 178 | assert t.comment == str(valid_multifile_metainfo[b'comment'], encoding='utf-8') 179 | assert t.creation_date == datetime.fromtimestamp(valid_multifile_metainfo[b'creation date']) 180 | assert t.created_by == str(valid_multifile_metainfo[b'created by'], encoding='utf-8') 181 | assert t.private is bool(exp_info[b'private']) 182 | assert t.piece_size == exp_info[b'piece length'] 183 | 184 | @pytest.mark.parametrize('bytes_type', (bytes, bytearray), ids=lambda t: t.__name__) 185 | def test_read_from_bytes(bytes_type, valid_multifile_metainfo, tmp_path): 186 | bytes = bytes_type(bencode.encode(valid_multifile_metainfo)) 187 | t = torf.Torrent.read_stream(bytes) 188 | exp_info = valid_multifile_metainfo[b'info'] 189 | assert t.path is None 190 | assert t.files == tuple(Path(str(b'/'.join([exp_info[b'name']] + f[b'path']), encoding='utf-8')) 191 | for f in exp_info[b'files']) 192 | assert t.filepaths == () 193 | assert t.name == str(exp_info[b'name'], encoding='utf-8') 194 | assert t.size == sum(f[b'length'] for f in exp_info[b'files']) 195 | assert t.infohash == sha1(bencode.encode(exp_info)).hexdigest() 196 | assert t.comment == str(valid_multifile_metainfo[b'comment'], encoding='utf-8') 197 | assert t.creation_date == datetime.fromtimestamp(valid_multifile_metainfo[b'creation date']) 198 | assert t.created_by == str(valid_multifile_metainfo[b'created by'], encoding='utf-8') 199 | assert t.private is bool(exp_info[b'private']) 200 | assert t.piece_size == exp_info[b'piece length'] 201 | 202 | @pytest.mark.parametrize('bytes_type', (bytes, bytearray), ids=lambda t: t.__name__) 203 | def test_read_from_too_many_bytes(bytes_type, valid_multifile_metainfo, tmp_path): 204 | bytes = bytes_type(b'x' * (torf.Torrent.MAX_TORRENT_FILE_SIZE + 1)) 205 | with pytest.raises(ValueError, match=( 206 | r'^Size of stream exceeds Torrent.MAX_TORRENT_FILE_SIZE: ' 207 | f'{torf.Torrent.MAX_TORRENT_FILE_SIZE + 1} > {torf.Torrent.MAX_TORRENT_FILE_SIZE}$' 208 | )): 209 | torf.Torrent.read_stream(bytes) 210 | 211 | def test_read_from_invalid_type(valid_multifile_metainfo, tmp_path): 212 | obj = 123 213 | with pytest.raises(TypeError, match=r'^Expected bytes, bytearray or a readable file-like object, got int$'): 214 | torf.Torrent.read_stream(obj) 215 | 216 | 217 | def test_reading_converts_private_flag_to_bool(tmp_path, valid_singlefile_metainfo): 218 | valid_singlefile_metainfo[b'info'][b'private'] = 1 219 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 220 | torrent = torf.Torrent.read_stream(fo) 221 | assert torrent.metainfo['info']['private'] is True 222 | 223 | valid_singlefile_metainfo[b'info'][b'private'] = 0 224 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 225 | torrent = torf.Torrent.read_stream(fo) 226 | assert torrent.metainfo['info']['private'] is False 227 | 228 | def test_reading_torrent_without_private_flag(tmp_path, valid_singlefile_metainfo): 229 | valid_singlefile_metainfo[b'info'][b'private'] = 1 230 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 231 | torrent = torf.Torrent.read_stream(fo) 232 | assert torrent.metainfo['info']['private'] is True 233 | assert torrent.private is True 234 | 235 | del valid_singlefile_metainfo[b'info'][b'private'] 236 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 237 | torrent = torf.Torrent.read_stream(fo) 238 | assert 'private' not in torrent.metainfo['info'] 239 | assert torrent.private is None 240 | 241 | def test_reading_torrent_without_creation_date(tmp_path, valid_singlefile_metainfo): 242 | del valid_singlefile_metainfo[b'creation date'] 243 | fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) 244 | torrent = torf.Torrent.read_stream(fo) 245 | assert 'creation date' not in torrent.metainfo['info'] 246 | assert torrent.creation_date is None 247 | 248 | 249 | def test_read_from_torrent_file_with_empty_path_components(valid_multifile_metainfo, tmp_path): 250 | valid_multifile_metainfo[b'info'][b'files'][0][b'path'] = [b'', b'foo', b'', b'', b'bar', b''] 251 | f = (tmp_path / 'foo.torrent') 252 | f.write_bytes(bencode.encode(valid_multifile_metainfo)) 253 | t = torf.Torrent.read(str(f)) 254 | exp_path = f'{valid_multifile_metainfo[b"info"][b"name"].decode()}/foo/bar' 255 | assert exp_path in tuple(str(f) for f in t.files) 256 | 257 | 258 | def test_read_nonutf8_encoded_metainfo(valid_multifile_metainfo, tmp_path): 259 | valid_multifile_metainfo[b'comment'] = 'A çommقnt'.encode('CP1256') 260 | valid_multifile_metainfo[b'created by'] = 'bДd ЗncodeЯ'.encode('CP866') 261 | valid_multifile_metainfo[b'info'][b'name'] = 'Thê ñãme'.encode('CP860') 262 | valid_multifile_metainfo[b'info'][b'source'] = 'Þhe ßource'.encode('CP861') 263 | valid_multifile_metainfo[b'info'][b'files'] = [ 264 | { 265 | b'path': [ 266 | 'FΩO'.encode('ISO8859-7'), 267 | 'BAΓ'.encode('ISO8859-7'), 268 | 'βAZ'.encode('ISO8859-7'), 269 | ], 270 | b'length': 124, 271 | }, 272 | ] 273 | 274 | f = (tmp_path / 'test.torrent') 275 | f.write_bytes(bencode.encode(valid_multifile_metainfo)) 276 | 277 | t = torf.Torrent.read(str(f)) 278 | assert t.name == 'Th� ��me' 279 | assert t.comment == 'A �omm�nt' 280 | assert t.created_by == 'b�d �ncode�' 281 | assert t.source == '�he �ource' 282 | assert t.files == [ 283 | _utils.File('Th� ��me/F�O/BA�/�AZ', size=124), 284 | ] 285 | -------------------------------------------------------------------------------- /tests/test_validate.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | import torf 6 | 7 | 8 | def test_wrong_info_type(generated_singlefile_torrent): 9 | t = generated_singlefile_torrent 10 | for typ in (bytearray, list, tuple): 11 | t.metainfo['info'] = typ() 12 | with pytest.raises(torf.MetainfoError) as excinfo: 13 | t.validate() 14 | assert str(excinfo.value) == (f"Invalid metainfo: ['info'] " 15 | f"must be dict, not {typ.__qualname__}: {t.metainfo['info']}") 16 | 17 | def test_length_and_files_in_info(generated_multifile_torrent): 18 | t = generated_multifile_torrent 19 | t.metainfo['info']['length'] = 123 20 | with pytest.raises(torf.MetainfoError) as excinfo: 21 | t.validate() 22 | assert str(excinfo.value) == "Invalid metainfo: ['info'] includes both 'length' and 'files'" 23 | 24 | 25 | def test_wrong_name_type(generated_singlefile_torrent): 26 | t = generated_singlefile_torrent 27 | t.metainfo['info']['name'] = 123 28 | with pytest.raises(torf.MetainfoError) as excinfo: 29 | t.validate() 30 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['name'] " 31 | "must be str or bytes, not int: 123") 32 | 33 | def test_wrong_piece_length_type(generated_singlefile_torrent): 34 | t = generated_singlefile_torrent 35 | t.metainfo['info']['piece length'] = [700] 36 | with pytest.raises(torf.MetainfoError) as excinfo: 37 | t.validate() 38 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['piece length'] " 39 | "must be int, not list: [700]") 40 | 41 | @pytest.mark.parametrize( 42 | argnames='piece_length, exp_exception', 43 | argvalues=( 44 | (-1, torf.MetainfoError("['info']['piece length'] is invalid: -1")), 45 | (0, torf.MetainfoError("['info']['piece length'] is invalid: 0")), 46 | (16385, torf.MetainfoError("['info']['piece length'] is invalid: 16385")), 47 | ), 48 | ) 49 | def test_piece_length_not_divisible_by_16_kib(piece_length, exp_exception, generated_singlefile_torrent): 50 | t = generated_singlefile_torrent 51 | t.metainfo['info']['piece length'] = piece_length 52 | with pytest.raises(type(exp_exception)) as excinfo: 53 | t.validate() 54 | assert str(excinfo.value) == str(exp_exception) 55 | 56 | def test_wrong_pieces_type(generated_singlefile_torrent): 57 | t = generated_singlefile_torrent 58 | t.metainfo['info']['pieces'] = 'many' 59 | with pytest.raises(torf.MetainfoError) as excinfo: 60 | t.validate() 61 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['pieces'] " 62 | "must be bytes, not str: 'many'") 63 | 64 | def test_pieces_is_empty(generated_singlefile_torrent): 65 | t = generated_singlefile_torrent 66 | t.metainfo['info']['pieces'] = bytes() 67 | with pytest.raises(torf.MetainfoError) as excinfo: 68 | t.validate() 69 | assert str(excinfo.value) == "Invalid metainfo: ['info']['pieces'] is empty" 70 | 71 | def test_invalid_number_of_bytes_in_pieces(generated_singlefile_torrent): 72 | t = generated_singlefile_torrent 73 | t.path = None 74 | t.metainfo['info']['piece length'] = 512 * 1024 75 | for i in range(1, 10): 76 | t.metainfo['info']['length'] = i * t.metainfo['info']['piece length'] 77 | t.metainfo['info']['pieces'] = bytes(os.urandom(i * 20)) 78 | t.validate() 79 | 80 | for j in ((i * 20) + 1, (i * 20) - 1): 81 | t.metainfo['info']['pieces'] = bytes(os.urandom(j)) 82 | with pytest.raises(torf.MetainfoError) as excinfo: 83 | t.validate() 84 | assert str(excinfo.value) == ("Invalid metainfo: length of ['info']['pieces'] " 85 | "is not divisible by 20") 86 | 87 | def test_wrong_creation_date_type(generated_singlefile_torrent): 88 | t = generated_singlefile_torrent 89 | t.metainfo['creation date'] = 'hello' 90 | with pytest.raises(torf.MetainfoError) as excinfo: 91 | t.validate() 92 | assert str(excinfo.value) == "Invalid metainfo: ['creation date'] must be int or datetime, not str: 'hello'" 93 | 94 | def test_singlefile__unexpected_number_of_bytes_in_pieces(generated_singlefile_torrent): 95 | t = generated_singlefile_torrent 96 | t.path = None # Don't complain about wrong file size 97 | t.metainfo['info']['length'] = 1024 * 1024 98 | t.metainfo['info']['piece length'] = int(1024 * 1024 / 8) 99 | 100 | t.metainfo['info']['pieces'] = os.urandom(20 * 9) 101 | with pytest.raises(torf.MetainfoError) as excinfo: 102 | t.validate() 103 | assert str(excinfo.value) == 'Invalid metainfo: Expected 8 pieces but there are 9' 104 | 105 | t.metainfo['info']['pieces'] = os.urandom(20 * 7) 106 | with pytest.raises(torf.MetainfoError) as excinfo: 107 | t.validate() 108 | assert str(excinfo.value) == 'Invalid metainfo: Expected 8 pieces but there are 7' 109 | 110 | def test_multifile__unexpected_number_of_bytes_in_pieces(generated_multifile_torrent): 111 | t = generated_multifile_torrent 112 | t.path = None # Don't complain about wrong file size 113 | 114 | total_size = 0 115 | for i,file in enumerate(t.metainfo['info']['files'], start=1): 116 | file['length'] = 1024 * 1024 * i + 123 117 | total_size += file['length'] 118 | 119 | import math 120 | t.metainfo['info']['piece length'] = int(1024 * 1024 / 8) 121 | piece_count = math.ceil(total_size / t.metainfo['info']['piece length']) 122 | 123 | t.metainfo['info']['pieces'] = os.urandom(20 * (piece_count + 1)) 124 | with pytest.raises(torf.MetainfoError) as excinfo: 125 | t.validate() 126 | assert str(excinfo.value) == 'Invalid metainfo: Expected 49 pieces but there are 50' 127 | 128 | t.metainfo['info']['pieces'] = os.urandom(20 * (piece_count - 1)) 129 | with pytest.raises(torf.MetainfoError) as excinfo: 130 | t.validate() 131 | assert str(excinfo.value) == 'Invalid metainfo: Expected 49 pieces but there are 48' 132 | 133 | 134 | def test_no_announce_is_ok(generated_singlefile_torrent): 135 | t = generated_singlefile_torrent 136 | if 'announce' in t.metainfo: 137 | del t.metainfo['announce'] 138 | t.validate() 139 | 140 | def test_wrong_announce_type(generated_singlefile_torrent): 141 | t = generated_singlefile_torrent 142 | for typ in (bytearray, list, tuple): 143 | t.metainfo['announce'] = typ() 144 | with pytest.raises(torf.MetainfoError) as excinfo: 145 | t.validate() 146 | assert str(excinfo.value) == (f"Invalid metainfo: ['announce'] " 147 | f"must be str, not {typ.__qualname__}: {t.metainfo['announce']}") 148 | 149 | def test_invalid_announce_url(generated_singlefile_torrent): 150 | t = generated_singlefile_torrent 151 | for url in ('123', 'http://123:xxx/announce'): 152 | t.metainfo['announce'] = url 153 | with pytest.raises(torf.MetainfoError) as excinfo: 154 | t.validate() 155 | assert str(excinfo.value) == f"Invalid metainfo: ['announce'] is invalid: {url!r}" 156 | 157 | def test_no_announce_list_is_ok(generated_singlefile_torrent): 158 | t = generated_singlefile_torrent 159 | if 'announce-list' in t.metainfo: 160 | del t.metainfo['announce-list'] 161 | t.validate() 162 | 163 | def test_wrong_announce_list_type(generated_singlefile_torrent): 164 | t = generated_singlefile_torrent 165 | 166 | # announce-list must be a list 167 | for value in (3, 'foo', None, lambda: None): 168 | t.metainfo['announce-list'] = value 169 | with pytest.raises(torf.MetainfoError) as excinfo: 170 | t.validate() 171 | assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'] " 172 | f"must be Iterable, not {type(value).__qualname__}: " 173 | f"{t.metainfo['announce-list']!r}") 174 | 175 | # Each item in announce-list must be a list 176 | for tier in (3, 'foo', None, lambda: None): 177 | for lst in ([tier], 178 | [tier, []], 179 | [[], tier], 180 | [[], tier, []]): 181 | t.metainfo['announce-list'] = lst 182 | with pytest.raises(torf.MetainfoError) as excinfo: 183 | t.validate() 184 | tier_index = lst.index(tier) 185 | assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'][{tier_index}] " 186 | f"must be Iterable, not {type(tier).__qualname__}: {tier!r}") 187 | 188 | # Each item in each list in announce-list must be a string 189 | for typ in (bytearray, set): 190 | url = typ() 191 | for tier in ([url], 192 | ['http://localhost:123/', url], 193 | [url, 'http://localhost:123/'], 194 | ['http://localhost:123/', url, 'http://localhost:456/']): 195 | url_index = tier.index(url) 196 | for lst in ([tier], 197 | [tier, []], 198 | [[], tier], 199 | [[], tier, []]): 200 | tier_index = lst.index(tier) 201 | t.metainfo['announce-list'] = lst 202 | with pytest.raises(torf.MetainfoError) as excinfo: 203 | t.validate() 204 | assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'][{tier_index}][{url_index}] " 205 | f"must be str, not {typ.__qualname__}: {url!r}") 206 | 207 | def test_invalid_url_in_announce_list(generated_singlefile_torrent): 208 | t = generated_singlefile_torrent 209 | for url in ('123', 'http://123:xxx/announce'): 210 | for tier in ([url], 211 | ['http://localhost:123/', url], 212 | [url, 'http://localhost:123/'], 213 | ['http://localhost:123/', url, 'http://localhost:456/']): 214 | url_index = tier.index(url) 215 | for lst in ([tier], 216 | [tier, []], 217 | [[], tier], 218 | [[], tier, []]): 219 | tier_index = lst.index(tier) 220 | t.metainfo['announce-list'] = lst 221 | with pytest.raises(torf.MetainfoError) as excinfo: 222 | t.validate() 223 | assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'][{tier_index}][{url_index}] " 224 | f"is invalid: {url!r}") 225 | 226 | def test_no_announce_and_no_announce_list_when_torrent_is_private(generated_singlefile_torrent): 227 | t = generated_singlefile_torrent 228 | t.metainfo['info']['private'] = True 229 | if 'announce' in t.metainfo: 230 | del t.metainfo['announce'] 231 | if 'announce-list' in t.metainfo: 232 | del t.metainfo['announce-list'] 233 | t.validate() 234 | assert t.generate() is True 235 | assert t.is_ready is True 236 | 237 | 238 | def test_singlefile_wrong_length_type(generated_singlefile_torrent): 239 | t = generated_singlefile_torrent 240 | t.metainfo['info']['length'] = 'foo' 241 | with pytest.raises(torf.MetainfoError) as excinfo: 242 | t.validate() 243 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['length'] " 244 | "must be int or float, not str: 'foo'") 245 | 246 | def test_singlefile_wrong_md5sum_type(generated_singlefile_torrent): 247 | t = generated_singlefile_torrent 248 | t.metainfo['info']['md5sum'] = 0 249 | with pytest.raises(torf.MetainfoError) as excinfo: 250 | t.validate() 251 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['md5sum'] " 252 | "must be str, not int: 0") 253 | 254 | t.metainfo['info']['md5sum'] = 'Z8b329da9893e34099c7d8ad5cb9c940' 255 | with pytest.raises(torf.MetainfoError) as excinfo: 256 | t.validate() 257 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['md5sum'] is invalid: " 258 | "'Z8b329da9893e34099c7d8ad5cb9c940'") 259 | 260 | 261 | def test_multifile_wrong_files_type(generated_multifile_torrent): 262 | t = generated_multifile_torrent 263 | t._path = None 264 | t.metainfo['info']['files'] = 'foo' 265 | with pytest.raises(torf.MetainfoError) as excinfo: 266 | t.validate() 267 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'] " 268 | "must be Iterable, not str: 'foo'") 269 | 270 | def test_multifile_wrong_path_type(generated_multifile_torrent): 271 | t = generated_multifile_torrent 272 | t._path = None 273 | t.metainfo['info']['files'][0]['path'] = 'foo/bar/baz' 274 | with pytest.raises(torf.MetainfoError) as excinfo: 275 | t.validate() 276 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][0]['path'] " 277 | "must be Iterable, not str: 'foo/bar/baz'") 278 | 279 | def test_multifile_wrong_path_item_type(generated_multifile_torrent): 280 | t = generated_multifile_torrent 281 | t._path = None 282 | t.metainfo['info']['files'][1]['path'][0] = 17 283 | with pytest.raises(torf.MetainfoError) as excinfo: 284 | t.validate() 285 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][1]['path'][0] " 286 | "must be str or bytes, not int: 17") 287 | 288 | def test_multifile_wrong_length_type(generated_multifile_torrent): 289 | t = generated_multifile_torrent 290 | t._path = None 291 | t.metainfo['info']['files'][2]['length'] = ['this', 'is', 'not', 'a', 'length'] 292 | with pytest.raises(torf.MetainfoError) as excinfo: 293 | t.validate() 294 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][2]['length'] " 295 | "must be int or float, not list: ['this', 'is', 'not', 'a', 'length']") 296 | 297 | def test_multifile_wrong_md5sum_type(generated_multifile_torrent): 298 | t = generated_multifile_torrent 299 | t.metainfo['info']['files'][0]['md5sum'] = 0 300 | with pytest.raises(torf.MetainfoError) as excinfo: 301 | t.validate() 302 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][0]['md5sum'] " 303 | "must be str, not int: 0") 304 | 305 | t.metainfo['info']['files'][0]['md5sum'] = 'Z8b329da9893e34099c7d8ad5cb9c940' 306 | with pytest.raises(torf.MetainfoError) as excinfo: 307 | t.validate() 308 | assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][0]['md5sum'] is invalid: " 309 | "'Z8b329da9893e34099c7d8ad5cb9c940'") 310 | 311 | 312 | def assert_missing_metainfo(torrent, *keys): 313 | md = torrent.metainfo 314 | for key in keys[:-1]: 315 | md = md[key] 316 | del md[keys[-1]] 317 | with pytest.raises(torf.MetainfoError) as excinfo: 318 | torrent.validate() 319 | assert excinfo.match(rf"Invalid metainfo: Missing {keys[-1]!r} in \['info'\]") 320 | 321 | def test_singlefile_missing_info_path(generated_singlefile_torrent): 322 | assert_missing_metainfo(generated_singlefile_torrent, 'info', 'name') 323 | 324 | def test_singlefile_missing_info_piece_length(generated_singlefile_torrent): 325 | assert_missing_metainfo(generated_singlefile_torrent, 'info', 'piece length') 326 | 327 | def test_singlefile_missing_info_pieces(generated_singlefile_torrent): 328 | assert_missing_metainfo(generated_singlefile_torrent, 'info', 'pieces') 329 | 330 | def test_multifile_missing_info_path(generated_multifile_torrent): 331 | assert_missing_metainfo(generated_multifile_torrent, 'info', 'name') 332 | 333 | def test_multifile_missing_info_piece_length(generated_multifile_torrent): 334 | assert_missing_metainfo(generated_multifile_torrent, 'info', 'piece length') 335 | 336 | def test_multifile_missing_info_pieces(generated_multifile_torrent): 337 | assert_missing_metainfo(generated_multifile_torrent, 'info', 'pieces') 338 | 339 | def test_multifile_missing_info_files_0_length(generated_multifile_torrent): 340 | assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 0, 'length') 341 | 342 | def test_multifile_missing_info_files_1_length(generated_multifile_torrent): 343 | assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 1, 'length') 344 | 345 | def test_multifile_missing_info_files_1_path(generated_multifile_torrent): 346 | assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 1, 'path') 347 | 348 | def test_multifile_missing_info_files_2_path(generated_multifile_torrent): 349 | assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 2, 'path') 350 | 351 | 352 | def assert_mismatching_filesizes(torrent): 353 | torrent.validate() # Should validate 354 | 355 | for torrent_path, fs_path in zip(torrent.files, torrent.filepaths): 356 | # Remember file content 357 | with open(fs_path, 'rb') as f: 358 | orig_fs_path_content = f.read() 359 | 360 | # Change file size 361 | with open(fs_path, 'ab') as f: 362 | f.write(b'foo') 363 | 364 | # Expect validation error 365 | mi_size = torrent.partial_size(torrent_path) 366 | fs_size = os.path.getsize(fs_path) 367 | assert fs_size == mi_size + len('foo') 368 | with pytest.raises(torf.MetainfoError) as excinfo: 369 | torrent.validate() 370 | assert str(excinfo.value) == (f'Invalid metainfo: Mismatching file sizes in metainfo ({mi_size}) ' 371 | f'and file system ({fs_size}): {fs_path}') 372 | 373 | # Restore original file content 374 | with open(fs_path, 'wb') as f: 375 | f.write(orig_fs_path_content) 376 | 377 | torrent.validate() # Should validate again 378 | 379 | def test_singlefile_mismatching_filesize(generated_singlefile_torrent): 380 | assert_mismatching_filesizes(generated_singlefile_torrent) 381 | 382 | def test_multifile_mismatching_filesize(generated_multifile_torrent): 383 | assert_mismatching_filesizes(generated_multifile_torrent) 384 | -------------------------------------------------------------------------------- /tests/test_verify_filesize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from pathlib import Path 4 | from unittest import mock 5 | 6 | import pytest 7 | 8 | import torf 9 | 10 | 11 | def test_validate_is_called_first(monkeypatch): 12 | torrent = torf.Torrent() 13 | mock_validate = mock.MagicMock(side_effect=torf.MetainfoError('Mock error')) 14 | monkeypatch.setattr(torrent, 'validate', mock_validate) 15 | with pytest.raises(torf.MetainfoError) as excinfo: 16 | torrent.verify_filesize('some/path') 17 | assert excinfo.match('^Invalid metainfo: Mock error$') 18 | mock_validate.assert_called_once_with() 19 | 20 | 21 | @pytest.mark.parametrize( 22 | argnames='callback_return_values, exp_calls, exp_success', 23 | argvalues=( 24 | ([None], 1, True), 25 | ([True], 1, False), 26 | ([False], 1, False), 27 | ([''], 1, False), 28 | ), 29 | ) 30 | def test_success_with_singlefile_torrent(callback_return_values, exp_calls, exp_success, create_file, create_torrent_file): 31 | content_path = create_file('file.jpg', '') 32 | with create_torrent_file(path=content_path) as torrent_file: 33 | torrent = torf.Torrent.read(torrent_file) 34 | 35 | # Without callback 36 | return_value = torrent.verify_filesize(content_path) 37 | assert return_value is True 38 | 39 | # With callback 40 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 41 | assert t == torrent 42 | assert fs_path == content_path 43 | assert files_done == 1 44 | assert files_total == 1 45 | assert exc is None 46 | return callback_return_values.pop(0) 47 | 48 | cb = mock.MagicMock() 49 | cb.side_effect = assert_call 50 | assert torrent.verify_filesize(content_path, callback=cb) is exp_success 51 | assert cb.call_count == exp_calls 52 | 53 | 54 | 55 | @pytest.mark.parametrize( 56 | argnames='callback_return_values, exp_calls, exp_success', 57 | argvalues=( 58 | ([None, None, None], 3, True), 59 | ([None, None, True], 3, False), 60 | ([None, True], 2, False), 61 | ([True], 1, False), 62 | ), 63 | ) 64 | def test_success_with_multifile_torrent(callback_return_values, exp_calls, exp_success, create_dir, create_torrent_file): 65 | content_path = create_dir('content', 66 | ('a.jpg', 'some data'), 67 | ('b.jpg', 'some other data'), 68 | ('c.jpg', 'some more data')) 69 | with create_torrent_file(path=content_path) as torrent_file: 70 | torrent = torf.Torrent.read(torrent_file) 71 | 72 | assert os.path.exists(content_path / 'a.jpg') 73 | assert os.path.exists(content_path / 'b.jpg') 74 | assert os.path.exists(content_path / 'c.jpg') 75 | 76 | # Without callback 77 | assert torrent.verify_filesize(content_path) is True 78 | 79 | # With callback 80 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 81 | assert t == torrent 82 | assert files_done == cb.call_count 83 | assert files_total == 3 84 | if cb.call_count == 1: 85 | assert fs_path == content_path / 'a.jpg' 86 | assert t_path == Path(*(content_path / 'a.jpg').parts[-2:]) 87 | assert exc is None 88 | elif cb.call_count == 2: 89 | assert fs_path == content_path / 'b.jpg' 90 | assert t_path == Path(*(content_path / 'b.jpg').parts[-2:]) 91 | assert exc is None 92 | elif cb.call_count == 3: 93 | assert fs_path == content_path / 'c.jpg' 94 | assert t_path == Path(*(content_path / 'c.jpg').parts[-2:]) 95 | assert exc is None 96 | return callback_return_values.pop(0) 97 | 98 | cb = mock.MagicMock() 99 | cb.side_effect = assert_call 100 | assert torrent.verify_filesize(content_path, callback=cb) is exp_success 101 | assert cb.call_count == exp_calls 102 | 103 | 104 | @pytest.mark.parametrize( 105 | argnames='callback_return_values, exp_calls', 106 | argvalues=( 107 | ([None], 1), 108 | ([True], 1), 109 | ([False], 1), 110 | ([''], 1), 111 | ), 112 | ) 113 | def test_file_in_singlefile_torrent_doesnt_exist(callback_return_values, exp_calls, create_file, create_torrent_file): 114 | content_path = create_file('file.jpg', '') 115 | with create_torrent_file(path=content_path) as torrent_file: 116 | torrent = torf.Torrent.read(torrent_file) 117 | 118 | # Without callback 119 | with pytest.raises(torf.ReadError) as excinfo: 120 | torrent.verify_filesize('/some/nonexisting/path') 121 | assert excinfo.match('^/some/nonexisting/path: No such file or directory$') 122 | 123 | # With callback 124 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 125 | assert t == torrent 126 | assert fs_path == Path('/some/nonexisting/path') 127 | assert files_done == 1 128 | assert files_total == 1 129 | assert str(exc) == '/some/nonexisting/path: No such file or directory' 130 | return callback_return_values.pop(0) 131 | 132 | cb = mock.MagicMock() 133 | cb.side_effect = assert_call 134 | assert torrent.verify_filesize('/some/nonexisting/path', callback=cb) is False 135 | assert cb.call_count == exp_calls 136 | 137 | 138 | @pytest.mark.parametrize( 139 | argnames='callback_return_values, exp_calls', 140 | argvalues=( 141 | ([None, None, None], 3), 142 | ([None, True], 2), 143 | ([True], 1), 144 | ), 145 | ) 146 | def test_file_in_multifile_torrent_doesnt_exist(callback_return_values, exp_calls, create_dir, create_torrent_file): 147 | content_path = create_dir('content', 148 | ('a.jpg', 'some data'), 149 | ('b.jpg', 'some other data'), 150 | ('c.jpg', 'some more data')) 151 | with create_torrent_file(path=content_path) as torrent_file: 152 | torrent = torf.Torrent.read(torrent_file) 153 | 154 | os.remove(content_path / 'a.jpg') 155 | os.remove(content_path / 'c.jpg') 156 | assert not os.path.exists(content_path / 'a.jpg') 157 | assert os.path.exists(content_path / 'b.jpg') 158 | assert not os.path.exists(content_path / 'c.jpg') 159 | 160 | # Without callback 161 | with pytest.raises(torf.ReadError) as excinfo: 162 | torrent.verify_filesize(content_path) 163 | assert excinfo.match(f'^{content_path / "a.jpg"}: No such file or directory$') 164 | 165 | # With callback 166 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 167 | assert t == torrent 168 | assert files_done == cb.call_count 169 | assert files_total == 3 170 | if cb.call_count == 1: 171 | assert fs_path == content_path / 'a.jpg' 172 | assert t_path == Path(*(content_path / 'a.jpg').parts[-2:]) 173 | assert str(exc) == f'{fs_path}: No such file or directory' 174 | elif cb.call_count == 2: 175 | assert fs_path == content_path / 'b.jpg' 176 | assert t_path == Path(*(content_path / 'b.jpg').parts[-2:]) 177 | assert exc is None 178 | elif cb.call_count == 3: 179 | assert fs_path == content_path / 'c.jpg' 180 | assert t_path == Path(*(content_path / 'c.jpg').parts[-2:]) 181 | assert str(exc) == f'{fs_path}: No such file or directory' 182 | return callback_return_values.pop(0) 183 | 184 | cb = mock.MagicMock() 185 | cb.side_effect = assert_call 186 | assert torrent.verify_filesize(content_path, callback=cb) is False 187 | assert cb.call_count == exp_calls 188 | 189 | 190 | @pytest.mark.parametrize( 191 | argnames='callback_return_values, exp_calls', 192 | argvalues=( 193 | ([None], 1), 194 | ([True], 1), 195 | (['yes'], 1), 196 | ), 197 | ) 198 | def test_file_in_singlefile_torrent_has_wrong_size(callback_return_values, exp_calls, create_file, create_torrent_file): 199 | content_path = create_file('file.jpg', '') 200 | with create_torrent_file(path=content_path) as torrent_file: 201 | torrent = torf.Torrent.read(torrent_file) 202 | 203 | content_path.write_text('') 204 | assert os.path.getsize(content_path) != torrent.size 205 | 206 | # Without callback 207 | with pytest.raises(torf.VerifyFileSizeError) as excinfo: 208 | torrent.verify_filesize(content_path) 209 | assert excinfo.match(f'^{content_path}: Too big: 22 instead of 12 bytes$') 210 | 211 | # With callback 212 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 213 | assert t == torrent 214 | assert fs_path == content_path 215 | assert t_path == Path(Path(content_path).name) 216 | assert files_done == cb.call_count 217 | assert files_total == 1 218 | assert str(exc) == f'{content_path}: Too big: 22 instead of 12 bytes' 219 | return callback_return_values.pop(0) 220 | 221 | cb = mock.MagicMock() 222 | cb.side_effect = assert_call 223 | assert torrent.verify_filesize(content_path, callback=cb) is False 224 | assert cb.call_count == exp_calls 225 | 226 | 227 | @pytest.mark.parametrize( 228 | argnames='callback_return_values, exp_calls', 229 | argvalues=( 230 | ([None, None, True], 3), 231 | ([None, True, None], 2), 232 | (['yes', None, None], 1), 233 | ), 234 | ) 235 | def test_file_in_multifile_torrent_has_wrong_size(callback_return_values, exp_calls, create_dir, create_torrent_file): 236 | content_path = create_dir('content', 237 | ('a.jpg', 100), 238 | ('b.jpg', 200), 239 | ('c.jpg', 300)) 240 | with create_torrent_file(path=content_path) as torrent_file: 241 | torrent = torf.Torrent.read(torrent_file) 242 | 243 | (content_path / 'b.jpg').write_bytes(b'\x00' * 201) 244 | (content_path / 'c.jpg').write_bytes(b'\x00' * 299) 245 | assert len((content_path / 'b.jpg').read_bytes()) == 201 246 | assert len((content_path / 'c.jpg').read_bytes()) == 299 247 | 248 | # Without callback 249 | with pytest.raises(torf.VerifyFileSizeError) as excinfo: 250 | torrent.verify_filesize(content_path) 251 | assert excinfo.match(f'^{content_path / "b.jpg"}: Too big: 201 instead of 200 bytes$') 252 | 253 | # With callback 254 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 255 | assert t == torrent 256 | assert files_done == cb.call_count 257 | assert files_total == 3 258 | if cb.call_count == 1: 259 | assert fs_path == content_path / 'a.jpg' 260 | assert t_path == Path(content_path.name, 'a.jpg') 261 | assert exc is None 262 | elif cb.call_count == 2: 263 | assert fs_path == content_path / 'b.jpg' 264 | assert t_path == Path(content_path.name, 'b.jpg') 265 | assert str(exc) == f'{fs_path}: Too big: 201 instead of 200 bytes' 266 | elif cb.call_count == 3: 267 | assert fs_path == content_path / 'c.jpg' 268 | assert t_path == Path(content_path.name, 'c.jpg') 269 | assert str(exc) == f'{fs_path}: Too small: 299 instead of 300 bytes' 270 | return callback_return_values.pop(0) 271 | 272 | cb = mock.MagicMock() 273 | cb.side_effect = assert_call 274 | assert torrent.verify_filesize(content_path, callback=cb) is False 275 | assert cb.call_count == exp_calls 276 | 277 | 278 | @pytest.mark.parametrize( 279 | argnames='callback_return_values, exp_calls', 280 | argvalues=( 281 | ([None, None, True], 1), 282 | ([True, None, None], 1), 283 | ([None], 1), 284 | ), 285 | ) 286 | def test_path_is_directory_and_torrent_contains_single_file(callback_return_values, exp_calls, create_file, create_dir, create_torrent_file): 287 | content_data = b'\x00' * 1001 288 | content_path = create_file('content', content_data) 289 | with create_torrent_file(path=content_path) as torrent_file: 290 | torrent = torf.Torrent.read(torrent_file) 291 | 292 | os.remove(content_path) 293 | content_path = create_dir('content', ('content', content_data)) 294 | assert os.path.isdir(content_path) 295 | 296 | # Without callback 297 | with pytest.raises(torf.VerifyIsDirectoryError) as excinfo: 298 | torrent.verify_filesize(content_path) 299 | assert excinfo.match(f'^{content_path}: Is a directory$') 300 | 301 | # With callback 302 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 303 | assert t == torrent 304 | assert files_done == 1 305 | assert files_total == 1 306 | assert fs_path == Path(content_path) 307 | assert t_path == Path(Path(content_path).name) 308 | assert str(exc) == f'{content_path}: Is a directory' 309 | return callback_return_values.pop(0) 310 | 311 | cb = mock.MagicMock() 312 | cb.side_effect = assert_call 313 | assert torrent.verify_filesize(content_path, callback=cb) is False 314 | assert cb.call_count == exp_calls 315 | 316 | 317 | @pytest.mark.parametrize( 318 | argnames='callback_return_values, exp_calls', 319 | argvalues=( 320 | ([None, None, None], 2), 321 | ([None, True], 2), 322 | ([False], 1), 323 | ), 324 | ) 325 | def test_path_is_file_and_torrent_contains_directory(callback_return_values, exp_calls, create_file, create_dir, create_torrent_file): 326 | content_path = create_dir('content', 327 | ('a.jpg', b'\x00' * 1234), 328 | ('b.jpg', b'\x00' * 234)) 329 | with create_torrent_file(path=content_path) as torrent_file: 330 | torrent = torf.Torrent.read(torrent_file) 331 | 332 | shutil.rmtree(content_path) 333 | assert not os.path.exists(content_path) 334 | 335 | create_file('content', 'some data') 336 | assert os.path.isfile(content_path) 337 | 338 | # Without callback 339 | with pytest.raises(torf.ReadError) as excinfo: 340 | torrent.verify_filesize(content_path) 341 | assert excinfo.match(f'^{content_path / "a.jpg"}: No such file or directory$') 342 | 343 | # With callback 344 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 345 | assert t == torrent 346 | assert files_done == cb.call_count 347 | assert files_total == 2 348 | if cb.call_count == 1: 349 | assert fs_path == content_path / 'a.jpg' 350 | assert t_path == Path(content_path.name, 'a.jpg') 351 | assert str(exc) == f'{fs_path}: No such file or directory' 352 | elif cb.call_count == 2: 353 | assert fs_path == content_path / 'b.jpg' 354 | assert t_path == Path(content_path.name, 'b.jpg') 355 | assert str(exc) == f'{fs_path}: No such file or directory' 356 | return callback_return_values.pop(0) 357 | 358 | cb = mock.MagicMock() 359 | cb.side_effect = assert_call 360 | assert torrent.verify_filesize(content_path, callback=cb) is False 361 | assert cb.call_count == exp_calls 362 | 363 | 364 | @pytest.mark.parametrize( 365 | argnames='callback_return_values, exp_calls', 366 | argvalues=( 367 | ([None, None, None], 3), 368 | ([None, None, 'cancel'], 3), 369 | ([None, ()], 2), 370 | ([0], 1), 371 | ), 372 | ) 373 | def test_parent_path_of_multifile_torrent_is_unreadable(callback_return_values, exp_calls, create_dir, create_torrent_file): 374 | content_path = create_dir('content', 375 | ('unreadable1/b/c/a.jpg', 'a data'), 376 | ('unreadable2/b/c/b.jpg', 'b data'), 377 | ('readable/b/c/c.jpg', 'c data')) 378 | with create_torrent_file(path=content_path) as torrent_file: 379 | torrent = torf.Torrent.read(torrent_file) 380 | unreadable_path1_mode = os.stat(content_path / 'unreadable1').st_mode 381 | unreadable_path2_mode = os.stat(content_path / 'unreadable2').st_mode 382 | try: 383 | os.chmod((content_path / 'unreadable1'), mode=0o222) 384 | os.chmod((content_path / 'unreadable2'), mode=0o222) 385 | 386 | # NOTE: We would expect "Permission denied" here, but 387 | # os.path.exists() can't look inside .../content/unreadable1/ and 388 | # thus raises "No such file or directory". 389 | 390 | # Without callback 391 | with pytest.raises(torf.ReadError) as excinfo: 392 | torrent.verify_filesize(content_path) 393 | assert excinfo.match(f'^{content_path / "unreadable1/b/c/a.jpg"}: No such file or directory$') 394 | 395 | # With callback 396 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 397 | assert t == torrent 398 | assert files_done == cb.call_count 399 | assert files_total == 3 400 | if cb.call_count == 1: 401 | assert fs_path == content_path / 'readable/b/c/c.jpg' 402 | assert t_path == Path(content_path.name, 'readable/b/c/c.jpg') 403 | assert exc is None 404 | elif cb.call_count == 2: 405 | assert fs_path == content_path / 'unreadable1/b/c/a.jpg' 406 | assert t_path == Path(content_path.name, 'unreadable1/b/c/a.jpg') 407 | assert str(exc) == f'{fs_path}: No such file or directory' 408 | elif cb.call_count == 3: 409 | assert fs_path == Path(content_path / 'unreadable2/b/c/b.jpg') 410 | assert t_path == Path(content_path.name, 'unreadable2/b/c/b.jpg') 411 | assert str(exc) == f'{fs_path}: No such file or directory' 412 | return callback_return_values.pop(0) 413 | 414 | cb = mock.MagicMock() 415 | cb.side_effect = assert_call 416 | assert torrent.verify_filesize(content_path, callback=cb) is False 417 | assert cb.call_count == exp_calls 418 | finally: 419 | os.chmod((content_path / 'unreadable1'), mode=unreadable_path1_mode) 420 | os.chmod((content_path / 'unreadable2'), mode=unreadable_path2_mode) 421 | 422 | 423 | @pytest.mark.parametrize( 424 | argnames='callback_return_values, exp_calls', 425 | argvalues=( 426 | ([None, None, None], 1), 427 | (['abort', None, None], 1), 428 | ([range(123), None, None], 1), 429 | ([123, None, None], 1), 430 | ), 431 | ) 432 | def test_parent_path_of_singlefile_torrent_is_unreadable(callback_return_values, exp_calls, create_dir, create_torrent_file): 433 | parent_path = create_dir('parent', 434 | ('file.jpg', b'\x00' * 123)) 435 | content_file = str(parent_path / 'file.jpg') 436 | with create_torrent_file(path=content_file) as torrent_file: 437 | torrent = torf.Torrent.read(torrent_file) 438 | 439 | parent_path_mode = os.stat(parent_path).st_mode 440 | try: 441 | os.chmod(parent_path, mode=0o222) 442 | 443 | # NOTE: We would expect "Permission denied" here, but 444 | # os.path.exists() can't look inside "parent" directory and thus 445 | # raises "No such file or directory". 446 | 447 | # Without callback 448 | with pytest.raises(torf.ReadError) as excinfo: 449 | torrent.verify_filesize(content_file) 450 | assert excinfo.match(f'^{content_file}: No such file or directory$') 451 | 452 | # With callback 453 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 454 | assert t == torrent 455 | assert files_done == 1 456 | assert files_total == 1 457 | assert fs_path == Path(content_file) 458 | assert t_path == Path(Path(content_file).name) 459 | assert str(exc) == f'{content_file}: No such file or directory' 460 | return callback_return_values.pop(0) 461 | 462 | cb = mock.MagicMock() 463 | cb.side_effect = assert_call 464 | assert torrent.verify_filesize(content_file, callback=cb) is False 465 | assert cb.call_count == exp_calls 466 | finally: 467 | os.chmod(parent_path, mode=parent_path_mode) 468 | 469 | 470 | def test_callback_raises_exception(create_dir, create_torrent_file): 471 | content_path = create_dir('content', 472 | ('a.jpg', b'\x00' * 123), 473 | ('b.jpg', b'\x00' * 456), 474 | ('c.jpg', b'\x00' * 789)) 475 | with create_torrent_file(path=content_path) as torrent_file: 476 | def assert_call(t, fs_path, t_path, files_done, files_total, exc): 477 | assert t == torrent 478 | assert files_done == cb.call_count 479 | assert files_total == 3 480 | if cb.call_count == 1: 481 | assert fs_path == content_path / 'a.jpg' 482 | assert t_path == Path(content_path.name, 'a.jpg') 483 | assert exc is None 484 | elif cb.call_count == 2: 485 | raise RuntimeError("I'm off") 486 | elif cb.call_count == 3: 487 | assert fs_path == content_path / 'c.jpg' 488 | assert t_path == Path(content_path.name, 'c.jpg') 489 | assert exc is None 490 | return None 491 | 492 | torrent = torf.Torrent.read(torrent_file) 493 | cb = mock.MagicMock() 494 | cb.side_effect = assert_call 495 | 496 | with pytest.raises(RuntimeError) as excinfo: 497 | torrent.verify_filesize(content_path, callback=cb) 498 | assert excinfo.match("^I'm off$") 499 | assert cb.call_count == 2 500 | -------------------------------------------------------------------------------- /tests/test_write.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import flatbencode as bencode 5 | import pytest 6 | 7 | import torf 8 | 9 | 10 | def test_successful_write(generated_singlefile_torrent, tmp_path): 11 | f = tmp_path / 'a.torrent' 12 | generated_singlefile_torrent.write(f) 13 | bytes_written = open(f, 'rb').read() 14 | bytes_expected = generated_singlefile_torrent.dump() 15 | assert bytes_written == bytes_expected 16 | 17 | 18 | def test_write_with_creation_date(generated_singlefile_torrent, tmp_path): 19 | f = tmp_path / 'a.torrent' 20 | now = int(time.time()) 21 | generated_singlefile_torrent.creation_date = now 22 | generated_singlefile_torrent.write(f) 23 | metainfo = bencode.decode(open(f, 'rb').read()) 24 | assert metainfo[b'creation date'] == now 25 | 26 | 27 | def test_write_to_file_without_permission(generated_singlefile_torrent, tmp_path): 28 | (tmp_path / 'test_dir').mkdir() 29 | (tmp_path / 'test_dir').chmod(0o444) 30 | (tmp_path / 'test_dir').chmod(0o444) 31 | with pytest.raises(torf.WriteError) as excinfo: 32 | generated_singlefile_torrent.write(tmp_path / 'test_dir' / 'a.torrent') 33 | assert excinfo.match(f'^{tmp_path / "test_dir" / "a.torrent"}: Permission denied$') 34 | 35 | 36 | def test_write_to_existing_file(generated_singlefile_torrent, tmp_path): 37 | (tmp_path / 'a.torrent').write_text('something') 38 | 39 | with pytest.raises(torf.WriteError) as excinfo: 40 | generated_singlefile_torrent.write(tmp_path / 'a.torrent') 41 | assert excinfo.match(f'^{tmp_path / "a.torrent"}: File exists$') 42 | 43 | generated_singlefile_torrent.write(tmp_path / 'a.torrent', overwrite=True) 44 | bytes_written = open(tmp_path / 'a.torrent', 'rb').read() 45 | bytes_expected = generated_singlefile_torrent.dump() 46 | assert bytes_written == bytes_expected 47 | 48 | 49 | def test_existing_file_is_unharmed_if_dump_fails(generated_singlefile_torrent, tmp_path): 50 | (tmp_path / 'a.torrent').write_text('something') 51 | del generated_singlefile_torrent.metainfo['info']['length'] 52 | 53 | with pytest.raises(torf.MetainfoError): 54 | generated_singlefile_torrent.write(tmp_path / 'a.torrent', overwrite=True) 55 | old_content = open(tmp_path / 'a.torrent', 'r').read() 56 | assert old_content == 'something' 57 | 58 | 59 | def test_new_file_is_not_created_if_dump_fails(generated_singlefile_torrent, tmp_path): 60 | f = tmp_path / 'a.torrent' 61 | del generated_singlefile_torrent.metainfo['info']['length'] 62 | 63 | with pytest.raises(torf.MetainfoError): 64 | generated_singlefile_torrent.write(f) 65 | assert not os.path.exists(f) 66 | 67 | 68 | def test_overwriting_larger_torrent_file_truncates_first(generated_singlefile_torrent, tmp_path): 69 | f = (tmp_path / 'large.file') 70 | f.write_text('x' * 1000000) 71 | assert os.path.getsize(f) == 1e6 72 | 73 | generated_singlefile_torrent.write(str(f), overwrite=True) 74 | assert os.path.exists(f) 75 | assert os.path.getsize(f) < 1e6 76 | assert torf.Torrent.read(str(f)).name == os.path.basename(generated_singlefile_torrent.path) 77 | -------------------------------------------------------------------------------- /torf/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of torf. 2 | # 3 | # torf is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # torf is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with torf. If not, see . 14 | 15 | # flake8: noqa 16 | 17 | """ 18 | Create and parse torrent files and magnet URIs 19 | """ 20 | 21 | __version__ = '4.3.0' 22 | 23 | from ._errors import * 24 | from ._magnet import Magnet 25 | from ._stream import TorrentFileStream 26 | from ._torrent import Torrent 27 | from ._utils import File, Filepath 28 | -------------------------------------------------------------------------------- /torf/__init__.pyi: -------------------------------------------------------------------------------- 1 | # This file is part of torf. 2 | # 3 | # torf is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # torf is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with torf. If not, see . 14 | 15 | # flake8: noqa 16 | 17 | __version__: str = ... 18 | 19 | from ._errors import * 20 | from ._magnet import Magnet as Magnet 21 | from ._stream import TorrentFileStream as TorrentFileStream 22 | from ._torrent import Torrent as Torrent 23 | from ._utils import File as File 24 | from ._utils import Filepath as Filepath 25 | -------------------------------------------------------------------------------- /torf/_errors.py: -------------------------------------------------------------------------------- 1 | # This file is part of torf. 2 | # 3 | # torf is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # torf is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with torf. If not, see . 14 | 15 | import os 16 | 17 | 18 | class TorfError(Exception): 19 | """Base exception for all exceptions raised by torf""" 20 | def __init__(self, msg, *posargs, **kwargs): 21 | super().__init__(msg) 22 | self.posargs = posargs 23 | self.kwargs = kwargs 24 | 25 | 26 | class URLError(TorfError): 27 | """Invalid URL""" 28 | def __init__(self, url): 29 | self._url = url 30 | super().__init__(f'{url}: Invalid URL', url) 31 | 32 | @property 33 | def url(self): 34 | """The invalid URL""" 35 | return self._url 36 | 37 | 38 | class PieceSizeError(TorfError): 39 | """Invalid piece size""" 40 | def __init__(self, size, min=None, max=None): 41 | self._size = size 42 | self._min = min 43 | self._max = max 44 | if min is not None and max is not None: 45 | super().__init__(f'Piece size must be between {min} and {max}: {size}', 46 | size, min=min, max=max) 47 | else: 48 | super().__init__(f'Piece size must be divisible by 16 KiB: {size}', 49 | size) 50 | 51 | @property 52 | def size(self): 53 | """The invalid piece size""" 54 | return self._size 55 | 56 | @property 57 | def min(self): 58 | """Smallest allowed piece size or ``None``""" 59 | return self._min 60 | 61 | @property 62 | def max(self): 63 | """Largest allowed piece size or ``None``""" 64 | return self._max 65 | 66 | 67 | class MetainfoError(TorfError): 68 | """Invalid torrent metainfo""" 69 | def __init__(self, msg): 70 | super().__init__(f'Invalid metainfo: {msg}', msg) 71 | 72 | 73 | class BdecodeError(TorfError): 74 | """Failed to decode bencoded byte sequence""" 75 | def __init__(self, filepath=None): 76 | self._filepath = filepath 77 | if filepath is None: 78 | super().__init__('Invalid metainfo format') 79 | else: 80 | super().__init__(f'{filepath}: Invalid torrent file format', filepath) 81 | 82 | @property 83 | def filepath(self): 84 | """Path of the offending torrent file or ``None``""" 85 | return self._filepath 86 | 87 | 88 | class MagnetError(TorfError): 89 | """Invalid magnet URI or value""" 90 | def __init__(self, uri, reason=None): 91 | self._uri = uri 92 | self._reason = reason 93 | if reason is not None: 94 | super().__init__(f'{uri}: {reason}', uri, reason=reason) 95 | else: 96 | super().__init__(f'{uri}: Invalid magnet URI', uri) 97 | 98 | @property 99 | def uri(self): 100 | """The invalid URI""" 101 | return self._uri 102 | 103 | @property 104 | def reason(self): 105 | """Why URI is invalid""" 106 | return self._reason 107 | 108 | 109 | class PathError(TorfError): 110 | """General invalid or unexpected path""" 111 | def __init__(self, path, msg): 112 | self._path = path 113 | super().__init__(f'{path}: {msg}', path, msg) 114 | 115 | @property 116 | def path(self): 117 | """Path of the offending file or directory""" 118 | return self._path 119 | 120 | 121 | class CommonPathError(TorfError): 122 | """Files don't share parent directory""" 123 | def __init__(self, filepaths): 124 | self._filepaths = filepaths 125 | filepaths_str = ', '.join(str(fp) for fp in filepaths) 126 | super().__init__(f'No common parent path: {filepaths_str}', filepaths) 127 | 128 | @property 129 | def filepaths(self): 130 | """Paths to offending files""" 131 | return self._filepaths 132 | 133 | 134 | class VerifyIsDirectoryError(TorfError): 135 | """Expected file but found directory""" 136 | def __init__(self, path): 137 | self._path = path 138 | super().__init__(f'{path}: Is a directory', path) 139 | 140 | @property 141 | def path(self): 142 | """Path of the offending directory""" 143 | return self._path 144 | 145 | 146 | class VerifyNotDirectoryError(TorfError): 147 | """Expected (link to) directory, but found something else""" 148 | def __init__(self, path): 149 | self._path = path 150 | super().__init__(f'{path}: Not a directory', path) 151 | 152 | @property 153 | def path(self): 154 | """Path of the offending non-directory""" 155 | return self._path 156 | 157 | 158 | class VerifyFileSizeError(TorfError): 159 | """Unexpected file size""" 160 | def __init__(self, filepath, actual_size, expected_size): 161 | self._filepath = filepath 162 | self._actual_size = actual_size 163 | self._expected_size = expected_size 164 | if actual_size > expected_size: 165 | super().__init__(f'{filepath}: Too big: {actual_size} instead of {expected_size} bytes', 166 | filepath, actual_size=actual_size, expected_size=expected_size) 167 | elif actual_size < expected_size: 168 | super().__init__(f'{filepath}: Too small: {actual_size} instead of {expected_size} bytes', 169 | filepath, actual_size=actual_size, expected_size=expected_size) 170 | else: 171 | raise RuntimeError(f'Unjustified: actual_size={actual_size} == expected_size={expected_size}') 172 | 173 | @property 174 | def filepath(self): 175 | """Path of the offending file""" 176 | return self._filepath 177 | 178 | @property 179 | def actual_size(self): 180 | """Size as reported by the file system""" 181 | return self._actual_size 182 | 183 | @property 184 | def expected_size(self): 185 | """Size as specified in the metainfo""" 186 | return self._expected_size 187 | 188 | 189 | class VerifyContentError(TorfError): 190 | """On-disk data does not match hashes in metainfo""" 191 | def __init__(self, filepath, piece_index, piece_size, file_sizes): 192 | self._filepath = filepath 193 | self._piece_index = piece_index 194 | self._piece_size = piece_size 195 | msg = f'Corruption in piece {piece_index + 1}' 196 | 197 | if len(file_sizes) < 1: 198 | raise RuntimeError('file_sizes argument is empty: {file_sizes!r}') 199 | elif len(file_sizes) == 1: 200 | corrupt_files = (file_sizes[0][0],) 201 | else: 202 | corrupt_files = [] 203 | 204 | # Find the slice in the whole stream of files that contains the 205 | # corruption (piece_index=0 is the first piece) 206 | err_i_beg = piece_index * piece_size 207 | err_i_end = err_i_beg + piece_size 208 | 209 | # Find the files that are covered by the corrupt piece 210 | cur_pos = 0 211 | for filepath,filesize in file_sizes: 212 | # `file` is possibly corrupt if: 213 | # 1. The corrupt piece STARTS between the beginning and the end 214 | # of the file in the stream. 215 | # 2. The corrupt piece ENDS between the beginning and the end 216 | # of the file in the stream. 217 | # 3. Both beginning and end of the file are between beginning 218 | # and end of the corrupt piece (i.e. file fits in one piece). 219 | file_i_beg = cur_pos 220 | file_i_end = file_i_beg + filesize 221 | if (file_i_beg <= err_i_beg < file_i_end or 222 | file_i_beg < err_i_end <= file_i_end or 223 | (file_i_beg >= err_i_beg and file_i_end < err_i_end)): 224 | corrupt_files.append(filepath) 225 | cur_pos += filesize 226 | 227 | if len(corrupt_files) == 1: 228 | msg += f' in {corrupt_files[0]}' 229 | else: 230 | msg += (', at least one of these files is corrupt: ' + 231 | ', '.join(str(f) for f in corrupt_files)) 232 | 233 | self._files = tuple(corrupt_files) 234 | super().__init__(msg, filepath, piece_index, piece_size, file_sizes) 235 | 236 | @property 237 | def filepath(self): 238 | """Path to file that caused the piece corruption""" 239 | return self._filepath 240 | 241 | @property 242 | def piece_index(self): 243 | """Index of the corrupt piece in the stream of concatenated files""" 244 | return self._piece_index 245 | 246 | @property 247 | def piece_size(self): 248 | """Size of the corrupt piece in bytes""" 249 | return self._piece_size 250 | 251 | @property 252 | def files(self): 253 | """Potentially corrupt neighboring files""" 254 | return self._files 255 | 256 | 257 | class ReadError(TorfError): 258 | """Unreadable file or stream""" 259 | def __init__(self, errno, path=None): 260 | self._errno = errno 261 | self._path = path 262 | msg = os.strerror(errno) if errno else 'Unable to read' 263 | if path is None: 264 | super().__init__(f'{msg}', errno) 265 | else: 266 | super().__init__(f'{path}: {msg}', errno, path) 267 | 268 | @property 269 | def path(self): 270 | """Path of the offending file or ``None``""" 271 | return self._path 272 | 273 | @property 274 | def errno(self): 275 | """POSIX error number from errno.h""" 276 | return self._errno 277 | 278 | 279 | class MemoryError(TorfError, MemoryError): 280 | """ 281 | Out of memory 282 | 283 | See also :class:`MemoryError`. 284 | """ 285 | 286 | 287 | class WriteError(TorfError): 288 | """Unwritable file or stream""" 289 | def __init__(self, errno, path=None): 290 | self._errno = errno 291 | self._path = path 292 | msg = os.strerror(errno) if errno else 'Unable to write' 293 | if path is None: 294 | super().__init__(f'{msg}', path) 295 | else: 296 | super().__init__(f'{path}: {msg}', errno, path) 297 | 298 | @property 299 | def path(self): 300 | """Path of the offending file or ``None``""" 301 | return self._path 302 | 303 | @property 304 | def errno(self): 305 | """POSIX error number from errno.h""" 306 | return self._errno 307 | 308 | 309 | class ConnectionError(TorfError): 310 | """Unwritable file or stream""" 311 | def __init__(self, url, msg='Failed'): 312 | self._url = url 313 | self._msg = str(msg) 314 | super().__init__(f'{url}: {msg}', url, msg) 315 | 316 | @property 317 | def url(self): 318 | """URL that caused the exception""" 319 | return self._url 320 | -------------------------------------------------------------------------------- /torf/_errors.pyi: -------------------------------------------------------------------------------- 1 | from _typeshed import StrPath 2 | 3 | from ._utils import Filepaths 4 | 5 | class TorfError(Exception): 6 | def __init__(self, msg: str, *posargs: object, **kwargs: object) -> None: ... 7 | 8 | class URLError(TorfError): 9 | def __init__(self, url: str) -> None: ... 10 | @property 11 | def url(self) -> str: ... 12 | 13 | class PieceSizeError(TorfError): 14 | def __init__(self, size: int, min: int | None = None, max: int | None = None) -> None: ... 15 | @property 16 | def size(self) -> int: ... 17 | @property 18 | def min(self) -> int | None: ... 19 | @property 20 | def max(self) -> int | None: ... 21 | 22 | class MetainfoError(TorfError): 23 | def __init__(self, msg: str) -> None: ... 24 | 25 | class BdecodeError(TorfError): 26 | def __init__(self, filepath: StrPath | None = None) -> None: ... 27 | @property 28 | def filepath(self) -> StrPath | None: ... 29 | 30 | class MagnetError(TorfError): 31 | def __init__(self, uri: str, reason: str | None = None) -> None: ... 32 | @property 33 | def uri(self) -> str: ... 34 | @property 35 | def reason(self) -> str | None: ... 36 | 37 | class PathError(TorfError): 38 | def __init__(self, path: StrPath, msg: str) -> None: ... 39 | @property 40 | def path(self) -> StrPath: ... 41 | 42 | class CommonPathError(TorfError): 43 | def __init__(self, filepaths: Filepaths) -> None: ... 44 | @property 45 | def filepaths(self) -> Filepaths: ... 46 | 47 | class VerifyIsDirectoryError(TorfError): 48 | def __init__(self, path: StrPath) -> None: ... 49 | @property 50 | def path(self) -> StrPath: ... 51 | 52 | class VerifyNotDirectoryError(TorfError): 53 | def __init__(self, path: StrPath) -> None: ... 54 | @property 55 | def path(self) -> StrPath: ... 56 | 57 | class VerifyFileSizeError(TorfError): 58 | def __init__(self, filepath: StrPath, actual_size: int | None, expected_size: int) -> None: ... 59 | @property 60 | def filepath(self) -> StrPath: ... 61 | @property 62 | def actual_size(self) -> int | None: ... 63 | @property 64 | def expected_size(self) -> int: ... 65 | 66 | class VerifyContentError(TorfError): 67 | def __init__( 68 | self, filepath: StrPath, piece_index: int, piece_size: int, file_sizes: tuple[tuple[str, int], ...] 69 | ) -> None: ... 70 | @property 71 | def filepath(self) -> StrPath: ... 72 | @property 73 | def piece_index(self) -> int: ... 74 | @property 75 | def piece_size(self) -> int: ... 76 | @property 77 | def files(self) -> tuple[tuple[str, int], ...]: ... 78 | 79 | class ReadError(TorfError): 80 | def __init__(self, errno: int, path: StrPath | None = None) -> None: ... 81 | @property 82 | def path(self) -> StrPath | None: ... 83 | @property 84 | def errno(self) -> int: ... 85 | 86 | class MemoryError(TorfError, MemoryError): ... # type: ignore[misc] 87 | 88 | class WriteError(TorfError): 89 | def __init__(self, errno: int, path: StrPath | None = None) -> None: ... 90 | @property 91 | def path(self) -> StrPath | None: ... 92 | @property 93 | def errno(self) -> int: ... 94 | 95 | class ConnectionError(TorfError): 96 | def __init__(self, url: str, msg: str = 'Failed') -> None: ... 97 | @property 98 | def url(self) -> str: ... 99 | -------------------------------------------------------------------------------- /torf/_generate.py: -------------------------------------------------------------------------------- 1 | # This file is part of torf. 2 | # 3 | # torf is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # torf is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with torf. If not, see . 14 | 15 | import errno 16 | import logging 17 | import os 18 | import queue 19 | import threading 20 | from hashlib import sha1 21 | from time import monotonic as time_monotonic 22 | 23 | from . import _errors as errors 24 | from ._stream import TorrentFileStream 25 | 26 | QUEUE_CLOSED = object() 27 | 28 | _debug = logging.getLogger('torf').debug 29 | 30 | def _thread_name(): 31 | return threading.current_thread().name 32 | 33 | def _pretty_bytes(b): 34 | if isinstance(b, (bytes, bytearray)) and len(b) > 8: 35 | # return b[:8].hex() + '...' + b[-8:].hex() 36 | return b[:8] + b'...' + b[-8:] 37 | else: 38 | return b 39 | 40 | 41 | class Worker: 42 | """ 43 | :class:`threading.Thread` subclass that re-raises any exceptions from the 44 | thread when joined 45 | """ 46 | 47 | def __init__(self, name, worker, start=True, fail_ok=False): 48 | self._exception = None 49 | self._name = str(name) 50 | self._worker = worker 51 | self._thread = threading.Thread(name=self._name, target=self._run_and_catch_exceptions) 52 | if start: 53 | self.start(fail_ok=fail_ok) 54 | 55 | @property 56 | def exception(self): 57 | return self._exception 58 | 59 | @property 60 | def name(self): 61 | return self._name 62 | 63 | @property 64 | def is_running(self): 65 | return self._thread.is_alive() 66 | 67 | def _run_and_catch_exceptions(self): 68 | try: 69 | self._worker() 70 | except BaseException as e: 71 | self._exception = e 72 | 73 | def start(self, fail_ok=False): 74 | if not self._thread.is_alive(): 75 | try: 76 | self._thread.start() 77 | except RuntimeError as e: 78 | if fail_ok: 79 | _debug(f'{self.name}: Failed to start thread: {e!r} - but that\'s ok') 80 | else: 81 | _debug(f'{self.name}: Failed to start thread: {e!r}') 82 | raise 83 | else: 84 | _debug(f'{self.name}: Started') 85 | 86 | def join(self, *args, **kwargs): 87 | if self.is_running: 88 | self._thread.join(*args, **kwargs) 89 | if self._exception: 90 | raise self._exception 91 | 92 | 93 | class Reader(Worker): 94 | """ 95 | :class:`Worker` subclass that reads files in pieces and pushes them to a 96 | queue 97 | """ 98 | 99 | def __init__(self, *, torrent, queue_size, path=None): 100 | self._torrent = torrent 101 | self._path = path 102 | self._piece_queue = queue.Queue(maxsize=queue_size) 103 | self._stop = False 104 | self._memory_error_timestamp = -1 105 | super().__init__(name='reader', worker=self._push_pieces) 106 | 107 | def _push_pieces(self): 108 | stream = TorrentFileStream(self._torrent) 109 | try: 110 | iter_pieces = stream.iter_pieces(self._path, oom_callback=self._handle_oom) 111 | for piece_index, (piece, filepath, exceptions) in enumerate(iter_pieces): 112 | # _debug(f'{_thread_name()}: Read #{piece_index}') 113 | if self._stop: 114 | _debug(f'{_thread_name()}: Stopped reading') 115 | break 116 | elif exceptions: 117 | self._push_piece(piece_index=piece_index, filepath=filepath, exceptions=exceptions) 118 | elif piece: 119 | self._push_piece(piece_index=piece_index, filepath=filepath, piece=piece) 120 | else: 121 | # `piece` is None because of missing file, and the exception 122 | # was already sent for the first `piece_index` of that file 123 | self._push_piece(piece_index=piece_index, filepath=filepath) 124 | 125 | # _debug(f'{_thread_name()}: {self._piece_queue.qsize()} pieces queued') 126 | 127 | except BaseException as e: 128 | _debug(f'{_thread_name()}: Exception while reading: {e!r}') 129 | raise 130 | 131 | finally: 132 | self._piece_queue.put(QUEUE_CLOSED) 133 | _debug(f'{_thread_name()}: Piece queue is now exhausted') 134 | stream.close() 135 | 136 | def _push_piece(self, *, piece_index, filepath, piece=None, exceptions=()): 137 | # _debug(f'{_thread_name()}: Pushing #{piece_index}: {filepath}: {_pretty_bytes(piece)}, {exceptions!r}') 138 | self._piece_queue.put((piece_index, filepath, piece, exceptions)) 139 | 140 | def _handle_oom(self, exception): 141 | # Reduce piece_queue.maxsize by 1 every 100ms until the MemoryErrors stop 142 | now = time_monotonic() 143 | time_diff = now - self._memory_error_timestamp 144 | if time_diff >= 0.1: 145 | old_maxsize = self._piece_queue.maxsize 146 | new_maxsize = max(1, int(old_maxsize * 0.9)) 147 | if new_maxsize != old_maxsize: 148 | _debug(f'{_thread_name()}: Reducing piece_queue.maxsize to {new_maxsize}') 149 | self._piece_queue.maxsize = new_maxsize 150 | self._memory_error_timestamp = now 151 | else: 152 | raise errors.ReadError(errno.ENOMEM, exception) 153 | 154 | def stop(self): 155 | """Stop reading and close the piece queue""" 156 | if not self._stop: 157 | _debug(f'{_thread_name()}: {type(self).__name__}: Setting stop flag') 158 | self._stop = True 159 | 160 | @property 161 | def piece_queue(self): 162 | """ 163 | :class:`queue.Queue` instance that gets evenly sized pieces from the 164 | concatenated stream of files 165 | """ 166 | return self._piece_queue 167 | 168 | 169 | class HasherPool: 170 | """ 171 | Wrapper around one or more :class:`Worker` instances that each read a piece 172 | from :attr:`Reader.piece_queue`, feed it to :func:`~.hashlib.sha1`, and push 173 | the resulting hash to :attr:`hash_queue` 174 | """ 175 | 176 | def __init__(self, hasher_threads, piece_queue): 177 | self._piece_queue = piece_queue 178 | self._hash_queue = queue.Queue() 179 | self._finalize_event = threading.Event() 180 | 181 | # Janitor takes care of closing the hash queue, removing idle hashers, etc 182 | self._janitor = Worker( 183 | name='janitor', 184 | worker=self._janitor_thread, 185 | start=False, 186 | ) 187 | 188 | # Hashers read from piece_queue and push to hash_queue 189 | self._hashers = [ 190 | Worker( 191 | name='hasher1', 192 | # One hasher is vital an may not die from boredom 193 | worker=lambda: self._hasher_thread(is_vital=True), 194 | start=False, 195 | ), 196 | ] 197 | for i in range(2, hasher_threads + 1): 198 | self._hashers.append( 199 | Worker( 200 | name=f'hasher{i}', 201 | # All other hashers should die if they are bored 202 | worker=lambda: self._hasher_thread(is_vital=False), 203 | start=False, 204 | ) 205 | ) 206 | 207 | # Start threads manually after they were created to prevent race 208 | # conditions and make sure all required threads are running 209 | self._janitor.start(fail_ok=False) 210 | 211 | # Hashers are allowed to fail (e.g. because of OS limits), but we need 212 | # at least one to start successfully 213 | self._hashers[0].start(fail_ok=False) 214 | for hasher in self._hashers[1:]: 215 | hasher.start(fail_ok=True) 216 | 217 | def _hasher_thread(self, is_vital=True): 218 | piece_queue = self._piece_queue 219 | handle_piece = self._handle_piece 220 | while True: 221 | # _debug(f'{_thread_name()}: Waiting for next task') 222 | try: 223 | task = piece_queue.get(timeout=0.5) 224 | except queue.Empty: 225 | if not is_vital: 226 | _debug(f'{_thread_name()}: I am bored, byeee!') 227 | break 228 | else: 229 | _debug(f'{_thread_name()}: I am bored, but needed.') 230 | else: 231 | if task is QUEUE_CLOSED: 232 | _debug(f'{_thread_name()}: piece_queue is closed') 233 | # Repeat QUEUE_CLOSED to the next sibling. This ensures 234 | # there is always one more QUEUE_CLOSED queued than running 235 | # threads. Otherwise, one thread might consume multiple 236 | # QUEUE_CLOSED and leave other threads running forvever. 237 | piece_queue.put(QUEUE_CLOSED) 238 | # Signal janitor to initiate shutdown procedure 239 | self._finalize_event.set() 240 | break 241 | else: 242 | handle_piece(*task) 243 | 244 | def _handle_piece(self, piece_index, filepath, piece, exceptions): 245 | if exceptions: 246 | # _debug(f'{_thread_name()}: Forwarding exceptions for #{piece_index}: {exceptions!r}') 247 | self._hash_queue.put((piece_index, filepath, None, exceptions)) 248 | 249 | elif piece: 250 | piece_hash = sha1(piece).digest() 251 | # _debug(f'{_thread_name()}: Hashed #{piece_index}: ' 252 | # f'{_pretty_bytes(piece)} [{len(piece)} bytes] -> {piece_hash}') 253 | self._hash_queue.put((piece_index, filepath, piece_hash, ())) 254 | 255 | else: 256 | # _debug(f'{_thread_name()}: Nothing to hash for #{piece_index}: {piece!r}') 257 | self._hash_queue.put((piece_index, filepath, None, ())) 258 | 259 | def _janitor_thread(self): 260 | while True: 261 | _debug(f'{_thread_name()}: Waiting for finalize event') 262 | finalization_initiated = self._finalize_event.wait(timeout=1.0) 263 | if finalization_initiated: 264 | self._wait_for_hashers() 265 | _debug(f'{_thread_name()}: Closing hash queue') 266 | self._hash_queue.put(QUEUE_CLOSED) 267 | break 268 | 269 | else: 270 | # Remove terminated idle hashers 271 | for hasher in tuple(self._hashers): 272 | if not hasher.is_running: 273 | _debug(f'{_thread_name()}: Pruning {hasher.name}') 274 | self._hashers.remove(hasher) 275 | 276 | _debug(f'{_thread_name()}: Terminating') 277 | 278 | def _wait_for_hashers(self): 279 | while True: 280 | # _debug(f'{_thread_name()}: Hashers running: {[h.name for h in self._hashers if h.is_running]}') 281 | if all(not h.is_running for h in self._hashers): 282 | _debug(f'{_thread_name()}: All hashers terminated') 283 | break 284 | 285 | def join(self): 286 | """Block until all threads have terminated""" 287 | for hasher in self._hashers: 288 | _debug(f'{_thread_name()}: Joining {hasher.name}') 289 | hasher.join() 290 | _debug(f'{_thread_name()}: Joined all hashers') 291 | 292 | _debug(f'{_thread_name()}: Joining {self._janitor.name}') 293 | self._janitor.join() 294 | _debug(f'{_thread_name()}: Joined {self._janitor.name}') 295 | 296 | @property 297 | def hash_queue(self): 298 | """:class:`queue.Queue` instance that gets piece hashes""" 299 | return self._hash_queue 300 | 301 | 302 | class Collector: 303 | """ 304 | Consume items from :attr:`HasherPool.hash_queue` and ensure proper 305 | termination of all threads if anything goes wrong or the user cancels the 306 | operation 307 | """ 308 | 309 | def __init__(self, torrent, reader, hashers, callback=None): 310 | self._reader = reader 311 | self._hashers = hashers 312 | self._callback = callback 313 | self._hashes_unsorted = [] 314 | self._pieces_seen = [] 315 | self._pieces_total = torrent.pieces 316 | 317 | def collect(self): 318 | """ 319 | Read piece hashes from :attr:`HasherPool.hash_queue` 320 | 321 | When this method returns, :attr:`hashes` is an ordered sequence of 322 | collected piece hashes. 323 | 324 | Exceptions from :class:`Reader`, :class:`HasherPool` or the provided 325 | callback are raised after all threads are terminated and joined. 326 | 327 | :return: the same value as :attr:`hashes` 328 | """ 329 | try: 330 | hash_queue = self._hashers.hash_queue 331 | while True: 332 | # _debug(f'{_thread_name()}: Waiting for next piece hash') 333 | task = hash_queue.get() 334 | # _debug(f'{_thread_name()}: Got task: {task}') 335 | if task is QUEUE_CLOSED: 336 | break 337 | else: 338 | self._collect(*task) 339 | 340 | except BaseException as e: 341 | _debug(f'{_thread_name()}: Exception while dequeueing piece hashes: {e!r}') 342 | self._cancel() 343 | raise 344 | 345 | finally: 346 | self._finalize() 347 | 348 | return self.hashes 349 | 350 | def _collect(self, piece_index, filepath, piece_hash, exceptions): 351 | # _debug(f'{_thread_name()}: Collecting #{piece_index}: {_pretty_bytes(piece_hash)}, {exceptions}') 352 | 353 | # Remember which pieces where hashed to count them and for sanity checking 354 | assert piece_index not in self._pieces_seen 355 | self._pieces_seen.append(piece_index) 356 | 357 | # Collect piece 358 | if not exceptions and piece_hash: 359 | self._hashes_unsorted.append((piece_index, piece_hash)) 360 | 361 | # If there is no callback, raise first exception 362 | if exceptions and not self._callback: 363 | raise exceptions[0] 364 | 365 | # Report progress/exceptions and allow callback to cancel 366 | if self._callback: 367 | # _debug(f'{_thread_name()}: Collector callback: {self._callback}') 368 | maybe_cancel = self._callback( 369 | piece_index, len(self._pieces_seen), self._pieces_total, 370 | filepath, piece_hash, exceptions, 371 | ) 372 | # _debug(f'{_thread_name()}: Collector callback return value: {maybe_cancel}') 373 | if maybe_cancel is not None: 374 | self._cancel() 375 | 376 | def _cancel(self): 377 | # NOTE: We don't need to stop HasherPool or Collector.collect() because 378 | # they will stop when Reader._push_pieces() pushes QUEUE_CLOSED. 379 | # They will process the pieces in the queue, but that shouldn't 380 | # take long unless the Reader's queue size is too big. 381 | self._reader.stop() 382 | 383 | def _finalize(self): 384 | _debug(f'{_thread_name()}: Joining {self._reader}') 385 | self._reader.join() 386 | _debug(f'{_thread_name()}: Joining {self._hashers}') 387 | self._hashers.join() 388 | _debug(f'{_thread_name()}: hash_queue has {self._hashers.hash_queue.qsize()} items left') 389 | 390 | @property 391 | def hashes(self): 392 | """Ordered sequence of piece hashes""" 393 | return tuple(hash for index, hash in sorted(self._hashes_unsorted)) 394 | 395 | 396 | class _IntervaledCallback: 397 | """ 398 | Callable that calls `callback`, but only if at least `interval` seconds 399 | elapsed since the previous call 400 | """ 401 | def __init__(self, callback, interval=0): 402 | self._callback = callback 403 | self._interval = interval 404 | self._prev_call_time = -1 405 | 406 | def __call__(self, *args, force=False): 407 | now = time_monotonic() 408 | diff = now - self._prev_call_time 409 | # _debug(f'{_thread_name()}: Callback? {force=} or {diff=} >= {self._interval=}') 410 | if force or diff >= self._interval: 411 | self._prev_call_time = now 412 | # _debug(f'{_thread_name()}: Callback! {args=}') 413 | return self._callback(*args) 414 | 415 | 416 | class _TranslatingCallback: 417 | def __init__(self, callback, interval, torrent): 418 | self._callback = callback 419 | self._torrent = torrent 420 | self._intervaled_callback = _IntervaledCallback( 421 | callback=self._call_callback, 422 | interval=interval, 423 | ) 424 | 425 | def __call__(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): 426 | force = self._force_callback(piece_index, pieces_done, pieces_total, 427 | filepath, piece_hash, exceptions) 428 | return self._intervaled_callback(piece_index, pieces_done, pieces_total, 429 | filepath, piece_hash, exceptions, 430 | force=force) 431 | 432 | def _force_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): 433 | # Figure out if we must ignore the interval for this call. This method 434 | # is called for every hashed piece and should be as efficient as 435 | # possible. 436 | raise NotImplementedError('You must implement this method!') 437 | 438 | def _call_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): 439 | # Translate arguments for the actual callback. This method is only 440 | # called at intervals (e.g. once per second). 441 | raise NotImplementedError('You must implement this method!') 442 | 443 | 444 | class GenerateCallback(_TranslatingCallback): 445 | """ 446 | Translate arguments from :class:`Collector` to what's specified by 447 | :meth:`~.Torrent.generate` 448 | """ 449 | 450 | def _force_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): 451 | return exceptions or pieces_done >= pieces_total 452 | 453 | def _call_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): 454 | if exceptions: 455 | # Torrent creation errors are always fatal and must be raised 456 | raise exceptions[0] 457 | elif self._callback: 458 | # Report progress and allow callback to cancel 459 | return self._callback(self._torrent, filepath, pieces_done, pieces_total) 460 | 461 | 462 | class VerifyCallback(_TranslatingCallback): 463 | """ 464 | Translate arguments from :class:`Collector` to what's specified by 465 | :meth:`~.Torrent.verify` 466 | """ 467 | def __init__(self, *args, path, **kwargs): 468 | super().__init__(*args, **kwargs) 469 | 470 | # Store piece hashes from the torrent for quick access 471 | self._exp_hashes = self._torrent.hashes 472 | 473 | # Map expected file system paths to expected file sizes 474 | # NOTE: The last segment in `path` is supposed to be the torrent name so 475 | # we must remove the name stored in the torrent file from each 476 | # `file`. This allows verification of any renamed file/directory 477 | # against a torrent. 478 | self._exp_file_sizes = tuple( 479 | ( 480 | os.sep.join((str(path), *file.parts[1:])), 481 | self._torrent.partial_size(file), 482 | ) 483 | for file in self._torrent.files 484 | ) 485 | 486 | def _force_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): 487 | return ( 488 | # Always report exceptions 489 | exceptions 490 | # Always report completion 491 | or pieces_done >= pieces_total 492 | # Always report hash mismatch 493 | or piece_hash is not None and piece_hash != self._exp_hashes[piece_index] 494 | ) 495 | 496 | def _call_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): 497 | if ( 498 | # Don't add verification error if there are other errors 499 | not exceptions 500 | # Piece hash was calculated and doesn't match 501 | and piece_hash is not None and piece_hash != self._exp_hashes[piece_index] 502 | ): 503 | exceptions = (errors.VerifyContentError( 504 | filepath, piece_index, self._torrent.piece_size, self._exp_file_sizes, 505 | ),) 506 | 507 | if self._callback: 508 | # Callback can raise exception or handle it otherwise 509 | def call_callback(fpath, exception): 510 | return self._callback( 511 | self._torrent, fpath, 512 | pieces_done, pieces_total, piece_index, 513 | piece_hash, exception, 514 | ) 515 | 516 | if exceptions: 517 | # Call callback for each exception until it indicates 518 | # cancellation by returning anything truthy 519 | for exception in exceptions: 520 | fpath = self._get_path_from_exception(exception) 521 | maybe_cancel = call_callback(fpath, exception) 522 | if maybe_cancel is not None: 523 | return maybe_cancel 524 | else: 525 | # Report progress and return cancellation indicator 526 | return call_callback(filepath, None) 527 | 528 | elif exceptions: 529 | # Default to raising first exception 530 | raise exceptions[0] 531 | 532 | @staticmethod 533 | def _get_path_from_exception(exception): 534 | for attr in ('filepath', 'path'): 535 | try: 536 | return getattr(exception, attr) 537 | except AttributeError: 538 | pass 539 | 540 | raise RuntimeError(f'Failed to get path from {exception!r}') 541 | -------------------------------------------------------------------------------- /torf/_magnet.py: -------------------------------------------------------------------------------- 1 | # This file is part of torf. 2 | # 3 | # torf is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # torf is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with torf. If not, see . 14 | 15 | import base64 16 | import binascii 17 | import io 18 | import re 19 | import time 20 | import urllib 21 | from collections import abc, defaultdict 22 | 23 | from . import _errors as error 24 | from . import _utils as utils 25 | 26 | 27 | class Magnet(): 28 | """ 29 | BTIH Magnet URI 30 | 31 | :param str xt: eXact Topic: Info hash (Base 16 or 32) 32 | :param str dn: Display Name: Name of the torrent 33 | :param int xl: eXact Length: Size in bytes 34 | :param list tr: TRacker: Iterable of tracker URLs 35 | :param str xs: eXact Source: Torrent file URL 36 | :param str as\\_: Acceptable Source: Fallback torrent file URL 37 | :param list ws: WebSeeds: Iterable of webseed URLs (see BEP19) 38 | :param list kt: Keyword Topic: List of search keywords 39 | 40 | All keyword arguments that start with ``x_`` go into the :attr:`x` 41 | dictionary with the part after the underscore as the key. They appear as 42 | "x." in the rendered URI. 43 | 44 | References: 45 | | https://www.bittorrent.org/beps/bep_0009.html 46 | | https://en.wikipedia.org/wiki/Magnet_URL 47 | | http://magnet-uri.sourceforge.net/magnet-draft-overview.txt 48 | | https://wiki.theory.org/index.php/BitTorrent_Magnet-URI_Webseeding 49 | | http://shareaza.sourceforge.net/mediawiki/index.php/Magnet_URI_scheme 50 | """ 51 | 52 | _INFOHASH_REGEX = re.compile(r'^[0-9a-f]{40}|[a-z2-7]{32}$', flags=re.IGNORECASE) 53 | _XT_REGEX = re.compile(r'^urn:btih:([0-9a-f]{40}|[a-z2-7]{32})$', flags=re.IGNORECASE) 54 | 55 | def __init__(self, xt, *, dn=None, xl=None, tr=None, xs=None, as_=None, ws=None, kt=None, **kwargs): 56 | self._tr = utils.MonitoredList(type=utils.URL) 57 | self._ws = utils.MonitoredList(type=utils.URL) 58 | self.xt = xt 59 | self.dn = dn 60 | self.xl = xl 61 | self.tr = tr 62 | self.xs = xs 63 | self.as_ = as_ 64 | self.ws = ws 65 | self.kt = kt 66 | 67 | self._x = defaultdict(lambda: None) 68 | for key in tuple(kwargs): 69 | if key.startswith('x_'): 70 | self._x[key[2:]] = kwargs.pop(key) 71 | 72 | if kwargs: 73 | key, value = next(iter(kwargs.items())) 74 | raise TypeError(f'Unrecognized argument: {key}={value!r}') 75 | 76 | @property 77 | def dn(self): 78 | """Display Name: Name of the torrent or ``None``""" 79 | return self._dn 80 | 81 | @dn.setter 82 | def dn(self, value): 83 | self._dn = str(value).replace('\n', ' ') if value is not None else None 84 | 85 | @property 86 | def xt(self): 87 | """ 88 | eXact Topic: URN containing the info hash as base 16 or base 32 89 | 90 | Example: 91 | 92 | urn:btih:3bb9561e35b06175bb6d2c2330578dc83846cc5d 93 | 94 | For convenience, this property may be set to the info hash without the 95 | ``urn:btih`` part. 96 | 97 | :raises MagnetError: if set to an invalid value 98 | """ 99 | return f'urn:btih:{self._infohash}' 100 | 101 | @xt.setter 102 | def xt(self, value): 103 | value = str(value) 104 | if self._INFOHASH_REGEX.match(value): 105 | self._infohash = value 106 | else: 107 | match = self._XT_REGEX.match(value) 108 | if match: 109 | self._infohash = match.group(1) 110 | if not hasattr(self, '_infohash'): 111 | raise error.MagnetError(value, 'Invalid exact topic ("xt")') 112 | 113 | @property 114 | def infohash(self): 115 | """ 116 | Info hash as base 16 or base 32 117 | 118 | :raises MagnetError: if set to an invalid value 119 | """ 120 | return self._infohash 121 | 122 | @infohash.setter 123 | def infohash(self, value): 124 | value = str(value) 125 | match = self._INFOHASH_REGEX.match(value) 126 | if match: 127 | self._infohash = value 128 | else: 129 | raise error.MagnetError(value, 'Invalid info hash') 130 | 131 | @property 132 | def xl(self): 133 | """ 134 | eXact Length: Size in bytes or ``None`` 135 | 136 | :raises MagnetError: if set to an invalid value 137 | """ 138 | return self._xl 139 | 140 | @xl.setter 141 | def xl(self, value): 142 | if value is not None: 143 | try: 144 | value = int(value) 145 | except ValueError: 146 | raise error.MagnetError(value, 'Invalid exact length ("xl")') 147 | else: 148 | if value < 1: 149 | raise error.MagnetError(value, 'Must be 1 or larger') 150 | else: 151 | self._xl = value 152 | else: 153 | self._xl = None 154 | 155 | @property 156 | def tr(self): 157 | """ 158 | TRackers: List of tracker URLs, single tracker URL or ``None`` 159 | 160 | :raises URLError: if any of the URLs is invalid 161 | """ 162 | return self._tr 163 | 164 | @tr.setter 165 | def tr(self, value): 166 | if value is None: 167 | self._tr.clear() 168 | elif isinstance(value, str): 169 | self._tr.replace((value,)) 170 | else: 171 | self._tr.replace(value) 172 | 173 | @property 174 | def xs(self): 175 | """ 176 | eXact Source: Torrent file URL or ``None`` 177 | 178 | :raises URLError: if the URL is invalid 179 | """ 180 | return self._xs 181 | 182 | @xs.setter 183 | def xs(self, value): 184 | self._xs = utils.URL(value) if value is not None else None 185 | 186 | @property 187 | def as_(self): 188 | """ 189 | Acceptable Source: Fallback torrent file URL or ``None`` 190 | 191 | (The trailing underscore is needed because "as" is a keyword in Python.) 192 | 193 | :raises URLError: if the URL is invalid 194 | """ 195 | return self._as 196 | 197 | @as_.setter 198 | def as_(self, value): 199 | self._as = utils.URL(value) if value is not None else None 200 | 201 | @property 202 | def ws(self): 203 | """ 204 | WebSeeds: List of webseed URLs, single webseed URL or ``None`` 205 | 206 | See BEP19. 207 | 208 | :raises URLError: if any of the URLs is invalid 209 | """ 210 | return self._ws 211 | 212 | @ws.setter 213 | def ws(self, value): 214 | if value is None: 215 | self._ws.clear() 216 | elif isinstance(value, str): 217 | self._ws.replace((value,)) 218 | else: 219 | self._ws.replace(value) 220 | 221 | @property 222 | def kt(self): 223 | """Keyword Topic: List of search keywords or ``None``""" 224 | return self._kt 225 | 226 | @kt.setter 227 | def kt(self, value): 228 | if value is None: 229 | self._kt = [] 230 | elif isinstance(value, str): 231 | self._kt = [value] 232 | elif isinstance(value, abc.Iterable): 233 | self._kt = [str(v) for v in value] if value is not None else None 234 | else: 235 | raise error.MagnetError(value, 'Invalid keyword topic ("kt")') 236 | 237 | @property 238 | def x(self): 239 | """ 240 | Mapping of custom keys to their values 241 | 242 | For example, "x.pe" (a peer address) would be accessed as 243 | ``magnet.x['pe']``. 244 | """ 245 | return self._x 246 | 247 | def torrent(self): 248 | """:class:`Torrent` instance""" 249 | # Prevent circular import issues 250 | from ._torrent import Torrent 251 | torrent = Torrent() 252 | torrent.name = self.dn 253 | if self.tr: 254 | torrent.trackers = self.tr 255 | if self.ws: 256 | torrent.webseeds = self.ws 257 | if self.xl: 258 | torrent._metainfo['info']['length'] = self.xl 259 | if hasattr(self, '_info'): 260 | torrent.metainfo['info'] = self._info 261 | elif len(self.infohash) == 40: 262 | torrent._infohash = self.infohash 263 | else: 264 | # Convert base 32 to base 16 (SHA1) 265 | torrent._infohash = base64.b16encode( 266 | base64.b32decode(self.infohash)).decode('utf-8').lower() 267 | return torrent 268 | 269 | def get_info(self, validate=True, timeout=60, callback=None): 270 | """ 271 | Download the torrent's "info" section 272 | 273 | Try the following sources in this order: :attr:`xs`, :attr:`as`, 274 | :attr:`tr` 275 | 276 | :meth:`torrent` can only return a complete torrent if this method is 277 | called first. 278 | 279 | :param validate: Whether to ensure the downloaded "info" section is 280 | valid 281 | :param timeout: Give up after this many seconds 282 | :type timeout: int, float 283 | :param callback callable: Callable that is called with a 284 | :class:`TorfError` instance if a source is specified but fails 285 | 286 | :return: ``True`` if the "info" section was successfully downloaded, 287 | ``False`` otherwise 288 | """ 289 | def success(): 290 | return hasattr(self, '_info') 291 | 292 | torrent_urls = [] 293 | if self.xs: torrent_urls.append(self.xs) # noqa: E701 294 | if self.as_: torrent_urls.append(self.as_) # noqa: E701 295 | torrent_urls.extend((url.rstrip('/') + '.torrent' for url in self.ws)) 296 | # I couldn't find any documentation for the "/file?info_hash=..." GET request, but 297 | # it seems to work for HTTP trackers. 298 | # https://stackoverflow.com/a/1019588 299 | for url in self.tr: 300 | if url.scheme in ('http', 'https'): 301 | infohash_enc = urllib.parse.quote_from_bytes(binascii.unhexlify(self.infohash)) 302 | torrent_urls.append(f'{url.scheme}://{url.netloc}/file?info_hash={infohash_enc}') 303 | 304 | start = time.monotonic() 305 | for url in torrent_urls: 306 | to = timeout - (time.monotonic() - start) 307 | try: 308 | torrent = utils.download(url, timeout=to) 309 | except error.ConnectionError as e: 310 | if callback: 311 | callback(e) 312 | else: 313 | self._set_info_from_torrent(torrent, validate, callback) 314 | if success() or to <= 0: 315 | break 316 | 317 | return success() 318 | 319 | def _set_info_from_torrent(self, torrent_data, validate=True, callback=False): 320 | """Extract "info" section from `torrent_data` for :meth:`torrent`""" 321 | # Prevent circular import issues 322 | from ._torrent import Torrent 323 | stream = io.BytesIO(torrent_data) 324 | try: 325 | torrent = Torrent.read_stream(stream, validate=validate) 326 | except error.TorfError as e: 327 | if callback: 328 | callback(e) 329 | else: 330 | if validate and self.infohash != torrent.infohash: 331 | raise error.MetainfoError(f'Mismatching info hashes: {self.infohash} != {torrent.infohash}') 332 | elif torrent.metainfo['info']: 333 | self._info = torrent.metainfo['info'] 334 | 335 | _KNOWN_PARAMETERS = ('xt', 'dn', 'xl', 'tr', 'xs', 'as', 'ws', 'kt') 336 | 337 | @classmethod 338 | def from_string(cls, uri): 339 | """ 340 | Create :class:`Magnet` URI from string 341 | 342 | :raises URLError: if `uri` contains an invalid URL (e.g. :attr:`tr`) 343 | :raises MagnetError: if `uri` is not a valid magnet URI 344 | """ 345 | info = urllib.parse.urlparse(uri.strip(), scheme='magnet', allow_fragments=False) 346 | if not info.scheme == 'magnet': 347 | raise error.MagnetError(uri, 'Not a magnet URI') 348 | else: 349 | query = urllib.parse.parse_qs(info.query) 350 | 351 | # Check for unknown parameters 352 | for key in query: 353 | if key not in cls._KNOWN_PARAMETERS and not key.startswith('x_'): 354 | raise error.MagnetError(uri, f'{key}: Unknown parameter') 355 | 356 | if 'xt' not in query: 357 | raise error.MagnetError(uri, 'Missing exact topic ("xt")') 358 | elif len(query['xt']) > 1: 359 | raise error.MagnetError(uri, 'Multiple exact topics ("xt")') 360 | else: 361 | self = cls(xt=query['xt'][0]) 362 | 363 | # Parameters that accept only one value 364 | for param,attr,name,parse in (('dn', 'dn', 'display name', lambda v: v), 365 | ('xl', 'xl', 'exact length', lambda v: v), 366 | ('xs', 'xs', 'exact source', lambda v: v), 367 | ('as', 'as_', 'acceptable source', lambda v: v), 368 | ('kt', 'kt', 'keyword topic', lambda v: v.split())): 369 | if param in query: 370 | if len(query[param]) > 1: 371 | raise error.MagnetError(uri, f'Multiple {name}s ("{param}")') 372 | else: 373 | setattr(self, attr, parse(query[param][0])) 374 | 375 | # Parameters that accept multiple values 376 | for param,name in (('tr', 'tracker'), 377 | ('ws', 'webseed')): 378 | if param in query: 379 | setattr(self, param, query[param]) 380 | 381 | return self 382 | 383 | def __str__(self): 384 | uri = [f'magnet:?xt={self.xt}'] 385 | 386 | for key in ('dn', 'xl', 'xs', 'as_'): 387 | value = getattr(self, f'{key}') 388 | if value is not None: 389 | if isinstance(value, str): 390 | uri.append(f'{key}={utils.urlquote(value)}') 391 | else: 392 | uri.append(f'{key}={value}') 393 | 394 | if self.kt: 395 | uri.append(f'kt={"+".join(utils.urlquote(k) for k in self.kt)}') 396 | 397 | for key in ('tr', 'ws'): 398 | seq = getattr(self, f'{key}') 399 | if seq is not None: 400 | for item in seq: 401 | uri.append(f'{key}={utils.urlquote(item)}') 402 | 403 | for key,value in self._x.items(): 404 | uri.append(f'x.{key}={utils.urlquote(value)}') 405 | 406 | return '&'.join(uri) 407 | 408 | def __repr__(self): 409 | clsname = type(self).__name__ 410 | kwargs = {} 411 | for param in self._KNOWN_PARAMETERS: 412 | if param == 'as': 413 | param = 'as_' 414 | value = getattr(self, param) 415 | if value: 416 | kwargs[param] = value 417 | for k,v in self.x.items(): 418 | kwargs[f'x_{k}'] = v 419 | kwargs_str = ', '.join(f'{k}={repr(v)}' for k,v in kwargs.items()) 420 | return f'{clsname}({kwargs_str})' 421 | -------------------------------------------------------------------------------- /torf/_magnet.pyi: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from collections.abc import Iterable 3 | from re import Pattern 4 | from typing import Any, Callable 5 | 6 | from typing_extensions import Self 7 | 8 | from ._errors import TorfError 9 | from ._torrent import Torrent 10 | from ._utils import URL, MonitoredList 11 | 12 | class Magnet: 13 | _INFOHASH_REGEX: Pattern[str] = ... 14 | _XT_REGEX: Pattern[str] = ... 15 | def __init__( 16 | self, 17 | xt: str, 18 | *, 19 | dn: str | None = None, 20 | xl: int | None = None, 21 | tr: Iterable[str] | str | None = None, 22 | xs: str | None = None, 23 | as_: str | None = None, 24 | ws: Iterable[str] | str | None = None, 25 | kt: Iterable[str] | str | None = None, 26 | **kwargs: Any, 27 | ) -> None: ... 28 | @property 29 | def dn(self) -> str | None: ... 30 | @dn.setter 31 | def dn(self, value: str) -> None: ... 32 | @property 33 | def xt(self) -> str: ... 34 | @xt.setter 35 | def xt(self, value: str) -> None: ... 36 | @property 37 | def infohash(self) -> str: ... 38 | @infohash.setter 39 | def infohash(self, value: str) -> None: ... 40 | @property 41 | def xl(self) -> int | None: ... 42 | @xl.setter 43 | def xl(self, value: int) -> None: ... 44 | @property 45 | def tr(self) -> MonitoredList[str]: ... 46 | @tr.setter 47 | def tr(self, value: Iterable[str] | str | None) -> None: ... 48 | @property 49 | def xs(self) -> URL | None: ... 50 | @xs.setter 51 | def xs(self, value: str | None) -> None: ... 52 | @property 53 | def as_(self) -> URL | None: ... 54 | @as_.setter 55 | def as_(self, value: str | None) -> None: ... 56 | @property 57 | def ws(self) -> MonitoredList[str]: ... 58 | @ws.setter 59 | def ws(self, value: Iterable[str] | str | None) -> None: ... 60 | @property 61 | def kt(self) -> list[str] | None: ... 62 | @kt.setter 63 | def kt(self, value: Iterable[str] | str | None) -> None: ... 64 | @property 65 | def x(self) -> defaultdict[str, Any]: ... 66 | def torrent(self) -> Torrent: ... 67 | def get_info( 68 | self, validate: bool = True, timeout: int = 60, callback: Callable[[TorfError], None] | None = None 69 | ) -> bool: ... 70 | 71 | _KNOWN_PARAMETERS: tuple[str, ...] = ... 72 | @classmethod 73 | def from_string(cls, uri: str) -> Self: ... 74 | def __str__(self) -> str: ... 75 | def __repr__(self) -> str: ... 76 | -------------------------------------------------------------------------------- /torf/_reuse.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | 4 | from . import _errors as error 5 | from . import _generate as generate 6 | from . import _stream as stream 7 | 8 | 9 | class find_torrent_files: 10 | """Iterator over ``(torrent_file, torrent_file_counter, exception)`` tuples""" 11 | 12 | def __init__(self, *paths, max_file_size=float('inf')): 13 | self._paths = paths 14 | self._counter = 0 15 | self._max_file_size = max_file_size 16 | 17 | def __iter__(self): 18 | """ 19 | Find torrent files recursively beneath each path in `paths` 20 | 21 | Each list item is a 4-tuple that contains the torrent file path or 22 | ``None``, a counter that increments for each torrent file and an 23 | exception or ``None``. 24 | """ 25 | for path in self._paths: 26 | yield from self._find(path) 27 | 28 | def _find(self, path): 29 | if os.path.isdir(path): 30 | try: 31 | for name in os.listdir(path): 32 | subpath = os.sep.join((str(path), name)) 33 | yield from self._find(subpath) 34 | except OSError as e: 35 | yield None, self._counter, error.ReadError(e.errno, str(path)) 36 | 37 | elif os.path.basename(path).lower().endswith('.torrent'): 38 | try: 39 | file_size = os.path.getsize(path) 40 | except OSError: 41 | self._counter += 1 42 | yield path, self._counter, error.ReadError(errno.ENOENT, str(path)) 43 | else: 44 | if file_size <= self._max_file_size: 45 | self._counter += 1 46 | yield path, self._counter, None 47 | 48 | elif not os.path.exists(path): 49 | yield None, self._counter, error.ReadError(errno.ENOENT, str(path)) 50 | 51 | @property 52 | def total(self): 53 | """Total number of torrents beneath all paths""" 54 | # Get a sequence of all torrents without changing self._counter. 55 | items = tuple(type(self)(*self._paths, max_file_size=self._max_file_size)) 56 | if items: 57 | # Last item should contain the number of torrents found. 58 | return items[-1][1] 59 | else: 60 | return 0 61 | 62 | 63 | def is_file_match(torrent, candidate): 64 | """ 65 | Whether `torrent` contains the same files as `candidate` 66 | 67 | Both arugments are :class:`~.Torrent` objects. 68 | 69 | The torrents match if they both share the same ``name`` and ``files`` or 70 | ``name`` and ``length`` fields in their :attr:`~.Torrent.metainfo`. 71 | `candidate`'s :attr:`~.Torrent.piece_size` of must also not exceed 72 | `torrent`'s :attr:`~.Torrent.piece_size_max`. 73 | 74 | This is a quick check that doesn't require any system calls. 75 | """ 76 | # Compare relative file paths and file sizes. 77 | # Order of files is important. 78 | torrent_info, candidate_info = torrent.metainfo['info'], candidate.metainfo['info'] 79 | 80 | # Don't bother doing anything else if the names are different 81 | if torrent_info['name'] != candidate_info['name']: 82 | return False 83 | 84 | torrent_id = _get_filepaths_and_sizes(torrent_info) 85 | candidate_id = _get_filepaths_and_sizes(candidate_info) 86 | if torrent_id == candidate_id: 87 | if torrent.piece_size_min <= candidate.piece_size <= torrent.piece_size_max: 88 | return True 89 | 90 | return False 91 | 92 | def _get_filepaths_and_sizes(info): 93 | name = info['name'] 94 | 95 | # Singlefile torrent 96 | length = info.get('length', None) 97 | if length: 98 | return [(name, length)] 99 | 100 | # Multifile torrent 101 | files = info.get('files', None) 102 | if files: 103 | files_and_sizes = [] 104 | for file in files: 105 | files_and_sizes.append(( 106 | os.sep.join((name, *file['path'])), 107 | file['length'], 108 | )) 109 | return sorted(files_and_sizes) 110 | 111 | else: 112 | raise RuntimeError(f'Unable to find files: {info!r}') 113 | 114 | 115 | def is_content_match(torrent, candidate): 116 | """ 117 | Whether `torrent` contains the same files as `candidate` 118 | 119 | Both arugments are :class:`~.Torrent` objects. 120 | 121 | If a `candidate` matches, a few piece hashes from each file are compared to 122 | the corresponding hashes from `candidate` to detect files name/size 123 | collisions. 124 | 125 | This is relatively slow and should only be used after :func:`is_file_match` 126 | returned `True`. 127 | """ 128 | if not torrent.path: 129 | raise RuntimeError(f'Torrent does not have a file system path: {torrent!r}') 130 | 131 | # Compare some piece hashes for each file 132 | with stream.TorrentFileStream(candidate, content_path=torrent.path) as tfs: 133 | check_piece_indexes = set() 134 | for file in torrent.files: 135 | all_file_piece_indexes = tfs.get_piece_indexes_of_file(file) 136 | middle_piece_index = int(len(all_file_piece_indexes) / 2) 137 | some_file_piece_indexes = ( 138 | all_file_piece_indexes[:1] 139 | + [middle_piece_index] 140 | + all_file_piece_indexes[-1:] 141 | ) 142 | check_piece_indexes.update(some_file_piece_indexes) 143 | 144 | for piece_index in sorted(check_piece_indexes): 145 | if not tfs.verify_piece(piece_index): 146 | return False 147 | return True 148 | 149 | 150 | def copy(from_torrent, to_torrent): 151 | """ 152 | Copy ``pieces``, ``piece length`` and ``files`` from `from_torrent` to 153 | `to_torrent` 154 | """ 155 | source_info = from_torrent.metainfo['info'] 156 | to_torrent.metainfo['info']['pieces'] = source_info['pieces'] 157 | to_torrent.metainfo['info']['piece length'] = source_info['piece length'] 158 | if 'files' in from_torrent.metainfo['info']: 159 | # Confirm both file lists are identical while ignoring order 160 | def make_sortable(files): 161 | return [tuple(f.items()) for f in files] 162 | 163 | # Only include "length" and "files" fields 164 | source_files = [ 165 | {'length': file['length'], 'path': file['path']} 166 | for file in source_info['files'] 167 | ] 168 | 169 | assert sorted(make_sortable(to_torrent.metainfo['info']['files'])) \ 170 | == sorted(make_sortable(source_files)) 171 | 172 | # Copy file order from `source_info` 173 | to_torrent.metainfo['info']['files'] = source_files 174 | 175 | 176 | class ReuseCallback(generate._IntervaledCallback): 177 | def __init__(self, *args, torrent, torrent_files_total, **kwargs): 178 | super().__init__(*args, **kwargs) 179 | self._torrent = torrent 180 | self._torrent_files_total = torrent_files_total 181 | 182 | def __call__(self, torrent_filepath, torrent_files_done, is_match, exception): 183 | if self._callback: 184 | force = bool( 185 | # Call callback if there is an error, e.g. "Permission denied" 186 | exception 187 | # Call callback if we found a match of if we are verifying file contents 188 | or is_match in (True, None) 189 | # Call callback if this is the last torrent file 190 | or torrent_files_done >= self._torrent_files_total 191 | ) 192 | return super().__call__( 193 | self._torrent, 194 | torrent_filepath, 195 | torrent_files_done, 196 | self._torrent_files_total, 197 | is_match, 198 | exception, 199 | force=force, 200 | ) 201 | elif exception: 202 | raise exception 203 | -------------------------------------------------------------------------------- /torf/_stream.pyi: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable, Iterator 2 | from types import TracebackType 3 | from typing import Callable 4 | 5 | from _typeshed import StrPath 6 | from typing_extensions import Self 7 | 8 | from ._errors import MemoryError, TorfError 9 | from ._torrent import Torrent 10 | from ._utils import File 11 | 12 | class TorrentFileStream: 13 | def __init__(self, torrent: Torrent, content_path: StrPath | None = None) -> None: ... 14 | def __enter__(self) -> Self: ... 15 | def __exit__( 16 | self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None 17 | ) -> None: ... 18 | def close(self) -> None: ... 19 | @property 20 | def max_piece_index(self) -> int: ... 21 | def get_file_position(self, file: File) -> int: ... 22 | def get_file_at_position(self, position: int, content_path: StrPath | None = None) -> File: ... 23 | def get_piece_indexes_of_file(self, file: File, exclusive: bool = False) -> list[int]: ... 24 | def get_files_at_byte_range( 25 | self, first_byte_index: int, last_byte_index: int, content_path: StrPath | None = None 26 | ) -> list[File]: ... 27 | def get_byte_range_of_file(self, file: File) -> tuple[int, int]: ... 28 | def get_files_at_piece_index(self, piece_index: int, content_path: StrPath | None = None) -> list[File]: ... 29 | def get_absolute_piece_indexes(self, file: File, relative_piece_indexes: Iterable[int]) -> list[int]: ... 30 | def get_relative_piece_indexes(self, file: File, relative_piece_indexes: Iterable[int]) -> list[int]: ... 31 | def get_piece( 32 | self, piece_index: int, content_path: StrPath | None = None 33 | ) -> bytes: ... # Docstrings say it can be `None` but from what I can see it can never be None? 34 | 35 | max_open_files: int = 10 36 | def iter_pieces( 37 | self, content_path: StrPath | None = None, oom_callback: Callable[[MemoryError], None] | None = None 38 | ) -> Iterator[tuple[bytes | None, File, tuple[TorfError, ...]]]: ... 39 | def get_piece_hash(self, piece_index: int, content_path: StrPath | None = None) -> bytes | None: ... 40 | def verify_piece(self, piece_index: int, content_path: StrPath | None = None) -> bool | None: ... 41 | -------------------------------------------------------------------------------- /torf/_torrent.pyi: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections import OrderedDict 3 | from collections.abc import Iterable 4 | from datetime import datetime 5 | from pathlib import Path 6 | from re import Pattern 7 | from typing import Any, Callable, Literal, Protocol 8 | 9 | from _typeshed import StrPath 10 | from typing_extensions import Self 11 | 12 | from . import __version__ 13 | from ._errors import TorfError 14 | from ._magnet import Magnet 15 | from ._utils import File, Filepath, Filepaths, Files, MonitoredList, Trackers, URLs 16 | 17 | if sys.version_info < (3, 11): 18 | from typing_extensions import NotRequired, Required, TypedDict 19 | else: 20 | from typing import NotRequired, Required, TypedDict 21 | 22 | class _WritableBinaryStream(Protocol): 23 | def seek(self, offset: int, whence: int = 0) -> int: ... 24 | def seekable(self) -> bool: ... 25 | def truncate(self, size: int | None = None) -> int: ... 26 | def write(self, s: bytes) -> int: ... 27 | 28 | class _ReadableBinaryStream(Protocol): 29 | def read(self, n: int = -1) -> bytes: ... 30 | 31 | _PACKAGE_NAME: str = ... 32 | NCORES: int = ... 33 | DEFAULT_TORRENT_NAME: Literal["UNNAMED TORRENT"] = ... 34 | 35 | 36 | class _FilesDict(TypedDict): 37 | length: int 38 | path: list[str] 39 | 40 | _InfoDict = TypedDict( 41 | "_InfoDict", 42 | { 43 | "name": str, 44 | "piece length": int, 45 | "pieces": bytes, 46 | "length": NotRequired[int], 47 | "files": NotRequired[list[_FilesDict]], 48 | "private": NotRequired[bool], 49 | "source": NotRequired[str] 50 | } 51 | ) 52 | """See BEP 0003: https://www.bittorrent.org/beps/bep_0003.html""" 53 | 54 | _MetaInfo = TypedDict( 55 | "_MetaInfo", 56 | { 57 | "info": Required[_InfoDict], 58 | "announce": str, 59 | "announce-list": list[list[str]], 60 | "comment": str, 61 | "created by": str, 62 | "creation date": datetime, 63 | "url-list": list[str], 64 | }, 65 | total=False 66 | ) 67 | 68 | class Torrent: 69 | def __init__( 70 | self, 71 | path: StrPath | None = None, 72 | name: str | None = None, 73 | exclude_globs: Iterable[str] = (), 74 | exclude_regexs: Iterable[str] = (), 75 | include_globs: Iterable[str] = (), 76 | include_regexs: Iterable[str] = (), 77 | trackers: Iterable[str] | str | None = None, 78 | webseeds: Iterable[str] | str | None = None, 79 | httpseeds: Iterable[str] | str | None = None, 80 | private: bool | None = None, 81 | comment: str | None = None, 82 | source: str | None = None, 83 | creation_date: int | float | datetime | None = None, 84 | created_by: str | None = f"{_PACKAGE_NAME} {__version__}", 85 | piece_size: int | None = None, 86 | piece_size_min: int | None = None, 87 | piece_size_max: int | None = None, 88 | randomize_infohash: bool = False, 89 | ) -> None: ... 90 | @property 91 | def metainfo(self) -> _MetaInfo: ... 92 | @property 93 | def path(self) -> Path | None: ... 94 | @path.setter 95 | def path(self, value: StrPath | None) -> None: ... 96 | @property 97 | def location(self) -> Path | None: ... 98 | @property 99 | def files(self) -> Files: ... 100 | @files.setter 101 | def files(self, files: Iterable[File]) -> None: ... 102 | @property 103 | def filepaths(self) -> Filepaths: ... 104 | @filepaths.setter 105 | def filepaths(self, filepaths: Iterable[Filepath]) -> None: ... 106 | @property 107 | def exclude_globs(self) -> MonitoredList[str]: ... 108 | @exclude_globs.setter 109 | def exclude_globs(self, value: Iterable[str]) -> None: ... 110 | @property 111 | def include_globs(self) -> MonitoredList[str]: ... 112 | @include_globs.setter 113 | def include_globs(self, value: Iterable[str]) -> None: ... 114 | @property 115 | def exclude_regexs(self) -> MonitoredList[Pattern[str]]: ... 116 | @exclude_regexs.setter 117 | def exclude_regexs(self, value: Iterable[str]) -> None: ... 118 | @property 119 | def include_regexs(self) -> MonitoredList[Pattern[str]]: ... 120 | @include_regexs.setter 121 | def include_regexs(self, value: Iterable[str]) -> None: ... 122 | @property 123 | def filetree(self) -> dict[str, dict[str, File]]: ... 124 | @property 125 | def name(self) -> str | None: ... 126 | @name.setter 127 | def name(self, value: str | None) -> None: ... 128 | @property 129 | def mode(self) -> Literal["singlefile", "multifile"] | None: ... 130 | @property 131 | def size(self) -> int: ... 132 | def partial_size(self, path: StrPath | Iterable[StrPath]) -> int: ... 133 | @property 134 | def piece_size(self) -> int: ... 135 | @piece_size.setter 136 | def piece_size(self, value: int | None) -> None: ... 137 | @property 138 | def piece_size_min(self) -> int: ... 139 | @piece_size_min.setter 140 | def piece_size_min(self, piece_size_min: int | None) -> None: ... 141 | @property 142 | def piece_size_max(self) -> int: ... 143 | @piece_size_max.setter 144 | def piece_size_max(self, piece_size_max: int | None) -> None: ... 145 | 146 | piece_size_min_default: int = ... 147 | piece_size_max_default: int = ... 148 | 149 | @classmethod 150 | def calculate_piece_size(cls, size: int, min_size: int | None = None, max_size: int | None = None) -> int: ... 151 | @property 152 | def pieces(self) -> int: ... 153 | @property 154 | def hashes(self) -> tuple[bytes, ...]: ... 155 | @property 156 | def trackers(self) -> Trackers: ... 157 | @trackers.setter 158 | def trackers(self, value: str | Iterable[str] | None) -> None: ... 159 | @property 160 | def webseeds(self) -> URLs: ... 161 | @webseeds.setter 162 | def webseeds(self, value: str | Iterable[str] | None) -> None: ... 163 | @property 164 | def httpseeds(self) -> URLs: ... 165 | @httpseeds.setter 166 | def httpseeds(self, value: str | Iterable[str] | None) -> None: ... 167 | @property 168 | def private(self) -> bool | None: ... 169 | @private.setter 170 | def private(self, value: bool | None) -> None: ... 171 | @property 172 | def comment(self) -> str | None: ... 173 | @comment.setter 174 | def comment(self, value: str | None) -> None: ... 175 | @property 176 | def creation_date(self) -> datetime | None: ... 177 | @creation_date.setter 178 | def creation_date(self, value: int | float | datetime | None) -> None: ... 179 | @property 180 | def created_by(self) -> str | None: ... 181 | @created_by.setter 182 | def created_by(self, value: str | None) -> None: ... 183 | @property 184 | def source(self) -> str | None: ... 185 | @source.setter 186 | def source(self, value: str | None) -> None: ... 187 | @property 188 | def infohash(self) -> str: ... 189 | @property 190 | def infohash_base32(self) -> bytes: ... 191 | @property 192 | def randomize_infohash(self) -> bool: ... 193 | @randomize_infohash.setter 194 | def randomize_infohash(self, value: bool) -> None: ... 195 | @property 196 | def is_ready(self) -> bool: ... 197 | def generate( 198 | self, 199 | threads: int | None = None, 200 | callback: Callable[[Torrent, str, int, int], Any] | None = None, 201 | interval: float = 0, 202 | ) -> bool: ... 203 | def verify( 204 | self, 205 | path: StrPath, 206 | threads: int | None = None, 207 | callback: Callable[[Torrent, str, int, int, int, bytes | None, TorfError | None], Any] | None = None, 208 | interval: float = 0, 209 | ) -> bool: ... 210 | def verify_filesize( 211 | self, path: StrPath, callback: Callable[[Torrent, str, str, int, int, TorfError | None], Any] | None = None 212 | ) -> bool: ... 213 | def validate(self) -> None: ... 214 | def convert(self) -> OrderedDict[bytes, Any]: ... 215 | def dump(self, validate: bool = True) -> bytes: ... 216 | def write_stream(self, stream: _WritableBinaryStream, validate: bool = True) -> None: ... 217 | def write(self, filepath: StrPath, validate: bool = True, overwrite: bool = False) -> None: ... 218 | def magnet(self, name: bool = True, size: bool = True, trackers: bool = True, tracker: bool = False) -> Magnet: ... 219 | 220 | MAX_TORRENT_FILE_SIZE: int = ... 221 | 222 | @classmethod 223 | def read_stream(cls, stream: bytes | bytearray | _ReadableBinaryStream, validate: bool = True) -> Self: ... 224 | @classmethod 225 | def read(cls, filepath: StrPath, validate: bool = True) -> Self: ... 226 | def copy(self) -> Self: ... 227 | def reuse( 228 | self, 229 | path: StrPath, 230 | callback: Callable[[Torrent, str | None, int, int, bool | None, TorfError | None], Any] | None = None, 231 | interval: float = 0, 232 | ) -> bool: ... 233 | def __repr__(self) -> str: ... 234 | def __eq__(self, other: object) -> bool: ... 235 | -------------------------------------------------------------------------------- /torf/_utils.pyi: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import sys 4 | from collections.abc import Container, Iterable, MutableSequence 5 | from functools import partial 6 | from typing import Any, Callable, TypeVar, overload 7 | 8 | from _typeshed import StrPath 9 | from typing_extensions import Self 10 | 11 | T = TypeVar("T") 12 | 13 | class MonitoredList(MutableSequence[T]): 14 | def __init__( 15 | self, items: Iterable[T] = (), callback: Callable[[Self], None] | None = None, type: type[T] | None = None 16 | ) -> None: ... 17 | @overload 18 | def __getitem__(self, index: int, /) -> T: ... 19 | @overload 20 | def __getitem__(self, index: slice, /) -> Self: ... 21 | @overload 22 | def __delitem__(self, index: int, /) -> None: ... 23 | @overload 24 | def __delitem__(self, index: slice, /) -> None: ... 25 | @overload 26 | def __setitem__(self, index: int, value: T, /) -> None: ... 27 | @overload 28 | def __setitem__(self, index: slice, value: Iterable[T], /) -> None: ... 29 | def insert(self, index: int, value: T) -> None: ... 30 | def replace(self, items: Iterable[T]) -> None: ... 31 | def clear(self) -> None: ... 32 | def __len__(self) -> int: ... 33 | def __eq__(self, other: object) -> bool: ... 34 | def __ne__(self, other: object) -> bool: ... 35 | def __add__(self, other: object) -> Self: ... 36 | def __repr__(self) -> str: ... 37 | 38 | class File(os.PathLike[str]): 39 | def __fspath__(self) -> str: ... 40 | def __reduce__(self) -> tuple[partial[Self], tuple[()]]: ... 41 | def __init__(self, path: StrPath | Iterable[StrPath], size: int) -> None: ... 42 | @property 43 | def size(self) -> int: ... 44 | def __getattr__(self, name: str) -> Any: ... 45 | def __str__(self) -> str: ... 46 | def __eq__(self, other: object) -> bool: ... 47 | def __hash__(self) -> int: ... 48 | def __gt__(self, other: object) -> bool: ... 49 | def __lt__(self, other: object) -> bool: ... 50 | def __ge__(self, other: object) -> bool: ... 51 | def __le__(self, other: object) -> bool: ... 52 | def __repr__(self) -> str: ... 53 | 54 | class Files(MonitoredList[File]): 55 | def __init__(self, files: str | Iterable[str], callback: Callable[[Self], None] | None = None): ... 56 | 57 | # There is special recognition in Mypy for `sys.platform`, not `os.name` 58 | if sys.platform == "win32": 59 | PathBase = pathlib.WindowsPath 60 | else: 61 | PathBase = pathlib.PosixPath 62 | 63 | class Filepath(PathBase): 64 | def __eq__(self, other: object) -> bool: ... 65 | def __hash__(self) -> int: ... 66 | 67 | class Filepaths(MonitoredList[Filepath]): 68 | def __init__(self, filepaths: str | Iterable[str], callback: Callable[[Self], None] | None = None) -> None: ... 69 | @overload 70 | def __setitem__(self, index: int, path: StrPath, /) -> None: ... 71 | @overload 72 | def __setitem__(self, index: slice, path: Iterable[StrPath], /) -> None: ... 73 | def insert(self, index: int, path: StrPath) -> None: ... 74 | 75 | class URL(str): 76 | def __new__(cls, s: str) -> Self: ... 77 | def __init__(self, url: str) -> None: ... 78 | @property 79 | def scheme(self) -> str: ... 80 | @property 81 | def netloc(self) -> str: ... 82 | @property 83 | def hostname(self) -> str | None: ... 84 | @property 85 | def port(self) -> int | None: ... 86 | @property 87 | def path(self) -> str: ... 88 | @property 89 | def params(self) -> str: ... 90 | @property 91 | def query(self) -> str: ... 92 | @property 93 | def fragment(self) -> str: ... 94 | 95 | class URLs(MonitoredList[URL]): 96 | def __init__( 97 | self, 98 | urls: str | Iterable[str], 99 | callback: Callable[[Self], None] | None = None, 100 | _get_known_urls: Callable[[], Container[str]] = lambda: (), 101 | ): ... 102 | 103 | class Trackers(MutableSequence[URLs]): 104 | def __init__(self, tiers: str | Iterable[str], callback: Callable[[Self], None] | None = None) -> None: ... 105 | @property 106 | def flat(self) -> tuple[URL, ...]: ... 107 | @overload 108 | def __getitem__(self, index: int, /) -> URLs: ... 109 | @overload 110 | def __getitem__(self, index: slice, /) -> Self: ... 111 | @overload 112 | def __delitem__(self, index: int, /) -> None: ... 113 | @overload 114 | def __delitem__(self, index: slice, /) -> None: ... 115 | @overload 116 | def __setitem__(self, index: int, value: URLs, /) -> None: ... 117 | @overload 118 | def __setitem__(self, index: slice, value: Iterable[URLs], /) -> None: ... 119 | def insert(self, index: int, value: Iterable[str]) -> None: ... 120 | def replace(self, tiers: Iterable[str]) -> None: ... 121 | def clear(self) -> None: ... 122 | def __len__(self) -> int: ... 123 | def __eq__(self, other: object) -> bool: ... 124 | def __ne__(self, other: object) -> bool: ... 125 | def __add__(self, other: str | Iterable[str]) -> Self: ... 126 | def __repr__(self) -> str: ... 127 | -------------------------------------------------------------------------------- /torf/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rndusr/torf/dc647cfd631699e968a3f99af5670c1b4312b4f3/torf/py.typed -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py313, py312, py311, py310, py39, py38, lint 3 | 4 | [testenv] 5 | deps = 6 | pytest 7 | pytest-xdist 8 | pytest-httpserver 9 | pytest-mock 10 | commands = 11 | pytest {posargs} 12 | 13 | [testenv:lint] 14 | deps = 15 | flake8 16 | isort 17 | ruff 18 | sphinx 19 | mypy 20 | commands = 21 | flake8 torf tests 22 | isort --check-only torf tests 23 | ruff check . 24 | mypy . 25 | sphinx-build -E -j auto -q -W --keep-going docs /tmp/sphinx-docs-build 26 | rm -r /tmp/sphinx-docs-build 27 | git clean docs --force --quiet 28 | allowlist_externals = 29 | rm 30 | git 31 | --------------------------------------------------------------------------------