├── .coveragerc ├── .github └── workflows │ └── build.yml ├── .gitignore ├── .isort.cfg ├── .prettierrc.yaml ├── .pylintrc ├── .vscode ├── env ├── launch.json └── settings.json ├── CHANGELOG.md ├── LICENSE.txt ├── README.md ├── dev-requirements.txt ├── mypy.ini ├── pytest.ini ├── setup.cfg ├── setup.py ├── src └── xz │ ├── __init__.py │ ├── block.py │ ├── common.py │ ├── file.py │ ├── io.py │ ├── open.py │ ├── py.typed │ ├── strategy.py │ ├── stream.py │ ├── typing.py │ └── utils.py ├── tests ├── conftest.py ├── integration │ ├── conftest.py │ ├── files │ │ ├── check-crc32.json │ │ ├── check-crc32.xz │ │ ├── check-crc64.json │ │ ├── check-crc64.xz │ │ ├── check-none.json │ │ ├── check-none.xz │ │ ├── check-sha256.json │ │ ├── check-sha256.xz │ │ ├── example.xz │ │ ├── few-blocks.json │ │ ├── few-blocks.xz │ │ ├── many-blocks.json │ │ ├── many-blocks.xz │ │ ├── one-stream-with-padding.json │ │ ├── one-stream-with-padding.xz │ │ ├── several-filters.json │ │ ├── several-filters.xz │ │ ├── several-streams-with-padding.json │ │ ├── several-streams-with-padding.xz │ │ ├── several-streams.json │ │ ├── several-streams.xz │ │ ├── various-block-sizes.json │ │ ├── various-block-sizes.xz │ │ ├── various-stream-checks-stream-padding-and-block-sizes.json │ │ ├── various-stream-checks-stream-padding-and-block-sizes.xz │ │ ├── various-stream-checks.json │ │ └── various-stream-checks.xz │ ├── test_file_read.py │ ├── test_file_write.py │ ├── test_generate_files.py │ ├── test_ram_usage.py │ └── test_readme.py └── unit │ ├── test_attr_proxy.py │ ├── test_block.py │ ├── test_common.py │ ├── test_file.py │ ├── test_floordict.py │ ├── test_ioabstract.py │ ├── test_iocombiner.py │ ├── test_ioproxy.py │ ├── test_iostatic.py │ ├── test_open.py │ ├── test_parse_mode.py │ └── test_stream.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [html] 2 | directory = coverage 3 | 4 | [paths] 5 | source = 6 | src/xz/ 7 | .tox/py*/lib/python*/site-packages/xz/ 8 | .tox/py*/site-packages/xz/ 9 | 10 | [report] 11 | exclude_lines = 12 | pragma: no cover 13 | def __repr__ 14 | def __str__ 15 | if __name__ == "__main__": 16 | show_missing = True 17 | 18 | [run] 19 | branch = True 20 | source = xz 21 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | PY_COLORS: 1 7 | 8 | jobs: 9 | tests-py: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python: 14 | - "3.7" 15 | - "3.8" 16 | - "3.9" 17 | - "3.10" 18 | - "3.11" 19 | - "pypy-3.7" 20 | - "pypy-3.8" 21 | - "pypy-3.9" 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Setup Python ${{ matrix.python }} 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: ${{ matrix.python }} 28 | - name: Install dependencies 29 | run: pip install tox 30 | - name: Run tests 31 | run: tox -e py 32 | 33 | tests-misc: 34 | runs-on: ubuntu-latest 35 | strategy: 36 | matrix: 37 | env: [generate-integration-files, type, lint, format] 38 | steps: 39 | - uses: actions/checkout@v3 40 | - name: Setup Python 41 | uses: actions/setup-python@v4 42 | with: 43 | python-version: "3.11" 44 | - name: Install dependencies 45 | run: pip install tox 46 | - name: Run ${{ matrix.env }} 47 | run: tox -e ${{ matrix.env }} 48 | 49 | build: 50 | needs: [tests-py, tests-misc] 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@v3 54 | with: 55 | # fetch all commits for setuptools_scm 56 | fetch-depth: 0 57 | - name: Setup Python 58 | uses: actions/setup-python@v4 59 | with: 60 | python-version: "3.11" 61 | - name: Build 62 | run: python setup.py sdist bdist_wheel 63 | - name: Save build artifacts 64 | uses: actions/upload-artifact@v3 65 | with: 66 | name: dist 67 | path: dist 68 | 69 | publish: 70 | if: startsWith(github.ref, 'refs/tags') 71 | needs: build 72 | runs-on: ubuntu-latest 73 | steps: 74 | - name: Restore build artifacts 75 | uses: actions/download-artifact@v3 76 | with: 77 | name: dist 78 | path: dist 79 | - name: Publish to PyPI 80 | uses: pypa/gh-action-pypi-publish@release/v1 81 | with: 82 | password: ${{ secrets.pypi_password }} 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /env 2 | __pycache__ 3 | 4 | /build 5 | /dist 6 | *.egg-info 7 | /.eggs 8 | 9 | /.coverage 10 | /coverage 11 | /.pytest_cache 12 | /.tox 13 | 14 | /src/xz/_version.py 15 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | force_sort_within_sections = True 3 | profile = black 4 | src_paths = src 5 | -------------------------------------------------------------------------------- /.prettierrc.yaml: -------------------------------------------------------------------------------- 1 | printWidth: 88 2 | proseWrap: always 3 | tabWidth: 2 4 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [BASIC] 2 | good-names = 3 | i, 4 | j, 5 | k, 6 | ex, 7 | _, 8 | T, 9 | 10 | [MESSAGES CONTROL] 11 | disable = 12 | missing-class-docstring, 13 | missing-function-docstring, 14 | missing-module-docstring, 15 | too-few-public-methods, 16 | too-many-arguments, 17 | too-many-branches, 18 | too-many-instance-attributes, 19 | too-many-locals, 20 | 21 | [SIMILARITIES] 22 | ignore-imports=yes 23 | -------------------------------------------------------------------------------- /.vscode/env: -------------------------------------------------------------------------------- 1 | # for vscode + extensions import when outside of src (e.g. in tests) 2 | PYTHONPATH=./src 3 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python: Current File (Integrated Terminal)", 6 | "type": "python", 7 | "request": "launch", 8 | "program": "${file}", 9 | "console": "integratedTerminal" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.exclude": { 3 | "env": true, 4 | "**/__pycache__": true, 5 | ".eggs": true, 6 | "**/*.egg-info": true, 7 | ".coverage": true, 8 | ".pytest_cache": true, 9 | ".tox": true 10 | }, 11 | "editor.detectIndentation": false, 12 | "editor.formatOnSave": true, 13 | "editor.insertSpaces": true, 14 | "editor.tabSize": 4, 15 | "files.insertFinalNewline": true, 16 | "[json]": { 17 | "editor.defaultFormatter": "esbenp.prettier-vscode" 18 | }, 19 | "[markdown]": { 20 | "editor.defaultFormatter": "esbenp.prettier-vscode", 21 | "editor.rulers": [88], 22 | "editor.wordWrap": "on", 23 | "editor.wordWrapColumn": 88 24 | }, 25 | "[python]": { 26 | "editor.codeActionsOnSave": { 27 | "source.organizeImports": true 28 | } 29 | }, 30 | "[yaml]": { 31 | "editor.defaultFormatter": "esbenp.prettier-vscode", 32 | "editor.tabSize": 2 33 | }, 34 | "python.envFile": "${workspaceFolder}/.vscode/env", 35 | "python.formatting.provider": "black", 36 | "python.linting.pylintEnabled": true, 37 | "python.testing.pytestEnabled": true, 38 | "python.sortImports.args": ["-sp .isort.cfg"] 39 | } 40 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project 6 | adheres to [Semantic Versioning](https://semver.org/). 7 | 8 | ## [0.5.0] - 2023-02-27 9 | 10 | [0.5.0]: https://github.com/rogdham/python-xz/releases/tag/v0.5.0 11 | 12 | ### :boom: Breaking changes 13 | 14 | - End of Python 3.6 support 15 | 16 | ### :house: Internal 17 | 18 | - Necessary code changes following dev dependency update: black, pylint, pytest 19 | - Refactor a descriptor following PEP 487 20 | - Add tests for CPython 3.11 and PyPy 3.9 21 | - Use CPython 3.11 for misc. tests 22 | - Update Github actions dependencies 23 | - Import typing modules impacted by PEP 585 based on Python version 24 | 25 | ## [0.4.0] - 2022-01-09 26 | 27 | [0.4.0]: https://github.com/rogdham/python-xz/releases/tag/v0.4.0 28 | 29 | ### :rocket: Added 30 | 31 | - Advanced users may use the new `block_read_strategy` argument of `XZFile`/`xz.open` to 32 | customize the strategy for freeing block readers, and implement a different tradeoff 33 | between memory consumption and read speed when alternating reads between several 34 | blocks; the following strategies are provided: `RollingBlockReadStrategy` and 35 | `KeepBlockReadStrategy` 36 | 37 | ### :bug: Fixes 38 | 39 | - Free memory after a block is fully read 40 | - Free memory of LZMA decompressors when many blocks are partially read; this is a 41 | tradeoff defaulting to keeping the last 8 LZMA decompressors used 42 | - Typing: use `BinaryIO` instead of `IO[bytes]` 43 | 44 | ### :house: Internal 45 | 46 | - Specify the Python versions required in package metadata 47 | - Test the `mode` attribute of objects returned by `xz.open`/`XZFile` 48 | - Minor improvements in some docstrings 49 | 50 | ## [0.3.1] - 2021-12-26 51 | 52 | [0.3.1]: https://github.com/rogdham/python-xz/releases/tag/v0.3.1 53 | 54 | ### :house: Internal 55 | 56 | - Add tests for CPython 3.10 and PyPy 3.8 57 | - Use CPython 3.10 for misc. tests 58 | - Clarify which Python versions are supported in readme 59 | - Fix some linting issues found by latest versions of mypy/pylint 60 | 61 | ## [0.3.0] - 2021-11-07 62 | 63 | [0.3.0]: https://github.com/rogdham/python-xz/releases/tag/v0.3.0 64 | 65 | ### :boom: Breaking changes 66 | 67 | - The `filename` argument of `XZFile` is now mandatory; this change should have very 68 | limited impact as not providing it makes no sense and would have raised a `TypeError`, 69 | plus it was already mandatory on `xz.open` 70 | 71 | ### :rocket: Added 72 | 73 | - Type hints 74 | 75 | ### :house: Internal 76 | 77 | - Type validation with mypy 78 | - Distribute `py.typed` file in conformance with [PEP 561] 79 | 80 | [pep 561]: https://www.python.org/dev/peps/pep-0561/ 81 | 82 | ## [0.2.0] - 2021-10-23 83 | 84 | [0.2.0]: https://github.com/rogdham/python-xz/releases/tag/v0.2.0 85 | 86 | ### :rocket: Added 87 | 88 | - Write modes (`w`, `x`, `r+`, `w+`, `x+`) :tada: 89 | - Allow to `seek` past the end of the fileobj 90 | - Calling `len` on a fileobj gives its length, and `bool` tells if it is empty 91 | - Export useful constants and functions from `lzma` for easy access: checks, filters, 92 | etc. 93 | 94 | ### :house: Internal 95 | 96 | - Test that no warnings are generated 97 | - Change development status to Alpha 98 | 99 | ## [0.1.2] - 2021-09-19 100 | 101 | [0.1.2]: https://github.com/rogdham/python-xz/releases/tag/v0.1.2 102 | 103 | ### :rocket: Added 104 | 105 | - Add `__version__` attribute to module, despite [PEP 396] being rejected 106 | 107 | [pep 396]: https://www.python.org/dev/peps/pep-0396/ 108 | 109 | ## [0.1.1] - 2021-05-14 110 | 111 | [0.1.1]: https://github.com/rogdham/python-xz/releases/tag/v0.1.1 112 | 113 | ### :rocket: Added 114 | 115 | - Implementation of the `fileno` method 116 | 117 | ## [0.1.0] - 2021-05-13 118 | 119 | [0.1.0]: https://github.com/rogdham/python-xz/releases/tag/v0.1.0 120 | 121 | ### :rocket: Added 122 | 123 | - Initial public release :tada: 124 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Rogdham 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # python-xz 4 | 5 | Pure Python implementation of the XZ file format with random access support 6 | 7 | _Leveraging the lzma module for fast (de)compression_ 8 | 9 | [![GitHub build status](https://img.shields.io/github/actions/workflow/status/rogdham/python-xz/build.yml?branch=master)](https://github.com/rogdham/python-xz/actions?query=branch:master) 10 | [![Release on PyPI](https://img.shields.io/pypi/v/python-xz)](https://pypi.org/project/python-xz/) 11 | [![Code coverage](https://img.shields.io/badge/coverage-100%25-brightgreen)](https://github.com/rogdham/python-xz/search?q=fail+under&type=Code) 12 | [![Mypy type checker](https://img.shields.io/badge/type_checker-mypy-informational)](https://mypy.readthedocs.io/) 13 | [![MIT License](https://img.shields.io/pypi/l/python-xz)](https://github.com/Rogdham/python-xz/blob/master/LICENSE.txt) 14 | 15 | --- 16 | 17 | [📖 Documentation](https://github.com/rogdham/python-xz/#usage)   |   [📃 Changelog](./CHANGELOG.md) 18 | 19 |
20 | 21 | --- 22 | 23 | A XZ file can be composed of several streams and blocks. This allows for fast random 24 | access when reading, but this is not supported by Python's builtin `lzma` module (which 25 | would read all previous blocks for nothing). 26 | 27 |
28 | 29 | | | [lzma] | [lzmaffi] | python-xz | 30 | | :---------------: | :---------------: | :------------------: | :------------------: | 31 | | module type | builtin | cffi (C extension) | pure Python | 32 | | 📄 **read** | | | | 33 | | random access | ❌ no1 | ✔️ yes2 | ✔️ yes2 | 34 | | several blocks | ✔️ yes | ✔️✔️ yes3 | ✔️✔️ yes3 | 35 | | several streams | ✔️ yes | ✔️ yes | ✔️✔️ yes4 | 36 | | stream padding | ❌ no5 | ✔️ yes | ✔️ yes | 37 | | 📝 **write** | | | | 38 | | `w` mode | ✔️ yes | ✔️ yes | ✔️ yes | 39 | | `x` mode | ✔️ yes | ❌ no | ✔️ yes | 40 | | `a` mode | ✔️ new stream | ✔️ new stream | ⏳ planned | 41 | | `r+`/`w+`/… modes | ❌ no | ❌ no | ✔️ yes | 42 | | several blocks | ❌ no | ❌ no | ✔️ yes | 43 | | several streams | ❌ no6 | ❌ no6 | ✔️ yes | 44 | | stream padding | ❌ no | ❌ no | ⏳ planned | 45 | 46 |
47 | 48 |
49 | Notes 50 | 51 | 1. Reading from a position will read the file from the very beginning 52 | 2. Reading from a position will read the file from the beginning of the block 53 | 3. Block positions available with the `block_boundaries` attribute 54 | 4. Stream positions available with the `stream_boundaries` attribute 55 | 5. Related [issue](https://github.com/python/cpython/issues/88300) 56 | 6. Possible by manually closing and re-opening in append mode 57 | 58 |
59 | 60 | [lzma]: https://docs.python.org/3/library/lzma.html 61 | [lzmaffi]: https://github.com/r3m0t/backports.lzma 62 | 63 | --- 64 | 65 | ## Install 66 | 67 | Install `python-xz` with pip: 68 | 69 | ```sh 70 | $ python -m pip install python-xz 71 | ``` 72 | 73 | _An unofficial package for conda is [also available][conda package], see [issue #5][#5] 74 | for more information._ 75 | 76 | [conda package]: https://anaconda.org/conda-forge/python-xz 77 | [#5]: https://github.com/Rogdham/python-xz/issues/5 78 | 79 | ## Usage 80 | 81 | The API is similar to [lzma]: you can use either `xz.open` or `xz.XZFile`. 82 | 83 | ### Read mode 84 | 85 | ```python 86 | >>> with xz.open('example.xz') as fin: 87 | ... fin.read(18) 88 | ... fin.stream_boundaries # 2 streams 89 | ... fin.block_boundaries # 4 blocks in first stream, 2 blocks in second stream 90 | ... fin.seek(1000) 91 | ... fin.read(31) 92 | ... 93 | b'Hello, world! \xf0\x9f\x91\x8b' 94 | [0, 2000] 95 | [0, 500, 1000, 1500, 2000, 3000] 96 | 1000 97 | b'\xe2\x9c\xa8 Random access is fast! \xf0\x9f\x9a\x80' 98 | ``` 99 | 100 | Opening in text mode works as well, but notice that seek arguments as well as boundaries 101 | are still in bytes (just like with `lzma.open`). 102 | 103 | ```python 104 | >>> with xz.open('example.xz', 'rt') as fin: 105 | ... fin.read(15) 106 | ... fin.stream_boundaries 107 | ... fin.block_boundaries 108 | ... fin.seek(1000) 109 | ... fin.read(26) 110 | ... 111 | 'Hello, world! 👋' 112 | [0, 2000] 113 | [0, 500, 1000, 1500, 2000, 3000] 114 | 1000 115 | '✨ Random access is fast! 🚀' 116 | ``` 117 | 118 | ### Write mode 119 | 120 | Writing is only supported from the end of file. It is however possible to truncate the 121 | file first. Note that truncating is only supported on block boundaries. 122 | 123 | ```python 124 | >>> with xz.open('test.xz', 'w') as fout: 125 | ... fout.write(b'Hello, world!\n') 126 | ... fout.write(b'This sentence is still in the previous block\n') 127 | ... fout.change_block() 128 | ... fout.write(b'But this one is in its own!\n') 129 | ... 130 | 14 131 | 45 132 | 28 133 | ``` 134 | 135 | Advanced usage: 136 | 137 | - Modes like `r+`/`w+`/`x+` allow to open for both read and write at the same time; 138 | however in the current implementation, a block with writing in progress is 139 | automatically closed when reading data from it. 140 | - The `check`, `preset` and `filters` arguments to `xz.open` and `xz.XZFile` allow to 141 | configure the default values for new streams and blocks. 142 | - Change block with the `change_block` method (the `preset` and `filters` attributes can 143 | be changed beforehand to apply to the new block). 144 | - Change stream with the `change_stream` method (the `check` attribute can be changed 145 | beforehand to apply to the new stream). 146 | 147 | --- 148 | 149 | ## FAQ 150 | 151 | ### How does random-access works? 152 | 153 | XZ files are made of a number of streams, and each stream is composed of a number of 154 | block. This can be seen with `xz --list`: 155 | 156 | ```sh 157 | $ xz --list file.xz 158 | Strms Blocks Compressed Uncompressed Ratio Check Filename 159 | 1 13 16.8 MiB 297.9 MiB 0.056 CRC64 file.xz 160 | ``` 161 | 162 | To read data from the middle of the 10th block, we will decompress the 10th block from 163 | its start it until we reach the middle (and drop that decompressed data), then returned 164 | the decompressed data from that point. 165 | 166 | Choosing the good block size is a tradeoff between seeking time during random access and 167 | compression ratio. 168 | 169 | ### How can I create XZ files optimized for random-access? 170 | 171 | You can open the file for writing and use the `change_block` method to create several 172 | blocks. 173 | 174 | Other tools allow to create XZ files with several blocks as well: 175 | 176 | - [XZ Utils](https://tukaani.org/xz/) needs to be called with flags: 177 | 178 | ```sh 179 | $ xz -T0 file # threading mode 180 | $ xz --block-size 16M file # same size for all blocks 181 | $ xz --block-list 16M,32M,8M,42M file # specific size for each block 182 | ``` 183 | 184 | - [PIXZ](https://github.com/vasi/pixz) creates files with several blocks by default: 185 | 186 | ```sh 187 | $ pixz file 188 | ``` 189 | 190 | ### Python version support 191 | 192 | As a general rule, all Python versions that are both [released and still officially 193 | supported][python-versions] are supported by `python-xz` and tested against (both 194 | CPython and PyPy implementations). 195 | 196 | If you have other use cases or find issues with some Python versions, feel free to 197 | [open a ticket](https://github.com/Rogdham/python-xz/issues/new)! 198 | 199 | [python-versions]: https://devguide.python.org/versions/#versions 200 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # install + dependencies 2 | -e . 3 | 4 | # typing 5 | mypy 6 | 7 | # tests 8 | coverage 9 | pytest 10 | pytest-cov 11 | 12 | # lint 13 | pylint 14 | 15 | # format 16 | black 17 | isort 18 | 19 | # publish 20 | setuptools_scm 21 | wheel 22 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | # section names refer to the documentation 3 | # https://mypy.readthedocs.io/en/stable/config_file.html 4 | 5 | # Import discovery 6 | files = src 7 | ignore_missing_imports = False 8 | follow_imports = normal 9 | 10 | # Platform configuration 11 | python_version = 3.11 12 | 13 | # Disallow dynamic typing 14 | disallow_any_unimported = True 15 | disallow_any_decorated = True 16 | disallow_any_generics = True 17 | disallow_subclassing_any = True 18 | 19 | # Untyped definitions and calls 20 | disallow_untyped_calls = True 21 | disallow_untyped_defs = True 22 | disallow_incomplete_defs = True 23 | check_untyped_defs = True 24 | disallow_untyped_decorators = True 25 | 26 | # None and Optional handling 27 | no_implicit_optional = True 28 | strict_optional = True 29 | 30 | # Configuring warning 31 | warn_redundant_casts = True 32 | warn_unused_ignores = True 33 | warn_no_return = True 34 | warn_return_any = True 35 | warn_unreachable = True 36 | 37 | # Supressing errors 38 | show_none_errors = True 39 | ignore_errors = False 40 | 41 | # Miscellaneous strictness flags 42 | strict_equality = True 43 | 44 | # Configuring error messages 45 | show_error_context = True 46 | show_error_codes = True 47 | 48 | # Miscellaneous 49 | warn_unused_configs = True 50 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = 3 | --cov 4 | --strict-markers 5 | filterwarnings = 6 | error 7 | # issue in standard lib for PyPy < 3.9 8 | ignore:Using or importing the ABCs from 'collections':DeprecationWarning:_lzma 9 | markers = 10 | generate_integration_files 11 | integration 12 | unit 13 | testpaths = 14 | tests 15 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = python-xz 3 | author = Rogdham 4 | author_email = contact@rogdham.net 5 | description = Pure Python implementation of the XZ file format with random access support 6 | long_description = file: README.md 7 | long_description_content_type = text/markdown 8 | url = https://github.com/rogdham/python-xz 9 | project_urls = 10 | Source = https://github.com/rogdham/python-xz 11 | keywords = xz lzma compression decompression 12 | license = MIT 13 | license_files = LICENSE.txt 14 | platform = any 15 | classifiers = 16 | Development Status :: 3 - Alpha 17 | License :: OSI Approved :: MIT License 18 | Operating System :: OS Independent 19 | Programming Language :: Python 20 | Programming Language :: Python :: 3 21 | Programming Language :: Python :: 3 :: Only 22 | Programming Language :: Python :: 3.7 23 | Programming Language :: Python :: 3.8 24 | Programming Language :: Python :: 3.9 25 | Programming Language :: Python :: 3.10 26 | Programming Language :: Python :: 3.11 27 | Topic :: Utilities 28 | Topic :: System :: Archiving 29 | Topic :: System :: Archiving :: Compression 30 | 31 | [options] 32 | include_package_data = True 33 | package_dir = =src 34 | packages = xz 35 | python_requires = >=3.7 36 | setup_requires = 37 | setuptools_scm 38 | wheel 39 | install_requires = 40 | typing-extensions>=4.5.0;python_version<"3.8" 41 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | setup( 6 | use_scm_version={ 7 | "write_to": "src/xz/_version.py", 8 | "write_to_template": '__version__ = "{version}"\n', 9 | } 10 | ) 11 | -------------------------------------------------------------------------------- /src/xz/__init__.py: -------------------------------------------------------------------------------- 1 | from lzma import ( 2 | CHECK_CRC32, 3 | CHECK_CRC64, 4 | CHECK_ID_MAX, 5 | CHECK_NONE, 6 | CHECK_SHA256, 7 | CHECK_UNKNOWN, 8 | FILTER_ARM, 9 | FILTER_ARMTHUMB, 10 | FILTER_DELTA, 11 | FILTER_IA64, 12 | FILTER_LZMA1, 13 | FILTER_LZMA2, 14 | FILTER_POWERPC, 15 | FILTER_SPARC, 16 | FILTER_X86, 17 | MF_BT2, 18 | MF_BT3, 19 | MF_BT4, 20 | MF_HC3, 21 | MF_HC4, 22 | MODE_FAST, 23 | MODE_NORMAL, 24 | PRESET_DEFAULT, 25 | PRESET_EXTREME, 26 | compress, 27 | decompress, 28 | is_check_supported, 29 | ) 30 | 31 | try: 32 | from xz._version import __version__ 33 | except ImportError: # pragma: no cover 34 | __version__ = "0.0.0.dev0-unknown" 35 | 36 | 37 | from xz.common import XZError 38 | from xz.file import XZFile 39 | from xz.open import xz_open 40 | from xz.strategy import KeepBlockReadStrategy, RollingBlockReadStrategy 41 | 42 | # pylint: disable=redefined-builtin 43 | open = xz_open 44 | # pylint: enable=redefined-builtin 45 | 46 | 47 | __all__ = ( 48 | "__version__", 49 | "KeepBlockReadStrategy", 50 | "RollingBlockReadStrategy", 51 | "XZError", 52 | "XZFile", 53 | "open", 54 | # re-export from lzma for easy access 55 | "CHECK_CRC32", 56 | "CHECK_CRC64", 57 | "CHECK_ID_MAX", 58 | "CHECK_NONE", 59 | "CHECK_SHA256", 60 | "CHECK_UNKNOWN", 61 | "FILTER_ARM", 62 | "FILTER_ARMTHUMB", 63 | "FILTER_DELTA", 64 | "FILTER_IA64", 65 | "FILTER_LZMA1", 66 | "FILTER_LZMA2", 67 | "FILTER_POWERPC", 68 | "FILTER_SPARC", 69 | "FILTER_X86", 70 | "MF_BT2", 71 | "MF_BT3", 72 | "MF_BT4", 73 | "MF_HC3", 74 | "MF_HC4", 75 | "MODE_FAST", 76 | "MODE_NORMAL", 77 | "PRESET_DEFAULT", 78 | "PRESET_EXTREME", 79 | "compress", 80 | "decompress", 81 | "is_check_supported", 82 | ) 83 | -------------------------------------------------------------------------------- /src/xz/block.py: -------------------------------------------------------------------------------- 1 | from io import DEFAULT_BUFFER_SIZE, SEEK_SET 2 | from lzma import FORMAT_XZ, LZMACompressor, LZMADecompressor, LZMAError 3 | from typing import Optional, Tuple, Union 4 | 5 | from xz.common import ( 6 | XZError, 7 | create_xz_header, 8 | create_xz_index_footer, 9 | parse_xz_footer, 10 | parse_xz_index, 11 | ) 12 | from xz.io import IOAbstract, IOCombiner, IOStatic 13 | from xz.strategy import KeepBlockReadStrategy 14 | from xz.typing import _BlockReadStrategyType, _LZMAFiltersType, _LZMAPresetType 15 | 16 | 17 | class BlockRead: 18 | read_size = DEFAULT_BUFFER_SIZE 19 | 20 | def __init__( 21 | self, 22 | fileobj: IOAbstract, 23 | check: int, 24 | unpadded_size: int, 25 | uncompressed_size: int, 26 | ) -> None: 27 | self.length = uncompressed_size 28 | self.fileobj = IOCombiner( 29 | IOStatic(create_xz_header(check)), 30 | fileobj, 31 | IOStatic( 32 | create_xz_index_footer(check, [(unpadded_size, uncompressed_size)]) 33 | ), 34 | ) 35 | self.reset() 36 | 37 | def reset(self) -> None: 38 | self.fileobj.seek(0, SEEK_SET) 39 | self.pos = 0 40 | self.decompressor = LZMADecompressor(format=FORMAT_XZ) 41 | 42 | def decompress(self, pos: int, size: int) -> bytes: 43 | if pos < self.pos: 44 | self.reset() 45 | 46 | skip_before = pos - self.pos 47 | 48 | # pylint: disable=using-constant-test 49 | if self.decompressor.eof: 50 | raise XZError("block: decompressor eof") 51 | 52 | if self.decompressor.needs_input: 53 | data_input = self.fileobj.read(self.read_size) 54 | if not data_input: 55 | raise XZError("block: data eof") 56 | else: 57 | data_input = b"" 58 | 59 | data_output = self.decompressor.decompress(data_input, skip_before + size) 60 | self.pos += len(data_output) 61 | 62 | if self.pos == self.length: 63 | # we reached the end of the block 64 | # according to the XZ specification, we must check the 65 | # remaining bytes of the block; this is mainly performed by the 66 | # decompressor itself when we consume it 67 | while not self.decompressor.eof: 68 | if self.decompress(self.pos, 1): 69 | raise LZMAError("Corrupt input data") 70 | 71 | return data_output[skip_before:] 72 | 73 | 74 | class BlockWrite: 75 | def __init__( 76 | self, 77 | fileobj: IOAbstract, 78 | check: int, 79 | preset: _LZMAPresetType, 80 | filters: _LZMAFiltersType, 81 | ) -> None: 82 | self.fileobj = fileobj 83 | self.check = check 84 | self.compressor = LZMACompressor(FORMAT_XZ, check, preset, filters) 85 | self.pos = 0 86 | if self.compressor.compress(b"") != create_xz_header(check): 87 | raise XZError("block: compressor header") 88 | 89 | def _write(self, data: bytes) -> None: 90 | if data: 91 | self.fileobj.seek(self.pos) 92 | self.fileobj.write(data) 93 | self.pos += len(data) 94 | 95 | def compress(self, data: bytes) -> None: 96 | self._write(self.compressor.compress(data)) 97 | 98 | def finish(self) -> Tuple[int, int]: 99 | data = self.compressor.flush() 100 | 101 | # footer 102 | check, backward_size = parse_xz_footer(data[-12:]) 103 | if check != self.check: 104 | raise XZError("block: compressor footer check") 105 | 106 | # index 107 | records = parse_xz_index(data[-12 - backward_size : -12]) 108 | if len(records) != 1: 109 | raise XZError("block: compressor index records length") 110 | 111 | # remaining block data 112 | self._write(data[: -12 - backward_size]) 113 | 114 | return records[0] # (unpadded_size, uncompressed_size) 115 | 116 | 117 | class XZBlock(IOAbstract): 118 | def __init__( 119 | self, 120 | fileobj: IOAbstract, 121 | check: int, 122 | unpadded_size: int, 123 | uncompressed_size: int, 124 | preset: _LZMAPresetType = None, 125 | filters: _LZMAFiltersType = None, 126 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 127 | ): 128 | super().__init__(uncompressed_size) 129 | self.fileobj = fileobj 130 | self.check = check 131 | self.preset = preset 132 | self.filters = filters 133 | self.block_read_strategy = block_read_strategy or KeepBlockReadStrategy() 134 | self.unpadded_size = unpadded_size 135 | self.operation: Union[BlockRead, BlockWrite, None] = None 136 | 137 | @property 138 | def uncompressed_size(self) -> int: 139 | return self._length 140 | 141 | def _read(self, size: int) -> bytes: 142 | # enforce read mode 143 | if not isinstance(self.operation, BlockRead): 144 | self._write_end() 145 | self.clear() 146 | self.block_read_strategy.on_create(self) 147 | self.operation = BlockRead( 148 | self.fileobj, 149 | self.check, 150 | self.unpadded_size, 151 | self.uncompressed_size, 152 | ) 153 | 154 | # read data 155 | self.block_read_strategy.on_read(self) 156 | try: 157 | data = self.operation.decompress(self._pos, size) 158 | except LZMAError as ex: 159 | raise XZError(f"block: error while decompressing: {ex}") from ex 160 | 161 | if self._pos + len(data) == self._length: 162 | self.clear() 163 | 164 | return data 165 | 166 | def writable(self) -> bool: 167 | return isinstance(self.operation, BlockWrite) or not self._length 168 | 169 | def _write(self, data: bytes) -> int: 170 | # enforce write mode 171 | if not isinstance(self.operation, BlockWrite): 172 | self.clear() 173 | self.operation = BlockWrite( 174 | self.fileobj, 175 | self.check, 176 | self.preset, 177 | self.filters, 178 | ) 179 | 180 | # write data 181 | self.operation.compress(data) 182 | return len(data) 183 | 184 | def _write_after(self) -> None: 185 | if isinstance(self.operation, BlockWrite): 186 | self.unpadded_size, uncompressed_size = self.operation.finish() 187 | if uncompressed_size != self.uncompressed_size: 188 | raise XZError("block: compressor uncompressed size") 189 | self.clear() 190 | 191 | def _truncate(self, size: int) -> None: 192 | # thanks to the writable method, we are sure that length is zero 193 | # so we don't need to handle the case of truncating in middle of the block 194 | self.seek(size) 195 | self.write(b"") 196 | 197 | def clear(self) -> None: 198 | if isinstance(self.operation, BlockRead): 199 | self.block_read_strategy.on_delete(self) 200 | self.operation = None # free memory 201 | -------------------------------------------------------------------------------- /src/xz/common.py: -------------------------------------------------------------------------------- 1 | from binascii import crc32 as crc32int 2 | import lzma 3 | from struct import pack, unpack 4 | from typing import List, Tuple, cast 5 | 6 | HEADER_MAGIC = b"\xfd7zXZ\x00" 7 | FOOTER_MAGIC = b"YZ" 8 | 9 | 10 | class XZError(Exception): 11 | pass 12 | 13 | 14 | def encode_mbi(value: int) -> bytes: 15 | data = bytearray() 16 | while value >= 0x80: 17 | data.append((value & 0x7F) | 0x80) 18 | value >>= 7 19 | data.append(value) 20 | return data 21 | 22 | 23 | def decode_mbi(data: bytes) -> Tuple[int, int]: 24 | value = 0 25 | for size, byte in enumerate(data): 26 | value |= (byte & 0x7F) << (size * 7) 27 | if not byte & 0x80: 28 | return (size + 1, value) 29 | raise XZError("invalid mbi") 30 | 31 | 32 | def crc32(data: bytes) -> bytes: 33 | return pack(" int: 37 | remainder = value % 4 38 | if remainder: 39 | return value - remainder + 4 40 | return value 41 | 42 | 43 | def pad(value: int) -> bytes: 44 | return b"\x00" * (round_up(value) - value) 45 | 46 | 47 | def create_xz_header(check: int) -> bytes: 48 | if not 0 <= check <= 0xF: 49 | raise XZError("header check") 50 | # stream header 51 | flags = pack(" bytes: 56 | if not 0 <= check <= 0xF: 57 | raise XZError("footer check") 58 | # index 59 | index = b"\x00" 60 | index += encode_mbi(len(records)) 61 | for unpadded_size, uncompressed_size in records: 62 | if not unpadded_size: 63 | raise XZError("index record unpadded size") 64 | index += encode_mbi(unpadded_size) 65 | index += encode_mbi(uncompressed_size) 66 | index += pad(len(index)) 67 | index += crc32(index) 68 | # stream footer 69 | footer = pack(" int: 75 | if len(header) != 12: 76 | raise XZError("header length") 77 | if header[:6] != HEADER_MAGIC: 78 | raise XZError("header magic") 79 | if crc32(header[6:8]) != header[8:12]: 80 | raise XZError("header crc32") 81 | flag_first_byte, check = cast( 82 | Tuple[int, int], 83 | unpack(" List[Tuple[int, int]]: 91 | if len(index) < 8 or len(index) % 4: 92 | raise XZError("index length") 93 | index = memoryview(index) 94 | if index[0]: 95 | raise XZError("index indicator") 96 | if crc32(index[:-4]) != index[-4:]: 97 | raise XZError("index crc32") 98 | size, nb_records = decode_mbi(index[1:]) 99 | index = index[1 + size : -4] 100 | # records 101 | records = [] 102 | for _ in range(nb_records): 103 | if not index: 104 | raise XZError("index size") 105 | size, unpadded_size = decode_mbi(index) 106 | if not unpadded_size: 107 | raise XZError("index record unpadded size") 108 | index = index[size:] 109 | if not index: 110 | raise XZError("index size") 111 | size, uncompressed_size = decode_mbi(index) 112 | if not uncompressed_size: 113 | raise XZError("index record uncompressed size") 114 | index = index[size:] 115 | records.append((unpadded_size, uncompressed_size)) 116 | # index padding 117 | if any(index): 118 | raise XZError("index padding") 119 | return records 120 | 121 | 122 | def parse_xz_footer(footer: bytes) -> Tuple[int, int]: 123 | if len(footer) != 12: 124 | raise XZError("footer length") 125 | if footer[10:12] != FOOTER_MAGIC: 126 | raise XZError("footer magic") 127 | if crc32(footer[4:10]) != footer[:4]: 128 | raise XZError("footer crc32") 129 | backward_size, flag_first_byte, check = cast( 130 | Tuple[int, int, int], 131 | unpack(" None: 41 | """Open an XZ file in binary mode. 42 | 43 | The filename argument can be either an actual file name 44 | (given as a str, bytes, or PathLike object), 45 | in which case the named file is opened, 46 | or it can be an existing file object to read from or write to. 47 | 48 | The mode argument can be one of the following: 49 | - "r" for reading (default) 50 | - "w" for writing, truncating the file 51 | - "r+" for reading and writing 52 | - "w+" for reading and writing, truncating the file 53 | - "x" and "x+" are like "w" and "w+", except that an 54 | FileExistsError is raised if the file already exists 55 | 56 | The following arguments are used during writing: 57 | - check: when creating a new stream 58 | - preset: when creating a new block 59 | - filters: when creating a new block 60 | 61 | For more information about the check/preset/filters arguments, 62 | refer to the documentation of the lzma module. 63 | 64 | The block_read_strategy argument allows to specify a strategy 65 | for freeing block readers, and implement a different tradeoff 66 | between memory consumption and read speed when alternating reads 67 | between several blocks. 68 | """ 69 | self._close_fileobj = False 70 | self._close_check_empty = False 71 | 72 | super().__init__() 73 | 74 | self._mode, self._readable, self._writable = parse_mode(mode) 75 | 76 | # create strategy 77 | if block_read_strategy is None: 78 | self.block_read_strategy: _BlockReadStrategyType = ( 79 | RollingBlockReadStrategy() 80 | ) 81 | else: 82 | self.block_read_strategy = block_read_strategy 83 | 84 | # get fileobj 85 | if isinstance(filename, (str, bytes, os.PathLike)): 86 | # pylint: disable=consider-using-with, unspecified-encoding 87 | self.fileobj = cast(BinaryIO, open(filename, self._mode + "b")) 88 | self._close_fileobj = True 89 | elif hasattr(filename, "read"): # weak check but better than nothing 90 | self.fileobj = filename 91 | else: 92 | raise TypeError("filename must be a str, bytes, file or PathLike object") 93 | 94 | # check fileobj 95 | if not self.fileobj.seekable(): 96 | raise ValueError("filename is not seekable") 97 | if self._readable and not self.fileobj.readable(): 98 | raise ValueError("filename is not readable") 99 | if self._writable and not self.fileobj.writable(): 100 | raise ValueError("filename is not writable") 101 | 102 | # init 103 | if self._mode[0] in "wx": 104 | self.fileobj.truncate(0) 105 | if self._readable: 106 | self._init_parse() 107 | if self._mode[0] == "r" and not self._fileobjs: 108 | raise XZError("file: no streams") 109 | 110 | self.check = check if check != -1 else DEFAULT_CHECK 111 | self.preset = preset 112 | self.filters = filters 113 | 114 | self._close_check_empty = self._mode[0] != "r" 115 | 116 | @property 117 | def _last_stream(self) -> Optional[XZStream]: 118 | try: 119 | return self._fileobjs.last_item 120 | except KeyError: 121 | return None 122 | 123 | preset = AttrProxy[_LZMAPresetType]("_last_stream") 124 | filters = AttrProxy[_LZMAFiltersType]("_last_stream") 125 | 126 | @property 127 | def mode(self) -> str: 128 | return self._mode 129 | 130 | def readable(self) -> bool: 131 | return self._readable 132 | 133 | def writable(self) -> bool: 134 | return self._writable 135 | 136 | def close(self) -> None: 137 | try: 138 | super().close() 139 | if self._close_check_empty and not self: 140 | warnings.warn( 141 | "Empty XZFile: nothing was written, " 142 | "so output is empty (and not a valid xz file).", 143 | RuntimeWarning, 144 | ) 145 | finally: 146 | if self._close_fileobj: 147 | self.fileobj.close() # self.fileobj exists at this point 148 | if sys.version_info < (3, 10): # pragma: no cover 149 | # fix coverage issue on some Python versions 150 | # see https://github.com/nedbat/coveragepy/issues/1480 151 | pass 152 | 153 | @property 154 | def stream_boundaries(self) -> List[int]: 155 | return list(self._fileobjs) 156 | 157 | @property 158 | def block_boundaries(self) -> List[int]: 159 | return [ 160 | stream_pos + block_boundary 161 | for stream_pos, stream in self._fileobjs.items() 162 | for block_boundary in stream.block_boundaries 163 | ] 164 | 165 | def _init_parse(self) -> None: 166 | self.fileobj.seek(0, SEEK_END) 167 | 168 | streams = [] 169 | 170 | while self.fileobj.tell(): 171 | if self.fileobj.tell() % 4: 172 | raise XZError("file: invalid size") 173 | self.fileobj.seek(-4, SEEK_CUR) 174 | if any(self.fileobj.read(4)): 175 | streams.append(XZStream.parse(self.fileobj, self.block_read_strategy)) 176 | else: 177 | self.fileobj.seek(-4, SEEK_CUR) # stream padding 178 | 179 | while streams: 180 | self._append(streams.pop()) 181 | 182 | def _create_fileobj(self) -> XZStream: 183 | stream_pos = sum(len(stream.fileobj) for stream in self._fileobjs.values()) 184 | return XZStream( 185 | IOProxy( 186 | self.fileobj, 187 | stream_pos, 188 | stream_pos, 189 | ), 190 | self.check, 191 | self.preset, 192 | self.filters, 193 | self.block_read_strategy, 194 | ) 195 | 196 | def change_stream(self) -> None: 197 | """ 198 | Create a new stream. 199 | 200 | If the current stream is empty, replace it instead.""" 201 | if self._fileobjs: 202 | self._change_fileobj() 203 | 204 | def change_block(self) -> None: 205 | """ 206 | Create a new block. 207 | 208 | If the current block is empty, replace it instead.""" 209 | last_stream = self._last_stream 210 | if last_stream: 211 | last_stream.change_block() 212 | -------------------------------------------------------------------------------- /src/xz/io.py: -------------------------------------------------------------------------------- 1 | from io import ( 2 | DEFAULT_BUFFER_SIZE, 3 | SEEK_CUR, 4 | SEEK_END, 5 | SEEK_SET, 6 | IOBase, 7 | UnsupportedOperation, 8 | ) 9 | from typing import BinaryIO, Generic, Optional, TypeVar, Union, cast 10 | 11 | from xz.utils import FloorDict 12 | 13 | # 14 | # Typing note 15 | # 16 | # The consensus seems to favour IO instead of IOBase for typing. 17 | # However we cannot subclass BinaryIO in IOAbstract as it conflicts with IOBase. 18 | # 19 | # As a result, some casting or unions between the two types may be required internally. 20 | # 21 | 22 | 23 | class IOAbstract(IOBase): 24 | def __init__(self, length: int) -> None: 25 | super().__init__() 26 | self._pos = 0 27 | self._length = length 28 | self._modified = False 29 | 30 | def __repr__(self) -> str: 31 | return f"<{self.__class__.__name__} object at {hex(hash(self))}>" 32 | 33 | def __len__(self) -> int: 34 | return self._length 35 | 36 | def _check_not_closed(self) -> None: 37 | # https://github.com/PyCQA/pylint/issues/3484 38 | # pylint: disable=using-constant-test 39 | if self.closed: 40 | raise ValueError("I/O operation on closed file") 41 | 42 | def fileno(self) -> int: 43 | try: 44 | return cast(BinaryIO, self.fileobj).fileno() # type: ignore[attr-defined] 45 | except AttributeError: 46 | raise UnsupportedOperation("fileno") # pylint: disable=raise-missing-from 47 | 48 | def seekable(self) -> bool: 49 | """Return a bool indicating whether object supports random access.""" 50 | return True 51 | 52 | def readable(self) -> bool: 53 | """Return a bool indicating whether object was opened for reading.""" 54 | return True 55 | 56 | def writable(self) -> bool: 57 | """Return a bool indicating whether object was opened for writing.""" 58 | return True 59 | 60 | def seek(self, pos: int, whence: int = SEEK_SET) -> int: 61 | """Change stream position. 62 | 63 | Change the stream position to byte offset pos. Argument pos is 64 | interpreted relative to the position indicated by whence. Values 65 | for whence are ints: 66 | 67 | * 0 -- start of stream (the default); offset should be zero or positive 68 | * 1 -- current stream position; offset may be negative 69 | * 2 -- end of stream; offset should be negative 70 | 71 | Return an int indicating the new absolute position. 72 | """ 73 | self._check_not_closed() 74 | if not self.seekable(): # just in case seekable is overridden 75 | raise UnsupportedOperation("seek") 76 | if whence == SEEK_SET: 77 | pass 78 | elif whence == SEEK_CUR: 79 | pos += self._pos 80 | elif whence == SEEK_END: 81 | pos += self._length 82 | else: 83 | raise ValueError("unsupported whence value") 84 | if pos >= 0: 85 | self._pos = pos 86 | return self._pos 87 | raise ValueError("invalid seek position") 88 | 89 | def tell(self) -> int: 90 | """Return an int indicating the current stream position.""" 91 | self._check_not_closed() 92 | return self._pos 93 | 94 | def read(self, size: int = -1) -> bytes: 95 | """Read at most size bytes, returned as a bytes object. 96 | 97 | If the size argument is negative, read until EOF is reached. 98 | Return an empty bytes object at or after EOF. 99 | """ 100 | self._check_not_closed() 101 | if not self.readable(): 102 | raise UnsupportedOperation("read") 103 | if size < 0: 104 | size = self._length 105 | size = min(size, self._length - self._pos) 106 | parts = [] 107 | while size > 0: 108 | data = self._read(size) # do not stop if nothing was read 109 | parts.append(data) 110 | size -= len(data) 111 | self._pos += len(data) 112 | return b"".join(parts) 113 | 114 | def _write_start(self) -> None: 115 | if not self._modified: 116 | self._write_before() 117 | self._modified = True 118 | 119 | def _write_end(self) -> None: 120 | if self._modified: 121 | self._write_after() 122 | self._modified = False 123 | 124 | def write(self, data: bytes) -> int: 125 | """Write data, passed as a bytes object. 126 | 127 | Returns the number of bytes written, which is always the length 128 | of the input data in bytes. 129 | """ 130 | self._check_not_closed() 131 | if not self.writable(): 132 | raise UnsupportedOperation("write") 133 | written_bytes = len(data) 134 | padding_size = self._pos - self._length 135 | if padding_size < 0: 136 | raise ValueError("write is only supported from EOF") 137 | if padding_size > 0: 138 | null_bytes = memoryview(bytearray(DEFAULT_BUFFER_SIZE)) 139 | self._pos = self._length 140 | data = memoryview(data) 141 | while padding_size or data: 142 | self._write_start() 143 | if padding_size > 0: 144 | # pad with null bytes, not counted in written_bytes 145 | padding = null_bytes[:padding_size] 146 | written_len = self._write(padding) # do not stop if nothing was written 147 | padding_size -= written_len 148 | else: 149 | written_len = self._write(data) # do not stop if nothing was written 150 | data = data[written_len:] 151 | self._pos += written_len 152 | self._length = max(self._length, self._pos) 153 | return written_bytes 154 | 155 | def truncate(self, size: Optional[int] = None) -> int: 156 | """Truncate file to size bytes. 157 | Size defaults to the current IO position as reported by tell(). 158 | 159 | The current file position is unchanged. 160 | 161 | Return the new size. 162 | """ 163 | self._check_not_closed() 164 | if not self.writable(): 165 | raise UnsupportedOperation("truncate") 166 | if size is None: 167 | size = self._pos 168 | elif size < 0: 169 | raise ValueError("invalid truncate size") 170 | if size != self._length: 171 | self._write_start() 172 | pos = self._pos 173 | self._truncate(size) 174 | self._length = size 175 | self._pos = pos # make sure position is unchanged 176 | return self._length 177 | 178 | def close(self) -> None: 179 | """Flush and close the stream. 180 | 181 | This method has no effect if it is already closed. 182 | """ 183 | try: 184 | if not self.closed: 185 | self._write_end() 186 | finally: 187 | super().close() 188 | 189 | # the methods below are expected to be implemented by subclasses 190 | 191 | def _read(self, size: int) -> bytes: # pragma: no cover 192 | """Read and return up to size bytes, where size is an int. 193 | 194 | The size will not exceed the number of bytes between self._pos and 195 | self._length. This should prevent to deal with EOF. 196 | 197 | This method can return less bytes than size, in which case it will be 198 | called again. This includes being able to return an empty bytes object. 199 | """ 200 | raise UnsupportedOperation("read") 201 | 202 | def _write_before(self) -> None: 203 | """This method is called before the first write operation.""" 204 | 205 | def _write_after(self) -> None: 206 | """This method is called after the last write operation (usually on file close).""" 207 | 208 | def _write(self, data: bytes) -> int: # pragma: no cover 209 | """Writes as many bytes from data as possible, and return the number 210 | of bytes written. 211 | 212 | data may be greater than the number of bytes between self._pos 213 | and self._length; self._length will be updated by caller afterwards. 214 | 215 | This method can return and int smaller than the length of data, in which 216 | case it will be called again. This includes being able to return 0. 217 | """ 218 | raise UnsupportedOperation("write") 219 | 220 | def _truncate(self, size: int) -> None: # pragma: no cover 221 | """Truncate the file to the given size. 222 | This resizing can extend or reduce the current file size. 223 | 224 | The current file position may be changed by this method, 225 | but is restored by caller. 226 | 227 | Returns None. 228 | """ 229 | raise UnsupportedOperation("truncate") 230 | 231 | 232 | class IOStatic(IOAbstract): 233 | def __init__(self, data: bytes) -> None: 234 | self.data = bytearray(data) 235 | super().__init__(len(self.data)) 236 | 237 | def writable(self) -> bool: 238 | return False 239 | 240 | def _read(self, size: int) -> bytes: 241 | return self.data[self._pos : self._pos + size] 242 | 243 | 244 | class IOProxy(IOAbstract): 245 | def __init__( 246 | self, 247 | fileobj: Union[BinaryIO, IOBase], # see typing note on top of this file 248 | start: int, 249 | end: int, 250 | ) -> None: 251 | super().__init__(end - start) 252 | self.fileobj = fileobj 253 | self.start = start 254 | 255 | def _read(self, size: int) -> bytes: 256 | self.fileobj.seek(self.start + self._pos, SEEK_SET) 257 | return self.fileobj.read(size) # size already restricted by caller 258 | 259 | def _write(self, data: bytes) -> int: 260 | self.fileobj.seek(self.start + self._pos, SEEK_SET) 261 | return self.fileobj.write(data) 262 | 263 | def _truncate(self, size: int) -> None: 264 | self.fileobj.truncate(self.start + size) 265 | 266 | 267 | T = TypeVar("T", bound=IOAbstract) 268 | 269 | 270 | class IOCombiner(IOAbstract, Generic[T]): 271 | def __init__(self, *fileobjs: T) -> None: 272 | super().__init__(0) 273 | self._fileobjs: FloorDict[T] = FloorDict() 274 | for fileobj in fileobjs: 275 | self._append(fileobj) 276 | 277 | def _get_fileobj(self) -> T: 278 | start, fileobj = self._fileobjs.get_with_index(self._pos) 279 | fileobj.seek(self._pos - start, SEEK_SET) 280 | return fileobj 281 | 282 | def _read(self, size: int) -> bytes: 283 | return self._get_fileobj().read(size) 284 | 285 | def _write_after(self) -> None: 286 | if self._fileobjs: 287 | last_fileobj = self._fileobjs.last_item 288 | if last_fileobj: 289 | last_fileobj._write_end() # pylint: disable=protected-access 290 | else: 291 | del self._fileobjs[self._fileobjs.last_key] 292 | 293 | def _write(self, data: bytes) -> int: 294 | if self._fileobjs: 295 | fileobj: Optional[T] = self._get_fileobj() 296 | else: 297 | fileobj = None 298 | 299 | if fileobj is None or not fileobj.writable(): 300 | self._change_fileobj() 301 | fileobj = self._get_fileobj() 302 | 303 | # newly created fileobj should be writable 304 | # otherwire this will raise UnsupportedOperation 305 | return fileobj.write(data) 306 | 307 | def _truncate(self, size: int) -> None: 308 | start, fileobj = self._fileobjs.get_with_index(size) 309 | if start != size: 310 | fileobj.truncate(size - start) 311 | for key in reversed(self._fileobjs): 312 | if key < size: 313 | break 314 | del self._fileobjs[key] 315 | 316 | def _append(self, fileobj: T) -> None: 317 | if not isinstance(fileobj, IOAbstract): 318 | raise TypeError 319 | self._fileobjs[self._length] = fileobj # override empty streams 320 | self._length += len(fileobj) 321 | 322 | def _change_fileobj(self) -> None: 323 | """Create and append a new fileobj. 324 | 325 | If the last fileobj was empty, delete it. 326 | """ 327 | # end write on last fileobj 328 | if self._fileobjs: 329 | last_fileobj = self._fileobjs.last_item 330 | if last_fileobj: 331 | if last_fileobj.writable(): 332 | last_fileobj._write_end() # pylint: disable=protected-access 333 | else: 334 | del self._fileobjs[self._fileobjs.last_key] 335 | 336 | # create and append new fileobj 337 | self._append(self._create_fileobj()) 338 | 339 | def _create_fileobj(self) -> T: # pragma: no cover 340 | """ 341 | Create a new fileobj to be concatenated. 342 | 343 | It must be writable. 344 | """ 345 | raise NotImplementedError 346 | -------------------------------------------------------------------------------- /src/xz/open.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from io import TextIOWrapper 3 | from typing import BinaryIO, List, Optional, Union, cast, overload 4 | 5 | from xz.file import XZFile 6 | from xz.typing import ( 7 | _BlockReadStrategyType, 8 | _LZMAFilenameType, 9 | _LZMAFiltersType, 10 | _LZMAPresetType, 11 | _XZModesBinaryType, 12 | _XZModesTextType, 13 | ) 14 | from xz.utils import AttrProxy 15 | 16 | 17 | class _XZFileText(TextIOWrapper): 18 | def __init__( 19 | self, 20 | filename: _LZMAFilenameType, 21 | mode: str, 22 | *, 23 | check: int = -1, 24 | preset: _LZMAPresetType = None, 25 | filters: _LZMAFiltersType = None, 26 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 27 | encoding: Optional[str] = None, 28 | errors: Optional[str] = None, 29 | newline: Optional[str] = None, 30 | ) -> None: 31 | self.xz_file = XZFile( 32 | filename, 33 | mode.replace("t", ""), 34 | check=check, 35 | preset=preset, 36 | filters=filters, 37 | block_read_strategy=block_read_strategy, 38 | ) 39 | super().__init__( 40 | cast(BinaryIO, self.xz_file), 41 | encoding, 42 | errors, 43 | newline, 44 | ) 45 | 46 | check = AttrProxy[int]("xz_file") 47 | preset = AttrProxy[_LZMAPresetType]("xz_file") 48 | filters = AttrProxy[_LZMAFiltersType]("xz_file") 49 | stream_boundaries = AttrProxy[List[int]]("xz_file") 50 | block_boundaries = AttrProxy[List[int]]("xz_file") 51 | block_read_strategy = AttrProxy[_BlockReadStrategyType]("xz_file") 52 | 53 | @property 54 | def mode(self) -> str: 55 | return f"{self.xz_file.mode}t" 56 | 57 | @wraps(XZFile.change_stream) 58 | def change_stream(self) -> None: 59 | self.flush() 60 | self.xz_file.change_stream() 61 | 62 | @wraps(XZFile.change_block) 63 | def change_block(self) -> None: 64 | self.flush() 65 | self.xz_file.change_block() 66 | 67 | 68 | @overload 69 | def xz_open( 70 | filename: _LZMAFilenameType, 71 | mode: _XZModesBinaryType = "rb", 72 | *, 73 | # XZFile kwargs 74 | check: int = -1, 75 | preset: _LZMAPresetType = None, 76 | filters: _LZMAFiltersType = None, 77 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 78 | # text-mode kwargs 79 | encoding: Optional[str] = None, 80 | errors: Optional[str] = None, 81 | newline: Optional[str] = None, 82 | ) -> XZFile: 83 | ... # pragma: no cover 84 | 85 | 86 | @overload 87 | def xz_open( 88 | filename: _LZMAFilenameType, 89 | mode: _XZModesTextType, 90 | *, 91 | # XZFile kwargs 92 | check: int = -1, 93 | preset: _LZMAPresetType = None, 94 | filters: _LZMAFiltersType = None, 95 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 96 | # text-mode kwargs 97 | encoding: Optional[str] = None, 98 | errors: Optional[str] = None, 99 | newline: Optional[str] = None, 100 | ) -> _XZFileText: 101 | ... # pragma: no cover 102 | 103 | 104 | @overload 105 | def xz_open( 106 | filename: _LZMAFilenameType, 107 | mode: str, 108 | *, 109 | # XZFile kwargs 110 | check: int = -1, 111 | preset: _LZMAPresetType = None, 112 | filters: _LZMAFiltersType = None, 113 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 114 | # text-mode kwargs 115 | encoding: Optional[str] = None, 116 | errors: Optional[str] = None, 117 | newline: Optional[str] = None, 118 | ) -> Union[XZFile, _XZFileText]: 119 | ... # pragma: no cover 120 | 121 | 122 | def xz_open( 123 | filename: _LZMAFilenameType, 124 | mode: str = "rb", 125 | *, 126 | # XZFile kwargs 127 | check: int = -1, 128 | preset: _LZMAPresetType = None, 129 | filters: _LZMAFiltersType = None, 130 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 131 | # text-mode kwargs 132 | encoding: Optional[str] = None, 133 | errors: Optional[str] = None, 134 | newline: Optional[str] = None, 135 | ) -> Union[XZFile, _XZFileText]: 136 | """Open an XZ file in binary or text mode. 137 | 138 | filename can be either an actual file name (given as a str, bytes, 139 | or PathLike object), in which case the named file is opened, or it 140 | can be an existing file object to read from or write to. 141 | 142 | For binary mode, this function is equivalent to the XZFile 143 | constructor: XZFile(filename, mode, ...). In this case, the 144 | encoding, errors and newline arguments must not be provided. 145 | 146 | For text mode, an XZFile object is created, and wrapped in an 147 | io.TextIOWrapper instance with the specified encoding, error 148 | handling behavior, and line ending(s). 149 | """ 150 | if "t" in mode: 151 | if "b" in mode: 152 | raise ValueError(f"Invalid mode: {mode}") 153 | 154 | return _XZFileText( 155 | filename, 156 | mode, 157 | check=check, 158 | preset=preset, 159 | filters=filters, 160 | block_read_strategy=block_read_strategy, 161 | encoding=encoding, 162 | errors=errors, 163 | newline=newline, 164 | ) 165 | 166 | if encoding is not None: 167 | raise ValueError("Argument 'encoding' not supported in binary mode") 168 | if errors is not None: 169 | raise ValueError("Argument 'errors' not supported in binary mode") 170 | if newline is not None: 171 | raise ValueError("Argument 'newline' not supported in binary mode") 172 | 173 | return XZFile( 174 | filename, 175 | mode, 176 | check=check, 177 | preset=preset, 178 | filters=filters, 179 | block_read_strategy=block_read_strategy, 180 | ) 181 | -------------------------------------------------------------------------------- /src/xz/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/src/xz/py.typed -------------------------------------------------------------------------------- /src/xz/strategy.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import TYPE_CHECKING, Dict 3 | 4 | if TYPE_CHECKING: # pragma: no cover 5 | # avoid circular dependency 6 | from xz.block import XZBlock 7 | 8 | 9 | class KeepBlockReadStrategy: 10 | def on_create(self, block: "XZBlock") -> None: 11 | pass # do nothing 12 | 13 | def on_delete(self, block: "XZBlock") -> None: 14 | pass # do nothing 15 | 16 | def on_read(self, block: "XZBlock") -> None: 17 | pass # do nothing 18 | 19 | 20 | class RollingBlockReadStrategy: 21 | def __init__(self, max_block_read_nb: int = 8) -> None: 22 | self.block_reads: Dict["XZBlock", float] = {} 23 | self.max_block_read_nb = max_block_read_nb 24 | 25 | def _freshly_used(self, block: "XZBlock") -> None: 26 | self.block_reads[block] = time.monotonic() 27 | 28 | def on_create(self, block: "XZBlock") -> None: 29 | self._freshly_used(block) 30 | if len(self.block_reads) > self.max_block_read_nb: 31 | to_clear = min( 32 | self.block_reads.items(), 33 | key=lambda item: item[1], 34 | )[0] 35 | to_clear.clear() # will call on_delete 36 | 37 | def on_delete(self, block: "XZBlock") -> None: 38 | del self.block_reads[block] 39 | 40 | def on_read(self, block: "XZBlock") -> None: 41 | self._freshly_used(block) 42 | -------------------------------------------------------------------------------- /src/xz/stream.py: -------------------------------------------------------------------------------- 1 | from io import SEEK_CUR 2 | from typing import BinaryIO, List, Optional 3 | 4 | from xz.block import XZBlock 5 | from xz.common import ( 6 | XZError, 7 | create_xz_header, 8 | create_xz_index_footer, 9 | parse_xz_footer, 10 | parse_xz_header, 11 | parse_xz_index, 12 | round_up, 13 | ) 14 | from xz.io import IOCombiner, IOProxy 15 | from xz.typing import _BlockReadStrategyType, _LZMAFiltersType, _LZMAPresetType 16 | 17 | 18 | class XZStream(IOCombiner[XZBlock]): 19 | def __init__( 20 | self, 21 | fileobj: IOProxy, 22 | check: int, 23 | preset: _LZMAPresetType = None, 24 | filters: _LZMAFiltersType = None, 25 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 26 | ) -> None: 27 | super().__init__() 28 | self.fileobj = fileobj 29 | self._check = check 30 | self.preset = preset 31 | self.filters = filters 32 | self.block_read_strategy = block_read_strategy 33 | 34 | @property 35 | def check(self) -> int: 36 | return self._check 37 | 38 | @property 39 | def block_boundaries(self) -> List[int]: 40 | return list(self._fileobjs) 41 | 42 | @property 43 | def _fileobj_blocks_end_pos(self) -> int: 44 | return 12 + sum( 45 | round_up(block.unpadded_size) for block in self._fileobjs.values() 46 | ) 47 | 48 | @classmethod 49 | def parse( 50 | cls, 51 | fileobj: BinaryIO, 52 | block_read_strategy: Optional[_BlockReadStrategyType] = None, 53 | ) -> "XZStream": 54 | """Parse one XZ stream from a fileobj. 55 | 56 | fileobj position should be right at the end of the stream when calling 57 | and will be moved right at the start of the stream 58 | """ 59 | # footer 60 | footer_end_pos = fileobj.seek(-12, SEEK_CUR) + 12 61 | footer = fileobj.read(12) 62 | check, backward_size = parse_xz_footer(footer) 63 | 64 | # index 65 | block_start = fileobj.seek(-12 - backward_size, SEEK_CUR) 66 | index = fileobj.read(backward_size) 67 | records = parse_xz_index(index) 68 | blocks_len = sum(round_up(unpadded_size) for unpadded_size, _ in records) 69 | block_start -= blocks_len 70 | blocks = [] 71 | for unpadded_size, uncompressed_size in records: 72 | block_end = block_start + round_up(unpadded_size) 73 | blocks.append( 74 | XZBlock( 75 | IOProxy(fileobj, block_start, block_end), 76 | check, 77 | unpadded_size, 78 | uncompressed_size, 79 | block_read_strategy=block_read_strategy, 80 | ) 81 | ) 82 | block_start = block_end 83 | 84 | # header 85 | fileobj.seek(-12 - blocks_len - backward_size, SEEK_CUR) 86 | header = fileobj.read(12) 87 | header_check = parse_xz_header(header) 88 | if header_check != check: 89 | raise XZError("stream: inconsistent check value") 90 | 91 | header_start_pos = fileobj.seek(-12, SEEK_CUR) 92 | 93 | stream_fileobj = IOProxy(fileobj, header_start_pos, footer_end_pos) 94 | stream = cls(stream_fileobj, check, block_read_strategy=block_read_strategy) 95 | for block in blocks: 96 | stream._append(block) 97 | return stream 98 | 99 | def _create_fileobj(self) -> XZBlock: 100 | self.fileobj.truncate(self._fileobj_blocks_end_pos) 101 | return XZBlock( 102 | IOProxy( 103 | self.fileobj, 104 | self._fileobj_blocks_end_pos, 105 | self._fileobj_blocks_end_pos, 106 | ), 107 | self.check, 108 | 0, 109 | 0, 110 | self.preset, 111 | self.filters, 112 | self.block_read_strategy, 113 | ) 114 | 115 | def _write_before(self) -> None: 116 | if not self: 117 | self.fileobj.seek(0) 118 | self.fileobj.truncate() 119 | self.fileobj.write(create_xz_header(self.check)) 120 | 121 | def _write_after(self) -> None: 122 | super()._write_after() 123 | self.fileobj.seek(self._fileobj_blocks_end_pos) 124 | self.fileobj.truncate() 125 | self.fileobj.write( 126 | create_xz_index_footer( 127 | self.check, 128 | [ 129 | (block.unpadded_size, block.uncompressed_size) 130 | for block in self._fileobjs.values() 131 | ], 132 | ) 133 | ) 134 | 135 | def change_block(self) -> None: 136 | """ 137 | End the current block, and create a new one. 138 | 139 | If the current block is empty, replace it instead.""" 140 | if self._fileobjs: 141 | self._change_fileobj() 142 | -------------------------------------------------------------------------------- /src/xz/typing.py: -------------------------------------------------------------------------------- 1 | from os import PathLike 2 | import sys 3 | from typing import TYPE_CHECKING, Any, BinaryIO, Optional, Union 4 | 5 | if sys.version_info >= (3, 9): # pragma: no cover 6 | from collections.abc import Mapping, Sequence 7 | 8 | _LZMAFilenameType = Union[str, bytes, PathLike[str], PathLike[bytes], BinaryIO] 9 | else: # pragma: no cover 10 | from typing import Mapping, Sequence 11 | 12 | _LZMAFilenameType = Union[str, bytes, PathLike, BinaryIO] 13 | 14 | 15 | if sys.version_info >= (3, 8): # pragma: no cover 16 | from typing import Literal, Protocol 17 | else: # pragma: no cover 18 | from typing_extensions import Literal, Protocol 19 | 20 | 21 | if TYPE_CHECKING: # pragma: no cover 22 | # avoid circular dependency 23 | from xz.block import XZBlock 24 | 25 | 26 | _LZMAPresetType = Optional[int] 27 | _LZMAFiltersType = Optional[Sequence[Mapping[str, Any]]] 28 | 29 | 30 | # all valid modes if we don't consider changing order nor repetitions 31 | # (see utils.parse_mode for more details) 32 | # the values are unit tested in test_parse_mode to make sure that all are here 33 | _XZModesBinaryType = Literal[ 34 | "r", "r+", "w", "w+", "x", "x+", "rb", "rb+", "wb", "wb+", "xb", "xb+" 35 | ] 36 | _XZModesTextType = Literal["rt", "rt+", "wt", "wt+", "xt", "xt+"] 37 | 38 | 39 | class _BlockReadStrategyType(Protocol): 40 | def on_create(self, block: "XZBlock") -> None: 41 | ... # pragma: no cover 42 | 43 | def on_delete(self, block: "XZBlock") -> None: 44 | ... # pragma: no cover 45 | 46 | def on_read(self, block: "XZBlock") -> None: 47 | ... # pragma: no cover 48 | -------------------------------------------------------------------------------- /src/xz/utils.py: -------------------------------------------------------------------------------- 1 | from bisect import bisect_right, insort_right 2 | import sys 3 | from typing import Any, Dict, Generic, List, Tuple, TypeVar, cast 4 | 5 | if sys.version_info >= (3, 9): # pragma: no cover 6 | from collections.abc import Iterator, MutableMapping 7 | else: # pragma: no cover 8 | from typing import Iterator, MutableMapping 9 | 10 | 11 | T = TypeVar("T") 12 | 13 | 14 | class FloorDict(MutableMapping[int, T]): 15 | """A dict where keys are int, and accessing a key will use the closest lower one. 16 | 17 | Differences from dict: 18 | - keys must be int 19 | - obj[key] will return the value whose key is the closest one which is lower or equal to key 20 | """ 21 | 22 | def __init__(self) -> None: 23 | self._dict: Dict[int, T] = {} 24 | self._keys: List[int] = [] # sorted 25 | 26 | def __repr__(self) -> str: 27 | return f"FloorDict<{self._dict!r}>" 28 | 29 | def __iter__(self) -> Iterator[int]: 30 | return iter(self._keys) 31 | 32 | def __reversed__(self) -> Iterator[int]: 33 | return reversed(self._keys) 34 | 35 | def __len__(self) -> int: 36 | return len(self._keys) 37 | 38 | def _key_index(self, key: int) -> int: 39 | index = bisect_right(self._keys, key) - 1 40 | if index < 0: 41 | raise KeyError(key) 42 | return index 43 | 44 | def get_with_index(self, key: int) -> Tuple[int, T]: 45 | if not isinstance(key, int): 46 | raise TypeError("Invalid key") 47 | index = self._keys[self._key_index(key)] 48 | value = self._dict[index] 49 | return (index, value) 50 | 51 | def __getitem__(self, key: int) -> T: 52 | return self.get_with_index(key)[1] 53 | 54 | def __setitem__(self, key: int, value: T) -> None: 55 | if not isinstance(key, int): 56 | raise TypeError("Invalid key") 57 | if key not in self._dict: # prevent duplicates in _keys 58 | insort_right(self._keys, key) 59 | self._dict[key] = value 60 | 61 | def __delitem__(self, key: int) -> None: 62 | del self._dict[key] 63 | # the key is an exact index (otherwise KeyError raised on last line) 64 | self._keys.pop(self._key_index(key)) 65 | 66 | @property 67 | def last_key(self) -> int: 68 | if not self._keys: 69 | raise KeyError("dictionary is empty") 70 | return self._keys[-1] 71 | 72 | @property 73 | def last_item(self) -> T: 74 | return self._dict[self.last_key] 75 | 76 | 77 | def parse_mode(mode: str) -> Tuple[str, bool, bool]: 78 | """Parse a mode used in open. 79 | 80 | Order is not considered at all. 81 | Binary flag (b) is ignored. 82 | Valid modes are: r, r+, w, w+, x, x+. 83 | 84 | Return a tuple (nomalized, is_read, is_write). 85 | """ 86 | mode_set = set(mode) 87 | if len(mode_set) != len(mode): 88 | raise ValueError(f"invalid mode: {mode}") 89 | mode_plus = "+" in mode_set 90 | mode_set -= {"b", "+"} 91 | mode_base = mode_set.pop() if mode_set else "invalid" 92 | if mode_set or mode_base not in "rwx": 93 | raise ValueError(f"invalid mode: {mode}") 94 | if mode_plus: 95 | return (f"{mode_base}+", True, True) 96 | return (mode_base, mode_base == "r", mode_base != "r") 97 | 98 | 99 | class AttrProxy(Generic[T]): 100 | """Create a descriptor that is a proxy to the same attribute of an attribute. 101 | 102 | Example: 103 | 104 | class Foo: 105 | proxy = Something() 106 | bar = AttrProxy("proxy") 107 | 108 | foo = Foo() 109 | 110 | then foo.bar would be proxied to foo.proxy.bar 111 | 112 | If the proxy value is None, then use a local value instead, 113 | which acts as a temporary storage in the meanwhile. 114 | """ 115 | 116 | # Typing note 117 | # 118 | # There is no typing enforced to make sure that the proxy attribute 119 | # on the attribute exists and is of type T. 120 | # We just trust that the user-provided T is right. 121 | # 122 | # This explains the use of Any everywhere 123 | # 124 | 125 | attribute: str 126 | not_proxied_value: T 127 | 128 | def __init__(self, proxy: str) -> None: 129 | self.proxy = proxy 130 | 131 | def __set_name__(self, klass: Any, name: str) -> None: 132 | self.attribute = name 133 | 134 | def __get__(self, instance: Any, klass: Any) -> T: 135 | dest = getattr(instance, self.proxy) 136 | if dest is None: 137 | try: 138 | return self.not_proxied_value 139 | except AttributeError as ex: 140 | raise AttributeError( 141 | f"'{klass.__name__}' object has not attribute '{self.attribute}'" 142 | f" until its attribute '{self.proxy}' is defined" 143 | ) from ex 144 | return cast(T, getattr(dest, self.attribute)) 145 | 146 | def __set__(self, instance: Any, value: T) -> None: 147 | dest = getattr(instance, self.proxy) 148 | if dest is None: 149 | self.not_proxied_value = value 150 | else: 151 | setattr(dest, self.attribute, value) 152 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from itertools import chain, product 2 | from pathlib import Path 3 | import sys 4 | from typing import List, Tuple 5 | 6 | import pytest 7 | 8 | if sys.version_info >= (3, 9): # pragma: no cover 9 | from collections.abc import Callable, Iterator 10 | else: # pragma: no cover 11 | from typing import Callable, Iterator 12 | 13 | 14 | def pytest_addoption(parser: pytest.Parser) -> None: 15 | parser.addoption( 16 | "--generate-integration-files", 17 | action="store_true", 18 | default=False, 19 | help="Test the generation of the integration files", 20 | ) 21 | 22 | 23 | def pytest_collection_modifyitems( 24 | config: pytest.Config, items: List[pytest.Item] 25 | ) -> None: 26 | root = Path(__file__).parent.parent 27 | for item in items: 28 | if item.fspath: 29 | relative = Path(item.fspath).parent.relative_to(root) 30 | mark = relative.name 31 | item.add_marker(getattr(pytest.mark, mark)) 32 | if not config.getoption("--generate-integration-files"): 33 | skip_mark = pytest.mark.skip( 34 | reason="need --generate-integration-files option to run" 35 | ) 36 | for item in items: 37 | if "generate_integration_files" in item.keywords: 38 | item.add_marker(skip_mark) 39 | 40 | 41 | # any 3 consecutive bytes is unique in _DATA_PATTERN 42 | _DATA_PATTERN = bytes( 43 | chain( 44 | *product( 45 | range(65, 91), # uppercase 46 | range(97, 123), # lowercase 47 | range(48, 58), # digit 48 | ) 49 | ) 50 | ) 51 | 52 | 53 | @pytest.fixture(scope="session") 54 | def data_pattern() -> bytes: 55 | return _DATA_PATTERN 56 | 57 | 58 | @pytest.fixture(scope="session") 59 | def data_pattern_locate() -> Iterator[Callable[[bytes], Tuple[int, int]]]: 60 | def locate(data: bytes) -> Tuple[int, int]: 61 | if len(data) < 3: 62 | raise ValueError("data to short") 63 | return (_DATA_PATTERN.index(data), len(data)) 64 | 65 | yield locate 66 | -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import TYPE_CHECKING, Any, Dict, Tuple, cast 4 | 5 | import pytest 6 | 7 | if TYPE_CHECKING: 8 | 9 | class _Request(pytest.FixtureRequest): 10 | param: Path 11 | 12 | 13 | _IntegrationCase = Tuple[Path, Dict[str, Any]] 14 | 15 | 16 | @pytest.fixture( 17 | params=(Path(__file__).parent / "files").rglob("*.json"), 18 | ids=lambda path: cast(Path, path).name, 19 | ) 20 | def integration_case(request: "_Request") -> _IntegrationCase: 21 | json_path = request.param 22 | with json_path.open() as json_file: 23 | metadata = cast(Dict[str, Any], json.load(json_file)) 24 | return (json_path.with_suffix(".xz"), metadata) 25 | -------------------------------------------------------------------------------- /tests/integration/files/check-crc32.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz -C crc32", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 1, 11 | "blocks": [ 12 | { 13 | "length": 20280 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /tests/integration/files/check-crc32.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-crc32.xz -------------------------------------------------------------------------------- /tests/integration/files/check-crc64.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz -C crc64", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 4, 11 | "blocks": [ 12 | { 13 | "length": 20280 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /tests/integration/files/check-crc64.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-crc64.xz -------------------------------------------------------------------------------- /tests/integration/files/check-none.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz -C none", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 0, 11 | "blocks": [ 12 | { 13 | "length": 20280 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /tests/integration/files/check-none.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-none.xz -------------------------------------------------------------------------------- /tests/integration/files/check-sha256.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz -C sha256", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 10, 11 | "blocks": [ 12 | { 13 | "length": 20280 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /tests/integration/files/check-sha256.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-sha256.xz -------------------------------------------------------------------------------- /tests/integration/files/example.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/example.xz -------------------------------------------------------------------------------- /tests/integration/files/few-blocks.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz --block-size 10000", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 4, 11 | "blocks": [ 12 | { 13 | "length": 10000 14 | }, 15 | { 16 | "length": 10000 17 | }, 18 | { 19 | "length": 280 20 | } 21 | ] 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /tests/integration/files/few-blocks.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/few-blocks.xz -------------------------------------------------------------------------------- /tests/integration/files/many-blocks.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz --block-size 100", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 4, 11 | "blocks": [ 12 | { 13 | "length": 100 14 | }, 15 | { 16 | "length": 100 17 | }, 18 | { 19 | "length": 100 20 | }, 21 | { 22 | "length": 100 23 | }, 24 | { 25 | "length": 100 26 | }, 27 | { 28 | "length": 100 29 | }, 30 | { 31 | "length": 100 32 | }, 33 | { 34 | "length": 100 35 | }, 36 | { 37 | "length": 100 38 | }, 39 | { 40 | "length": 100 41 | }, 42 | { 43 | "length": 100 44 | }, 45 | { 46 | "length": 100 47 | }, 48 | { 49 | "length": 100 50 | }, 51 | { 52 | "length": 100 53 | }, 54 | { 55 | "length": 100 56 | }, 57 | { 58 | "length": 100 59 | }, 60 | { 61 | "length": 100 62 | }, 63 | { 64 | "length": 100 65 | }, 66 | { 67 | "length": 100 68 | }, 69 | { 70 | "length": 100 71 | }, 72 | { 73 | "length": 100 74 | }, 75 | { 76 | "length": 100 77 | }, 78 | { 79 | "length": 100 80 | }, 81 | { 82 | "length": 100 83 | }, 84 | { 85 | "length": 100 86 | }, 87 | { 88 | "length": 100 89 | }, 90 | { 91 | "length": 100 92 | }, 93 | { 94 | "length": 100 95 | }, 96 | { 97 | "length": 100 98 | }, 99 | { 100 | "length": 100 101 | }, 102 | { 103 | "length": 100 104 | }, 105 | { 106 | "length": 100 107 | }, 108 | { 109 | "length": 100 110 | }, 111 | { 112 | "length": 100 113 | }, 114 | { 115 | "length": 100 116 | }, 117 | { 118 | "length": 100 119 | }, 120 | { 121 | "length": 100 122 | }, 123 | { 124 | "length": 100 125 | }, 126 | { 127 | "length": 100 128 | }, 129 | { 130 | "length": 100 131 | }, 132 | { 133 | "length": 100 134 | }, 135 | { 136 | "length": 100 137 | }, 138 | { 139 | "length": 100 140 | }, 141 | { 142 | "length": 100 143 | }, 144 | { 145 | "length": 100 146 | }, 147 | { 148 | "length": 100 149 | }, 150 | { 151 | "length": 100 152 | }, 153 | { 154 | "length": 100 155 | }, 156 | { 157 | "length": 100 158 | }, 159 | { 160 | "length": 100 161 | }, 162 | { 163 | "length": 100 164 | }, 165 | { 166 | "length": 100 167 | }, 168 | { 169 | "length": 100 170 | }, 171 | { 172 | "length": 100 173 | }, 174 | { 175 | "length": 100 176 | }, 177 | { 178 | "length": 100 179 | }, 180 | { 181 | "length": 100 182 | }, 183 | { 184 | "length": 100 185 | }, 186 | { 187 | "length": 100 188 | }, 189 | { 190 | "length": 100 191 | }, 192 | { 193 | "length": 100 194 | }, 195 | { 196 | "length": 100 197 | }, 198 | { 199 | "length": 100 200 | }, 201 | { 202 | "length": 100 203 | }, 204 | { 205 | "length": 100 206 | }, 207 | { 208 | "length": 100 209 | }, 210 | { 211 | "length": 100 212 | }, 213 | { 214 | "length": 100 215 | }, 216 | { 217 | "length": 100 218 | }, 219 | { 220 | "length": 100 221 | }, 222 | { 223 | "length": 100 224 | }, 225 | { 226 | "length": 100 227 | }, 228 | { 229 | "length": 100 230 | }, 231 | { 232 | "length": 100 233 | }, 234 | { 235 | "length": 100 236 | }, 237 | { 238 | "length": 100 239 | }, 240 | { 241 | "length": 100 242 | }, 243 | { 244 | "length": 100 245 | }, 246 | { 247 | "length": 100 248 | }, 249 | { 250 | "length": 100 251 | }, 252 | { 253 | "length": 100 254 | }, 255 | { 256 | "length": 100 257 | }, 258 | { 259 | "length": 100 260 | }, 261 | { 262 | "length": 100 263 | }, 264 | { 265 | "length": 100 266 | }, 267 | { 268 | "length": 100 269 | }, 270 | { 271 | "length": 100 272 | }, 273 | { 274 | "length": 100 275 | }, 276 | { 277 | "length": 100 278 | }, 279 | { 280 | "length": 100 281 | }, 282 | { 283 | "length": 100 284 | }, 285 | { 286 | "length": 100 287 | }, 288 | { 289 | "length": 100 290 | }, 291 | { 292 | "length": 100 293 | }, 294 | { 295 | "length": 100 296 | }, 297 | { 298 | "length": 100 299 | }, 300 | { 301 | "length": 100 302 | }, 303 | { 304 | "length": 100 305 | }, 306 | { 307 | "length": 100 308 | }, 309 | { 310 | "length": 100 311 | }, 312 | { 313 | "length": 100 314 | }, 315 | { 316 | "length": 100 317 | }, 318 | { 319 | "length": 100 320 | }, 321 | { 322 | "length": 100 323 | }, 324 | { 325 | "length": 100 326 | }, 327 | { 328 | "length": 100 329 | }, 330 | { 331 | "length": 100 332 | }, 333 | { 334 | "length": 100 335 | }, 336 | { 337 | "length": 100 338 | }, 339 | { 340 | "length": 100 341 | }, 342 | { 343 | "length": 100 344 | }, 345 | { 346 | "length": 100 347 | }, 348 | { 349 | "length": 100 350 | }, 351 | { 352 | "length": 100 353 | }, 354 | { 355 | "length": 100 356 | }, 357 | { 358 | "length": 100 359 | }, 360 | { 361 | "length": 100 362 | }, 363 | { 364 | "length": 100 365 | }, 366 | { 367 | "length": 100 368 | }, 369 | { 370 | "length": 100 371 | }, 372 | { 373 | "length": 100 374 | }, 375 | { 376 | "length": 100 377 | }, 378 | { 379 | "length": 100 380 | }, 381 | { 382 | "length": 100 383 | }, 384 | { 385 | "length": 100 386 | }, 387 | { 388 | "length": 100 389 | }, 390 | { 391 | "length": 100 392 | }, 393 | { 394 | "length": 100 395 | }, 396 | { 397 | "length": 100 398 | }, 399 | { 400 | "length": 100 401 | }, 402 | { 403 | "length": 100 404 | }, 405 | { 406 | "length": 100 407 | }, 408 | { 409 | "length": 100 410 | }, 411 | { 412 | "length": 100 413 | }, 414 | { 415 | "length": 100 416 | }, 417 | { 418 | "length": 100 419 | }, 420 | { 421 | "length": 100 422 | }, 423 | { 424 | "length": 100 425 | }, 426 | { 427 | "length": 100 428 | }, 429 | { 430 | "length": 100 431 | }, 432 | { 433 | "length": 100 434 | }, 435 | { 436 | "length": 100 437 | }, 438 | { 439 | "length": 100 440 | }, 441 | { 442 | "length": 100 443 | }, 444 | { 445 | "length": 100 446 | }, 447 | { 448 | "length": 100 449 | }, 450 | { 451 | "length": 100 452 | }, 453 | { 454 | "length": 100 455 | }, 456 | { 457 | "length": 100 458 | }, 459 | { 460 | "length": 100 461 | }, 462 | { 463 | "length": 100 464 | }, 465 | { 466 | "length": 100 467 | }, 468 | { 469 | "length": 100 470 | }, 471 | { 472 | "length": 100 473 | }, 474 | { 475 | "length": 100 476 | }, 477 | { 478 | "length": 100 479 | }, 480 | { 481 | "length": 100 482 | }, 483 | { 484 | "length": 100 485 | }, 486 | { 487 | "length": 100 488 | }, 489 | { 490 | "length": 100 491 | }, 492 | { 493 | "length": 100 494 | }, 495 | { 496 | "length": 100 497 | }, 498 | { 499 | "length": 100 500 | }, 501 | { 502 | "length": 100 503 | }, 504 | { 505 | "length": 100 506 | }, 507 | { 508 | "length": 100 509 | }, 510 | { 511 | "length": 100 512 | }, 513 | { 514 | "length": 100 515 | }, 516 | { 517 | "length": 100 518 | }, 519 | { 520 | "length": 100 521 | }, 522 | { 523 | "length": 100 524 | }, 525 | { 526 | "length": 100 527 | }, 528 | { 529 | "length": 100 530 | }, 531 | { 532 | "length": 100 533 | }, 534 | { 535 | "length": 100 536 | }, 537 | { 538 | "length": 100 539 | }, 540 | { 541 | "length": 100 542 | }, 543 | { 544 | "length": 100 545 | }, 546 | { 547 | "length": 100 548 | }, 549 | { 550 | "length": 100 551 | }, 552 | { 553 | "length": 100 554 | }, 555 | { 556 | "length": 100 557 | }, 558 | { 559 | "length": 100 560 | }, 561 | { 562 | "length": 100 563 | }, 564 | { 565 | "length": 100 566 | }, 567 | { 568 | "length": 100 569 | }, 570 | { 571 | "length": 100 572 | }, 573 | { 574 | "length": 100 575 | }, 576 | { 577 | "length": 100 578 | }, 579 | { 580 | "length": 100 581 | }, 582 | { 583 | "length": 100 584 | }, 585 | { 586 | "length": 100 587 | }, 588 | { 589 | "length": 100 590 | }, 591 | { 592 | "length": 100 593 | }, 594 | { 595 | "length": 100 596 | }, 597 | { 598 | "length": 100 599 | }, 600 | { 601 | "length": 100 602 | }, 603 | { 604 | "length": 100 605 | }, 606 | { 607 | "length": 100 608 | }, 609 | { 610 | "length": 100 611 | }, 612 | { 613 | "length": 100 614 | }, 615 | { 616 | "length": 100 617 | }, 618 | { 619 | "length": 80 620 | } 621 | ] 622 | } 623 | ] 624 | } 625 | -------------------------------------------------------------------------------- /tests/integration/files/many-blocks.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/many-blocks.xz -------------------------------------------------------------------------------- /tests/integration/files/one-stream-with-padding.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz --block-size 15000", 5 | "length": 20280 6 | }, 7 | { 8 | "cmd": "head -c 100 /dev/zero" 9 | } 10 | ], 11 | "streams": [ 12 | { 13 | "check": 4, 14 | "blocks": [ 15 | { 16 | "length": 15000 17 | }, 18 | { 19 | "length": 5280 20 | } 21 | ] 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /tests/integration/files/one-stream-with-padding.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/one-stream-with-padding.xz -------------------------------------------------------------------------------- /tests/integration/files/several-filters.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz --x86=start=42 --delta=dist=3 --powerpc --lzma2", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 4, 11 | "blocks": [ 12 | { 13 | "filters": [ 14 | { 15 | "id": 4, 16 | "start_offset": 42 17 | }, 18 | { 19 | "id": 3, 20 | "dist": 3 21 | }, 22 | { 23 | "id": 5 24 | }, 25 | { 26 | "id": 33 27 | } 28 | ], 29 | "length": 20280 30 | } 31 | ] 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /tests/integration/files/several-filters.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-filters.xz -------------------------------------------------------------------------------- /tests/integration/files/several-streams-with-padding.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz --block-size 1000", 5 | "length": 1500 6 | }, 7 | { 8 | "cmd": "head -c 100 /dev/zero" 9 | }, 10 | { 11 | "cmd": "xz", 12 | "length": 2000 13 | }, 14 | { 15 | "cmd": "head -c 200 /dev/zero" 16 | }, 17 | { 18 | "cmd": "xz --block-size 4000", 19 | "length": 16780 20 | }, 21 | { 22 | "cmd": "head -c 400 /dev/zero" 23 | } 24 | ], 25 | "streams": [ 26 | { 27 | "check": 4, 28 | "blocks": [ 29 | { 30 | "length": 1000 31 | }, 32 | { 33 | "length": 500 34 | } 35 | ] 36 | }, 37 | { 38 | "check": 4, 39 | "blocks": [ 40 | { 41 | "length": 2000 42 | } 43 | ] 44 | }, 45 | { 46 | "check": 4, 47 | "blocks": [ 48 | { 49 | "length": 4000 50 | }, 51 | { 52 | "length": 4000 53 | }, 54 | { 55 | "length": 4000 56 | }, 57 | { 58 | "length": 4000 59 | }, 60 | { 61 | "length": 780 62 | } 63 | ] 64 | } 65 | ] 66 | } 67 | -------------------------------------------------------------------------------- /tests/integration/files/several-streams-with-padding.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-streams-with-padding.xz -------------------------------------------------------------------------------- /tests/integration/files/several-streams.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz --block-size 5000", 5 | "length": 17000 6 | }, 7 | { 8 | "cmd": "xz --block-size 1000", 9 | "length": 2100 10 | }, 11 | { 12 | "cmd": "xz", 13 | "length": 1180 14 | } 15 | ], 16 | "streams": [ 17 | { 18 | "check": 4, 19 | "blocks": [ 20 | { 21 | "length": 5000 22 | }, 23 | { 24 | "length": 5000 25 | }, 26 | { 27 | "length": 5000 28 | }, 29 | { 30 | "length": 2000 31 | } 32 | ] 33 | }, 34 | { 35 | "check": 4, 36 | "blocks": [ 37 | { 38 | "length": 1000 39 | }, 40 | { 41 | "length": 1000 42 | }, 43 | { 44 | "length": 100 45 | } 46 | ] 47 | }, 48 | { 49 | "check": 4, 50 | "blocks": [ 51 | { 52 | "length": 1180 53 | } 54 | ] 55 | } 56 | ] 57 | } 58 | -------------------------------------------------------------------------------- /tests/integration/files/several-streams.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-streams.xz -------------------------------------------------------------------------------- /tests/integration/files/various-block-sizes.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz --block-list 1234,567,8901,234,5678,90,0", 5 | "length": 20280 6 | } 7 | ], 8 | "streams": [ 9 | { 10 | "check": 4, 11 | "blocks": [ 12 | { 13 | "length": 1234 14 | }, 15 | { 16 | "length": 567 17 | }, 18 | { 19 | "length": 8901 20 | }, 21 | { 22 | "length": 234 23 | }, 24 | { 25 | "length": 5678 26 | }, 27 | { 28 | "length": 90 29 | }, 30 | { 31 | "length": 3576 32 | } 33 | ] 34 | } 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /tests/integration/files/various-block-sizes.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-block-sizes.xz -------------------------------------------------------------------------------- /tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz -C none --block-list 100,1000,200,2000,0", 5 | "length": 3600 6 | }, 7 | { 8 | "cmd": "head -c 100 /dev/zero" 9 | }, 10 | { 11 | "cmd": "xz -C crc32", 12 | "length": 10000 13 | }, 14 | { 15 | "cmd": "head -c 800 /dev/zero" 16 | }, 17 | { 18 | "cmd": "xz -C crc64 --block-list 3000,300,0", 19 | "length": 3333 20 | }, 21 | { 22 | "cmd": "xz -C sha256 --block-size 600", 23 | "length": 3347 24 | }, 25 | { 26 | "cmd": "head -c 400 /dev/zero" 27 | } 28 | ], 29 | "streams": [ 30 | { 31 | "check": 0, 32 | "blocks": [ 33 | { 34 | "length": 100 35 | }, 36 | { 37 | "length": 1000 38 | }, 39 | { 40 | "length": 200 41 | }, 42 | { 43 | "length": 2000 44 | }, 45 | { 46 | "length": 300 47 | } 48 | ] 49 | }, 50 | { 51 | "check": 1, 52 | "blocks": [ 53 | { 54 | "length": 10000 55 | } 56 | ] 57 | }, 58 | { 59 | "check": 4, 60 | "blocks": [ 61 | { 62 | "length": 3000 63 | }, 64 | { 65 | "length": 300 66 | }, 67 | { 68 | "length": 33 69 | } 70 | ] 71 | }, 72 | { 73 | "check": 10, 74 | "blocks": [ 75 | { 76 | "length": 600 77 | }, 78 | { 79 | "length": 600 80 | }, 81 | { 82 | "length": 600 83 | }, 84 | { 85 | "length": 600 86 | }, 87 | { 88 | "length": 600 89 | }, 90 | { 91 | "length": 347 92 | } 93 | ] 94 | } 95 | ] 96 | } 97 | -------------------------------------------------------------------------------- /tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.xz -------------------------------------------------------------------------------- /tests/integration/files/various-stream-checks.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate": [ 3 | { 4 | "cmd": "xz -C none", 5 | "length": 5070 6 | }, 7 | { 8 | "cmd": "xz -C crc32", 9 | "length": 5070 10 | }, 11 | { 12 | "cmd": "xz -C crc64", 13 | "length": 5070 14 | }, 15 | { 16 | "cmd": "xz -C sha256", 17 | "length": 5070 18 | } 19 | ], 20 | "streams": [ 21 | { 22 | "check": 0, 23 | "blocks": [ 24 | { 25 | "length": 5070 26 | } 27 | ] 28 | }, 29 | { 30 | "check": 1, 31 | "blocks": [ 32 | { 33 | "length": 5070 34 | } 35 | ] 36 | }, 37 | { 38 | "check": 4, 39 | "blocks": [ 40 | { 41 | "length": 5070 42 | } 43 | ] 44 | }, 45 | { 46 | "check": 10, 47 | "blocks": [ 48 | { 49 | "length": 5070 50 | } 51 | ] 52 | } 53 | ] 54 | } 55 | -------------------------------------------------------------------------------- /tests/integration/files/various-stream-checks.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-stream-checks.xz -------------------------------------------------------------------------------- /tests/integration/test_file_read.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, Dict, Tuple 3 | 4 | from xz import XZFile 5 | 6 | _IntegrationCase = Tuple[Path, Dict[str, Any]] 7 | 8 | 9 | def test_read_all(integration_case: _IntegrationCase, data_pattern: bytes) -> None: 10 | xz_path, metadata = integration_case 11 | with XZFile(xz_path) as xzfile: 12 | streams_items = list( 13 | xzfile._fileobjs.items() # pylint: disable=protected-access 14 | ) 15 | assert len(streams_items) == len(metadata["streams"]) 16 | pos = 0 17 | stream_boundaries = [] 18 | block_boundaries = [] 19 | for stream_item, metadata_stream in zip(streams_items, metadata["streams"]): 20 | stream_boundaries.append(pos) 21 | stream_pos, stream = stream_item 22 | assert stream_pos == pos 23 | assert stream.check == metadata_stream["check"] 24 | block_items = list( 25 | stream._fileobjs.items() # pylint: disable=protected-access 26 | ) 27 | assert len(block_items) == len(metadata_stream["blocks"]) 28 | for block_item, metadata_block in zip( 29 | block_items, metadata_stream["blocks"] 30 | ): 31 | block_boundaries.append(pos) 32 | block_pos, block = block_item 33 | assert block_pos == pos - stream_pos 34 | assert len(block) == metadata_block["length"] 35 | pos += metadata_block["length"] 36 | assert len(stream) == pos - stream_pos 37 | assert xzfile.stream_boundaries == stream_boundaries 38 | assert xzfile.block_boundaries == block_boundaries 39 | assert xzfile.read() == data_pattern 40 | 41 | 42 | def test_read_reversed(integration_case: _IntegrationCase, data_pattern: bytes) -> None: 43 | xz_path, _ = integration_case 44 | with XZFile(xz_path) as xzfile: 45 | # we are testing the worst possible case (lots of negative seeking) 46 | # limit the time to test by reading in chunks instead of 1 byte at a time 47 | read_size = 37 48 | for pos in reversed(range(0, len(data_pattern), read_size)): 49 | xzfile.seek(pos) 50 | assert xzfile.read(read_size) == data_pattern[pos : pos + read_size] 51 | -------------------------------------------------------------------------------- /tests/integration/test_file_write.py: -------------------------------------------------------------------------------- 1 | from hashlib import sha256 2 | from pathlib import Path 3 | from typing import Any, Dict, Tuple 4 | 5 | import pytest 6 | 7 | import xz 8 | 9 | _IntegrationCase = Tuple[Path, Dict[str, Any]] 10 | 11 | 12 | def test( 13 | integration_case: _IntegrationCase, data_pattern: bytes, tmp_path: Path 14 | ) -> None: 15 | xz_path, metadata = integration_case 16 | data = memoryview(data_pattern) 17 | 18 | if "padding" in xz_path.name: 19 | pytest.skip("Write mode does not support stream padding yet") 20 | 21 | generated_path = tmp_path / "archive.xz" 22 | 23 | with xz.open(generated_path, "w") as xzfile: 24 | for stream in metadata["streams"]: 25 | xzfile.check = stream["check"] 26 | xzfile.change_stream() 27 | for block in stream["blocks"]: 28 | xzfile.filters = block.get("filters") 29 | xzfile.change_block() 30 | xzfile.write(data[: block["length"]]) 31 | data = data[block["length"] :] 32 | 33 | assert not data 34 | 35 | expected_hash = sha256(xz_path.read_bytes()) 36 | generated_hash = sha256(generated_path.read_bytes()) 37 | 38 | assert generated_hash.hexdigest() == expected_hash.hexdigest() 39 | -------------------------------------------------------------------------------- /tests/integration/test_generate_files.py: -------------------------------------------------------------------------------- 1 | from hashlib import sha256 2 | from pathlib import Path 3 | import subprocess 4 | from typing import Any, Dict, Tuple 5 | 6 | import pytest 7 | 8 | _IntegrationCase = Tuple[Path, Dict[str, Any]] 9 | 10 | 11 | @pytest.mark.generate_integration_files 12 | def test(integration_case: _IntegrationCase, data_pattern: bytes) -> None: 13 | xz_path, metadata = integration_case 14 | 15 | expected_hash = sha256(xz_path.read_bytes()) 16 | 17 | # note that we override current xz file 18 | # this allows to create new integration files from json metadata 19 | data = memoryview(data_pattern) 20 | with xz_path.open("wb") as fout: 21 | for step in metadata["generate"]: 22 | step_data_len = step.get("length", 0) 23 | step_data = data[:step_data_len] 24 | data = data[step_data_len:] 25 | fout.write( 26 | subprocess.run( 27 | step["cmd"].split(" "), 28 | input=step_data, 29 | stdout=subprocess.PIPE, 30 | check=True, 31 | ).stdout 32 | ) 33 | assert not data 34 | 35 | generated_hash = sha256(xz_path.read_bytes()) 36 | 37 | assert generated_hash.hexdigest() == expected_hash.hexdigest() 38 | -------------------------------------------------------------------------------- /tests/integration/test_ram_usage.py: -------------------------------------------------------------------------------- 1 | from io import DEFAULT_BUFFER_SIZE 2 | from lzma import compress 3 | from pathlib import Path 4 | from random import seed 5 | import sys 6 | from typing import BinaryIO, Optional, cast 7 | 8 | import pytest 9 | 10 | from xz import XZFile 11 | from xz.common import create_xz_index_footer, parse_xz_footer, parse_xz_index 12 | from xz.io import IOCombiner, IOStatic 13 | 14 | if sys.version_info >= (3, 9): 15 | from collections.abc import Callable, Iterator 16 | from random import randbytes 17 | else: 18 | from random import getrandbits 19 | from typing import Callable, Iterator 20 | 21 | def randbytes(length: int) -> bytes: 22 | return getrandbits(length * 8).to_bytes(length, "little") 23 | 24 | 25 | @pytest.fixture 26 | def ram_usage() -> Iterator[Callable[[], int]]: 27 | try: 28 | import tracemalloc # pylint: disable=import-outside-toplevel 29 | except ImportError: # e.g. PyPy 30 | pytest.skip("tracemalloc module not available") 31 | 32 | try: 33 | tracemalloc.start() 34 | yield lambda: tracemalloc.get_traced_memory()[1] 35 | finally: 36 | tracemalloc.stop() 37 | 38 | 39 | BLOCK_SIZE = 1_000_000 40 | 41 | 42 | @pytest.fixture 43 | def fileobj() -> BinaryIO: 44 | # create xz raw data composed of many identical blocks 45 | nb_blocks = 50 46 | 47 | seed(0) 48 | data = compress(randbytes(BLOCK_SIZE)) 49 | header = data[:12] 50 | footer = data[-12:] 51 | check, backward_size = parse_xz_footer(footer) 52 | block = data[12 : -12 - backward_size] 53 | records = parse_xz_index(data[-12 - backward_size : -12]) 54 | index_footer = create_xz_index_footer(check, records * nb_blocks) 55 | 56 | return cast( 57 | BinaryIO, 58 | IOCombiner( 59 | IOStatic(header), 60 | *[IOStatic(block)] * nb_blocks, 61 | IOStatic(index_footer), 62 | ), 63 | ) 64 | 65 | 66 | def test_read_linear( 67 | # pylint: disable=redefined-outer-name 68 | fileobj: BinaryIO, 69 | ram_usage: Callable[[], int], 70 | ) -> None: 71 | with XZFile(fileobj) as xz_file: 72 | # read almost one block 73 | xz_file.read(BLOCK_SIZE - 1) 74 | one_block_memory = ram_usage() 75 | 76 | # read all the file 77 | while xz_file.read(DEFAULT_BUFFER_SIZE): 78 | assert ( 79 | # should not use much more memory, take 2 as error margin 80 | ram_usage() 81 | < one_block_memory * 2 82 | ), f"Consumes too much RAM (at {100 * xz_file.tell() / len(xz_file):.0f}%)" 83 | 84 | 85 | def test_partial_read_each_block( 86 | # pylint: disable=redefined-outer-name 87 | fileobj: BinaryIO, 88 | ram_usage: Callable[[], int], 89 | ) -> None: 90 | one_block_memory: Optional[int] = None 91 | 92 | with XZFile(fileobj) as xz_file: 93 | for pos in xz_file.block_boundaries[1:]: 94 | # read second-to last byte of each block 95 | xz_file.seek(pos - 2) 96 | xz_file.read(1) 97 | if one_block_memory is None: 98 | one_block_memory = ram_usage() 99 | else: 100 | assert ( 101 | # default strategy is max 8 blocks, take 10 as error margin 102 | ram_usage() 103 | < one_block_memory * 10 104 | ), f"Consumes too much RAM (at {100 * xz_file.tell() / len(xz_file):.0f}%)" 105 | 106 | 107 | def test_write( 108 | tmp_path: Path, 109 | # pylint: disable=redefined-outer-name 110 | ram_usage: Callable[[], int], 111 | ) -> None: 112 | nb_blocks = 10 113 | 114 | seed(0) 115 | 116 | one_block_memory: Optional[int] = None 117 | 118 | with XZFile(tmp_path / "archive.xz", "w") as xz_file: 119 | for i in range(nb_blocks): 120 | xz_file.change_block() 121 | xz_file.write(randbytes(BLOCK_SIZE)) 122 | 123 | if one_block_memory is None: 124 | one_block_memory = ram_usage() 125 | else: 126 | assert ( 127 | # should not use much more memory, take 2 as error margin 128 | ram_usage() 129 | < one_block_memory * 2 130 | ), f"Consumes too much RAM (at {i / nb_blocks:.0f}%)" 131 | -------------------------------------------------------------------------------- /tests/integration/test_readme.py: -------------------------------------------------------------------------------- 1 | import doctest 2 | import os 3 | from pathlib import Path 4 | import shutil 5 | import sys 6 | from typing import List, Optional, Tuple 7 | 8 | import pytest 9 | 10 | import xz 11 | 12 | if sys.version_info >= (3, 9): # pragma: no cover 13 | from collections.abc import Iterator 14 | else: # pragma: no cover 15 | from typing import Iterator 16 | 17 | 18 | @pytest.fixture(autouse=True) 19 | def change_dir(tmp_path: Path) -> Iterator[None]: 20 | old_dir = os.getcwd() 21 | shutil.copy(Path(__file__).parent / "files" / "example.xz", tmp_path) 22 | os.chdir(tmp_path) 23 | yield 24 | os.chdir(old_dir) 25 | 26 | 27 | def _parse_readme() -> List[Tuple[int, str]]: 28 | code_blocks = [] 29 | current_code_block = "" 30 | current_code_block_line: Optional[int] = None 31 | with (Path(__file__).parent.parent.parent / "README.md").open() as fin: 32 | for line_no, line in enumerate(fin): 33 | if line.startswith("```"): 34 | if current_code_block_line is None: 35 | if "python" in line: 36 | current_code_block_line = line_no + 1 37 | else: 38 | code_blocks.append((current_code_block_line, current_code_block)) 39 | current_code_block = "" 40 | current_code_block_line = None 41 | elif current_code_block_line is not None: 42 | current_code_block += line 43 | return code_blocks 44 | 45 | 46 | _README_CODE_BLOCKS = _parse_readme() 47 | 48 | 49 | @pytest.mark.parametrize( 50 | "code_block", 51 | [ 52 | pytest.param(code_block, id=f"line_{line_no}") 53 | for line_no, code_block in _README_CODE_BLOCKS 54 | ], 55 | ) 56 | def test_readme( 57 | code_block: str, tmp_path: Path 58 | ) -> None: # pylint: disable=redefined-outer-name 59 | path = tmp_path / "block.txt" 60 | path.write_text(code_block) 61 | failure_count, test_count = doctest.testfile( 62 | str(path), 63 | module_relative=False, 64 | extraglobs={"xz": xz}, 65 | ) 66 | assert failure_count == 0 67 | assert test_count 68 | -------------------------------------------------------------------------------- /tests/unit/test_attr_proxy.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import pytest 4 | 5 | from xz.utils import AttrProxy 6 | 7 | 8 | class Dest: 9 | abc = "012" 10 | 11 | 12 | class Src: 13 | proxy: Optional[Dest] = None 14 | abc = AttrProxy[str]("proxy") 15 | 16 | 17 | def test_direct() -> None: 18 | dest = Dest() 19 | src = Src() 20 | 21 | # not proxied 22 | with pytest.raises(AttributeError) as exc_info: 23 | src.abc # pylint: disable=pointless-statement 24 | assert ( 25 | str(exc_info.value) 26 | == "'Src' object has not attribute 'abc' until its attribute 'proxy' is defined" 27 | ) 28 | 29 | src.abc = "345" 30 | assert src.abc == "345" 31 | assert dest.abc == "012" # unchanged 32 | 33 | # proxied 34 | src.proxy = dest 35 | 36 | assert src.abc == "012" # get initial value back from proxy 37 | 38 | src.abc = "678" 39 | assert src.abc == "678" 40 | assert dest.abc == "678" # changed 41 | -------------------------------------------------------------------------------- /tests/unit/test_block.py: -------------------------------------------------------------------------------- 1 | from io import SEEK_SET, BytesIO, UnsupportedOperation 2 | import sys 3 | from typing import Tuple, cast 4 | from unittest.mock import Mock, call 5 | 6 | import pytest 7 | 8 | import xz.block as block_module 9 | from xz.block import BlockRead, XZBlock 10 | from xz.common import XZError, create_xz_header, create_xz_index_footer 11 | from xz.io import IOAbstract, IOStatic 12 | 13 | if sys.version_info >= (3, 9): # pragma: no cover 14 | from collections.abc import Callable, Iterator 15 | else: # pragma: no cover 16 | from typing import Callable, Iterator 17 | 18 | 19 | BLOCK_BYTES = bytes.fromhex( 20 | "0200210116000000742fe5a3e0006300415d00209842100431d01ab285328305" 21 | "7ddb5924a128599cc9911a7fcff8d59c1f6f887bcee97b1f83f1808f005de273" 22 | "e1a6e99a7eac4f8f632b7e43bbf1da311dce5c0000000000e7c35efa" 23 | ) 24 | 25 | 26 | def create_fileobj(data: bytes) -> Mock: 27 | raw = BytesIO(data) 28 | mock = Mock(wraps=raw) 29 | mock.__class__ = cast(Mock, IOAbstract) # needs to be subclass of IOAbstract 30 | mock.__len__ = lambda _: len(raw.getvalue()) 31 | return mock 32 | 33 | 34 | @pytest.fixture 35 | def fileobj() -> Iterator[Mock]: 36 | yield create_fileobj(BLOCK_BYTES) 37 | 38 | 39 | @pytest.fixture 40 | def fileobj_empty() -> Iterator[Mock]: 41 | yield create_fileobj(b"") 42 | 43 | 44 | @pytest.fixture(autouse=True) 45 | def patch_buffer_size(monkeypatch: pytest.MonkeyPatch) -> None: 46 | monkeypatch.setattr(BlockRead, "read_size", 17) 47 | 48 | 49 | @pytest.fixture 50 | def compressor(monkeypatch: pytest.MonkeyPatch) -> Iterator[Mock]: 51 | mock = Mock() 52 | monkeypatch.setattr(block_module, "LZMACompressor", mock) 53 | yield mock.return_value 54 | 55 | 56 | # pylint: disable=redefined-outer-name 57 | 58 | 59 | # 60 | # read 61 | # 62 | 63 | 64 | def test_read_all( 65 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]] 66 | ) -> None: 67 | block = XZBlock(fileobj, 1, 89, 100) 68 | assert block.tell() == 0 69 | assert data_pattern_locate(block.read()) == (0, 100) 70 | 71 | assert fileobj.method_calls == [ 72 | call.seek(0, SEEK_SET), 73 | call.read(5), # xz padding is 12 bytes 74 | call.seek(5, SEEK_SET), 75 | call.read(17), 76 | call.seek(22, SEEK_SET), 77 | call.read(17), 78 | call.seek(39, SEEK_SET), 79 | call.read(17), 80 | call.seek(56, SEEK_SET), 81 | call.read(17), 82 | call.seek(73, SEEK_SET), 83 | call.read(17), 84 | # below is not needed to get the data 85 | # but needed to perform various checks 86 | # see other tests 87 | call.seek(90, SEEK_SET), 88 | call.read(17), 89 | ] 90 | fileobj.method_calls.clear() 91 | 92 | assert block.read() == b"" 93 | assert not fileobj.method_calls 94 | 95 | 96 | def test_read_seek_forward( 97 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]] 98 | ) -> None: 99 | block = XZBlock(fileobj, 1, 89, 100) 100 | assert block.tell() == 0 101 | 102 | block.seek(0) 103 | assert block.tell() == 0 104 | assert not fileobj.method_calls # no file access 105 | assert data_pattern_locate(block.read(4)) == (0, 4) 106 | assert block.tell() == 4 107 | assert fileobj.method_calls == [ 108 | call.seek(0, SEEK_SET), 109 | call.read(5), # xz padding is 12 bytes 110 | call.seek(5, SEEK_SET), 111 | call.read(17), 112 | call.seek(22, SEEK_SET), 113 | call.read(17), 114 | ] 115 | fileobj.method_calls.clear() 116 | 117 | block.seek(10) 118 | assert block.tell() == 10 119 | assert not fileobj.method_calls # no file access 120 | assert data_pattern_locate(block.read(4)) == (10, 4) 121 | assert block.tell() == 14 122 | assert not fileobj.method_calls # no file access 123 | 124 | block.seek(30) 125 | assert block.tell() == 30 126 | assert not fileobj.method_calls # no file access 127 | assert data_pattern_locate(block.read(4)) == (30, 4) 128 | assert block.tell() == 34 129 | assert fileobj.method_calls == [ 130 | call.seek(39, SEEK_SET), 131 | call.read(17), 132 | ] 133 | fileobj.method_calls.clear() 134 | 135 | block.seek(60) 136 | assert block.tell() == 60 137 | assert not fileobj.method_calls # no file access 138 | assert data_pattern_locate(block.read(4)) == (60, 4) 139 | assert block.tell() == 64 140 | assert fileobj.method_calls == [ 141 | call.seek(56, SEEK_SET), 142 | call.read(17), 143 | ] 144 | fileobj.method_calls.clear() 145 | 146 | 147 | def test_read_seek_backward( 148 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]] 149 | ) -> None: 150 | block = XZBlock(fileobj, 1, 89, 100) 151 | assert block.tell() == 0 152 | 153 | block.seek(60) 154 | assert block.tell() == 60 155 | assert not fileobj.method_calls # no file access 156 | 157 | block.seek(40) 158 | assert block.tell() == 40 159 | assert not fileobj.method_calls # no file access 160 | assert data_pattern_locate(block.read(4)) == (40, 4) 161 | assert block.tell() == 44 162 | assert fileobj.method_calls == [ 163 | call.seek(0, SEEK_SET), 164 | call.read(5), # xz padding is 12 bytes 165 | call.seek(5, SEEK_SET), 166 | call.read(17), 167 | call.seek(22, SEEK_SET), 168 | call.read(17), 169 | call.seek(39, SEEK_SET), 170 | call.read(17), 171 | ] 172 | fileobj.method_calls.clear() 173 | assert not fileobj.method_calls # no file access 174 | 175 | block.seek(20) 176 | assert block.tell() == 20 177 | assert not fileobj.method_calls # no file access 178 | assert data_pattern_locate(block.read(4)) == (20, 4) 179 | assert block.tell() == 24 180 | assert fileobj.method_calls == [ 181 | call.seek(0, SEEK_SET), 182 | call.read(5), # xz padding is 12 bytes 183 | call.seek(5, SEEK_SET), 184 | call.read(17), 185 | call.seek(22, SEEK_SET), 186 | call.read(17), 187 | call.seek(39, SEEK_SET), 188 | call.read(17), 189 | ] 190 | fileobj.method_calls.clear() 191 | 192 | 193 | def test_read_wrong_uncompressed_size_too_small( 194 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]] 195 | ) -> None: 196 | block = XZBlock(fileobj, 1, 89, 99) 197 | 198 | # read all but last byte 199 | assert data_pattern_locate(block.read(98)) == (0, 98) 200 | 201 | # read last byte 202 | with pytest.raises(XZError) as exc_info: 203 | block.read(1) 204 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data" 205 | 206 | 207 | def test_read_wrong_uncompressed_size_too_big( 208 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]] 209 | ) -> None: 210 | block = XZBlock(fileobj, 1, 89, 101) 211 | 212 | # read all but last byte 213 | assert data_pattern_locate(block.read(100)) == (0, 100) 214 | 215 | # read last byte 216 | with pytest.raises(XZError) as exc_info: 217 | block.read(1) 218 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data" 219 | 220 | 221 | def test_read_wrong_block_padding( 222 | data_pattern_locate: Callable[[bytes], Tuple[int, int]] 223 | ) -> None: 224 | fileobj = IOStatic(BLOCK_BYTES[:-5] + b"\xff" + BLOCK_BYTES[-4:]) 225 | block = XZBlock(fileobj, 1, 89, 100) 226 | 227 | # read all but last byte 228 | assert data_pattern_locate(block.read(99)) == (0, 99) 229 | 230 | # read last byte 231 | with pytest.raises(XZError) as exc_info: 232 | block.read(1) 233 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data" 234 | 235 | 236 | def test_read_wrong_check( 237 | data_pattern_locate: Callable[[bytes], Tuple[int, int]] 238 | ) -> None: 239 | fileobj = IOStatic(BLOCK_BYTES[:-4] + b"\xff" * 4) 240 | 241 | block = XZBlock(fileobj, 1, 89, 100) 242 | 243 | # read all but last byte 244 | assert data_pattern_locate(block.read(99)) == (0, 99) 245 | 246 | # read last byte 247 | with pytest.raises(XZError) as exc_info: 248 | block.read(1) 249 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data" 250 | 251 | 252 | def test_read_truncated_data() -> None: 253 | fileobj = create_fileobj( 254 | bytes.fromhex( 255 | # header 256 | "fd377a585a0000016922de36" 257 | # one block (truncated) 258 | "0200210116000000742fe5a301000941" 259 | ) 260 | ) 261 | 262 | block = XZBlock(fileobj, 1, 89, 100) 263 | 264 | with pytest.raises(XZError) as exc_info: 265 | block.read() 266 | assert str(exc_info.value) == "block: data eof" 267 | 268 | 269 | def test_read_decompressor_eof( 270 | data_pattern_locate: Callable[[bytes], Tuple[int, int]] 271 | ) -> None: 272 | fileobj = IOStatic( 273 | bytes.fromhex( 274 | # one block 275 | "0200210116000000742fe5a301000941" 276 | "6130416131416132410000004e4aa467" 277 | # index 278 | "00011e0aea6312149042990d0100" 279 | # stream footer 280 | "00000001595a" 281 | ) 282 | ) 283 | 284 | # real uncompressed size is 10, not 11 285 | # it is changed to trigger the error case we are testing here 286 | block = XZBlock(fileobj, 1, 30, 11) 287 | 288 | # read all but last byte 289 | assert data_pattern_locate(block.read(10)) == (0, 10) 290 | 291 | # read last byte 292 | with pytest.raises(XZError) as exc_info: 293 | block.read(1) 294 | assert str(exc_info.value) == "block: decompressor eof" 295 | 296 | 297 | # 298 | # writable 299 | # 300 | 301 | 302 | def test_writable(fileobj: Mock) -> None: 303 | block = XZBlock(fileobj, 1, 89, 100) 304 | assert not block.writable() 305 | 306 | 307 | def test_writable_empty(fileobj_empty: Mock) -> None: 308 | block = XZBlock(fileobj_empty, 1, 0, 0) 309 | assert block.writable() 310 | 311 | 312 | # 313 | # write 314 | # 315 | 316 | 317 | def test_write_once(fileobj_empty: Mock) -> None: 318 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 319 | block.write(b"Hello, world!\n") 320 | assert block.tell() == 14 321 | assert fileobj_empty.method_calls == [ 322 | call.seek(0), 323 | call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"), 324 | ] 325 | fileobj_empty.reset_mock() 326 | 327 | assert block.unpadded_size == 34 328 | assert block.uncompressed_size == 14 329 | 330 | assert fileobj_empty.method_calls == [ 331 | call.seek(12), 332 | call.write(b"\x01\x00\rHello, world!\n\x00\x00\x00\x18\xa7U{"), 333 | ] 334 | 335 | 336 | def test_write_multiple(fileobj_empty: Mock) -> None: 337 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 338 | block.write(b"Hello,") 339 | assert block.tell() == 6 340 | assert fileobj_empty.method_calls == [ 341 | call.seek(0), 342 | call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"), 343 | ] 344 | fileobj_empty.reset_mock() 345 | 346 | block.write(b" world!\n") 347 | assert block.tell() == 14 348 | assert not fileobj_empty.method_calls # buffered 349 | 350 | block.write(b"A" * 3_000_000) 351 | assert block.tell() == 3_000_014 352 | assert fileobj_empty.method_calls # not buffered 353 | 354 | assert block.unpadded_size == 540 355 | assert block.uncompressed_size == 3_000_014 356 | 357 | assert fileobj_empty.method_calls # flushing compressor 358 | 359 | 360 | @pytest.mark.parametrize("pos", [0, 42, 100, 200]) 361 | def test_write_existing(fileobj: Mock, pos: int) -> None: 362 | block = XZBlock(fileobj, 1, 89, 100) 363 | block.seek(pos) 364 | with pytest.raises(UnsupportedOperation): 365 | # block is not empty, so not writable 366 | block.write(b"a") 367 | 368 | 369 | def test_write_compressor_error_0(fileobj_empty: Mock, compressor: Mock) -> None: 370 | compressor.compress.return_value = create_xz_header(0) 371 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 372 | with pytest.raises(XZError) as exc_info: 373 | block.write(b"Hello, world!\n") 374 | assert str(exc_info.value) == "block: compressor header" 375 | 376 | 377 | def test_write_compressor_error_1(fileobj_empty: Mock, compressor: Mock) -> None: 378 | compressor.compress.return_value = create_xz_header(1) 379 | compressor.flush.return_value = create_xz_index_footer(0, [(13, 37), (4, 2)]) 380 | with pytest.raises(XZError) as exc_info: 381 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 382 | block.write(b"Hello, world!\n") 383 | assert str(exc_info.value) == "block: compressor footer check" 384 | 385 | 386 | def test_write_compressor_error_2(fileobj_empty: Mock, compressor: Mock) -> None: 387 | compressor.compress.return_value = create_xz_header(1) 388 | compressor.flush.return_value = create_xz_index_footer(1, [(13, 37), (4, 2)]) 389 | with pytest.raises(XZError) as exc_info: 390 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 391 | block.write(b"Hello, world!\n") 392 | assert str(exc_info.value) == "block: compressor index records length" 393 | 394 | 395 | def test_write_compressor_error_3(fileobj_empty: Mock, compressor: Mock) -> None: 396 | compressor.compress.return_value = create_xz_header(1) 397 | compressor.flush.return_value = create_xz_index_footer(1, [(34, 1337)]) 398 | with pytest.raises(XZError) as exc_info: 399 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 400 | block.write(b"Hello, world!\n") 401 | assert str(exc_info.value) == "block: compressor uncompressed size" 402 | 403 | 404 | # 405 | # truncate 406 | # 407 | 408 | 409 | def test_truncate_empty_zero(fileobj_empty: Mock) -> None: 410 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 411 | block.truncate(0) 412 | assert block.tell() == 0 413 | assert not fileobj_empty.method_calls 414 | 415 | assert block.unpadded_size == 0 416 | assert block.uncompressed_size == 0 417 | 418 | assert not fileobj_empty.method_calls 419 | 420 | 421 | def test_truncate_empty_fill(fileobj_empty: Mock) -> None: 422 | with XZBlock(fileobj_empty, 1, 0, 0) as block: 423 | block.truncate(42) 424 | assert block.tell() == 0 425 | assert fileobj_empty.method_calls == [ 426 | call.seek(0), 427 | call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"), 428 | ] 429 | fileobj_empty.reset_mock() 430 | 431 | assert block.unpadded_size == 30 432 | assert block.uncompressed_size == 42 433 | 434 | assert fileobj_empty.method_calls == [ 435 | call.seek(12), 436 | call.write(b"\xe0\x00)\x00\x06]\x00\x00n,GH\x00\x00\x00\x00\xfb(o\xe4"), 437 | ] 438 | 439 | 440 | @pytest.mark.parametrize("size", [0, 42, 100, 200]) 441 | def test_truncate_existing(fileobj: Mock, size: int) -> None: 442 | block = XZBlock(fileobj, 1, 89, 100) 443 | with pytest.raises(UnsupportedOperation): 444 | # block is not empty, so not writable 445 | block.truncate(size) 446 | -------------------------------------------------------------------------------- /tests/unit/test_common.py: -------------------------------------------------------------------------------- 1 | from lzma import CHECK_CRC32, CHECK_CRC64, CHECK_NONE, CHECK_SHA256, is_check_supported 2 | from typing import List, Tuple 3 | 4 | import pytest 5 | 6 | from xz.common import ( 7 | DEFAULT_CHECK, 8 | XZError, 9 | create_xz_header, 10 | create_xz_index_footer, 11 | decode_mbi, 12 | encode_mbi, 13 | pad, 14 | parse_xz_footer, 15 | parse_xz_header, 16 | parse_xz_index, 17 | round_up, 18 | ) 19 | 20 | MBI_CASE = tuple( 21 | pytest.param(value, data, id=hex(value)) 22 | for value, data in ( 23 | (0, "00"), 24 | (1, "01"), 25 | ((1 << 7) - 1, "7f"), 26 | (1 << 7, "8001"), 27 | ((1 << 7 * 2) - 1, "ff7f"), 28 | (1 << 7 * 2, "808001"), 29 | ((1 << 7 * 3) - 1, "ffff7f"), 30 | (1 << 7 * 3, "80808001"), 31 | ((1 << 7 * 10) - 1, "ffffffffffffffffff7f"), 32 | (1 << 7 * 10, "8080808080808080808001"), 33 | (9, "09"), 34 | (99, "63"), 35 | (999, "e707"), 36 | (9999, "8f4e"), 37 | (99999, "9f8d06"), 38 | (999999, "bf843d"), 39 | (9999999, "fface204"), 40 | (99999999, "ffc1d72f"), 41 | (999999999, "ff93ebdc03"), 42 | ) 43 | ) 44 | 45 | 46 | @pytest.mark.parametrize("value, data", MBI_CASE) 47 | def test_encode_mbi(value: int, data: str) -> None: 48 | assert encode_mbi(value) == bytes.fromhex(data) 49 | 50 | 51 | @pytest.mark.parametrize("value, data", MBI_CASE) 52 | def test_decode_mbi(value: int, data: str) -> None: 53 | assert decode_mbi(bytes.fromhex(data) + b"\xff\x00" * 10) == (len(data) // 2, value) 54 | 55 | 56 | @pytest.mark.parametrize("data", ("", "81828384"), ids=("empty", "truncated")) 57 | def test_decode_mbi_invalid(data: str) -> None: 58 | with pytest.raises(XZError) as exc_info: 59 | decode_mbi(bytes.fromhex(data)) 60 | assert str(exc_info.value) == "invalid mbi" 61 | 62 | 63 | @pytest.mark.parametrize( 64 | "value, expected", 65 | ((0, 0), (1, 4), (2, 4), (3, 4), (4, 4), (5, 8), (6, 8), (7, 8), (8, 8)), 66 | ) 67 | def test_round_up(value: int, expected: int) -> None: 68 | assert round_up(value) == expected 69 | 70 | 71 | @pytest.mark.parametrize( 72 | "value, padding", 73 | ( 74 | (0, ""), 75 | (1, "000000"), 76 | (2, "0000"), 77 | (3, "00"), 78 | (4, ""), 79 | (5, "000000"), 80 | (6, "0000"), 81 | (7, "00"), 82 | (8, ""), 83 | ), 84 | ) 85 | def test_pad(value: int, padding: str) -> None: 86 | assert pad(value) == bytes.fromhex(padding) 87 | data = b"B" * value 88 | data += bytes.fromhex(padding) 89 | assert not len(data) % 4 90 | 91 | 92 | XZ_HEADER_CASES = ( 93 | pytest.param(CHECK_NONE, "fd377a585a000000ff12d941", id="check_none"), 94 | pytest.param(CHECK_CRC32, "fd377a585a0000016922de36", id="check_crc32"), 95 | pytest.param(CHECK_CRC64, "fd377a585a000004e6d6b446", id="check_crc64"), 96 | pytest.param(CHECK_SHA256, "fd377a585a00000ae1fb0ca1", id="check_sha256"), 97 | ) 98 | 99 | 100 | @pytest.mark.parametrize("check, data", XZ_HEADER_CASES) 101 | def test_create_xz_header(check: int, data: str) -> None: 102 | assert create_xz_header(check) == bytes.fromhex(data) 103 | 104 | 105 | def test_create_xz_header_invalid_check() -> None: 106 | with pytest.raises(XZError) as exc_info: 107 | create_xz_header(17) 108 | assert str(exc_info.value) == "header check" 109 | 110 | 111 | @pytest.mark.parametrize("check, data", XZ_HEADER_CASES) 112 | def test_parse_xz_header(check: int, data: str) -> None: 113 | assert parse_xz_header(bytes.fromhex(data)) == check 114 | 115 | 116 | @pytest.mark.parametrize( 117 | "data, message", 118 | ( 119 | ("fd377a585a0000016922de3600", "header length"), 120 | ("f1377a585a000000ff12d941", "header magic"), 121 | ("fd377a585a0000016942de36", "header crc32"), 122 | ("fd377a585a0000110d32692b", "header flags"), 123 | ("fd377a585a0001012813c52f", "header flags"), 124 | ("fd377a585a00100138301c7c", "header flags"), 125 | ), 126 | ) 127 | def test_parse_xz_header_invalid(data: str, message: str) -> None: 128 | with pytest.raises(XZError) as exc_info: 129 | parse_xz_header(bytes.fromhex(data)) 130 | assert str(exc_info.value) == message 131 | 132 | 133 | XZ_INDEX_CASES = ( 134 | # all have check=1 135 | pytest.param([], "000000001cdf4421", id="empty"), 136 | pytest.param([(24, 4)], "000118046be9f0a5", id="one-small-block"), 137 | pytest.param([(2062, 20280)], "00018e10b89e010039f45fb1", id="one-big-block"), 138 | pytest.param( 139 | [(73, 60), (73, 60), (73, 60), (56, 30)], 140 | "0004493c493c493c381e0000b6ec1657", 141 | id="several-small-blocks", 142 | ), 143 | pytest.param( 144 | [(1, 2), (11, 2222), (1111, 22222222), (11111111, 2222222222222222)], 145 | "000401020bae11d7088eabcc0ac795a6058ec7abf196a3f903000000c9647142", 146 | id="several-blocks-various-sizes", 147 | ), 148 | ) 149 | 150 | 151 | @pytest.mark.parametrize("records, data", XZ_INDEX_CASES) 152 | def test_create_xz_index(records: List[Tuple[int, int]], data: str) -> None: 153 | assert create_xz_index_footer(1, records)[:-12] == bytes.fromhex(data) 154 | 155 | 156 | def test_create_xz_index_invalid() -> None: 157 | with pytest.raises(XZError) as exc_info: 158 | create_xz_index_footer(1, [(73, 60), (0, 12), (56, 30)]) 159 | assert str(exc_info.value) == "index record unpadded size" 160 | 161 | 162 | @pytest.mark.parametrize("records, data", XZ_INDEX_CASES) 163 | def test_parse_xz_index(records: List[Tuple[int, int]], data: str) -> None: 164 | assert parse_xz_index(bytes.fromhex(data)) == records 165 | 166 | 167 | @pytest.mark.parametrize( 168 | "data, message", 169 | ( 170 | ("0000001cdf4421", "index length"), 171 | ("420000001cdf4421", "index indicator"), 172 | ("000000001cdf4221", "index crc32"), 173 | ("000218043257b6a7", "index size"), 174 | ("000100043271eb27", "index record unpadded size"), 175 | ("000188047163b1d4", "index size"), 176 | ("000104002f70ea44", "index record uncompressed size"), 177 | ("000180180400420096a658c0", "index padding"), 178 | ), 179 | ) 180 | def test_parse_xz_index_invalid(data: str, message: str) -> None: 181 | with pytest.raises(XZError) as exc_info: 182 | parse_xz_index(bytes.fromhex(data)) 183 | assert str(exc_info.value) == message 184 | 185 | 186 | XZ_FOOTER_CASES = ( 187 | # all have backward_size=8 (i.e. no blocks) 188 | pytest.param(CHECK_NONE, "06729e7a010000000000595a", id="check_none"), 189 | pytest.param(CHECK_CRC32, "9042990d010000000001595a", id="check_crc32"), 190 | pytest.param(CHECK_CRC64, "1fb6f37d010000000004595a", id="check_crc64"), 191 | pytest.param(CHECK_SHA256, "189b4b9a01000000000a595a", id="check_sha256"), 192 | ) 193 | 194 | 195 | @pytest.mark.parametrize("check, data", XZ_FOOTER_CASES) 196 | def test_create_xz_footer(check: int, data: str) -> None: 197 | assert create_xz_index_footer(check, [])[-12:] == bytes.fromhex(data) 198 | 199 | 200 | def test_create_xz_footer_invalid_check() -> None: 201 | with pytest.raises(XZError) as exc_info: 202 | create_xz_index_footer(17, []) 203 | assert str(exc_info.value) == "footer check" 204 | 205 | 206 | @pytest.mark.parametrize("check, data", XZ_FOOTER_CASES) 207 | def test_parse_xz_footer(check: int, data: str) -> None: 208 | assert parse_xz_footer(bytes.fromhex(data)) == (check, 8) 209 | 210 | 211 | @pytest.mark.parametrize( 212 | "data, message", 213 | ( 214 | ("009042990d010000000001595a", "footer length"), 215 | ("9042990d0100000000015959", "footer magic"), 216 | ("9042090d010000000001595a", "footer crc32"), 217 | ("f4522e10010000000011595a", "footer flags"), 218 | ("d1738214010000000101595a", "footer flags"), 219 | ("c1505b47010000001001595a", "footer flags"), 220 | ), 221 | ) 222 | def test_parse_xz_footer_invalid(data: str, message: str) -> None: 223 | with pytest.raises(XZError) as exc_info: 224 | parse_xz_footer(bytes.fromhex(data)) 225 | assert str(exc_info.value) == message 226 | 227 | 228 | def test_default_check_supported() -> None: 229 | assert is_check_supported(DEFAULT_CHECK) 230 | -------------------------------------------------------------------------------- /tests/unit/test_floordict.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import pytest 4 | 5 | from xz.utils import FloorDict 6 | 7 | 8 | def expect_floor_dict(floordict: FloorDict[str], items: Dict[int, str]) -> None: 9 | sorted_keys = sorted(items) 10 | assert len(floordict) == len(items) 11 | assert list(floordict) == sorted_keys 12 | assert list(floordict.keys()) == sorted_keys 13 | assert list(floordict.values()) == [items[key] for key in sorted_keys] 14 | assert list(floordict.items()) == [(key, items[key]) for key in sorted_keys] 15 | # pylint: disable=protected-access 16 | assert floordict._keys == sorted_keys 17 | assert floordict._dict == items 18 | 19 | 20 | def test_empty() -> None: 21 | floordict = FloorDict[str]() 22 | 23 | expect_floor_dict(floordict, {}) 24 | 25 | with pytest.raises(KeyError): 26 | floordict[0] # pylint: disable=pointless-statement 27 | with pytest.raises(KeyError): 28 | floordict[42] # pylint: disable=pointless-statement 29 | with pytest.raises(KeyError): 30 | floordict.last_key # pylint: disable=pointless-statement 31 | with pytest.raises(KeyError): 32 | floordict.last_item # pylint: disable=pointless-statement 33 | 34 | 35 | def test_normal() -> None: 36 | floordict = FloorDict[str]() 37 | floordict[10] = "ten" 38 | floordict[50] = "fifty" 39 | with pytest.raises(TypeError): 40 | floordict["wrong type"] = "wrong type" # type: ignore[index] 41 | 42 | expect_floor_dict(floordict, {10: "ten", 50: "fifty"}) 43 | 44 | assert floordict[10] == "ten" 45 | assert floordict.last_key == 50 46 | assert floordict.last_item == "fifty" 47 | 48 | assert floordict[42] == "ten" 49 | assert floordict.get_with_index(42) == (10, "ten") 50 | 51 | assert floordict[50] == "fifty" 52 | assert floordict[1337] == "fifty" 53 | assert floordict.get(0) is None 54 | with pytest.raises(KeyError): 55 | floordict[0] # pylint: disable=pointless-statement 56 | assert floordict.get(7) is None 57 | with pytest.raises(KeyError): 58 | floordict[7] # pylint: disable=pointless-statement 59 | with pytest.raises(KeyError): 60 | floordict[-42] # pylint: disable=pointless-statement 61 | with pytest.raises(TypeError): 62 | # pylint: disable=pointless-statement 63 | floordict["wrong type"] # type: ignore[index] 64 | 65 | 66 | def test_override() -> None: 67 | floordict = FloorDict[str]() 68 | floordict[10] = "ten" 69 | floordict[20] = "twenty" 70 | floordict[30] = "thirty" 71 | 72 | expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"}) 73 | 74 | floordict[20] = "two-ten" 75 | assert floordict[15] == "ten" 76 | assert floordict[20] == "two-ten" 77 | assert floordict[25] == "two-ten" 78 | assert floordict[50] == "thirty" 79 | 80 | expect_floor_dict(floordict, {10: "ten", 20: "two-ten", 30: "thirty"}) 81 | 82 | 83 | def test_del() -> None: 84 | floordict = FloorDict[str]() 85 | floordict[10] = "ten" 86 | floordict[20] = "twenty" 87 | floordict[30] = "thirty" 88 | assert floordict[20] == "twenty" 89 | assert floordict[22] == "twenty" 90 | expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"}) 91 | 92 | del floordict[20] 93 | assert floordict[20] == "ten" 94 | assert floordict[22] == "ten" 95 | expect_floor_dict(floordict, {10: "ten", 30: "thirty"}) 96 | 97 | with pytest.raises(KeyError): 98 | del floordict[20] 99 | with pytest.raises(KeyError): 100 | del floordict[40] 101 | 102 | 103 | def test_pop() -> None: 104 | floordict = FloorDict[str]() 105 | floordict[10] = "ten" 106 | floordict[20] = "twenty" 107 | floordict[30] = "thirty" 108 | assert floordict[25] == "twenty" 109 | expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"}) 110 | 111 | with pytest.raises(KeyError): 112 | floordict.pop(25) 113 | 114 | assert floordict.pop(20) == "twenty" 115 | expect_floor_dict(floordict, {10: "ten", 30: "thirty"}) 116 | assert floordict[25] == "ten" 117 | 118 | 119 | def test_values() -> None: 120 | floordict = FloorDict[str]() 121 | expected = {} 122 | for i in range(50): 123 | floordict[i * 2] = str(i * 2) 124 | expected[i * 2] = str(i * 2) 125 | expect_floor_dict(floordict, expected) 126 | for j in range(100): 127 | value = min(i * 2, j - (j % 2)) 128 | assert floordict[j] == str(value) 129 | assert floordict.get_with_index(j) == (value, str(value)) 130 | -------------------------------------------------------------------------------- /tests/unit/test_ioabstract.py: -------------------------------------------------------------------------------- 1 | from io import DEFAULT_BUFFER_SIZE, UnsupportedOperation 2 | from pathlib import Path 3 | from typing import BinaryIO 4 | from unittest.mock import Mock, call 5 | 6 | import pytest 7 | 8 | from xz.io import IOAbstract 9 | 10 | # 11 | # len 12 | # 13 | 14 | 15 | def test_len() -> None: 16 | obj = IOAbstract(10) 17 | assert len(obj) == 10 18 | 19 | 20 | # 21 | # fileno 22 | # 23 | 24 | 25 | def test_fileno(tmp_path: Path) -> None: 26 | file_path = tmp_path / "file" 27 | file_path.write_bytes(b"abcd") 28 | 29 | class Impl(IOAbstract): 30 | def __init__(self, fileobj: BinaryIO) -> None: 31 | super().__init__(10) 32 | self.fileobj = fileobj 33 | 34 | with file_path.open("rb") as fin: 35 | obj = Impl(fin) 36 | assert obj.fileno() == fin.fileno() 37 | 38 | 39 | def test_fileno_ko() -> None: 40 | obj = IOAbstract(10) 41 | with pytest.raises(UnsupportedOperation): 42 | obj.fileno() 43 | 44 | 45 | # 46 | # tell / seek 47 | # 48 | 49 | 50 | def test_seek_not_seekable() -> None: 51 | class Impl(IOAbstract): 52 | def __init__(self) -> None: 53 | super().__init__(10) 54 | 55 | def seekable(self) -> bool: 56 | return False 57 | 58 | obj = Impl() 59 | assert obj.seekable() is False 60 | with pytest.raises(UnsupportedOperation) as exc_info: 61 | obj.seek(1) 62 | assert str(exc_info.value) == "seek" 63 | 64 | 65 | def test_tell_seek() -> None: 66 | obj = IOAbstract(10) 67 | assert obj.seekable() is True 68 | assert obj.tell() == 0 69 | 70 | # absolute (no whence) 71 | assert obj.seek(1) == 1 72 | assert obj.tell() == 1 73 | assert obj.seek(3) == 3 74 | assert obj.tell() == 3 75 | assert obj.seek(10) == 10 76 | assert obj.tell() == 10 77 | with pytest.raises(ValueError) as exc_info: 78 | obj.seek(-1) 79 | assert str(exc_info.value) == "invalid seek position" 80 | assert obj.seek(42) == 42 81 | assert obj.tell() == 42 82 | 83 | # absolute (with whence) 84 | assert obj.seek(5, 0) == 5 85 | assert obj.tell() == 5 86 | assert obj.seek(10, 0) == 10 87 | assert obj.tell() == 10 88 | with pytest.raises(ValueError) as exc_info: 89 | obj.seek(-1, 0) 90 | assert str(exc_info.value) == "invalid seek position" 91 | assert obj.seek(42, 0) == 42 92 | assert obj.tell() == 42 93 | 94 | # relative 95 | assert obj.seek(10) == 10 96 | assert obj.seek(-7, 1) == 3 97 | assert obj.tell() == 3 98 | assert obj.seek(2, 1) == 5 99 | assert obj.tell() == 5 100 | with pytest.raises(ValueError) as exc_info: 101 | obj.seek(-6, 1) 102 | assert str(exc_info.value) == "invalid seek position" 103 | assert obj.tell() == 5 104 | assert obj.seek(37, 1) == 42 105 | assert obj.tell() == 42 106 | 107 | # from end 108 | assert obj.seek(0, 2) == 10 109 | assert obj.tell() == 10 110 | assert obj.seek(-4, 2) == 6 111 | assert obj.tell() == 6 112 | assert obj.seek(-10, 2) == 0 113 | assert obj.tell() == 0 114 | assert obj.seek(32, 2) == 42 115 | assert obj.tell() == 42 116 | with pytest.raises(ValueError) as exc_info: 117 | obj.seek(-11, 2) 118 | assert str(exc_info.value) == "invalid seek position" 119 | 120 | # from error 121 | with pytest.raises(ValueError) as exc_info: 122 | obj.seek(42, 3) 123 | assert str(exc_info.value) == "unsupported whence value" 124 | 125 | # seek after close 126 | obj.close() 127 | with pytest.raises(ValueError) as exc_info: 128 | obj.seek(0) 129 | assert str(exc_info.value) == "I/O operation on closed file" 130 | 131 | 132 | # 133 | # read 134 | # 135 | 136 | 137 | def test_read_non_readable() -> None: 138 | class Impl(IOAbstract): 139 | def __init__(self) -> None: 140 | super().__init__(10) 141 | 142 | def readable(self) -> bool: 143 | return False 144 | 145 | obj = Impl() 146 | assert obj.readable() is False 147 | with pytest.raises(UnsupportedOperation) as exc_info: 148 | obj.read(1) 149 | assert str(exc_info.value) == "read" 150 | 151 | 152 | def test_tell_read() -> None: 153 | class Impl(IOAbstract): 154 | def __init__(self) -> None: 155 | super().__init__(10) 156 | 157 | def _read(self, size: int) -> bytes: 158 | # for tests, does not rely on position 159 | return b"xyz"[:size] 160 | 161 | def _write_after(self) -> None: 162 | raise RuntimeError("should not be called") 163 | 164 | obj = Impl() 165 | assert obj.tell() == 0 166 | 167 | # read all 168 | assert obj.read() == b"xyzxyzxyzx" 169 | obj.seek(5) 170 | assert obj.read() == b"xyzxy" 171 | 172 | # read from pos 173 | obj.seek(3) 174 | assert obj.read(2) == b"xy" 175 | assert obj.read(2) == b"xy" 176 | assert obj.read(2) == b"xy" 177 | assert obj.read(2) == b"x" 178 | assert obj.read(2) == b"" 179 | assert obj.read(2) == b"" 180 | 181 | # read from after EOF 182 | obj.seek(11) 183 | assert obj.read(2) == b"" 184 | 185 | # read after close 186 | obj.close() 187 | with pytest.raises(ValueError) as exc_info: 188 | obj.read(1) 189 | assert str(exc_info.value) == "I/O operation on closed file" 190 | 191 | 192 | def test_tell_read_empty() -> None: 193 | class Impl(IOAbstract): 194 | def __init__(self) -> None: 195 | super().__init__(10) 196 | self.empty_reads = 100 197 | 198 | def _read(self, size: int) -> bytes: 199 | self.empty_reads -= 1 200 | if self.empty_reads > 0: 201 | return b"" 202 | return b"a" 203 | 204 | obj = Impl() 205 | assert obj.tell() == 0 206 | assert obj.read() == b"aaaaaaaaaa" 207 | 208 | 209 | # 210 | # write 211 | # 212 | 213 | 214 | def test_write_non_writeable() -> None: 215 | class Impl(IOAbstract): 216 | def __init__(self) -> None: 217 | super().__init__(10) 218 | 219 | def writable(self) -> bool: 220 | return False 221 | 222 | with Impl() as obj: 223 | assert obj.writable() is False 224 | with pytest.raises(UnsupportedOperation) as exc_info: 225 | obj.write(b"hello") 226 | assert str(exc_info.value) == "write" 227 | 228 | 229 | @pytest.mark.parametrize("write_partial", (True, False)) 230 | def test_write_full(write_partial: bool) -> None: 231 | class Impl(IOAbstract): 232 | def __init__(self) -> None: 233 | super().__init__(10) 234 | self.mock = Mock() 235 | 236 | def _write_before(self) -> None: 237 | self.mock.write_start() 238 | 239 | def _write_after(self) -> None: 240 | self.mock.write_finish() 241 | 242 | def _write(self, data: bytes) -> int: 243 | self.mock.write(bytes(data)) 244 | if write_partial: 245 | return min(2, len(data)) 246 | return len(data) 247 | 248 | with Impl() as obj: 249 | # write before end 250 | obj.seek(5) 251 | with pytest.raises(ValueError) as exc_info: 252 | obj.write(b"abcdef") 253 | assert str(exc_info.value) == "write is only supported from EOF" 254 | assert not obj.mock.called 255 | 256 | # write at end 257 | obj.seek(10) 258 | assert obj.write(b"") == 0 259 | assert obj.tell() == 10 260 | assert not obj.mock.called 261 | assert obj.write(b"ghijkl") == 6 262 | assert obj.tell() == 16 263 | if write_partial: 264 | assert obj.mock.method_calls == [ 265 | call.write_start(), 266 | call.write(b"ghijkl"), 267 | call.write(b"ijkl"), 268 | call.write(b"kl"), 269 | ] 270 | else: 271 | assert obj.mock.method_calls == [ 272 | call.write_start(), 273 | call.write(b"ghijkl"), 274 | ] 275 | obj.mock.reset_mock() 276 | 277 | # write after end 278 | obj.seek(20) 279 | assert obj.write(b"mnopq") == 5 280 | assert obj.tell() == 25 281 | if write_partial: 282 | assert obj.mock.method_calls == [ 283 | call.write(b"\x00\x00\x00\x00"), 284 | call.write(b"\x00\x00"), 285 | call.write(b"mnopq"), 286 | call.write(b"opq"), 287 | call.write(b"q"), 288 | ] 289 | else: 290 | assert obj.mock.method_calls == [ 291 | call.write(b"\x00\x00\x00\x00"), 292 | call.write(b"mnopq"), 293 | ] 294 | obj.mock.reset_mock() 295 | 296 | # (big) write nothing after end (used e.g. by tuncate) 297 | limit = 30 if write_partial else int(DEFAULT_BUFFER_SIZE * 3.7) 298 | obj.seek(limit) 299 | assert obj.write(b"") == 0 300 | assert obj.tell() == limit 301 | if write_partial: 302 | assert obj.mock.method_calls == [ 303 | call.write(b"\x00\x00\x00\x00\x00"), 304 | call.write(b"\x00\x00\x00"), 305 | call.write(b"\x00"), 306 | ] 307 | else: 308 | assert obj.mock.method_calls == [ 309 | call.write(b"\x00" * DEFAULT_BUFFER_SIZE), 310 | call.write(b"\x00" * DEFAULT_BUFFER_SIZE), 311 | call.write(b"\x00" * DEFAULT_BUFFER_SIZE), 312 | call.write(b"\x00" * (limit - 3 * DEFAULT_BUFFER_SIZE - 25)), 313 | ] 314 | obj.mock.reset_mock() 315 | 316 | # close calls write_finish once 317 | obj.close() 318 | assert obj.mock.method_calls == [call.write_finish()] 319 | obj.mock.reset_mock() 320 | obj.close() 321 | assert not obj.mock.method_calls 322 | obj.close() 323 | 324 | # write after close 325 | with pytest.raises(ValueError) as exc_info: 326 | obj.write(b"xyz") 327 | assert str(exc_info.value) == "I/O operation on closed file" 328 | 329 | 330 | # 331 | # truncate 332 | # 333 | 334 | 335 | def test_truncate_non_writeable() -> None: 336 | class Impl(IOAbstract): 337 | def __init__(self) -> None: 338 | super().__init__(10) 339 | 340 | def writable(self) -> bool: 341 | return False 342 | 343 | with Impl() as obj: 344 | assert obj.writable() is False 345 | with pytest.raises(UnsupportedOperation) as exc_info: 346 | obj.truncate(4) 347 | assert str(exc_info.value) == "truncate" 348 | 349 | 350 | @pytest.mark.parametrize("with_size", (True, False)) 351 | def test_truncate_with_size(with_size: bool) -> None: 352 | class Impl(IOAbstract): 353 | def __init__(self) -> None: 354 | super().__init__(10) 355 | self.mock = Mock() 356 | 357 | def _write_before(self) -> None: 358 | self.mock.write_start() 359 | 360 | def _write_after(self) -> None: 361 | self.mock.write_finish() 362 | 363 | def _write(self, data: bytes) -> int: 364 | raise RuntimeError("should not be called") 365 | 366 | def _truncate(self, size: int) -> None: 367 | self.mock.truncate(size) 368 | 369 | with Impl() as obj: 370 | obj.seek(7) 371 | assert not obj.mock.method_calls 372 | 373 | def truncate(size: int) -> int: 374 | if with_size: 375 | return obj.truncate(size) 376 | obj.seek(size) 377 | return obj.truncate() 378 | 379 | # truncate before start 380 | with pytest.raises(ValueError) as exc_info: 381 | obj.truncate(-1) 382 | assert str(exc_info.value) == "invalid truncate size" 383 | assert not obj.mock.method_calls 384 | 385 | # truncate before end 386 | assert truncate(5) == 5 387 | assert not with_size or obj.tell() == 7 388 | assert len(obj) == 5 389 | assert obj.mock.method_calls == [call.write_start(), call.truncate(5)] 390 | obj.mock.reset_mock() 391 | 392 | # truncate at end 393 | assert truncate(5) == 5 394 | assert not with_size or obj.tell() == 7 395 | assert len(obj) == 5 396 | assert not obj.mock.method_calls 397 | obj.mock.reset_mock() 398 | 399 | # truncate after end 400 | assert truncate(20) == 20 401 | assert not with_size or obj.tell() == 7 402 | assert len(obj) == 20 403 | assert obj.mock.method_calls == [call.truncate(20)] 404 | obj.mock.reset_mock() 405 | 406 | # close calls write_finish once 407 | obj.close() 408 | assert obj.mock.method_calls == [call.write_finish()] 409 | obj.mock.reset_mock() 410 | obj.close() 411 | assert not obj.mock.method_calls 412 | 413 | # truncate after close 414 | with pytest.raises(ValueError) as exc_info: 415 | obj.truncate(5) 416 | assert str(exc_info.value) == "I/O operation on closed file" 417 | -------------------------------------------------------------------------------- /tests/unit/test_iocombiner.py: -------------------------------------------------------------------------------- 1 | from io import SEEK_SET, BytesIO 2 | from typing import List, cast 3 | from unittest.mock import Mock, call 4 | 5 | import pytest 6 | 7 | from xz.io import IOAbstract, IOCombiner, IOProxy 8 | 9 | 10 | def generate_mock(length: int) -> Mock: 11 | mock = Mock() 12 | mock.__class__ = cast(Mock, IOAbstract) # needs to be subclass of IOAbstract 13 | mock._length = length # pylint: disable=protected-access 14 | mock.__len__ = lambda s: s._length # pylint: disable=protected-access 15 | 16 | def write(data: bytes) -> int: 17 | mock._length += len(data) 18 | return len(data) 19 | 20 | mock.write.side_effect = write 21 | mock.writable.return_value = True 22 | return mock 23 | 24 | 25 | # 26 | # tell / seek 27 | # 28 | 29 | 30 | def test_seek() -> None: 31 | originals = [ 32 | generate_mock(2), 33 | generate_mock(0), 34 | generate_mock(8), 35 | ] 36 | combiner = IOCombiner(*originals) 37 | 38 | assert combiner.tell() == 0 39 | assert combiner.seek(7) == 7 40 | assert combiner.tell() == 7 41 | 42 | for original in originals: 43 | assert not original.method_calls # did not touch original 44 | 45 | 46 | # 47 | # read 48 | # 49 | 50 | 51 | def test_read() -> None: 52 | originals: List[IOAbstract] = [ 53 | IOProxy(BytesIO(b"abc"), 0, 3), 54 | generate_mock(0), # size 0, will be never used 55 | IOProxy(BytesIO(b"defghij"), 0, 7), 56 | ] 57 | combiner = IOCombiner(*originals) 58 | 59 | # read all 60 | originals[0].seek(2) 61 | originals[2].seek(4) 62 | combiner.seek(0) 63 | assert combiner.read() == b"abcdefghij" 64 | assert originals[0].tell() == 3 65 | assert originals[2].tell() == 7 66 | combiner.seek(4) 67 | assert combiner.read() == b"efghij" 68 | assert originals[0].tell() == 3 69 | assert originals[2].tell() == 7 70 | 71 | # read partial 72 | originals[0].seek(2) 73 | originals[2].seek(4) 74 | combiner.seek(1) 75 | assert combiner.read(6) == b"bcdefg" 76 | assert originals[0].tell() == 3 77 | assert originals[2].tell() == 4 78 | assert combiner.read(6) == b"hij" 79 | assert originals[0].tell() == 3 80 | assert originals[2].tell() == 7 81 | assert combiner.read(6) == b"" 82 | assert originals[0].tell() == 3 83 | assert originals[2].tell() == 7 84 | assert combiner.read(6) == b"" 85 | assert originals[0].tell() == 3 86 | assert originals[2].tell() == 7 87 | 88 | # with original seek 89 | combiner.seek(1) 90 | originals[0].seek(2) 91 | originals[2].seek(4) 92 | assert combiner.read(5) == b"bcdef" 93 | assert originals[0].tell() == 3 94 | assert originals[2].tell() == 3 95 | 96 | # never used at all 97 | assert not cast(Mock, originals[1]).method_calls 98 | 99 | 100 | # 101 | # write 102 | # 103 | 104 | 105 | def test_write() -> None: 106 | parts = [] 107 | 108 | class Combiner(IOCombiner[IOAbstract]): 109 | def _create_fileobj(self) -> IOAbstract: 110 | fileobj = generate_mock(0) 111 | parts.append(fileobj) 112 | return fileobj 113 | 114 | with Combiner() as combiner: 115 | assert combiner.writable() 116 | assert len(parts) == 0 117 | 118 | # create new from scratch 119 | combiner.write(b"abc") 120 | assert len(parts) == 1 121 | assert parts[0].method_calls == [ 122 | call.seek(0, SEEK_SET), 123 | call.write(memoryview(b"abc")), 124 | ] 125 | parts[0].method_calls.clear() 126 | 127 | combiner.write(b"def") 128 | assert len(parts) == 1 129 | assert parts[0].method_calls == [ 130 | call.seek(3, SEEK_SET), 131 | call.writable(), 132 | call.write(memoryview(b"def")), 133 | ] 134 | parts[0].method_calls.clear() 135 | 136 | combiner.seek(8) 137 | combiner.write(b"ghi") 138 | assert len(parts) == 1 139 | assert parts[0].method_calls == [ 140 | call.seek(6, SEEK_SET), 141 | call.writable(), 142 | call.write(memoryview(b"\x00\x00")), 143 | call.seek(8, SEEK_SET), 144 | call.writable(), 145 | call.write(memoryview(b"ghi")), 146 | ] 147 | parts[0].method_calls.clear() 148 | 149 | # not writable anymore -> create new fileobj 150 | parts[0].writable.return_value = False 151 | combiner.write(b"jkl") 152 | assert len(parts) == 2 153 | assert parts[0].method_calls == [ 154 | call.seek(11, SEEK_SET), 155 | call.writable(), 156 | call.writable(), 157 | ] 158 | assert parts[1].method_calls == [ 159 | call.seek(0, SEEK_SET), 160 | call.write(memoryview(b"jkl")), 161 | ] 162 | parts[0].method_calls.clear() 163 | parts[1].method_calls.clear() 164 | 165 | combiner.write(b"mno") 166 | assert len(parts) == 2 167 | assert not parts[0].method_calls 168 | assert parts[1].method_calls == [ 169 | call.seek(3, SEEK_SET), 170 | call.writable(), 171 | call.write(memoryview(b"mno")), 172 | ] 173 | parts[1].method_calls.clear() 174 | 175 | # force change fileobj 176 | combiner._change_fileobj() # pylint: disable=protected-access 177 | assert len(parts) == 3 178 | assert not parts[0].method_calls 179 | assert parts[1].method_calls == [ 180 | call.writable(), 181 | call._write_end(), # pylint: disable=protected-access 182 | ] 183 | assert not parts[2].method_calls 184 | parts[1].method_calls.clear() 185 | 186 | # force change fileobj again 187 | combiner._change_fileobj() # pylint: disable=protected-access 188 | assert len(parts) == 4 189 | assert not parts[0].method_calls 190 | assert not parts[1].method_calls 191 | assert not parts[2].method_calls # no call to _write_end 192 | assert not parts[3].method_calls 193 | parts[1].method_calls.clear() 194 | 195 | combiner.write(b"pqr") 196 | assert len(parts) == 4 197 | assert not parts[0].method_calls 198 | assert not parts[1].method_calls 199 | assert not parts[2].method_calls 200 | assert parts[3].method_calls == [ 201 | call.seek(0, SEEK_SET), 202 | call.writable(), 203 | call.write(memoryview(b"pqr")), 204 | ] 205 | parts[3].method_calls.clear() 206 | 207 | # don't create fileobj if write nothing 208 | parts[1].writable.return_value = False 209 | combiner.write(b"") 210 | assert len(parts) == 4 211 | assert not parts[0].method_calls 212 | assert not parts[1].method_calls 213 | assert not parts[2].method_calls 214 | assert not parts[3].method_calls 215 | 216 | # check write_finish 217 | assert not parts[0].method_calls 218 | assert not parts[1].method_calls 219 | assert not parts[2].method_calls 220 | assert parts[3].method_calls == [ 221 | call._write_end(), # pylint: disable=protected-access 222 | ] 223 | 224 | # check if last fileobj is empty no calls to _write_end 225 | with Combiner() as combiner: 226 | combiner.write(b"abc") 227 | combiner._change_fileobj() # pylint: disable=protected-access 228 | parts[0].method_calls.clear() 229 | assert not parts[1].method_calls 230 | assert not parts[0].method_calls 231 | assert not parts[1].method_calls # no calls to _write_end 232 | 233 | 234 | # 235 | # truncate 236 | # 237 | 238 | 239 | def test_truncate() -> None: 240 | # pylint: disable=protected-access 241 | originals = [ 242 | generate_mock(2), 243 | generate_mock(0), 244 | generate_mock(8), 245 | generate_mock(10), 246 | generate_mock(20), 247 | ] 248 | 249 | with IOCombiner(*originals) as combiner: 250 | # truncate between two boundaries 251 | combiner.truncate(17) 252 | assert originals[3].method_calls == [call.truncate(7)] 253 | assert not originals[4].method_calls 254 | assert dict(combiner._fileobjs) == { 255 | 0: originals[0], 256 | 2: originals[2], 257 | 10: originals[3], 258 | } 259 | originals[3].reset_mock() 260 | 261 | # truncate after length 262 | combiner.truncate(42) 263 | assert originals[3].method_calls == [call.truncate(32)] 264 | assert dict(combiner._fileobjs) == { 265 | 0: originals[0], 266 | 2: originals[2], 267 | 10: originals[3], 268 | } 269 | originals[3].reset_mock() 270 | 271 | # truncate at boundary 272 | combiner.truncate(10) 273 | assert dict(combiner._fileobjs) == { 274 | 0: originals[0], 275 | 2: originals[2], 276 | } 277 | assert not originals[2].method_calls 278 | assert not originals[3].method_calls 279 | 280 | # truncate at boundary 281 | combiner.truncate(2) 282 | assert dict(combiner._fileobjs) == { 283 | 0: originals[0], 284 | } 285 | assert not originals[0].method_calls 286 | assert not originals[1].method_calls 287 | assert not originals[2].method_calls 288 | 289 | # truncate at start 290 | combiner.truncate(0) 291 | assert not dict(combiner._fileobjs) 292 | assert not originals[0].method_calls 293 | 294 | 295 | # 296 | # append 297 | # 298 | 299 | 300 | def test_append() -> None: 301 | combiner = IOCombiner[IOAbstract](generate_mock(13), generate_mock(37)) 302 | assert len(combiner) == 50 303 | combiner._append( # pylint: disable=protected-access 304 | IOProxy(BytesIO(b"abcdefghij"), 0, 10) 305 | ) 306 | assert len(combiner) == 60 307 | combiner.seek(54) 308 | assert combiner.read(4) == b"efgh" 309 | 310 | 311 | def test_append_invalid() -> None: 312 | combiner = IOCombiner[IOAbstract](generate_mock(13), generate_mock(37)) 313 | assert len(combiner) == 50 314 | with pytest.raises(TypeError): 315 | # pylint: disable=protected-access 316 | combiner._append(BytesIO(b"abcdefghij")) # type: ignore[arg-type] 317 | -------------------------------------------------------------------------------- /tests/unit/test_ioproxy.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from pathlib import Path 3 | from unittest.mock import Mock, call 4 | 5 | from xz.io import IOProxy 6 | 7 | 8 | def test_fileno(tmp_path: Path) -> None: 9 | file_path = tmp_path / "file" 10 | file_path.write_bytes(b"abcd") 11 | 12 | with file_path.open("rb") as fin: 13 | obj = IOProxy(fin, 1, 3) 14 | assert obj.fileno() == fin.fileno() 15 | 16 | 17 | def test_seek() -> None: 18 | original = Mock() 19 | proxy = IOProxy(original, 4, 14) 20 | 21 | assert proxy.tell() == 0 22 | assert proxy.seek(7) == 7 23 | assert proxy.tell() == 7 24 | 25 | assert not original.method_calls # did not touch original 26 | 27 | 28 | def test_read() -> None: 29 | original = BytesIO(b"xxxxabcdefghijyyyyy") 30 | proxy = IOProxy(original, 4, 14) 31 | 32 | # read all 33 | original.seek(2) 34 | proxy.seek(0) 35 | assert proxy.read() == b"abcdefghij" 36 | assert original.tell() == 14 37 | proxy.seek(4) 38 | assert proxy.read() == b"efghij" 39 | assert original.tell() == 14 40 | 41 | # read partial 42 | original.seek(2) 43 | proxy.seek(6) 44 | assert proxy.read(3) == b"ghi" 45 | assert original.tell() == 13 46 | assert proxy.read(3) == b"j" 47 | assert original.tell() == 14 48 | assert proxy.read(3) == b"" 49 | assert original.tell() == 14 50 | assert proxy.read(3) == b"" 51 | assert original.tell() == 14 52 | 53 | # with original seek 54 | original.seek(2) 55 | proxy.seek(4) 56 | original.seek(1) 57 | assert proxy.read() == b"efghij" 58 | assert original.tell() == 14 59 | 60 | 61 | def test_write() -> None: 62 | original = BytesIO(b"xxxxabcdefghijyyyyy") 63 | with IOProxy(original, 4, 14) as proxy: 64 | proxy.seek(10) 65 | 66 | assert proxy.write(b"uvw") == 3 67 | assert original.getvalue() == b"xxxxabcdefghijuvwyy" 68 | 69 | assert proxy.write(b"UVWXYZ") == 6 70 | assert original.getvalue() == b"xxxxabcdefghijuvwUVWXYZ" 71 | 72 | 73 | def test_truncate() -> None: 74 | original = Mock() 75 | with IOProxy(original, 4, 14) as proxy: 76 | assert proxy.truncate(5) == 5 77 | assert original.method_calls == [call.truncate(9)] 78 | original.reset_mock() 79 | 80 | assert proxy.truncate(20) == 20 81 | assert original.method_calls == [call.truncate(24)] 82 | -------------------------------------------------------------------------------- /tests/unit/test_iostatic.py: -------------------------------------------------------------------------------- 1 | from io import UnsupportedOperation 2 | 3 | import pytest 4 | 5 | from xz.io import IOStatic 6 | 7 | 8 | def test_read() -> None: 9 | static = IOStatic(b"abcdefghij") 10 | 11 | # read all 12 | static.seek(0) 13 | assert static.read() == b"abcdefghij" 14 | static.seek(4) 15 | assert static.read() == b"efghij" 16 | 17 | # read partial 18 | static.seek(6) 19 | assert static.read(3) == b"ghi" 20 | assert static.read(3) == b"j" 21 | assert static.read(3) == b"" 22 | assert static.read(3) == b"" 23 | 24 | 25 | def test_write() -> None: 26 | with IOStatic(b"abc") as static: 27 | assert static.writable() is False 28 | static.seek(3) 29 | with pytest.raises(UnsupportedOperation): 30 | static.write(b"def") 31 | 32 | 33 | def test_truncate() -> None: 34 | with IOStatic(b"abc") as static: 35 | assert static.writable() is False 36 | with pytest.raises(UnsupportedOperation): 37 | static.truncate() 38 | -------------------------------------------------------------------------------- /tests/unit/test_open.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import lzma 3 | from pathlib import Path 4 | from typing import List, Optional 5 | from unittest.mock import Mock 6 | 7 | import pytest 8 | 9 | from xz.open import xz_open 10 | from xz.strategy import RollingBlockReadStrategy 11 | 12 | # a stream with two blocks (lengths: 10, 3) 13 | # one UTF8 character is between the two blocks 14 | STREAM_BYTES = bytes.fromhex( 15 | "fd377a585a000004e6d6b446" 16 | "0200210116000000742fe5a3010009e299a5207574663820e2000000404506004bafe33d" 17 | "0200210116000000742fe5a301000299a50a0000c6687a2b8dbda0cf" 18 | "0002220a1b0300001b1c3777" 19 | "b1c467fb020000000004595a" 20 | ) 21 | 22 | 23 | # 24 | # read 25 | # 26 | 27 | 28 | def test_mode_rb() -> None: 29 | fileobj = BytesIO(STREAM_BYTES) 30 | 31 | with xz_open(fileobj, "rb") as xzfile: 32 | assert xzfile.mode == "r" 33 | assert len(xzfile) == 13 34 | assert xzfile.stream_boundaries == [0] 35 | assert xzfile.block_boundaries == [0, 10] 36 | 37 | assert xzfile.read() == b"\xe2\x99\xa5 utf8 \xe2\x99\xa5\n" 38 | 39 | assert xzfile.seek(9) == 9 40 | assert xzfile.read() == b"\xe2\x99\xa5\n" 41 | 42 | 43 | def test_mode_rt() -> None: 44 | fileobj = BytesIO(STREAM_BYTES) 45 | 46 | with xz_open(fileobj, "rt") as xzfile: 47 | assert xzfile.mode == "rt" 48 | assert xzfile.stream_boundaries == [0] 49 | assert xzfile.block_boundaries == [0, 10] 50 | 51 | assert xzfile.read() == "♥ utf8 ♥\n" 52 | 53 | assert xzfile.seek(9) == 9 54 | assert xzfile.read() == "♥\n" 55 | 56 | 57 | def test_mode_rt_file(tmp_path: Path) -> None: 58 | file_path = tmp_path / "file.xz" 59 | file_path.write_bytes(STREAM_BYTES) 60 | 61 | with file_path.open("rb") as fin: 62 | with xz_open(fin, "rt") as xzfile: 63 | assert xzfile.stream_boundaries == [0] 64 | assert xzfile.block_boundaries == [0, 10] 65 | assert xzfile.fileno() == fin.fileno() 66 | 67 | assert xzfile.read() == "♥ utf8 ♥\n" 68 | 69 | assert xzfile.seek(9) == 9 70 | assert xzfile.read() == "♥\n" 71 | 72 | 73 | @pytest.mark.parametrize( 74 | "encoding, expected", 75 | ( 76 | pytest.param("utf8", "еñϲоԺε", id="utf8"), 77 | pytest.param("latin1", "еñϲоԺε", id="latin1"), 78 | ), 79 | ) 80 | def test_mode_rt_encoding(encoding: str, expected: str) -> None: 81 | fileobj = BytesIO( 82 | bytes.fromhex( 83 | "fd377a585a000000ff12d9410200210116000000742fe5a301000bd0b5c3b1cf" 84 | "b2d0bed4baceb50000011c0c5da447cf06729e7a010000000000595a" 85 | ) 86 | ) 87 | with xz_open(fileobj, "rt", encoding=encoding) as xzfile: 88 | assert xzfile.read() == expected 89 | 90 | 91 | @pytest.mark.parametrize( 92 | "errors, expected", 93 | ( 94 | pytest.param(None, None, id="None"), 95 | pytest.param("strict", None, id="strict"), 96 | pytest.param("ignore", "encoding", id="ignore"), 97 | pytest.param("replace", "en�co�di�ng", id="replace"), 98 | pytest.param( 99 | "backslashreplace", r"en\x99co\x98di\x97ng", id="backslashreplace" 100 | ), 101 | ), 102 | ) 103 | def test_mode_rt_encoding_errors( 104 | errors: Optional[str], expected: Optional[str] 105 | ) -> None: 106 | fileobj = BytesIO( 107 | bytes.fromhex( 108 | "fd377a585a000000ff12d9410200210116000000742fe5a301000a656e99636f" 109 | "986469976e67000000011b0b39a7621e06729e7a010000000000595a" 110 | ) 111 | ) 112 | 113 | with xz_open(fileobj, "rt", errors=errors) as xzfile: 114 | if expected is None: 115 | with pytest.raises(ValueError): 116 | xzfile.read() 117 | else: 118 | assert xzfile.read() == expected 119 | 120 | 121 | @pytest.mark.parametrize( 122 | "newline, expected", 123 | ( 124 | pytest.param(None, ["a\n", "b\n", "c\n", "d"], id="None"), 125 | pytest.param("", ["a\n", "b\r", "c\r\n", "d"], id="''"), 126 | pytest.param("\n", ["a\n", "b\rc\r\n", "d"], id="'\n'"), 127 | pytest.param("\r", ["a\nb\r", "c\r", "\nd"], id="'\r'"), 128 | pytest.param("\r\n", ["a\nb\rc\r\n", "d"], id="'\r\n'"), 129 | ), 130 | ) 131 | def test_mode_rt_newline(newline: Optional[str], expected: List[str]) -> None: 132 | fileobj = BytesIO( 133 | bytes.fromhex( 134 | "fd377a585a000000ff12d9410200210116000000742fe5a3010007610a620d63" 135 | "0d0a64000001180840a546ac06729e7a010000000000595a" 136 | ) 137 | ) 138 | 139 | with xz_open(fileobj, "rt", newline=newline) as xzfile: 140 | assert xzfile.readlines() == expected 141 | 142 | 143 | def test_mode_rb_encoding() -> None: 144 | fileobj = BytesIO(STREAM_BYTES) 145 | with pytest.raises(ValueError): 146 | xz_open(fileobj, "rb", encoding="latin1") 147 | 148 | 149 | def test_mode_rb_encoding_errors() -> None: 150 | fileobj = BytesIO(STREAM_BYTES) 151 | with pytest.raises(ValueError): 152 | xz_open(fileobj, "rb", errors="ignore") 153 | 154 | 155 | def test_mode_rb_newline() -> None: 156 | fileobj = BytesIO(STREAM_BYTES) 157 | with pytest.raises(ValueError): 158 | xz_open(fileobj, "rb", newline="\n") 159 | 160 | 161 | # 162 | # write 163 | # 164 | 165 | TEST_MODE_W_CHECK_BYTES = bytes.fromhex( 166 | # stream 1 167 | "fd377a585a0000016922de36" 168 | "0200210116000000742fe5a3010001ceb1000000256bc6a8" 169 | "00011602d06110d2" 170 | "9042990d010000000001595a" 171 | # stream 2 172 | "fd377a585a0000016922de36" 173 | "0200210116000000742fe5a3010001ceb20000009f3acf31" 174 | "00011602d06110d2" 175 | "9042990d010000000001595a" 176 | # stream 3 (changed check) 177 | "fd377a585a000004e6d6b446" 178 | "0200210116000000742fe5a3010001ceb3000000ab6cffc6b19a1d23" 179 | "00011a02dc2ea57e" 180 | "1fb6f37d010000000004595a" 181 | # stream 4 (changed check) 182 | "fd377a585a000004e6d6b446" 183 | "0200210116000000742fe5a3010001ceb4000000accd9792dc23671f" 184 | "00011a02dc2ea57e" 185 | "1fb6f37d010000000004595a" 186 | ) 187 | 188 | 189 | def test_mode_wb_check() -> None: 190 | fileobj = BytesIO() 191 | 192 | with xz_open(fileobj, "wb", check=1) as xzfile: 193 | assert xzfile.mode == "w" 194 | xzfile.write(b"\xce\xb1") 195 | xzfile.change_stream() 196 | xzfile.check = 4 197 | xzfile.write(b"\xce\xb2") 198 | xzfile.change_stream() 199 | xzfile.write(b"\xce\xb3") 200 | xzfile.change_stream() 201 | xzfile.write(b"\xce\xb4") 202 | 203 | assert fileobj.getvalue() == TEST_MODE_W_CHECK_BYTES 204 | 205 | 206 | def test_mode_wt_check() -> None: 207 | fileobj = BytesIO() 208 | 209 | with xz_open(fileobj, "wt", check=1) as xzfile: 210 | assert xzfile.mode == "wt" 211 | xzfile.write("α") 212 | xzfile.change_stream() 213 | xzfile.check = 4 214 | xzfile.write("β") 215 | xzfile.change_stream() 216 | xzfile.write("γ") 217 | xzfile.change_stream() 218 | xzfile.write("δ") 219 | 220 | assert fileobj.getvalue() == TEST_MODE_W_CHECK_BYTES 221 | 222 | 223 | TEST_MODE_W_FILTERS_BYTES = bytes.fromhex( 224 | ## stream 1 225 | # header 226 | "fd377a585a0000016922de36" 227 | # block 1 228 | "0200210116000000742fe5a3010001ceb1000000256bc6a8" 229 | # block 2 230 | "0200210116000000742fe5a3010001ceb20000009f3acf31" 231 | # block 3 (changed filters) 232 | "02010301002101167920c4ee010001cee5000000090ac846" 233 | # block 4 (changed filters) 234 | "02010301002101167920c4ee010001cee6000000aa9facd8" 235 | # index 236 | "0004160216021602160200008a2bb83b" 237 | # footer 238 | "9be35140030000000001595a" 239 | ## stream 2 240 | # header 241 | "fd377a585a0000016922de36" 242 | # block 1 (changed filters) 243 | "02010301002101167920c4ee010001cee70000003cafabaf" 244 | # block 2 (changed filters) 245 | "02010301002101167920c4ee010001cee800000086fea236" 246 | # index 247 | "00021602160200008ba0042b" 248 | # footer 249 | "3e300d8b020000000001595a" 250 | ## stream 3 251 | # header 252 | "fd377a585a0000016922de36" 253 | # block 1 (changed filters) 254 | "02010301002101167920c4ee010001cee900000010cea541" 255 | # block 2 (changed filters) 256 | "02010301002101167920c4ee010001ceea00000081d31ad1" 257 | # index 258 | "00021602160200008ba0042b" 259 | # footer 260 | "3e300d8b020000000001595a" 261 | ) 262 | 263 | 264 | def test_mode_wb_filters() -> None: 265 | fileobj = BytesIO() 266 | 267 | with xz_open(fileobj, "wb", check=1) as xzfile: 268 | xzfile.write(b"\xce\xb1") 269 | xzfile.change_block() 270 | xzfile.filters = [{"id": 3, "dist": 1}, {"id": 33}] 271 | xzfile.write(b"\xce\xb2") 272 | xzfile.change_block() 273 | xzfile.write(b"\xce\xb3") 274 | xzfile.change_block() 275 | xzfile.write(b"\xce\xb4") 276 | xzfile.change_stream() 277 | xzfile.write(b"\xce\xb5") 278 | xzfile.change_block() 279 | xzfile.write(b"\xce\xb6") 280 | xzfile.change_stream() 281 | xzfile.write(b"\xce\xb7") 282 | xzfile.change_block() 283 | xzfile.write(b"\xce\xb8") 284 | 285 | assert fileobj.getvalue() == TEST_MODE_W_FILTERS_BYTES 286 | 287 | 288 | def test_mode_wt_filters() -> None: 289 | fileobj = BytesIO() 290 | 291 | with xz_open(fileobj, "wt", check=1) as xzfile: 292 | xzfile.write("α") 293 | xzfile.change_block() 294 | xzfile.filters = [{"id": 3, "dist": 1}, {"id": 33}] 295 | xzfile.write("β") 296 | xzfile.change_block() 297 | xzfile.write("γ") 298 | xzfile.change_block() 299 | xzfile.write("δ") 300 | xzfile.change_stream() 301 | xzfile.write("ε") 302 | xzfile.change_block() 303 | xzfile.write("ζ") 304 | xzfile.change_stream() 305 | xzfile.write("η") 306 | xzfile.change_block() 307 | xzfile.write("θ") 308 | 309 | assert fileobj.getvalue() == TEST_MODE_W_FILTERS_BYTES 310 | 311 | 312 | TEST_MODE_W_PRESET_BYTES = bytes.fromhex( 313 | ## stream 1 314 | # header 315 | "fd377a585a0000016922de36" 316 | # block 1 317 | "0200210116000000742fe5a3010001ceb1000000256bc6a8" 318 | # block 2 319 | "0200210116000000742fe5a3010001ceb20000009f3acf31" 320 | # block 3 (changed preset) 321 | "020021011c00000010cf58cc010001ceb3000000090ac846" 322 | # block 4 (changed preset) 323 | "020021011c00000010cf58cc010001ceb4000000aa9facd8" 324 | # index 325 | "0004160216021602160200008a2bb83b" 326 | # footer 327 | "9be35140030000000001595a" 328 | ## stream 2 329 | # header 330 | "fd377a585a0000016922de36" 331 | # block 1 (changed preset) 332 | "020021011c00000010cf58cc010001ceb50000003cafabaf" 333 | # block 2 (changed preset) 334 | "020021011c00000010cf58cc010001ceb600000086fea236" 335 | # index 336 | "00021602160200008ba0042b" 337 | # footer 338 | "3e300d8b020000000001595a" 339 | ## stream 3 340 | # header 341 | "fd377a585a0000016922de36" 342 | # block 1 (changed preset) 343 | "020021011c00000010cf58cc010001ceb700000010cea541" 344 | # block 2 (changed preset) 345 | "020021011c00000010cf58cc010001ceb800000081d31ad1" 346 | # index 347 | "00021602160200008ba0042b" 348 | # footer 349 | "3e300d8b020000000001595a" 350 | ) 351 | 352 | 353 | def test_mode_wb_preset() -> None: 354 | fileobj = BytesIO() 355 | 356 | with xz_open(fileobj, "wb", check=1) as xzfile: 357 | xzfile.write(b"\xce\xb1") 358 | xzfile.change_block() 359 | xzfile.preset = 9 360 | xzfile.write(b"\xce\xb2") 361 | xzfile.change_block() 362 | xzfile.write(b"\xce\xb3") 363 | xzfile.change_block() 364 | xzfile.write(b"\xce\xb4") 365 | xzfile.change_stream() 366 | xzfile.write(b"\xce\xb5") 367 | xzfile.change_block() 368 | xzfile.write(b"\xce\xb6") 369 | xzfile.change_stream() 370 | xzfile.write(b"\xce\xb7") 371 | xzfile.change_block() 372 | xzfile.write(b"\xce\xb8") 373 | 374 | assert fileobj.getvalue() == TEST_MODE_W_PRESET_BYTES 375 | 376 | 377 | def test_mode_wt_preset() -> None: 378 | fileobj = BytesIO() 379 | 380 | with xz_open(fileobj, "wt", check=1) as xzfile: 381 | xzfile.write("α") 382 | xzfile.change_block() 383 | xzfile.preset = 9 384 | xzfile.write("β") 385 | xzfile.change_block() 386 | xzfile.write("γ") 387 | xzfile.change_block() 388 | xzfile.write("δ") 389 | xzfile.change_stream() 390 | xzfile.write("ε") 391 | xzfile.change_block() 392 | xzfile.write("ζ") 393 | xzfile.change_stream() 394 | xzfile.write("η") 395 | xzfile.change_block() 396 | xzfile.write("θ") 397 | 398 | assert fileobj.getvalue() == TEST_MODE_W_PRESET_BYTES 399 | 400 | 401 | @pytest.mark.parametrize( 402 | "encoding, data", 403 | ( 404 | pytest.param("utf8", "еñϲоԺε", id="utf8"), 405 | pytest.param("latin1", "еñϲоԺε", id="latin1"), 406 | ), 407 | ) 408 | def test_mode_wt_encoding(encoding: str, data: str) -> None: 409 | fileobj = BytesIO() 410 | with xz_open(fileobj, "wt", check=0, encoding=encoding) as xzfile: 411 | xzfile.write(data) 412 | 413 | assert fileobj.getvalue() == bytes.fromhex( 414 | "fd377a585a000000ff12d9410200210116000000742fe5a301000bd0b5c3b1cf" 415 | "b2d0bed4baceb50000011c0c5da447cf06729e7a010000000000595a" 416 | ) 417 | 418 | 419 | @pytest.mark.parametrize( 420 | "errors, data", 421 | ( 422 | pytest.param(None, None, id="None"), 423 | pytest.param("strict", None, id="strict"), 424 | pytest.param( 425 | "ignore", 426 | b"encoding", 427 | id="ignore", 428 | ), 429 | pytest.param( 430 | "replace", 431 | b"en?co?di?ng", 432 | id="replace", 433 | ), 434 | pytest.param( 435 | "backslashreplace", 436 | rb"en\udc01co\udc02di\udc03ng", 437 | id="backslashreplace", 438 | ), 439 | ), 440 | ) 441 | def test_mode_wt_encoding_errors(errors: Optional[str], data: Optional[bytes]) -> None: 442 | fileobj = BytesIO() 443 | 444 | with xz_open(fileobj, "wt", errors=errors) as xzfile: 445 | if data is None: 446 | xzfile.write("X") # to avoid having an empty file 447 | with pytest.raises(ValueError): 448 | xzfile.write("en\udc01co\udc0di\udc03ng") 449 | else: 450 | xzfile.write("en\udc01co\udc02di\udc03ng") 451 | 452 | if data is not None: 453 | assert lzma.decompress(fileobj.getvalue()) == data 454 | 455 | 456 | @pytest.mark.parametrize( 457 | "newline, data", 458 | ( 459 | pytest.param(None, b"a\nb\n", id="None"), 460 | pytest.param("", b"a\nb\n", id="''"), 461 | pytest.param("\n", b"a\nb\n", id="'\n'"), 462 | pytest.param("\r", b"a\rb\r", id="'\r'"), 463 | pytest.param("\r\n", b"a\r\nb\r\n", id="'\r\n'"), 464 | ), 465 | ) 466 | def test_mode_wt_newline(newline: Optional[str], data: bytes) -> None: 467 | fileobj = BytesIO() 468 | 469 | with xz_open(fileobj, "wt", newline=newline) as xzfile: 470 | xzfile.writelines(["a\n", "b\n"]) 471 | 472 | assert lzma.decompress(fileobj.getvalue()) == data 473 | 474 | 475 | # 476 | # misc 477 | # 478 | 479 | 480 | @pytest.mark.parametrize("mode", ("rtb", "rbt", "wtb", "wbt")) 481 | def test_mode_invalid(mode: str) -> None: 482 | fileobj = BytesIO(STREAM_BYTES) 483 | 484 | with pytest.raises(ValueError) as exc_info: 485 | xz_open(fileobj, mode) 486 | assert str(exc_info.value) == f"Invalid mode: {mode}" 487 | 488 | 489 | @pytest.mark.parametrize("mode", ("r", "rt")) 490 | def test_default_strategy(mode: str) -> None: 491 | fileobj = BytesIO(STREAM_BYTES) 492 | 493 | with xz_open(fileobj, mode) as xzfile: 494 | assert isinstance(xzfile.block_read_strategy, RollingBlockReadStrategy) 495 | 496 | 497 | @pytest.mark.parametrize("mode", ("r", "rt")) 498 | def test_custom_strategy(mode: str) -> None: 499 | fileobj = BytesIO(STREAM_BYTES) 500 | strategy = Mock() 501 | 502 | with xz_open(fileobj, mode, block_read_strategy=strategy) as xzfile: 503 | assert xzfile.block_read_strategy == strategy 504 | -------------------------------------------------------------------------------- /tests/unit/test_parse_mode.py: -------------------------------------------------------------------------------- 1 | from itertools import permutations, product 2 | import sys 3 | from typing import Tuple 4 | 5 | try: 6 | from typing import get_args 7 | except ImportError: 8 | pass 9 | 10 | import pytest 11 | 12 | from xz.typing import _XZModesBinaryType, _XZModesTextType 13 | from xz.utils import parse_mode 14 | 15 | VALID_MODES = { 16 | "".join(sorted(case[0] + extra)): case 17 | for case in [ 18 | ("r", True, False), 19 | ("r+", True, True), 20 | ("w", False, True), 21 | ("w+", True, True), 22 | ("x", False, True), 23 | ("x+", True, True), 24 | ] 25 | for extra in ("", "b") 26 | } 27 | 28 | 29 | @pytest.mark.skipif( 30 | sys.version_info < (3, 9), 31 | reason="Literal or get_args not supported", 32 | ) 33 | def test_known_valid_modes_binary() -> None: 34 | assert sorted( 35 | "".join(sorted(mode)) for mode in get_args(_XZModesBinaryType) 36 | ) == sorted(VALID_MODES) 37 | 38 | 39 | @pytest.mark.skipif( 40 | sys.version_info < (3, 9), 41 | reason="Literal or get_args not supported", 42 | ) 43 | def test_known_valid_modes_text() -> None: 44 | assert sorted( 45 | "".join(sorted(mode.replace("t", ""))) for mode in get_args(_XZModesTextType) 46 | ) == sorted(mode for mode in VALID_MODES if "b" not in mode) 47 | 48 | 49 | @pytest.mark.parametrize( 50 | "mode, expected", 51 | [pytest.param(mode, expected, id=mode) for mode, expected in VALID_MODES.items()], 52 | ) 53 | def test_parse_mode_valid(mode: str, expected: Tuple[str, bool, bool]) -> None: 54 | for parts in permutations(mode): 55 | mode_permuted = "".join(parts) 56 | assert parse_mode(mode_permuted) == expected, mode_permuted 57 | 58 | 59 | @pytest.mark.parametrize( 60 | "mode", 61 | [ 62 | "".join(mode_parts) 63 | for mode_parts in product(*((c, "") for c in "arwx+tb")) 64 | if "".join(sorted(mode_parts)) not in VALID_MODES 65 | ] 66 | + [mode * 2 for mode in VALID_MODES], 67 | ) 68 | def test_parse_mode_invalid(mode: str) -> None: 69 | for parts in permutations(mode): 70 | mode_permuted = "".join(parts) 71 | with pytest.raises(ValueError): 72 | parse_mode(mode_permuted) 73 | -------------------------------------------------------------------------------- /tests/unit/test_stream.py: -------------------------------------------------------------------------------- 1 | from io import SEEK_CUR, SEEK_END, BytesIO 2 | import sys 3 | from typing import Tuple, cast 4 | from unittest.mock import Mock, call 5 | 6 | import pytest 7 | 8 | from xz.common import XZError 9 | from xz.io import IOProxy 10 | from xz.stream import XZStream 11 | 12 | if sys.version_info >= (3, 9): # pragma: no cover 13 | from collections.abc import Callable 14 | else: # pragma: no cover 15 | from typing import Callable 16 | 17 | 18 | # a stream with two blocks (lengths: 100, 90) 19 | STREAM_BYTES = bytes.fromhex( 20 | "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020" 21 | "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b" 22 | "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00" 23 | "00000000e7c35efa0200210116000000742fe5a3e00059003d5d00320cc42641" 24 | "c8b91ac7908be7e635b8e7d681d74b683cde914399f8de5460dc672363f1e067" 25 | "5a3ebac9f427ecbebcb94552c0dba85b26950f0ac98b22390000000031f4ee87" 26 | "00025964555a0000041276283e300d8b020000000001595a" 27 | ) 28 | 29 | # a stream with no blocks 30 | STREAM_BYTES_EMPTY = bytes.fromhex( 31 | "fd377a585a0000016922de36000000001cdf44219042990d010000000001595a" 32 | ) 33 | 34 | 35 | def test_parse(data_pattern_locate: Callable[[bytes], Tuple[int, int]]) -> None: 36 | fileobj = Mock(wraps=BytesIO(b"\xff" * 1000 + STREAM_BYTES + b"\xee" * 1000)) 37 | fileobj.seek(-1000, SEEK_END) 38 | fileobj.method_calls.clear() 39 | 40 | # parse stream 41 | stream = XZStream.parse(fileobj) 42 | assert stream.check == 1 43 | assert len(stream) == 190 44 | assert stream.block_boundaries == [0, 100] 45 | 46 | # make sure we don't read the blocks 47 | assert fileobj.method_calls == [ 48 | call.seek(-12, SEEK_CUR), 49 | call.read(12), 50 | call.seek(-24, SEEK_CUR), 51 | call.read(12), 52 | call.seek(-204, SEEK_CUR), # blocks are skipped over here 53 | call.read(12), 54 | call.seek(-12, SEEK_CUR), 55 | ] 56 | 57 | # fileobj should be at the begining of the stream 58 | assert fileobj.tell() == 1000 59 | 60 | # read from start 61 | assert data_pattern_locate(stream.read(20)) == (0, 20) 62 | 63 | # read from middle of a block 64 | stream.seek(40) 65 | assert data_pattern_locate(stream.read(20)) == (40, 20) 66 | 67 | # read accross two blocks 68 | stream.seek(90) 69 | assert data_pattern_locate(stream.read(20)) == (90, 20) 70 | 71 | # read middle of an other block 72 | stream.seek(160) 73 | assert data_pattern_locate(stream.read(20)) == (160, 20) 74 | 75 | # go backward and read 76 | stream.seek(130) 77 | assert data_pattern_locate(stream.read(20)) == (130, 20) 78 | 79 | # read in previous block (going backward from last read in that block) 80 | stream.seek(60) 81 | assert data_pattern_locate(stream.read(20)) == (60, 20) 82 | 83 | # read until end 84 | stream.seek(170) 85 | assert data_pattern_locate(stream.read()) == (170, 20) 86 | 87 | 88 | def test_parse_invalid_stream_flags_missmatch() -> None: 89 | fileobj = BytesIO( 90 | bytes.fromhex( 91 | "fd377a585a000004e6d6b446000000001cdf44219042990d010000000001595a" 92 | ) 93 | ) 94 | fileobj.seek(0, SEEK_END) 95 | with pytest.raises(XZError) as exc_info: 96 | XZStream.parse(fileobj) 97 | assert str(exc_info.value) == "stream: inconsistent check value" 98 | 99 | 100 | def test_parse_empty_block() -> None: 101 | fileobj = BytesIO( 102 | bytes.fromhex( 103 | "fd377a585a0000016922de360200210116000000742fe5a30000000000000000" 104 | "000111003b965f739042990d010000000001595a" 105 | ) 106 | ) 107 | fileobj.seek(0, SEEK_END) 108 | with pytest.raises(XZError) as exc_info: 109 | XZStream.parse(fileobj) 110 | assert str(exc_info.value) == "index record uncompressed size" 111 | 112 | 113 | def test_parse_empty_stream() -> None: 114 | fileobj = BytesIO(STREAM_BYTES_EMPTY) 115 | fileobj.seek(0, SEEK_END) 116 | stream = XZStream.parse(fileobj) 117 | assert len(stream) == 0 118 | assert stream.block_boundaries == [] 119 | 120 | 121 | def test_write(data_pattern: bytes) -> None: 122 | # init with more size than what will be written at the end 123 | init_size = 1024 124 | assert len(STREAM_BYTES) < init_size 125 | 126 | fileobj = BytesIO(b"A" * init_size) 127 | 128 | with XZStream(cast(IOProxy, fileobj), 1) as stream: 129 | assert fileobj.getvalue() == b"A" * init_size 130 | 131 | assert stream.block_boundaries == [] 132 | 133 | stream.change_block() 134 | assert stream.block_boundaries == [] 135 | 136 | stream.write(data_pattern[:100]) 137 | assert stream.block_boundaries == [0] 138 | 139 | stream.change_block() 140 | assert stream.block_boundaries == [0, 100] 141 | 142 | stream.write(data_pattern[100:190]) 143 | assert stream.block_boundaries == [0, 100] 144 | 145 | assert fileobj.getvalue() == STREAM_BYTES 146 | 147 | 148 | def test_write_from_existing_stream(data_pattern: bytes) -> None: 149 | fileobj = BytesIO( 150 | bytes.fromhex( 151 | "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020" 152 | "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b" 153 | "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00" 154 | "00000000e7c35efa0001596477f620019042990d010000000001595a" 155 | ) 156 | ) 157 | fileobj.seek(0, SEEK_END) 158 | with XZStream.parse(fileobj) as stream: 159 | assert stream.read() == data_pattern[:100] 160 | assert stream.block_boundaries == [0] 161 | 162 | stream.write(data_pattern[100:190]) 163 | assert stream.block_boundaries == [0, 100] 164 | 165 | assert fileobj.getvalue() == STREAM_BYTES 166 | 167 | 168 | def test_truncate_and_write(data_pattern: bytes) -> None: 169 | fileobj = BytesIO( 170 | bytes.fromhex( 171 | "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020" 172 | "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b" 173 | "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00" 174 | "00000000e7c35efa0200210116000000742fe5a30100025a5a5a0000407f8055" 175 | "00025964170300008d97067a3e300d8b020000000001595a" 176 | ) 177 | ) 178 | fileobj.seek(0, SEEK_END) 179 | with XZStream.parse(fileobj) as stream: 180 | assert stream.read() == data_pattern[:100] + b"ZZZ" 181 | assert stream.block_boundaries == [0, 100] 182 | 183 | stream.seek(100) 184 | stream.truncate() 185 | assert stream.block_boundaries == [0] 186 | 187 | stream.write(data_pattern[100:190]) 188 | assert stream.block_boundaries == [0, 100] 189 | 190 | assert fileobj.getvalue() == STREAM_BYTES 191 | 192 | 193 | def test_truncate_middle_block() -> None: 194 | fileobj = BytesIO(STREAM_BYTES) 195 | fileobj.seek(0, SEEK_END) 196 | with pytest.raises(ValueError) as exc_info: 197 | with XZStream.parse(fileobj) as stream: 198 | stream.truncate(80) 199 | assert str(exc_info.value) == "truncate" 200 | 201 | 202 | def test_read_only_check() -> None: 203 | fileobj = BytesIO() 204 | 205 | with XZStream(cast(IOProxy, fileobj), 1) as stream: 206 | with pytest.raises(AttributeError): 207 | stream.check = 4 # type: ignore[misc] 208 | 209 | 210 | def test_change_filters() -> None: 211 | fileobj = BytesIO() 212 | 213 | with XZStream(cast(IOProxy, fileobj), 1) as stream: 214 | stream.write(b"aa") 215 | stream.change_block() 216 | stream.filters = [{"id": 3, "dist": 1}, {"id": 33}] 217 | stream.write(b"bb") 218 | stream.change_block() 219 | stream.write(b"cc") 220 | stream.change_block() 221 | stream.write(b"dd") 222 | 223 | assert fileobj.getvalue() == bytes.fromhex( 224 | # header 225 | "fd377a585a0000016922de36" 226 | # block 1 227 | "0200210116000000742fe5a30100016161000000d7198a07" 228 | # block 2 229 | "0200210116000000742fe5a30100016262000000ae1baeb5" 230 | # block 3 (changed filters) 231 | "02010301002101167920c4ee0100016300000000791ab2db" 232 | # block 4 (changed filters) 233 | "02010301002101167920c4ee01000164000000001d19970a" 234 | # index 235 | "0004160216021602160200008a2bb83b" 236 | # footer 237 | "9be35140030000000001595a" 238 | ) 239 | 240 | 241 | def test_change_preset() -> None: 242 | fileobj = BytesIO() 243 | 244 | with XZStream(cast(IOProxy, fileobj), 1) as stream: 245 | stream.write(b"aa") 246 | stream.change_block() 247 | stream.preset = 9 248 | stream.write(b"bb") 249 | stream.change_block() 250 | stream.write(b"cc") 251 | stream.change_block() 252 | stream.write(b"dd") 253 | 254 | assert fileobj.getvalue() == bytes.fromhex( 255 | # header 256 | "fd377a585a0000016922de36" 257 | # block 1 258 | "0200210116000000742fe5a30100016161000000d7198a07" 259 | # block 2 260 | "0200210116000000742fe5a30100016262000000ae1baeb5" 261 | # block 3 (changed preset) 262 | "020021011c00000010cf58cc0100016363000000791ab2db" 263 | # block 4 (changed preset) 264 | "020021011c00000010cf58cc01000164640000001d19970a" 265 | # index 266 | "0004160216021602160200008a2bb83b" 267 | # footer 268 | "9be35140030000000001595a" 269 | ) 270 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | py, py37, py38, py39, py310, py311, pypy3 4 | generate-integration-files, type, lint, format 5 | 6 | [testenv] 7 | deps = 8 | pytest 9 | pytest-cov 10 | passenv = PY_COLORS 11 | setenv = 12 | COVERAGE_FILE = {toxworkdir}/{envname}/.coverage 13 | PYTHONDEVMODE = 1 14 | commands = 15 | pytest {posargs:-vv --cov-fail-under=100} 16 | py: -coverage html 17 | 18 | [testenv:generate-integration-files] 19 | deps = 20 | pytest 21 | pytest-cov 22 | passenv = PY_COLORS 23 | setenv = 24 | PYTEST_ADDOPTS = --no-cov 25 | PYTHONDEVMODE = 1 26 | commands = pytest -vv -m generate_integration_files --generate-integration-files 27 | 28 | [testenv:type] 29 | deps = 30 | mypy 31 | pytest # for typing 32 | commands = 33 | mypy 34 | mypy --namespace-packages --explicit-package-bases tests 35 | 36 | [testenv:lint] 37 | deps = 38 | pylint 39 | pytest # to avoid import errors 40 | commands = 41 | pylint src 42 | pylint -d duplicate-code,too-many-statements,use-implicit-booleaness-not-comparison tests 43 | 44 | [testenv:format] 45 | skip_install = true 46 | deps = 47 | black 48 | isort 49 | commands = 50 | black {posargs:--check --diff} src tests 51 | isort {posargs:--check --diff} src tests 52 | --------------------------------------------------------------------------------