├── .coveragerc
├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── .isort.cfg
├── .prettierrc.yaml
├── .pylintrc
├── .vscode
    ├── env
    ├── launch.json
    └── settings.json
├── CHANGELOG.md
├── LICENSE.txt
├── README.md
├── dev-requirements.txt
├── mypy.ini
├── pytest.ini
├── setup.cfg
├── setup.py
├── src
    └── xz
    │   ├── __init__.py
    │   ├── block.py
    │   ├── common.py
    │   ├── file.py
    │   ├── io.py
    │   ├── open.py
    │   ├── py.typed
    │   ├── strategy.py
    │   ├── stream.py
    │   ├── typing.py
    │   └── utils.py
├── tests
    ├── conftest.py
    ├── integration
    │   ├── conftest.py
    │   ├── files
    │   │   ├── check-crc32.json
    │   │   ├── check-crc32.xz
    │   │   ├── check-crc64.json
    │   │   ├── check-crc64.xz
    │   │   ├── check-none.json
    │   │   ├── check-none.xz
    │   │   ├── check-sha256.json
    │   │   ├── check-sha256.xz
    │   │   ├── example.xz
    │   │   ├── few-blocks.json
    │   │   ├── few-blocks.xz
    │   │   ├── many-blocks.json
    │   │   ├── many-blocks.xz
    │   │   ├── one-stream-with-padding.json
    │   │   ├── one-stream-with-padding.xz
    │   │   ├── several-filters.json
    │   │   ├── several-filters.xz
    │   │   ├── several-streams-with-padding.json
    │   │   ├── several-streams-with-padding.xz
    │   │   ├── several-streams.json
    │   │   ├── several-streams.xz
    │   │   ├── various-block-sizes.json
    │   │   ├── various-block-sizes.xz
    │   │   ├── various-stream-checks-stream-padding-and-block-sizes.json
    │   │   ├── various-stream-checks-stream-padding-and-block-sizes.xz
    │   │   ├── various-stream-checks.json
    │   │   └── various-stream-checks.xz
    │   ├── test_file_read.py
    │   ├── test_file_write.py
    │   ├── test_generate_files.py
    │   ├── test_ram_usage.py
    │   └── test_readme.py
    └── unit
    │   ├── test_attr_proxy.py
    │   ├── test_block.py
    │   ├── test_common.py
    │   ├── test_file.py
    │   ├── test_floordict.py
    │   ├── test_ioabstract.py
    │   ├── test_iocombiner.py
    │   ├── test_ioproxy.py
    │   ├── test_iostatic.py
    │   ├── test_open.py
    │   ├── test_parse_mode.py
    │   └── test_stream.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [html]
 2 | directory = coverage
 3 | 
 4 | [paths]
 5 | source =
 6 |     src/xz/
 7 |     .tox/py*/lib/python*/site-packages/xz/
 8 |     .tox/py*/site-packages/xz/
 9 | 
10 | [report]
11 | exclude_lines =
12 |     pragma: no cover
13 |     def __repr__
14 |     def __str__
15 |     if __name__ == "__main__":
16 | show_missing = True
17 | 
18 | [run]
19 | branch = True
20 | source = xz
21 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | env:
 6 |   PY_COLORS: 1
 7 | 
 8 | jobs:
 9 |   tests-py:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python:
14 |           - "3.7"
15 |           - "3.8"
16 |           - "3.9"
17 |           - "3.10"
18 |           - "3.11"
19 |           - "pypy-3.7"
20 |           - "pypy-3.8"
21 |           - "pypy-3.9"
22 |     steps:
23 |       - uses: actions/checkout@v3
24 |       - name: Setup Python ${{ matrix.python }}
25 |         uses: actions/setup-python@v4
26 |         with:
27 |           python-version: ${{ matrix.python }}
28 |       - name: Install dependencies
29 |         run: pip install tox
30 |       - name: Run tests
31 |         run: tox -e py
32 | 
33 |   tests-misc:
34 |     runs-on: ubuntu-latest
35 |     strategy:
36 |       matrix:
37 |         env: [generate-integration-files, type, lint, format]
38 |     steps:
39 |       - uses: actions/checkout@v3
40 |       - name: Setup Python
41 |         uses: actions/setup-python@v4
42 |         with:
43 |           python-version: "3.11"
44 |       - name: Install dependencies
45 |         run: pip install tox
46 |       - name: Run ${{ matrix.env }}
47 |         run: tox -e ${{ matrix.env }}
48 | 
49 |   build:
50 |     needs: [tests-py, tests-misc]
51 |     runs-on: ubuntu-latest
52 |     steps:
53 |       - uses: actions/checkout@v3
54 |         with:
55 |           # fetch all commits for setuptools_scm
56 |           fetch-depth: 0
57 |       - name: Setup Python
58 |         uses: actions/setup-python@v4
59 |         with:
60 |           python-version: "3.11"
61 |       - name: Build
62 |         run: python setup.py sdist bdist_wheel
63 |       - name: Save build artifacts
64 |         uses: actions/upload-artifact@v3
65 |         with:
66 |           name: dist
67 |           path: dist
68 | 
69 |   publish:
70 |     if: startsWith(github.ref, 'refs/tags')
71 |     needs: build
72 |     runs-on: ubuntu-latest
73 |     steps:
74 |       - name: Restore build artifacts
75 |         uses: actions/download-artifact@v3
76 |         with:
77 |           name: dist
78 |           path: dist
79 |       - name: Publish to PyPI
80 |         uses: pypa/gh-action-pypi-publish@release/v1
81 |         with:
82 |           password: ${{ secrets.pypi_password }}
83 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /env
 2 | __pycache__
 3 | 
 4 | /build
 5 | /dist
 6 | *.egg-info
 7 | /.eggs
 8 | 
 9 | /.coverage
10 | /coverage
11 | /.pytest_cache
12 | /.tox
13 | 
14 | /src/xz/_version.py
15 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | force_sort_within_sections = True
3 | profile = black
4 | src_paths = src
5 | 


--------------------------------------------------------------------------------
/.prettierrc.yaml:
--------------------------------------------------------------------------------
1 | printWidth: 88
2 | proseWrap: always
3 | tabWidth: 2
4 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [BASIC]
 2 | good-names =
 3 |     i,
 4 |     j,
 5 |     k,
 6 |     ex,
 7 |     _,
 8 |     T,
 9 | 
10 | [MESSAGES CONTROL]
11 | disable =
12 |     missing-class-docstring,
13 |     missing-function-docstring,
14 |     missing-module-docstring,
15 |     too-few-public-methods,
16 |     too-many-arguments,
17 |     too-many-branches,
18 |     too-many-instance-attributes,
19 |     too-many-locals,
20 | 
21 | [SIMILARITIES]
22 | ignore-imports=yes
23 | 


--------------------------------------------------------------------------------
/.vscode/env:
--------------------------------------------------------------------------------
1 | # for vscode + extensions import when outside of src (e.g. in tests)
2 | PYTHONPATH=./src
3 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.2.0",
 3 |   "configurations": [
 4 |     {
 5 |       "name": "Python: Current File (Integrated Terminal)",
 6 |       "type": "python",
 7 |       "request": "launch",
 8 |       "program": "${file}",
 9 |       "console": "integratedTerminal"
10 |     }
11 |   ]
12 | }
13 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files.exclude": {
 3 |     "env": true,
 4 |     "**/__pycache__": true,
 5 |     ".eggs": true,
 6 |     "**/*.egg-info": true,
 7 |     ".coverage": true,
 8 |     ".pytest_cache": true,
 9 |     ".tox": true
10 |   },
11 |   "editor.detectIndentation": false,
12 |   "editor.formatOnSave": true,
13 |   "editor.insertSpaces": true,
14 |   "editor.tabSize": 4,
15 |   "files.insertFinalNewline": true,
16 |   "[json]": {
17 |     "editor.defaultFormatter": "esbenp.prettier-vscode"
18 |   },
19 |   "[markdown]": {
20 |     "editor.defaultFormatter": "esbenp.prettier-vscode",
21 |     "editor.rulers": [88],
22 |     "editor.wordWrap": "on",
23 |     "editor.wordWrapColumn": 88
24 |   },
25 |   "[python]": {
26 |     "editor.codeActionsOnSave": {
27 |       "source.organizeImports": true
28 |     }
29 |   },
30 |   "[yaml]": {
31 |     "editor.defaultFormatter": "esbenp.prettier-vscode",
32 |     "editor.tabSize": 2
33 |   },
34 |   "python.envFile": "${workspaceFolder}/.vscode/env",
35 |   "python.formatting.provider": "black",
36 |   "python.linting.pylintEnabled": true,
37 |   "python.testing.pytestEnabled": true,
38 |   "python.sortImports.args": ["-sp .isort.cfg"]
39 | }
40 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project
  6 | adheres to [Semantic Versioning](https://semver.org/).
  7 | 
  8 | ## [0.5.0] - 2023-02-27
  9 | 
 10 | [0.5.0]: https://github.com/rogdham/python-xz/releases/tag/v0.5.0
 11 | 
 12 | ### :boom: Breaking changes
 13 | 
 14 | - End of Python 3.6 support
 15 | 
 16 | ### :house: Internal
 17 | 
 18 | - Necessary code changes following dev dependency update: black, pylint, pytest
 19 | - Refactor a descriptor following PEP 487
 20 | - Add tests for CPython 3.11 and PyPy 3.9
 21 | - Use CPython 3.11 for misc. tests
 22 | - Update Github actions dependencies
 23 | - Import typing modules impacted by PEP 585 based on Python version
 24 | 
 25 | ## [0.4.0] - 2022-01-09
 26 | 
 27 | [0.4.0]: https://github.com/rogdham/python-xz/releases/tag/v0.4.0
 28 | 
 29 | ### :rocket: Added
 30 | 
 31 | - Advanced users may use the new `block_read_strategy` argument of `XZFile`/`xz.open` to
 32 |   customize the strategy for freeing block readers, and implement a different tradeoff
 33 |   between memory consumption and read speed when alternating reads between several
 34 |   blocks; the following strategies are provided: `RollingBlockReadStrategy` and
 35 |   `KeepBlockReadStrategy`
 36 | 
 37 | ### :bug: Fixes
 38 | 
 39 | - Free memory after a block is fully read
 40 | - Free memory of LZMA decompressors when many blocks are partially read; this is a
 41 |   tradeoff defaulting to keeping the last 8 LZMA decompressors used
 42 | - Typing: use `BinaryIO` instead of `IO[bytes]`
 43 | 
 44 | ### :house: Internal
 45 | 
 46 | - Specify the Python versions required in package metadata
 47 | - Test the `mode` attribute of objects returned by `xz.open`/`XZFile`
 48 | - Minor improvements in some docstrings
 49 | 
 50 | ## [0.3.1] - 2021-12-26
 51 | 
 52 | [0.3.1]: https://github.com/rogdham/python-xz/releases/tag/v0.3.1
 53 | 
 54 | ### :house: Internal
 55 | 
 56 | - Add tests for CPython 3.10 and PyPy 3.8
 57 | - Use CPython 3.10 for misc. tests
 58 | - Clarify which Python versions are supported in readme
 59 | - Fix some linting issues found by latest versions of mypy/pylint
 60 | 
 61 | ## [0.3.0] - 2021-11-07
 62 | 
 63 | [0.3.0]: https://github.com/rogdham/python-xz/releases/tag/v0.3.0
 64 | 
 65 | ### :boom: Breaking changes
 66 | 
 67 | - The `filename` argument of `XZFile` is now mandatory; this change should have very
 68 |   limited impact as not providing it makes no sense and would have raised a `TypeError`,
 69 |   plus it was already mandatory on `xz.open`
 70 | 
 71 | ### :rocket: Added
 72 | 
 73 | - Type hints
 74 | 
 75 | ### :house: Internal
 76 | 
 77 | - Type validation with mypy
 78 | - Distribute `py.typed` file in conformance with [PEP 561]
 79 | 
 80 | [pep 561]: https://www.python.org/dev/peps/pep-0561/
 81 | 
 82 | ## [0.2.0] - 2021-10-23
 83 | 
 84 | [0.2.0]: https://github.com/rogdham/python-xz/releases/tag/v0.2.0
 85 | 
 86 | ### :rocket: Added
 87 | 
 88 | - Write modes (`w`, `x`, `r+`, `w+`, `x+`) :tada:
 89 | - Allow to `seek` past the end of the fileobj
 90 | - Calling `len` on a fileobj gives its length, and `bool` tells if it is empty
 91 | - Export useful constants and functions from `lzma` for easy access: checks, filters,
 92 |   etc.
 93 | 
 94 | ### :house: Internal
 95 | 
 96 | - Test that no warnings are generated
 97 | - Change development status to Alpha
 98 | 
 99 | ## [0.1.2] - 2021-09-19
100 | 
101 | [0.1.2]: https://github.com/rogdham/python-xz/releases/tag/v0.1.2
102 | 
103 | ### :rocket: Added
104 | 
105 | - Add `__version__` attribute to module, despite [PEP 396] being rejected
106 | 
107 | [pep 396]: https://www.python.org/dev/peps/pep-0396/
108 | 
109 | ## [0.1.1] - 2021-05-14
110 | 
111 | [0.1.1]: https://github.com/rogdham/python-xz/releases/tag/v0.1.1
112 | 
113 | ### :rocket: Added
114 | 
115 | - Implementation of the `fileno` method
116 | 
117 | ## [0.1.0] - 2021-05-13
118 | 
119 | [0.1.0]: https://github.com/rogdham/python-xz/releases/tag/v0.1.0
120 | 
121 | ### :rocket: Added
122 | 
123 | - Initial public release :tada:
124 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2020 Rogdham
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | 
  3 | # python-xz
  4 | 
  5 | Pure Python implementation of the XZ file format with random access support
  6 | 
  7 | _Leveraging the lzma module for fast (de)compression_
  8 | 
  9 | [![GitHub build status](https://img.shields.io/github/actions/workflow/status/rogdham/python-xz/build.yml?branch=master)](https://github.com/rogdham/python-xz/actions?query=branch:master)
 10 | [![Release on PyPI](https://img.shields.io/pypi/v/python-xz)](https://pypi.org/project/python-xz/)
 11 | [![Code coverage](https://img.shields.io/badge/coverage-100%25-brightgreen)](https://github.com/rogdham/python-xz/search?q=fail+under&type=Code)
 12 | [![Mypy type checker](https://img.shields.io/badge/type_checker-mypy-informational)](https://mypy.readthedocs.io/)
 13 | [![MIT License](https://img.shields.io/pypi/l/python-xz)](https://github.com/Rogdham/python-xz/blob/master/LICENSE.txt)
 14 | 
 15 | ---
 16 | 
 17 | [📖 Documentation](https://github.com/rogdham/python-xz/#usage)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[📃 Changelog](./CHANGELOG.md)
 18 | 
 19 | </div>
 20 | 
 21 | ---
 22 | 
 23 | A XZ file can be composed of several streams and blocks. This allows for fast random
 24 | access when reading, but this is not supported by Python's builtin `lzma` module (which
 25 | would read all previous blocks for nothing).
 26 | 
 27 | <div align="center">
 28 | 
 29 | |                   |      [lzma]       |      [lzmaffi]       |      python-xz       |
 30 | | :---------------: | :---------------: | :------------------: | :------------------: |
 31 | |    module type    |      builtin      |  cffi (C extension)  |     pure Python      |
 32 | |    📄 **read**    |                   |                      |                      |
 33 | |   random access   | ❌ no<sup>1</sup> |  ✔️ yes<sup>2</sup>  |  ✔️ yes<sup>2</sup>  |
 34 | |  several blocks   |      ✔️ yes       | ✔️✔️ yes<sup>3</sup> | ✔️✔️ yes<sup>3</sup> |
 35 | |  several streams  |      ✔️ yes       |        ✔️ yes        | ✔️✔️ yes<sup>4</sup> |
 36 | |  stream padding   | ❌ no<sup>5</sup> |        ✔️ yes        |        ✔️ yes        |
 37 | |   📝 **write**    |                   |                      |                      |
 38 | |     `w` mode      |      ✔️ yes       |        ✔️ yes        |        ✔️ yes        |
 39 | |     `x` mode      |      ✔️ yes       |        ❌ no         |        ✔️ yes        |
 40 | |     `a` mode      |   ✔️ new stream   |    ✔️ new stream     |      ⏳ planned      |
 41 | | `r+`/`w+`/… modes |       ❌ no       |        ❌ no         |        ✔️ yes        |
 42 | |  several blocks   |       ❌ no       |        ❌ no         |        ✔️ yes        |
 43 | |  several streams  | ❌ no<sup>6</sup> |  ❌ no<sup>6</sup>   |        ✔️ yes        |
 44 | |  stream padding   |       ❌ no       |        ❌ no         |      ⏳ planned      |
 45 | 
 46 | </div>
 47 | 
 48 | <details>
 49 | <summary>Notes</summary>
 50 | 
 51 | 1. Reading from a position will read the file from the very beginning
 52 | 2. Reading from a position will read the file from the beginning of the block
 53 | 3. Block positions available with the `block_boundaries` attribute
 54 | 4. Stream positions available with the `stream_boundaries` attribute
 55 | 5. Related [issue](https://github.com/python/cpython/issues/88300)
 56 | 6. Possible by manually closing and re-opening in append mode
 57 | 
 58 | </details>
 59 | 
 60 | [lzma]: https://docs.python.org/3/library/lzma.html
 61 | [lzmaffi]: https://github.com/r3m0t/backports.lzma
 62 | 
 63 | ---
 64 | 
 65 | ## Install
 66 | 
 67 | Install `python-xz` with pip:
 68 | 
 69 | ```sh
 70 | $ python -m pip install python-xz
 71 | ```
 72 | 
 73 | _An unofficial package for conda is [also available][conda package], see [issue #5][#5]
 74 | for more information._
 75 | 
 76 | [conda package]: https://anaconda.org/conda-forge/python-xz
 77 | [#5]: https://github.com/Rogdham/python-xz/issues/5
 78 | 
 79 | ## Usage
 80 | 
 81 | The API is similar to [lzma]: you can use either `xz.open` or `xz.XZFile`.
 82 | 
 83 | ### Read mode
 84 | 
 85 | ```python
 86 | >>> with xz.open('example.xz') as fin:
 87 | ...     fin.read(18)
 88 | ...     fin.stream_boundaries  # 2 streams
 89 | ...     fin.block_boundaries   # 4 blocks in first stream, 2 blocks in second stream
 90 | ...     fin.seek(1000)
 91 | ...     fin.read(31)
 92 | ...
 93 | b'Hello, world! \xf0\x9f\x91\x8b'
 94 | [0, 2000]
 95 | [0, 500, 1000, 1500, 2000, 3000]
 96 | 1000
 97 | b'\xe2\x9c\xa8 Random access is fast! \xf0\x9f\x9a\x80'
 98 | ```
 99 | 
100 | Opening in text mode works as well, but notice that seek arguments as well as boundaries
101 | are still in bytes (just like with `lzma.open`).
102 | 
103 | ```python
104 | >>> with xz.open('example.xz', 'rt') as fin:
105 | ...     fin.read(15)
106 | ...     fin.stream_boundaries
107 | ...     fin.block_boundaries
108 | ...     fin.seek(1000)
109 | ...     fin.read(26)
110 | ...
111 | 'Hello, world! 👋'
112 | [0, 2000]
113 | [0, 500, 1000, 1500, 2000, 3000]
114 | 1000
115 | '✨ Random access is fast! 🚀'
116 | ```
117 | 
118 | ### Write mode
119 | 
120 | Writing is only supported from the end of file. It is however possible to truncate the
121 | file first. Note that truncating is only supported on block boundaries.
122 | 
123 | ```python
124 | >>> with xz.open('test.xz', 'w') as fout:
125 | ...     fout.write(b'Hello, world!\n')
126 | ...     fout.write(b'This sentence is still in the previous block\n')
127 | ...     fout.change_block()
128 | ...     fout.write(b'But this one is in its own!\n')
129 | ...
130 | 14
131 | 45
132 | 28
133 | ```
134 | 
135 | Advanced usage:
136 | 
137 | - Modes like `r+`/`w+`/`x+` allow to open for both read and write at the same time;
138 |   however in the current implementation, a block with writing in progress is
139 |   automatically closed when reading data from it.
140 | - The `check`, `preset` and `filters` arguments to `xz.open` and `xz.XZFile` allow to
141 |   configure the default values for new streams and blocks.
142 | - Change block with the `change_block` method (the `preset` and `filters` attributes can
143 |   be changed beforehand to apply to the new block).
144 | - Change stream with the `change_stream` method (the `check` attribute can be changed
145 |   beforehand to apply to the new stream).
146 | 
147 | ---
148 | 
149 | ## FAQ
150 | 
151 | ### How does random-access works?
152 | 
153 | XZ files are made of a number of streams, and each stream is composed of a number of
154 | block. This can be seen with `xz --list`:
155 | 
156 | ```sh
157 | $ xz --list file.xz
158 | Strms  Blocks   Compressed Uncompressed  Ratio  Check   Filename
159 |     1      13     16.8 MiB    297.9 MiB  0.056  CRC64   file.xz
160 | ```
161 | 
162 | To read data from the middle of the 10th block, we will decompress the 10th block from
163 | its start it until we reach the middle (and drop that decompressed data), then returned
164 | the decompressed data from that point.
165 | 
166 | Choosing the good block size is a tradeoff between seeking time during random access and
167 | compression ratio.
168 | 
169 | ### How can I create XZ files optimized for random-access?
170 | 
171 | You can open the file for writing and use the `change_block` method to create several
172 | blocks.
173 | 
174 | Other tools allow to create XZ files with several blocks as well:
175 | 
176 | - [XZ Utils](https://tukaani.org/xz/) needs to be called with flags:
177 | 
178 | ```sh
179 | $ xz -T0 file                          # threading mode
180 | $ xz --block-size 16M file             # same size for all blocks
181 | $ xz --block-list 16M,32M,8M,42M file  # specific size for each block
182 | ```
183 | 
184 | - [PIXZ](https://github.com/vasi/pixz) creates files with several blocks by default:
185 | 
186 | ```sh
187 | $ pixz file
188 | ```
189 | 
190 | ### Python version support
191 | 
192 | As a general rule, all Python versions that are both [released and still officially
193 | supported][python-versions] are supported by `python-xz` and tested against (both
194 | CPython and PyPy implementations).
195 | 
196 | If you have other use cases or find issues with some Python versions, feel free to
197 | [open a ticket](https://github.com/Rogdham/python-xz/issues/new)!
198 | 
199 | [python-versions]: https://devguide.python.org/versions/#versions
200 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | # install + dependencies
 2 | -e .
 3 | 
 4 | # typing
 5 | mypy
 6 | 
 7 | # tests
 8 | coverage
 9 | pytest
10 | pytest-cov
11 | 
12 | # lint
13 | pylint
14 | 
15 | # format
16 | black
17 | isort
18 | 
19 | # publish
20 | setuptools_scm
21 | wheel
22 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | # section names refer to the documentation
 3 | # https://mypy.readthedocs.io/en/stable/config_file.html
 4 | 
 5 | # Import discovery
 6 | files = src
 7 | ignore_missing_imports = False
 8 | follow_imports = normal
 9 | 
10 | # Platform configuration
11 | python_version = 3.11
12 | 
13 | # Disallow dynamic typing
14 | disallow_any_unimported = True
15 | disallow_any_decorated = True
16 | disallow_any_generics = True
17 | disallow_subclassing_any = True
18 | 
19 | # Untyped definitions and calls
20 | disallow_untyped_calls = True
21 | disallow_untyped_defs = True
22 | disallow_incomplete_defs = True
23 | check_untyped_defs = True
24 | disallow_untyped_decorators = True
25 | 
26 | # None and Optional handling
27 | no_implicit_optional = True
28 | strict_optional = True
29 | 
30 | # Configuring warning
31 | warn_redundant_casts = True
32 | warn_unused_ignores = True
33 | warn_no_return = True
34 | warn_return_any = True
35 | warn_unreachable = True
36 | 
37 | # Supressing errors
38 | show_none_errors = True
39 | ignore_errors = False
40 | 
41 | # Miscellaneous strictness flags
42 | strict_equality = True
43 | 
44 | # Configuring error messages
45 | show_error_context = True
46 | show_error_codes = True
47 | 
48 | # Miscellaneous
49 | warn_unused_configs = True
50 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | addopts =
 3 |     --cov
 4 |     --strict-markers
 5 | filterwarnings =
 6 |     error
 7 |     # issue in standard lib for PyPy < 3.9
 8 |     ignore:Using or importing the ABCs from 'collections':DeprecationWarning:_lzma
 9 | markers =
10 |     generate_integration_files
11 |     integration
12 |     unit
13 | testpaths =
14 |     tests
15 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = python-xz
 3 | author = Rogdham
 4 | author_email = contact@rogdham.net
 5 | description = Pure Python implementation of the XZ file format with random access support
 6 | long_description = file: README.md
 7 | long_description_content_type = text/markdown
 8 | url = https://github.com/rogdham/python-xz
 9 | project_urls =
10 |     Source = https://github.com/rogdham/python-xz
11 | keywords = xz lzma compression decompression
12 | license = MIT
13 | license_files = LICENSE.txt
14 | platform = any
15 | classifiers =
16 |     Development Status :: 3 - Alpha
17 |     License :: OSI Approved :: MIT License
18 |     Operating System :: OS Independent
19 |     Programming Language :: Python
20 |     Programming Language :: Python :: 3
21 |     Programming Language :: Python :: 3 :: Only
22 |     Programming Language :: Python :: 3.7
23 |     Programming Language :: Python :: 3.8
24 |     Programming Language :: Python :: 3.9
25 |     Programming Language :: Python :: 3.10
26 |     Programming Language :: Python :: 3.11
27 |     Topic :: Utilities
28 |     Topic :: System :: Archiving
29 |     Topic :: System :: Archiving :: Compression
30 | 
31 | [options]
32 | include_package_data = True
33 | package_dir = =src
34 | packages = xz
35 | python_requires = >=3.7
36 | setup_requires =
37 |     setuptools_scm
38 |     wheel
39 | install_requires =
40 |     typing-extensions>=4.5.0;python_version<"3.8"
41 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     use_scm_version={
 7 |         "write_to": "src/xz/_version.py",
 8 |         "write_to_template": '__version__ = "{version}"\n',
 9 |     }
10 | )
11 | 


--------------------------------------------------------------------------------
/src/xz/__init__.py:
--------------------------------------------------------------------------------
 1 | from lzma import (
 2 |     CHECK_CRC32,
 3 |     CHECK_CRC64,
 4 |     CHECK_ID_MAX,
 5 |     CHECK_NONE,
 6 |     CHECK_SHA256,
 7 |     CHECK_UNKNOWN,
 8 |     FILTER_ARM,
 9 |     FILTER_ARMTHUMB,
10 |     FILTER_DELTA,
11 |     FILTER_IA64,
12 |     FILTER_LZMA1,
13 |     FILTER_LZMA2,
14 |     FILTER_POWERPC,
15 |     FILTER_SPARC,
16 |     FILTER_X86,
17 |     MF_BT2,
18 |     MF_BT3,
19 |     MF_BT4,
20 |     MF_HC3,
21 |     MF_HC4,
22 |     MODE_FAST,
23 |     MODE_NORMAL,
24 |     PRESET_DEFAULT,
25 |     PRESET_EXTREME,
26 |     compress,
27 |     decompress,
28 |     is_check_supported,
29 | )
30 | 
31 | try:
32 |     from xz._version import __version__
33 | except ImportError:  # pragma: no cover
34 |     __version__ = "0.0.0.dev0-unknown"
35 | 
36 | 
37 | from xz.common import XZError
38 | from xz.file import XZFile
39 | from xz.open import xz_open
40 | from xz.strategy import KeepBlockReadStrategy, RollingBlockReadStrategy
41 | 
42 | # pylint: disable=redefined-builtin
43 | open = xz_open
44 | # pylint: enable=redefined-builtin
45 | 
46 | 
47 | __all__ = (
48 |     "__version__",
49 |     "KeepBlockReadStrategy",
50 |     "RollingBlockReadStrategy",
51 |     "XZError",
52 |     "XZFile",
53 |     "open",
54 |     # re-export from lzma for easy access
55 |     "CHECK_CRC32",
56 |     "CHECK_CRC64",
57 |     "CHECK_ID_MAX",
58 |     "CHECK_NONE",
59 |     "CHECK_SHA256",
60 |     "CHECK_UNKNOWN",
61 |     "FILTER_ARM",
62 |     "FILTER_ARMTHUMB",
63 |     "FILTER_DELTA",
64 |     "FILTER_IA64",
65 |     "FILTER_LZMA1",
66 |     "FILTER_LZMA2",
67 |     "FILTER_POWERPC",
68 |     "FILTER_SPARC",
69 |     "FILTER_X86",
70 |     "MF_BT2",
71 |     "MF_BT3",
72 |     "MF_BT4",
73 |     "MF_HC3",
74 |     "MF_HC4",
75 |     "MODE_FAST",
76 |     "MODE_NORMAL",
77 |     "PRESET_DEFAULT",
78 |     "PRESET_EXTREME",
79 |     "compress",
80 |     "decompress",
81 |     "is_check_supported",
82 | )
83 | 


--------------------------------------------------------------------------------
/src/xz/block.py:
--------------------------------------------------------------------------------
  1 | from io import DEFAULT_BUFFER_SIZE, SEEK_SET
  2 | from lzma import FORMAT_XZ, LZMACompressor, LZMADecompressor, LZMAError
  3 | from typing import Optional, Tuple, Union
  4 | 
  5 | from xz.common import (
  6 |     XZError,
  7 |     create_xz_header,
  8 |     create_xz_index_footer,
  9 |     parse_xz_footer,
 10 |     parse_xz_index,
 11 | )
 12 | from xz.io import IOAbstract, IOCombiner, IOStatic
 13 | from xz.strategy import KeepBlockReadStrategy
 14 | from xz.typing import _BlockReadStrategyType, _LZMAFiltersType, _LZMAPresetType
 15 | 
 16 | 
 17 | class BlockRead:
 18 |     read_size = DEFAULT_BUFFER_SIZE
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         fileobj: IOAbstract,
 23 |         check: int,
 24 |         unpadded_size: int,
 25 |         uncompressed_size: int,
 26 |     ) -> None:
 27 |         self.length = uncompressed_size
 28 |         self.fileobj = IOCombiner(
 29 |             IOStatic(create_xz_header(check)),
 30 |             fileobj,
 31 |             IOStatic(
 32 |                 create_xz_index_footer(check, [(unpadded_size, uncompressed_size)])
 33 |             ),
 34 |         )
 35 |         self.reset()
 36 | 
 37 |     def reset(self) -> None:
 38 |         self.fileobj.seek(0, SEEK_SET)
 39 |         self.pos = 0
 40 |         self.decompressor = LZMADecompressor(format=FORMAT_XZ)
 41 | 
 42 |     def decompress(self, pos: int, size: int) -> bytes:
 43 |         if pos < self.pos:
 44 |             self.reset()
 45 | 
 46 |         skip_before = pos - self.pos
 47 | 
 48 |         # pylint: disable=using-constant-test
 49 |         if self.decompressor.eof:
 50 |             raise XZError("block: decompressor eof")
 51 | 
 52 |         if self.decompressor.needs_input:
 53 |             data_input = self.fileobj.read(self.read_size)
 54 |             if not data_input:
 55 |                 raise XZError("block: data eof")
 56 |         else:
 57 |             data_input = b""
 58 | 
 59 |         data_output = self.decompressor.decompress(data_input, skip_before + size)
 60 |         self.pos += len(data_output)
 61 | 
 62 |         if self.pos == self.length:
 63 |             # we reached the end of the block
 64 |             # according to the XZ specification, we must check the
 65 |             # remaining bytes of the block; this is mainly performed by the
 66 |             # decompressor itself when we consume it
 67 |             while not self.decompressor.eof:
 68 |                 if self.decompress(self.pos, 1):
 69 |                     raise LZMAError("Corrupt input data")
 70 | 
 71 |         return data_output[skip_before:]
 72 | 
 73 | 
 74 | class BlockWrite:
 75 |     def __init__(
 76 |         self,
 77 |         fileobj: IOAbstract,
 78 |         check: int,
 79 |         preset: _LZMAPresetType,
 80 |         filters: _LZMAFiltersType,
 81 |     ) -> None:
 82 |         self.fileobj = fileobj
 83 |         self.check = check
 84 |         self.compressor = LZMACompressor(FORMAT_XZ, check, preset, filters)
 85 |         self.pos = 0
 86 |         if self.compressor.compress(b"") != create_xz_header(check):
 87 |             raise XZError("block: compressor header")
 88 | 
 89 |     def _write(self, data: bytes) -> None:
 90 |         if data:
 91 |             self.fileobj.seek(self.pos)
 92 |             self.fileobj.write(data)
 93 |             self.pos += len(data)
 94 | 
 95 |     def compress(self, data: bytes) -> None:
 96 |         self._write(self.compressor.compress(data))
 97 | 
 98 |     def finish(self) -> Tuple[int, int]:
 99 |         data = self.compressor.flush()
100 | 
101 |         # footer
102 |         check, backward_size = parse_xz_footer(data[-12:])
103 |         if check != self.check:
104 |             raise XZError("block: compressor footer check")
105 | 
106 |         # index
107 |         records = parse_xz_index(data[-12 - backward_size : -12])
108 |         if len(records) != 1:
109 |             raise XZError("block: compressor index records length")
110 | 
111 |         # remaining block data
112 |         self._write(data[: -12 - backward_size])
113 | 
114 |         return records[0]  # (unpadded_size, uncompressed_size)
115 | 
116 | 
117 | class XZBlock(IOAbstract):
118 |     def __init__(
119 |         self,
120 |         fileobj: IOAbstract,
121 |         check: int,
122 |         unpadded_size: int,
123 |         uncompressed_size: int,
124 |         preset: _LZMAPresetType = None,
125 |         filters: _LZMAFiltersType = None,
126 |         block_read_strategy: Optional[_BlockReadStrategyType] = None,
127 |     ):
128 |         super().__init__(uncompressed_size)
129 |         self.fileobj = fileobj
130 |         self.check = check
131 |         self.preset = preset
132 |         self.filters = filters
133 |         self.block_read_strategy = block_read_strategy or KeepBlockReadStrategy()
134 |         self.unpadded_size = unpadded_size
135 |         self.operation: Union[BlockRead, BlockWrite, None] = None
136 | 
137 |     @property
138 |     def uncompressed_size(self) -> int:
139 |         return self._length
140 | 
141 |     def _read(self, size: int) -> bytes:
142 |         # enforce read mode
143 |         if not isinstance(self.operation, BlockRead):
144 |             self._write_end()
145 |             self.clear()
146 |             self.block_read_strategy.on_create(self)
147 |             self.operation = BlockRead(
148 |                 self.fileobj,
149 |                 self.check,
150 |                 self.unpadded_size,
151 |                 self.uncompressed_size,
152 |             )
153 | 
154 |         # read data
155 |         self.block_read_strategy.on_read(self)
156 |         try:
157 |             data = self.operation.decompress(self._pos, size)
158 |         except LZMAError as ex:
159 |             raise XZError(f"block: error while decompressing: {ex}") from ex
160 | 
161 |         if self._pos + len(data) == self._length:
162 |             self.clear()
163 | 
164 |         return data
165 | 
166 |     def writable(self) -> bool:
167 |         return isinstance(self.operation, BlockWrite) or not self._length
168 | 
169 |     def _write(self, data: bytes) -> int:
170 |         # enforce write mode
171 |         if not isinstance(self.operation, BlockWrite):
172 |             self.clear()
173 |             self.operation = BlockWrite(
174 |                 self.fileobj,
175 |                 self.check,
176 |                 self.preset,
177 |                 self.filters,
178 |             )
179 | 
180 |         # write data
181 |         self.operation.compress(data)
182 |         return len(data)
183 | 
184 |     def _write_after(self) -> None:
185 |         if isinstance(self.operation, BlockWrite):
186 |             self.unpadded_size, uncompressed_size = self.operation.finish()
187 |             if uncompressed_size != self.uncompressed_size:
188 |                 raise XZError("block: compressor uncompressed size")
189 |             self.clear()
190 | 
191 |     def _truncate(self, size: int) -> None:
192 |         # thanks to the writable method, we are sure that length is zero
193 |         # so we don't need to handle the case of truncating in middle of the block
194 |         self.seek(size)
195 |         self.write(b"")
196 | 
197 |     def clear(self) -> None:
198 |         if isinstance(self.operation, BlockRead):
199 |             self.block_read_strategy.on_delete(self)
200 |         self.operation = None  # free memory
201 | 


--------------------------------------------------------------------------------
/src/xz/common.py:
--------------------------------------------------------------------------------
  1 | from binascii import crc32 as crc32int
  2 | import lzma
  3 | from struct import pack, unpack
  4 | from typing import List, Tuple, cast
  5 | 
  6 | HEADER_MAGIC = b"\xfd7zXZ\x00"
  7 | FOOTER_MAGIC = b"YZ"
  8 | 
  9 | 
 10 | class XZError(Exception):
 11 |     pass
 12 | 
 13 | 
 14 | def encode_mbi(value: int) -> bytes:
 15 |     data = bytearray()
 16 |     while value >= 0x80:
 17 |         data.append((value & 0x7F) | 0x80)
 18 |         value >>= 7
 19 |     data.append(value)
 20 |     return data
 21 | 
 22 | 
 23 | def decode_mbi(data: bytes) -> Tuple[int, int]:
 24 |     value = 0
 25 |     for size, byte in enumerate(data):
 26 |         value |= (byte & 0x7F) << (size * 7)
 27 |         if not byte & 0x80:
 28 |             return (size + 1, value)
 29 |     raise XZError("invalid mbi")
 30 | 
 31 | 
 32 | def crc32(data: bytes) -> bytes:
 33 |     return pack("<I", crc32int(data))
 34 | 
 35 | 
 36 | def round_up(value: int) -> int:
 37 |     remainder = value % 4
 38 |     if remainder:
 39 |         return value - remainder + 4
 40 |     return value
 41 | 
 42 | 
 43 | def pad(value: int) -> bytes:
 44 |     return b"\x00" * (round_up(value) - value)
 45 | 
 46 | 
 47 | def create_xz_header(check: int) -> bytes:
 48 |     if not 0 <= check <= 0xF:
 49 |         raise XZError("header check")
 50 |     # stream header
 51 |     flags = pack("<BB", 0, check)
 52 |     return HEADER_MAGIC + flags + crc32(flags)
 53 | 
 54 | 
 55 | def create_xz_index_footer(check: int, records: List[Tuple[int, int]]) -> bytes:
 56 |     if not 0 <= check <= 0xF:
 57 |         raise XZError("footer check")
 58 |     # index
 59 |     index = b"\x00"
 60 |     index += encode_mbi(len(records))
 61 |     for unpadded_size, uncompressed_size in records:
 62 |         if not unpadded_size:
 63 |             raise XZError("index record unpadded size")
 64 |         index += encode_mbi(unpadded_size)
 65 |         index += encode_mbi(uncompressed_size)
 66 |     index += pad(len(index))
 67 |     index += crc32(index)
 68 |     # stream footer
 69 |     footer = pack("<IBB", (len(index) // 4) - 1, 0, check)
 70 |     footer = crc32(footer) + footer + FOOTER_MAGIC
 71 |     return index + footer
 72 | 
 73 | 
 74 | def parse_xz_header(header: bytes) -> int:
 75 |     if len(header) != 12:
 76 |         raise XZError("header length")
 77 |     if header[:6] != HEADER_MAGIC:
 78 |         raise XZError("header magic")
 79 |     if crc32(header[6:8]) != header[8:12]:
 80 |         raise XZError("header crc32")
 81 |     flag_first_byte, check = cast(
 82 |         Tuple[int, int],
 83 |         unpack("<BB", header[6:8]),
 84 |     )
 85 |     if flag_first_byte or not 0 <= check <= 0xF:
 86 |         raise XZError("header flags")
 87 |     return check
 88 | 
 89 | 
 90 | def parse_xz_index(index: bytes) -> List[Tuple[int, int]]:
 91 |     if len(index) < 8 or len(index) % 4:
 92 |         raise XZError("index length")
 93 |     index = memoryview(index)
 94 |     if index[0]:
 95 |         raise XZError("index indicator")
 96 |     if crc32(index[:-4]) != index[-4:]:
 97 |         raise XZError("index crc32")
 98 |     size, nb_records = decode_mbi(index[1:])
 99 |     index = index[1 + size : -4]
100 |     # records
101 |     records = []
102 |     for _ in range(nb_records):
103 |         if not index:
104 |             raise XZError("index size")
105 |         size, unpadded_size = decode_mbi(index)
106 |         if not unpadded_size:
107 |             raise XZError("index record unpadded size")
108 |         index = index[size:]
109 |         if not index:
110 |             raise XZError("index size")
111 |         size, uncompressed_size = decode_mbi(index)
112 |         if not uncompressed_size:
113 |             raise XZError("index record uncompressed size")
114 |         index = index[size:]
115 |         records.append((unpadded_size, uncompressed_size))
116 |     # index padding
117 |     if any(index):
118 |         raise XZError("index padding")
119 |     return records
120 | 
121 | 
122 | def parse_xz_footer(footer: bytes) -> Tuple[int, int]:
123 |     if len(footer) != 12:
124 |         raise XZError("footer length")
125 |     if footer[10:12] != FOOTER_MAGIC:
126 |         raise XZError("footer magic")
127 |     if crc32(footer[4:10]) != footer[:4]:
128 |         raise XZError("footer crc32")
129 |     backward_size, flag_first_byte, check = cast(
130 |         Tuple[int, int, int],
131 |         unpack("<IBB", footer[4:10]),
132 |     )
133 |     backward_size = (backward_size + 1) * 4
134 |     if flag_first_byte or not 0 <= check <= 0xF:
135 |         raise XZError("footer flags")
136 |     return (check, backward_size)
137 | 
138 | 
139 | # find default value for check implicitely used by lzma
140 | DEFAULT_CHECK = parse_xz_header(lzma.compress(b"")[:12])
141 | 


--------------------------------------------------------------------------------
/src/xz/file.py:
--------------------------------------------------------------------------------
  1 | from io import SEEK_CUR, SEEK_END
  2 | import os
  3 | import sys
  4 | from typing import BinaryIO, List, Optional, cast
  5 | import warnings
  6 | 
  7 | from xz.common import DEFAULT_CHECK, XZError
  8 | from xz.io import IOCombiner, IOProxy
  9 | from xz.strategy import RollingBlockReadStrategy
 10 | from xz.stream import XZStream
 11 | from xz.typing import (
 12 |     _BlockReadStrategyType,
 13 |     _LZMAFilenameType,
 14 |     _LZMAFiltersType,
 15 |     _LZMAPresetType,
 16 | )
 17 | from xz.utils import AttrProxy, parse_mode
 18 | 
 19 | 
 20 | class XZFile(IOCombiner[XZStream]):
 21 |     """A file object providing transparent XZ (de)compression.
 22 | 
 23 |     An XZFile can act as a wrapper for an existing file object, or
 24 |     refer directly to a named file on disk.
 25 | 
 26 |     Note that XZFile provides a *binary* file interface - data read
 27 |     is returned as bytes, and data to be written must be given as bytes.
 28 |     Use xz.open if you want a *text* file interface.
 29 |     """
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         filename: _LZMAFilenameType,
 34 |         mode: str = "r",
 35 |         *,
 36 |         check: int = -1,
 37 |         preset: _LZMAPresetType = None,
 38 |         filters: _LZMAFiltersType = None,
 39 |         block_read_strategy: Optional[_BlockReadStrategyType] = None,
 40 |     ) -> None:
 41 |         """Open an XZ file in binary mode.
 42 | 
 43 |         The filename argument can be either an actual file name
 44 |         (given as a str, bytes, or PathLike object),
 45 |         in which case the named file is opened,
 46 |         or it can be an existing file object to read from or write to.
 47 | 
 48 |         The mode argument can be one of the following:
 49 |          - "r" for reading (default)
 50 |          - "w" for writing, truncating the file
 51 |          - "r+" for reading and writing
 52 |          - "w+" for reading and writing, truncating the file
 53 |          - "x" and "x+" are like "w" and "w+", except that an
 54 |            FileExistsError is raised if the file already exists
 55 | 
 56 |         The following arguments are used during writing:
 57 |          - check: when creating a new stream
 58 |          - preset: when creating a new block
 59 |          - filters: when creating a new block
 60 | 
 61 |         For more information about the check/preset/filters arguments,
 62 |         refer to the documentation of the lzma module.
 63 | 
 64 |         The block_read_strategy argument allows to specify a strategy
 65 |         for freeing block readers, and implement a different tradeoff
 66 |         between memory consumption and read speed when alternating reads
 67 |         between several blocks.
 68 |         """
 69 |         self._close_fileobj = False
 70 |         self._close_check_empty = False
 71 | 
 72 |         super().__init__()
 73 | 
 74 |         self._mode, self._readable, self._writable = parse_mode(mode)
 75 | 
 76 |         # create strategy
 77 |         if block_read_strategy is None:
 78 |             self.block_read_strategy: _BlockReadStrategyType = (
 79 |                 RollingBlockReadStrategy()
 80 |             )
 81 |         else:
 82 |             self.block_read_strategy = block_read_strategy
 83 | 
 84 |         # get fileobj
 85 |         if isinstance(filename, (str, bytes, os.PathLike)):
 86 |             # pylint: disable=consider-using-with, unspecified-encoding
 87 |             self.fileobj = cast(BinaryIO, open(filename, self._mode + "b"))
 88 |             self._close_fileobj = True
 89 |         elif hasattr(filename, "read"):  # weak check but better than nothing
 90 |             self.fileobj = filename
 91 |         else:
 92 |             raise TypeError("filename must be a str, bytes, file or PathLike object")
 93 | 
 94 |         # check fileobj
 95 |         if not self.fileobj.seekable():
 96 |             raise ValueError("filename is not seekable")
 97 |         if self._readable and not self.fileobj.readable():
 98 |             raise ValueError("filename is not readable")
 99 |         if self._writable and not self.fileobj.writable():
100 |             raise ValueError("filename is not writable")
101 | 
102 |         # init
103 |         if self._mode[0] in "wx":
104 |             self.fileobj.truncate(0)
105 |         if self._readable:
106 |             self._init_parse()
107 |         if self._mode[0] == "r" and not self._fileobjs:
108 |             raise XZError("file: no streams")
109 | 
110 |         self.check = check if check != -1 else DEFAULT_CHECK
111 |         self.preset = preset
112 |         self.filters = filters
113 | 
114 |         self._close_check_empty = self._mode[0] != "r"
115 | 
116 |     @property
117 |     def _last_stream(self) -> Optional[XZStream]:
118 |         try:
119 |             return self._fileobjs.last_item
120 |         except KeyError:
121 |             return None
122 | 
123 |     preset = AttrProxy[_LZMAPresetType]("_last_stream")
124 |     filters = AttrProxy[_LZMAFiltersType]("_last_stream")
125 | 
126 |     @property
127 |     def mode(self) -> str:
128 |         return self._mode
129 | 
130 |     def readable(self) -> bool:
131 |         return self._readable
132 | 
133 |     def writable(self) -> bool:
134 |         return self._writable
135 | 
136 |     def close(self) -> None:
137 |         try:
138 |             super().close()
139 |             if self._close_check_empty and not self:
140 |                 warnings.warn(
141 |                     "Empty XZFile: nothing was written, "
142 |                     "so output is empty (and not a valid xz file).",
143 |                     RuntimeWarning,
144 |                 )
145 |         finally:
146 |             if self._close_fileobj:
147 |                 self.fileobj.close()  # self.fileobj exists at this point
148 |             if sys.version_info < (3, 10):  # pragma: no cover
149 |                 # fix coverage issue on some Python versions
150 |                 # see https://github.com/nedbat/coveragepy/issues/1480
151 |                 pass
152 | 
153 |     @property
154 |     def stream_boundaries(self) -> List[int]:
155 |         return list(self._fileobjs)
156 | 
157 |     @property
158 |     def block_boundaries(self) -> List[int]:
159 |         return [
160 |             stream_pos + block_boundary
161 |             for stream_pos, stream in self._fileobjs.items()
162 |             for block_boundary in stream.block_boundaries
163 |         ]
164 | 
165 |     def _init_parse(self) -> None:
166 |         self.fileobj.seek(0, SEEK_END)
167 | 
168 |         streams = []
169 | 
170 |         while self.fileobj.tell():
171 |             if self.fileobj.tell() % 4:
172 |                 raise XZError("file: invalid size")
173 |             self.fileobj.seek(-4, SEEK_CUR)
174 |             if any(self.fileobj.read(4)):
175 |                 streams.append(XZStream.parse(self.fileobj, self.block_read_strategy))
176 |             else:
177 |                 self.fileobj.seek(-4, SEEK_CUR)  # stream padding
178 | 
179 |         while streams:
180 |             self._append(streams.pop())
181 | 
182 |     def _create_fileobj(self) -> XZStream:
183 |         stream_pos = sum(len(stream.fileobj) for stream in self._fileobjs.values())
184 |         return XZStream(
185 |             IOProxy(
186 |                 self.fileobj,
187 |                 stream_pos,
188 |                 stream_pos,
189 |             ),
190 |             self.check,
191 |             self.preset,
192 |             self.filters,
193 |             self.block_read_strategy,
194 |         )
195 | 
196 |     def change_stream(self) -> None:
197 |         """
198 |         Create a new stream.
199 | 
200 |         If the current stream is empty, replace it instead."""
201 |         if self._fileobjs:
202 |             self._change_fileobj()
203 | 
204 |     def change_block(self) -> None:
205 |         """
206 |         Create a new block.
207 | 
208 |         If the current block is empty, replace it instead."""
209 |         last_stream = self._last_stream
210 |         if last_stream:
211 |             last_stream.change_block()
212 | 


--------------------------------------------------------------------------------
/src/xz/io.py:
--------------------------------------------------------------------------------
  1 | from io import (
  2 |     DEFAULT_BUFFER_SIZE,
  3 |     SEEK_CUR,
  4 |     SEEK_END,
  5 |     SEEK_SET,
  6 |     IOBase,
  7 |     UnsupportedOperation,
  8 | )
  9 | from typing import BinaryIO, Generic, Optional, TypeVar, Union, cast
 10 | 
 11 | from xz.utils import FloorDict
 12 | 
 13 | #
 14 | # Typing note
 15 | #
 16 | # The consensus seems to favour IO instead of IOBase for typing.
 17 | # However we cannot subclass BinaryIO in IOAbstract as it conflicts with IOBase.
 18 | #
 19 | # As a result, some casting or unions between the two types may be required internally.
 20 | #
 21 | 
 22 | 
 23 | class IOAbstract(IOBase):
 24 |     def __init__(self, length: int) -> None:
 25 |         super().__init__()
 26 |         self._pos = 0
 27 |         self._length = length
 28 |         self._modified = False
 29 | 
 30 |     def __repr__(self) -> str:
 31 |         return f"<{self.__class__.__name__} object at {hex(hash(self))}>"
 32 | 
 33 |     def __len__(self) -> int:
 34 |         return self._length
 35 | 
 36 |     def _check_not_closed(self) -> None:
 37 |         # https://github.com/PyCQA/pylint/issues/3484
 38 |         # pylint: disable=using-constant-test
 39 |         if self.closed:
 40 |             raise ValueError("I/O operation on closed file")
 41 | 
 42 |     def fileno(self) -> int:
 43 |         try:
 44 |             return cast(BinaryIO, self.fileobj).fileno()  # type: ignore[attr-defined]
 45 |         except AttributeError:
 46 |             raise UnsupportedOperation("fileno")  # pylint: disable=raise-missing-from
 47 | 
 48 |     def seekable(self) -> bool:
 49 |         """Return a bool indicating whether object supports random access."""
 50 |         return True
 51 | 
 52 |     def readable(self) -> bool:
 53 |         """Return a bool indicating whether object was opened for reading."""
 54 |         return True
 55 | 
 56 |     def writable(self) -> bool:
 57 |         """Return a bool indicating whether object was opened for writing."""
 58 |         return True
 59 | 
 60 |     def seek(self, pos: int, whence: int = SEEK_SET) -> int:
 61 |         """Change stream position.
 62 | 
 63 |         Change the stream position to byte offset pos. Argument pos is
 64 |         interpreted relative to the position indicated by whence. Values
 65 |         for whence are ints:
 66 | 
 67 |         * 0 -- start of stream (the default); offset should be zero or positive
 68 |         * 1 -- current stream position; offset may be negative
 69 |         * 2 -- end of stream; offset should be negative
 70 | 
 71 |         Return an int indicating the new absolute position.
 72 |         """
 73 |         self._check_not_closed()
 74 |         if not self.seekable():  # just in case seekable is overridden
 75 |             raise UnsupportedOperation("seek")
 76 |         if whence == SEEK_SET:
 77 |             pass
 78 |         elif whence == SEEK_CUR:
 79 |             pos += self._pos
 80 |         elif whence == SEEK_END:
 81 |             pos += self._length
 82 |         else:
 83 |             raise ValueError("unsupported whence value")
 84 |         if pos >= 0:
 85 |             self._pos = pos
 86 |             return self._pos
 87 |         raise ValueError("invalid seek position")
 88 | 
 89 |     def tell(self) -> int:
 90 |         """Return an int indicating the current stream position."""
 91 |         self._check_not_closed()
 92 |         return self._pos
 93 | 
 94 |     def read(self, size: int = -1) -> bytes:
 95 |         """Read at most size bytes, returned as a bytes object.
 96 | 
 97 |         If the size argument is negative, read until EOF is reached.
 98 |         Return an empty bytes object at or after EOF.
 99 |         """
100 |         self._check_not_closed()
101 |         if not self.readable():
102 |             raise UnsupportedOperation("read")
103 |         if size < 0:
104 |             size = self._length
105 |         size = min(size, self._length - self._pos)
106 |         parts = []
107 |         while size > 0:
108 |             data = self._read(size)  # do not stop if nothing was read
109 |             parts.append(data)
110 |             size -= len(data)
111 |             self._pos += len(data)
112 |         return b"".join(parts)
113 | 
114 |     def _write_start(self) -> None:
115 |         if not self._modified:
116 |             self._write_before()
117 |             self._modified = True
118 | 
119 |     def _write_end(self) -> None:
120 |         if self._modified:
121 |             self._write_after()
122 |             self._modified = False
123 | 
124 |     def write(self, data: bytes) -> int:
125 |         """Write data, passed as a bytes object.
126 | 
127 |         Returns the number of bytes written, which is always the length
128 |         of the input data in bytes.
129 |         """
130 |         self._check_not_closed()
131 |         if not self.writable():
132 |             raise UnsupportedOperation("write")
133 |         written_bytes = len(data)
134 |         padding_size = self._pos - self._length
135 |         if padding_size < 0:
136 |             raise ValueError("write is only supported from EOF")
137 |         if padding_size > 0:
138 |             null_bytes = memoryview(bytearray(DEFAULT_BUFFER_SIZE))
139 |             self._pos = self._length
140 |         data = memoryview(data)
141 |         while padding_size or data:
142 |             self._write_start()
143 |             if padding_size > 0:
144 |                 # pad with null bytes, not counted in written_bytes
145 |                 padding = null_bytes[:padding_size]
146 |                 written_len = self._write(padding)  # do not stop if nothing was written
147 |                 padding_size -= written_len
148 |             else:
149 |                 written_len = self._write(data)  # do not stop if nothing was written
150 |                 data = data[written_len:]
151 |             self._pos += written_len
152 |             self._length = max(self._length, self._pos)
153 |         return written_bytes
154 | 
155 |     def truncate(self, size: Optional[int] = None) -> int:
156 |         """Truncate file to size bytes.
157 |         Size defaults to the current IO position as reported by tell().
158 | 
159 |         The current file position is unchanged.
160 | 
161 |         Return the new size.
162 |         """
163 |         self._check_not_closed()
164 |         if not self.writable():
165 |             raise UnsupportedOperation("truncate")
166 |         if size is None:
167 |             size = self._pos
168 |         elif size < 0:
169 |             raise ValueError("invalid truncate size")
170 |         if size != self._length:
171 |             self._write_start()
172 |             pos = self._pos
173 |             self._truncate(size)
174 |             self._length = size
175 |             self._pos = pos  # make sure position is unchanged
176 |         return self._length
177 | 
178 |     def close(self) -> None:
179 |         """Flush and close the stream.
180 | 
181 |         This method has no effect if it is already closed.
182 |         """
183 |         try:
184 |             if not self.closed:
185 |                 self._write_end()
186 |         finally:
187 |             super().close()
188 | 
189 |     # the methods below are expected to be implemented by subclasses
190 | 
191 |     def _read(self, size: int) -> bytes:  # pragma: no cover
192 |         """Read and return up to size bytes, where size is an int.
193 | 
194 |         The size will not exceed the number of bytes between self._pos and
195 |         self._length. This should prevent to deal with EOF.
196 | 
197 |         This method can return less bytes than size, in which case it will be
198 |         called again. This includes being able to return an empty bytes object.
199 |         """
200 |         raise UnsupportedOperation("read")
201 | 
202 |     def _write_before(self) -> None:
203 |         """This method is called before the first write operation."""
204 | 
205 |     def _write_after(self) -> None:
206 |         """This method is called after the last write operation (usually on file close)."""
207 | 
208 |     def _write(self, data: bytes) -> int:  # pragma: no cover
209 |         """Writes as many bytes from data as possible, and return the number
210 |         of bytes written.
211 | 
212 |         data may be greater than the number of bytes between self._pos
213 |         and self._length; self._length will be updated by caller afterwards.
214 | 
215 |         This method can return and int smaller than the length of data, in which
216 |         case it will be called again. This includes being able to return 0.
217 |         """
218 |         raise UnsupportedOperation("write")
219 | 
220 |     def _truncate(self, size: int) -> None:  # pragma: no cover
221 |         """Truncate the file to the given size.
222 |         This resizing can extend or reduce the current file size.
223 | 
224 |         The current file position may be changed by this method,
225 |         but is restored by caller.
226 | 
227 |         Returns None.
228 |         """
229 |         raise UnsupportedOperation("truncate")
230 | 
231 | 
232 | class IOStatic(IOAbstract):
233 |     def __init__(self, data: bytes) -> None:
234 |         self.data = bytearray(data)
235 |         super().__init__(len(self.data))
236 | 
237 |     def writable(self) -> bool:
238 |         return False
239 | 
240 |     def _read(self, size: int) -> bytes:
241 |         return self.data[self._pos : self._pos + size]
242 | 
243 | 
244 | class IOProxy(IOAbstract):
245 |     def __init__(
246 |         self,
247 |         fileobj: Union[BinaryIO, IOBase],  # see typing note on top of this file
248 |         start: int,
249 |         end: int,
250 |     ) -> None:
251 |         super().__init__(end - start)
252 |         self.fileobj = fileobj
253 |         self.start = start
254 | 
255 |     def _read(self, size: int) -> bytes:
256 |         self.fileobj.seek(self.start + self._pos, SEEK_SET)
257 |         return self.fileobj.read(size)  # size already restricted by caller
258 | 
259 |     def _write(self, data: bytes) -> int:
260 |         self.fileobj.seek(self.start + self._pos, SEEK_SET)
261 |         return self.fileobj.write(data)
262 | 
263 |     def _truncate(self, size: int) -> None:
264 |         self.fileobj.truncate(self.start + size)
265 | 
266 | 
267 | T = TypeVar("T", bound=IOAbstract)
268 | 
269 | 
270 | class IOCombiner(IOAbstract, Generic[T]):
271 |     def __init__(self, *fileobjs: T) -> None:
272 |         super().__init__(0)
273 |         self._fileobjs: FloorDict[T] = FloorDict()
274 |         for fileobj in fileobjs:
275 |             self._append(fileobj)
276 | 
277 |     def _get_fileobj(self) -> T:
278 |         start, fileobj = self._fileobjs.get_with_index(self._pos)
279 |         fileobj.seek(self._pos - start, SEEK_SET)
280 |         return fileobj
281 | 
282 |     def _read(self, size: int) -> bytes:
283 |         return self._get_fileobj().read(size)
284 | 
285 |     def _write_after(self) -> None:
286 |         if self._fileobjs:
287 |             last_fileobj = self._fileobjs.last_item
288 |             if last_fileobj:
289 |                 last_fileobj._write_end()  # pylint: disable=protected-access
290 |             else:
291 |                 del self._fileobjs[self._fileobjs.last_key]
292 | 
293 |     def _write(self, data: bytes) -> int:
294 |         if self._fileobjs:
295 |             fileobj: Optional[T] = self._get_fileobj()
296 |         else:
297 |             fileobj = None
298 | 
299 |         if fileobj is None or not fileobj.writable():
300 |             self._change_fileobj()
301 |             fileobj = self._get_fileobj()
302 | 
303 |         # newly created fileobj should be writable
304 |         # otherwire this will raise UnsupportedOperation
305 |         return fileobj.write(data)
306 | 
307 |     def _truncate(self, size: int) -> None:
308 |         start, fileobj = self._fileobjs.get_with_index(size)
309 |         if start != size:
310 |             fileobj.truncate(size - start)
311 |         for key in reversed(self._fileobjs):
312 |             if key < size:
313 |                 break
314 |             del self._fileobjs[key]
315 | 
316 |     def _append(self, fileobj: T) -> None:
317 |         if not isinstance(fileobj, IOAbstract):
318 |             raise TypeError
319 |         self._fileobjs[self._length] = fileobj  # override empty streams
320 |         self._length += len(fileobj)
321 | 
322 |     def _change_fileobj(self) -> None:
323 |         """Create and append a new fileobj.
324 | 
325 |         If the last fileobj was empty, delete it.
326 |         """
327 |         # end write on last fileobj
328 |         if self._fileobjs:
329 |             last_fileobj = self._fileobjs.last_item
330 |             if last_fileobj:
331 |                 if last_fileobj.writable():
332 |                     last_fileobj._write_end()  # pylint: disable=protected-access
333 |             else:
334 |                 del self._fileobjs[self._fileobjs.last_key]
335 | 
336 |         # create and append new fileobj
337 |         self._append(self._create_fileobj())
338 | 
339 |     def _create_fileobj(self) -> T:  # pragma: no cover
340 |         """
341 |         Create a new fileobj to be concatenated.
342 | 
343 |         It must be writable.
344 |         """
345 |         raise NotImplementedError
346 | 


--------------------------------------------------------------------------------
/src/xz/open.py:
--------------------------------------------------------------------------------
  1 | from functools import wraps
  2 | from io import TextIOWrapper
  3 | from typing import BinaryIO, List, Optional, Union, cast, overload
  4 | 
  5 | from xz.file import XZFile
  6 | from xz.typing import (
  7 |     _BlockReadStrategyType,
  8 |     _LZMAFilenameType,
  9 |     _LZMAFiltersType,
 10 |     _LZMAPresetType,
 11 |     _XZModesBinaryType,
 12 |     _XZModesTextType,
 13 | )
 14 | from xz.utils import AttrProxy
 15 | 
 16 | 
 17 | class _XZFileText(TextIOWrapper):
 18 |     def __init__(
 19 |         self,
 20 |         filename: _LZMAFilenameType,
 21 |         mode: str,
 22 |         *,
 23 |         check: int = -1,
 24 |         preset: _LZMAPresetType = None,
 25 |         filters: _LZMAFiltersType = None,
 26 |         block_read_strategy: Optional[_BlockReadStrategyType] = None,
 27 |         encoding: Optional[str] = None,
 28 |         errors: Optional[str] = None,
 29 |         newline: Optional[str] = None,
 30 |     ) -> None:
 31 |         self.xz_file = XZFile(
 32 |             filename,
 33 |             mode.replace("t", ""),
 34 |             check=check,
 35 |             preset=preset,
 36 |             filters=filters,
 37 |             block_read_strategy=block_read_strategy,
 38 |         )
 39 |         super().__init__(
 40 |             cast(BinaryIO, self.xz_file),
 41 |             encoding,
 42 |             errors,
 43 |             newline,
 44 |         )
 45 | 
 46 |     check = AttrProxy[int]("xz_file")
 47 |     preset = AttrProxy[_LZMAPresetType]("xz_file")
 48 |     filters = AttrProxy[_LZMAFiltersType]("xz_file")
 49 |     stream_boundaries = AttrProxy[List[int]]("xz_file")
 50 |     block_boundaries = AttrProxy[List[int]]("xz_file")
 51 |     block_read_strategy = AttrProxy[_BlockReadStrategyType]("xz_file")
 52 | 
 53 |     @property
 54 |     def mode(self) -> str:
 55 |         return f"{self.xz_file.mode}t"
 56 | 
 57 |     @wraps(XZFile.change_stream)
 58 |     def change_stream(self) -> None:
 59 |         self.flush()
 60 |         self.xz_file.change_stream()
 61 | 
 62 |     @wraps(XZFile.change_block)
 63 |     def change_block(self) -> None:
 64 |         self.flush()
 65 |         self.xz_file.change_block()
 66 | 
 67 | 
 68 | @overload
 69 | def xz_open(
 70 |     filename: _LZMAFilenameType,
 71 |     mode: _XZModesBinaryType = "rb",
 72 |     *,
 73 |     # XZFile kwargs
 74 |     check: int = -1,
 75 |     preset: _LZMAPresetType = None,
 76 |     filters: _LZMAFiltersType = None,
 77 |     block_read_strategy: Optional[_BlockReadStrategyType] = None,
 78 |     # text-mode kwargs
 79 |     encoding: Optional[str] = None,
 80 |     errors: Optional[str] = None,
 81 |     newline: Optional[str] = None,
 82 | ) -> XZFile:
 83 |     ...  # pragma: no cover
 84 | 
 85 | 
 86 | @overload
 87 | def xz_open(
 88 |     filename: _LZMAFilenameType,
 89 |     mode: _XZModesTextType,
 90 |     *,
 91 |     # XZFile kwargs
 92 |     check: int = -1,
 93 |     preset: _LZMAPresetType = None,
 94 |     filters: _LZMAFiltersType = None,
 95 |     block_read_strategy: Optional[_BlockReadStrategyType] = None,
 96 |     # text-mode kwargs
 97 |     encoding: Optional[str] = None,
 98 |     errors: Optional[str] = None,
 99 |     newline: Optional[str] = None,
100 | ) -> _XZFileText:
101 |     ...  # pragma: no cover
102 | 
103 | 
104 | @overload
105 | def xz_open(
106 |     filename: _LZMAFilenameType,
107 |     mode: str,
108 |     *,
109 |     # XZFile kwargs
110 |     check: int = -1,
111 |     preset: _LZMAPresetType = None,
112 |     filters: _LZMAFiltersType = None,
113 |     block_read_strategy: Optional[_BlockReadStrategyType] = None,
114 |     # text-mode kwargs
115 |     encoding: Optional[str] = None,
116 |     errors: Optional[str] = None,
117 |     newline: Optional[str] = None,
118 | ) -> Union[XZFile, _XZFileText]:
119 |     ...  # pragma: no cover
120 | 
121 | 
122 | def xz_open(
123 |     filename: _LZMAFilenameType,
124 |     mode: str = "rb",
125 |     *,
126 |     # XZFile kwargs
127 |     check: int = -1,
128 |     preset: _LZMAPresetType = None,
129 |     filters: _LZMAFiltersType = None,
130 |     block_read_strategy: Optional[_BlockReadStrategyType] = None,
131 |     # text-mode kwargs
132 |     encoding: Optional[str] = None,
133 |     errors: Optional[str] = None,
134 |     newline: Optional[str] = None,
135 | ) -> Union[XZFile, _XZFileText]:
136 |     """Open an XZ file in binary or text mode.
137 | 
138 |     filename can be either an actual file name (given as a str, bytes,
139 |     or PathLike object), in which case the named file is opened, or it
140 |     can be an existing file object to read from or write to.
141 | 
142 |     For binary mode, this function is equivalent to the XZFile
143 |     constructor: XZFile(filename, mode, ...). In this case, the
144 |     encoding, errors and newline arguments must not be provided.
145 | 
146 |     For text mode, an XZFile object is created, and wrapped in an
147 |     io.TextIOWrapper instance with the specified encoding, error
148 |     handling behavior, and line ending(s).
149 |     """
150 |     if "t" in mode:
151 |         if "b" in mode:
152 |             raise ValueError(f"Invalid mode: {mode}")
153 | 
154 |         return _XZFileText(
155 |             filename,
156 |             mode,
157 |             check=check,
158 |             preset=preset,
159 |             filters=filters,
160 |             block_read_strategy=block_read_strategy,
161 |             encoding=encoding,
162 |             errors=errors,
163 |             newline=newline,
164 |         )
165 | 
166 |     if encoding is not None:
167 |         raise ValueError("Argument 'encoding' not supported in binary mode")
168 |     if errors is not None:
169 |         raise ValueError("Argument 'errors' not supported in binary mode")
170 |     if newline is not None:
171 |         raise ValueError("Argument 'newline' not supported in binary mode")
172 | 
173 |     return XZFile(
174 |         filename,
175 |         mode,
176 |         check=check,
177 |         preset=preset,
178 |         filters=filters,
179 |         block_read_strategy=block_read_strategy,
180 |     )
181 | 


--------------------------------------------------------------------------------
/src/xz/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/src/xz/py.typed


--------------------------------------------------------------------------------
/src/xz/strategy.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from typing import TYPE_CHECKING, Dict
 3 | 
 4 | if TYPE_CHECKING:  # pragma: no cover
 5 |     # avoid circular dependency
 6 |     from xz.block import XZBlock
 7 | 
 8 | 
 9 | class KeepBlockReadStrategy:
10 |     def on_create(self, block: "XZBlock") -> None:
11 |         pass  # do nothing
12 | 
13 |     def on_delete(self, block: "XZBlock") -> None:
14 |         pass  # do nothing
15 | 
16 |     def on_read(self, block: "XZBlock") -> None:
17 |         pass  # do nothing
18 | 
19 | 
20 | class RollingBlockReadStrategy:
21 |     def __init__(self, max_block_read_nb: int = 8) -> None:
22 |         self.block_reads: Dict["XZBlock", float] = {}
23 |         self.max_block_read_nb = max_block_read_nb
24 | 
25 |     def _freshly_used(self, block: "XZBlock") -> None:
26 |         self.block_reads[block] = time.monotonic()
27 | 
28 |     def on_create(self, block: "XZBlock") -> None:
29 |         self._freshly_used(block)
30 |         if len(self.block_reads) > self.max_block_read_nb:
31 |             to_clear = min(
32 |                 self.block_reads.items(),
33 |                 key=lambda item: item[1],
34 |             )[0]
35 |             to_clear.clear()  # will call on_delete
36 | 
37 |     def on_delete(self, block: "XZBlock") -> None:
38 |         del self.block_reads[block]
39 | 
40 |     def on_read(self, block: "XZBlock") -> None:
41 |         self._freshly_used(block)
42 | 


--------------------------------------------------------------------------------
/src/xz/stream.py:
--------------------------------------------------------------------------------
  1 | from io import SEEK_CUR
  2 | from typing import BinaryIO, List, Optional
  3 | 
  4 | from xz.block import XZBlock
  5 | from xz.common import (
  6 |     XZError,
  7 |     create_xz_header,
  8 |     create_xz_index_footer,
  9 |     parse_xz_footer,
 10 |     parse_xz_header,
 11 |     parse_xz_index,
 12 |     round_up,
 13 | )
 14 | from xz.io import IOCombiner, IOProxy
 15 | from xz.typing import _BlockReadStrategyType, _LZMAFiltersType, _LZMAPresetType
 16 | 
 17 | 
 18 | class XZStream(IOCombiner[XZBlock]):
 19 |     def __init__(
 20 |         self,
 21 |         fileobj: IOProxy,
 22 |         check: int,
 23 |         preset: _LZMAPresetType = None,
 24 |         filters: _LZMAFiltersType = None,
 25 |         block_read_strategy: Optional[_BlockReadStrategyType] = None,
 26 |     ) -> None:
 27 |         super().__init__()
 28 |         self.fileobj = fileobj
 29 |         self._check = check
 30 |         self.preset = preset
 31 |         self.filters = filters
 32 |         self.block_read_strategy = block_read_strategy
 33 | 
 34 |     @property
 35 |     def check(self) -> int:
 36 |         return self._check
 37 | 
 38 |     @property
 39 |     def block_boundaries(self) -> List[int]:
 40 |         return list(self._fileobjs)
 41 | 
 42 |     @property
 43 |     def _fileobj_blocks_end_pos(self) -> int:
 44 |         return 12 + sum(
 45 |             round_up(block.unpadded_size) for block in self._fileobjs.values()
 46 |         )
 47 | 
 48 |     @classmethod
 49 |     def parse(
 50 |         cls,
 51 |         fileobj: BinaryIO,
 52 |         block_read_strategy: Optional[_BlockReadStrategyType] = None,
 53 |     ) -> "XZStream":
 54 |         """Parse one XZ stream from a fileobj.
 55 | 
 56 |         fileobj position should be right at the end of the stream when calling
 57 |         and will be moved right at the start of the stream
 58 |         """
 59 |         # footer
 60 |         footer_end_pos = fileobj.seek(-12, SEEK_CUR) + 12
 61 |         footer = fileobj.read(12)
 62 |         check, backward_size = parse_xz_footer(footer)
 63 | 
 64 |         # index
 65 |         block_start = fileobj.seek(-12 - backward_size, SEEK_CUR)
 66 |         index = fileobj.read(backward_size)
 67 |         records = parse_xz_index(index)
 68 |         blocks_len = sum(round_up(unpadded_size) for unpadded_size, _ in records)
 69 |         block_start -= blocks_len
 70 |         blocks = []
 71 |         for unpadded_size, uncompressed_size in records:
 72 |             block_end = block_start + round_up(unpadded_size)
 73 |             blocks.append(
 74 |                 XZBlock(
 75 |                     IOProxy(fileobj, block_start, block_end),
 76 |                     check,
 77 |                     unpadded_size,
 78 |                     uncompressed_size,
 79 |                     block_read_strategy=block_read_strategy,
 80 |                 )
 81 |             )
 82 |             block_start = block_end
 83 | 
 84 |         # header
 85 |         fileobj.seek(-12 - blocks_len - backward_size, SEEK_CUR)
 86 |         header = fileobj.read(12)
 87 |         header_check = parse_xz_header(header)
 88 |         if header_check != check:
 89 |             raise XZError("stream: inconsistent check value")
 90 | 
 91 |         header_start_pos = fileobj.seek(-12, SEEK_CUR)
 92 | 
 93 |         stream_fileobj = IOProxy(fileobj, header_start_pos, footer_end_pos)
 94 |         stream = cls(stream_fileobj, check, block_read_strategy=block_read_strategy)
 95 |         for block in blocks:
 96 |             stream._append(block)
 97 |         return stream
 98 | 
 99 |     def _create_fileobj(self) -> XZBlock:
100 |         self.fileobj.truncate(self._fileobj_blocks_end_pos)
101 |         return XZBlock(
102 |             IOProxy(
103 |                 self.fileobj,
104 |                 self._fileobj_blocks_end_pos,
105 |                 self._fileobj_blocks_end_pos,
106 |             ),
107 |             self.check,
108 |             0,
109 |             0,
110 |             self.preset,
111 |             self.filters,
112 |             self.block_read_strategy,
113 |         )
114 | 
115 |     def _write_before(self) -> None:
116 |         if not self:
117 |             self.fileobj.seek(0)
118 |             self.fileobj.truncate()
119 |             self.fileobj.write(create_xz_header(self.check))
120 | 
121 |     def _write_after(self) -> None:
122 |         super()._write_after()
123 |         self.fileobj.seek(self._fileobj_blocks_end_pos)
124 |         self.fileobj.truncate()
125 |         self.fileobj.write(
126 |             create_xz_index_footer(
127 |                 self.check,
128 |                 [
129 |                     (block.unpadded_size, block.uncompressed_size)
130 |                     for block in self._fileobjs.values()
131 |                 ],
132 |             )
133 |         )
134 | 
135 |     def change_block(self) -> None:
136 |         """
137 |         End the current block, and create a new one.
138 | 
139 |         If the current block is empty, replace it instead."""
140 |         if self._fileobjs:
141 |             self._change_fileobj()
142 | 


--------------------------------------------------------------------------------
/src/xz/typing.py:
--------------------------------------------------------------------------------
 1 | from os import PathLike
 2 | import sys
 3 | from typing import TYPE_CHECKING, Any, BinaryIO, Optional, Union
 4 | 
 5 | if sys.version_info >= (3, 9):  # pragma: no cover
 6 |     from collections.abc import Mapping, Sequence
 7 | 
 8 |     _LZMAFilenameType = Union[str, bytes, PathLike[str], PathLike[bytes], BinaryIO]
 9 | else:  # pragma: no cover
10 |     from typing import Mapping, Sequence
11 | 
12 |     _LZMAFilenameType = Union[str, bytes, PathLike, BinaryIO]
13 | 
14 | 
15 | if sys.version_info >= (3, 8):  # pragma: no cover
16 |     from typing import Literal, Protocol
17 | else:  # pragma: no cover
18 |     from typing_extensions import Literal, Protocol
19 | 
20 | 
21 | if TYPE_CHECKING:  # pragma: no cover
22 |     # avoid circular dependency
23 |     from xz.block import XZBlock
24 | 
25 | 
26 | _LZMAPresetType = Optional[int]
27 | _LZMAFiltersType = Optional[Sequence[Mapping[str, Any]]]
28 | 
29 | 
30 | # all valid modes if we don't consider changing order nor repetitions
31 | # (see utils.parse_mode for more details)
32 | # the values are unit tested in test_parse_mode to make sure that all are here
33 | _XZModesBinaryType = Literal[
34 |     "r", "r+", "w", "w+", "x", "x+", "rb", "rb+", "wb", "wb+", "xb", "xb+"
35 | ]
36 | _XZModesTextType = Literal["rt", "rt+", "wt", "wt+", "xt", "xt+"]
37 | 
38 | 
39 | class _BlockReadStrategyType(Protocol):
40 |     def on_create(self, block: "XZBlock") -> None:
41 |         ...  # pragma: no cover
42 | 
43 |     def on_delete(self, block: "XZBlock") -> None:
44 |         ...  # pragma: no cover
45 | 
46 |     def on_read(self, block: "XZBlock") -> None:
47 |         ...  # pragma: no cover
48 | 


--------------------------------------------------------------------------------
/src/xz/utils.py:
--------------------------------------------------------------------------------
  1 | from bisect import bisect_right, insort_right
  2 | import sys
  3 | from typing import Any, Dict, Generic, List, Tuple, TypeVar, cast
  4 | 
  5 | if sys.version_info >= (3, 9):  # pragma: no cover
  6 |     from collections.abc import Iterator, MutableMapping
  7 | else:  # pragma: no cover
  8 |     from typing import Iterator, MutableMapping
  9 | 
 10 | 
 11 | T = TypeVar("T")
 12 | 
 13 | 
 14 | class FloorDict(MutableMapping[int, T]):
 15 |     """A dict where keys are int, and accessing a key will use the closest lower one.
 16 | 
 17 |     Differences from dict:
 18 |      - keys must be int
 19 |      - obj[key] will return the value whose key is the closest one which is lower or equal to key
 20 |     """
 21 | 
 22 |     def __init__(self) -> None:
 23 |         self._dict: Dict[int, T] = {}
 24 |         self._keys: List[int] = []  # sorted
 25 | 
 26 |     def __repr__(self) -> str:
 27 |         return f"FloorDict<{self._dict!r}>"
 28 | 
 29 |     def __iter__(self) -> Iterator[int]:
 30 |         return iter(self._keys)
 31 | 
 32 |     def __reversed__(self) -> Iterator[int]:
 33 |         return reversed(self._keys)
 34 | 
 35 |     def __len__(self) -> int:
 36 |         return len(self._keys)
 37 | 
 38 |     def _key_index(self, key: int) -> int:
 39 |         index = bisect_right(self._keys, key) - 1
 40 |         if index < 0:
 41 |             raise KeyError(key)
 42 |         return index
 43 | 
 44 |     def get_with_index(self, key: int) -> Tuple[int, T]:
 45 |         if not isinstance(key, int):
 46 |             raise TypeError("Invalid key")
 47 |         index = self._keys[self._key_index(key)]
 48 |         value = self._dict[index]
 49 |         return (index, value)
 50 | 
 51 |     def __getitem__(self, key: int) -> T:
 52 |         return self.get_with_index(key)[1]
 53 | 
 54 |     def __setitem__(self, key: int, value: T) -> None:
 55 |         if not isinstance(key, int):
 56 |             raise TypeError("Invalid key")
 57 |         if key not in self._dict:  # prevent duplicates in _keys
 58 |             insort_right(self._keys, key)
 59 |         self._dict[key] = value
 60 | 
 61 |     def __delitem__(self, key: int) -> None:
 62 |         del self._dict[key]
 63 |         # the key is an exact index (otherwise KeyError raised on last line)
 64 |         self._keys.pop(self._key_index(key))
 65 | 
 66 |     @property
 67 |     def last_key(self) -> int:
 68 |         if not self._keys:
 69 |             raise KeyError("dictionary is empty")
 70 |         return self._keys[-1]
 71 | 
 72 |     @property
 73 |     def last_item(self) -> T:
 74 |         return self._dict[self.last_key]
 75 | 
 76 | 
 77 | def parse_mode(mode: str) -> Tuple[str, bool, bool]:
 78 |     """Parse a mode used in open.
 79 | 
 80 |     Order is not considered at all.
 81 |     Binary flag (b) is ignored.
 82 |     Valid modes are: r, r+, w, w+, x, x+.
 83 | 
 84 |     Return a tuple (nomalized, is_read, is_write).
 85 |     """
 86 |     mode_set = set(mode)
 87 |     if len(mode_set) != len(mode):
 88 |         raise ValueError(f"invalid mode: {mode}")
 89 |     mode_plus = "+" in mode_set
 90 |     mode_set -= {"b", "+"}
 91 |     mode_base = mode_set.pop() if mode_set else "invalid"
 92 |     if mode_set or mode_base not in "rwx":
 93 |         raise ValueError(f"invalid mode: {mode}")
 94 |     if mode_plus:
 95 |         return (f"{mode_base}+", True, True)
 96 |     return (mode_base, mode_base == "r", mode_base != "r")
 97 | 
 98 | 
 99 | class AttrProxy(Generic[T]):
100 |     """Create a descriptor that is a proxy to the same attribute of an attribute.
101 | 
102 |     Example:
103 | 
104 |         class Foo:
105 |             proxy = Something()
106 |             bar = AttrProxy("proxy")
107 | 
108 |         foo = Foo()
109 | 
110 |         then foo.bar would be proxied to foo.proxy.bar
111 | 
112 |     If the proxy value is None, then use a local value instead,
113 |     which acts as a temporary storage in the meanwhile.
114 |     """
115 | 
116 |     # Typing note
117 |     #
118 |     # There is no typing enforced to make sure that the proxy attribute
119 |     # on the attribute exists and is of type T.
120 |     # We just trust that the user-provided T is right.
121 |     #
122 |     # This explains the use of Any everywhere
123 |     #
124 | 
125 |     attribute: str
126 |     not_proxied_value: T
127 | 
128 |     def __init__(self, proxy: str) -> None:
129 |         self.proxy = proxy
130 | 
131 |     def __set_name__(self, klass: Any, name: str) -> None:
132 |         self.attribute = name
133 | 
134 |     def __get__(self, instance: Any, klass: Any) -> T:
135 |         dest = getattr(instance, self.proxy)
136 |         if dest is None:
137 |             try:
138 |                 return self.not_proxied_value
139 |             except AttributeError as ex:
140 |                 raise AttributeError(
141 |                     f"'{klass.__name__}' object has not attribute '{self.attribute}'"
142 |                     f" until its attribute '{self.proxy}' is defined"
143 |                 ) from ex
144 |         return cast(T, getattr(dest, self.attribute))
145 | 
146 |     def __set__(self, instance: Any, value: T) -> None:
147 |         dest = getattr(instance, self.proxy)
148 |         if dest is None:
149 |             self.not_proxied_value = value
150 |         else:
151 |             setattr(dest, self.attribute, value)
152 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from itertools import chain, product
 2 | from pathlib import Path
 3 | import sys
 4 | from typing import List, Tuple
 5 | 
 6 | import pytest
 7 | 
 8 | if sys.version_info >= (3, 9):  # pragma: no cover
 9 |     from collections.abc import Callable, Iterator
10 | else:  # pragma: no cover
11 |     from typing import Callable, Iterator
12 | 
13 | 
14 | def pytest_addoption(parser: pytest.Parser) -> None:
15 |     parser.addoption(
16 |         "--generate-integration-files",
17 |         action="store_true",
18 |         default=False,
19 |         help="Test the generation of the integration files",
20 |     )
21 | 
22 | 
23 | def pytest_collection_modifyitems(
24 |     config: pytest.Config, items: List[pytest.Item]
25 | ) -> None:
26 |     root = Path(__file__).parent.parent
27 |     for item in items:
28 |         if item.fspath:
29 |             relative = Path(item.fspath).parent.relative_to(root)
30 |             mark = relative.name
31 |             item.add_marker(getattr(pytest.mark, mark))
32 |     if not config.getoption("--generate-integration-files"):
33 |         skip_mark = pytest.mark.skip(
34 |             reason="need --generate-integration-files option to run"
35 |         )
36 |         for item in items:
37 |             if "generate_integration_files" in item.keywords:
38 |                 item.add_marker(skip_mark)
39 | 
40 | 
41 | # any 3 consecutive bytes is unique in _DATA_PATTERN
42 | _DATA_PATTERN = bytes(
43 |     chain(
44 |         *product(
45 |             range(65, 91),  # uppercase
46 |             range(97, 123),  # lowercase
47 |             range(48, 58),  # digit
48 |         )
49 |     )
50 | )
51 | 
52 | 
53 | @pytest.fixture(scope="session")
54 | def data_pattern() -> bytes:
55 |     return _DATA_PATTERN
56 | 
57 | 
58 | @pytest.fixture(scope="session")
59 | def data_pattern_locate() -> Iterator[Callable[[bytes], Tuple[int, int]]]:
60 |     def locate(data: bytes) -> Tuple[int, int]:
61 |         if len(data) < 3:
62 |             raise ValueError("data to short")
63 |         return (_DATA_PATTERN.index(data), len(data))
64 | 
65 |     yield locate
66 | 


--------------------------------------------------------------------------------
/tests/integration/conftest.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from typing import TYPE_CHECKING, Any, Dict, Tuple, cast
 4 | 
 5 | import pytest
 6 | 
 7 | if TYPE_CHECKING:
 8 | 
 9 |     class _Request(pytest.FixtureRequest):
10 |         param: Path
11 | 
12 | 
13 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
14 | 
15 | 
16 | @pytest.fixture(
17 |     params=(Path(__file__).parent / "files").rglob("*.json"),
18 |     ids=lambda path: cast(Path, path).name,
19 | )
20 | def integration_case(request: "_Request") -> _IntegrationCase:
21 |     json_path = request.param
22 |     with json_path.open() as json_file:
23 |         metadata = cast(Dict[str, Any], json.load(json_file))
24 |     return (json_path.with_suffix(".xz"), metadata)
25 | 


--------------------------------------------------------------------------------
/tests/integration/files/check-crc32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz -C crc32",
 5 |       "length": 20280
 6 |     }
 7 |   ],
 8 |   "streams": [
 9 |     {
10 |       "check": 1,
11 |       "blocks": [
12 |         {
13 |           "length": 20280
14 |         }
15 |       ]
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/integration/files/check-crc32.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-crc32.xz


--------------------------------------------------------------------------------
/tests/integration/files/check-crc64.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz -C crc64",
 5 |       "length": 20280
 6 |     }
 7 |   ],
 8 |   "streams": [
 9 |     {
10 |       "check": 4,
11 |       "blocks": [
12 |         {
13 |           "length": 20280
14 |         }
15 |       ]
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/integration/files/check-crc64.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-crc64.xz


--------------------------------------------------------------------------------
/tests/integration/files/check-none.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz -C none",
 5 |       "length": 20280
 6 |     }
 7 |   ],
 8 |   "streams": [
 9 |     {
10 |       "check": 0,
11 |       "blocks": [
12 |         {
13 |           "length": 20280
14 |         }
15 |       ]
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/integration/files/check-none.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-none.xz


--------------------------------------------------------------------------------
/tests/integration/files/check-sha256.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz -C sha256",
 5 |       "length": 20280
 6 |     }
 7 |   ],
 8 |   "streams": [
 9 |     {
10 |       "check": 10,
11 |       "blocks": [
12 |         {
13 |           "length": 20280
14 |         }
15 |       ]
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/integration/files/check-sha256.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-sha256.xz


--------------------------------------------------------------------------------
/tests/integration/files/example.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/example.xz


--------------------------------------------------------------------------------
/tests/integration/files/few-blocks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz --block-size 10000",
 5 |       "length": 20280
 6 |     }
 7 |   ],
 8 |   "streams": [
 9 |     {
10 |       "check": 4,
11 |       "blocks": [
12 |         {
13 |           "length": 10000
14 |         },
15 |         {
16 |           "length": 10000
17 |         },
18 |         {
19 |           "length": 280
20 |         }
21 |       ]
22 |     }
23 |   ]
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/integration/files/few-blocks.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/few-blocks.xz


--------------------------------------------------------------------------------
/tests/integration/files/many-blocks.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "generate": [
  3 |     {
  4 |       "cmd": "xz --block-size 100",
  5 |       "length": 20280
  6 |     }
  7 |   ],
  8 |   "streams": [
  9 |     {
 10 |       "check": 4,
 11 |       "blocks": [
 12 |         {
 13 |           "length": 100
 14 |         },
 15 |         {
 16 |           "length": 100
 17 |         },
 18 |         {
 19 |           "length": 100
 20 |         },
 21 |         {
 22 |           "length": 100
 23 |         },
 24 |         {
 25 |           "length": 100
 26 |         },
 27 |         {
 28 |           "length": 100
 29 |         },
 30 |         {
 31 |           "length": 100
 32 |         },
 33 |         {
 34 |           "length": 100
 35 |         },
 36 |         {
 37 |           "length": 100
 38 |         },
 39 |         {
 40 |           "length": 100
 41 |         },
 42 |         {
 43 |           "length": 100
 44 |         },
 45 |         {
 46 |           "length": 100
 47 |         },
 48 |         {
 49 |           "length": 100
 50 |         },
 51 |         {
 52 |           "length": 100
 53 |         },
 54 |         {
 55 |           "length": 100
 56 |         },
 57 |         {
 58 |           "length": 100
 59 |         },
 60 |         {
 61 |           "length": 100
 62 |         },
 63 |         {
 64 |           "length": 100
 65 |         },
 66 |         {
 67 |           "length": 100
 68 |         },
 69 |         {
 70 |           "length": 100
 71 |         },
 72 |         {
 73 |           "length": 100
 74 |         },
 75 |         {
 76 |           "length": 100
 77 |         },
 78 |         {
 79 |           "length": 100
 80 |         },
 81 |         {
 82 |           "length": 100
 83 |         },
 84 |         {
 85 |           "length": 100
 86 |         },
 87 |         {
 88 |           "length": 100
 89 |         },
 90 |         {
 91 |           "length": 100
 92 |         },
 93 |         {
 94 |           "length": 100
 95 |         },
 96 |         {
 97 |           "length": 100
 98 |         },
 99 |         {
100 |           "length": 100
101 |         },
102 |         {
103 |           "length": 100
104 |         },
105 |         {
106 |           "length": 100
107 |         },
108 |         {
109 |           "length": 100
110 |         },
111 |         {
112 |           "length": 100
113 |         },
114 |         {
115 |           "length": 100
116 |         },
117 |         {
118 |           "length": 100
119 |         },
120 |         {
121 |           "length": 100
122 |         },
123 |         {
124 |           "length": 100
125 |         },
126 |         {
127 |           "length": 100
128 |         },
129 |         {
130 |           "length": 100
131 |         },
132 |         {
133 |           "length": 100
134 |         },
135 |         {
136 |           "length": 100
137 |         },
138 |         {
139 |           "length": 100
140 |         },
141 |         {
142 |           "length": 100
143 |         },
144 |         {
145 |           "length": 100
146 |         },
147 |         {
148 |           "length": 100
149 |         },
150 |         {
151 |           "length": 100
152 |         },
153 |         {
154 |           "length": 100
155 |         },
156 |         {
157 |           "length": 100
158 |         },
159 |         {
160 |           "length": 100
161 |         },
162 |         {
163 |           "length": 100
164 |         },
165 |         {
166 |           "length": 100
167 |         },
168 |         {
169 |           "length": 100
170 |         },
171 |         {
172 |           "length": 100
173 |         },
174 |         {
175 |           "length": 100
176 |         },
177 |         {
178 |           "length": 100
179 |         },
180 |         {
181 |           "length": 100
182 |         },
183 |         {
184 |           "length": 100
185 |         },
186 |         {
187 |           "length": 100
188 |         },
189 |         {
190 |           "length": 100
191 |         },
192 |         {
193 |           "length": 100
194 |         },
195 |         {
196 |           "length": 100
197 |         },
198 |         {
199 |           "length": 100
200 |         },
201 |         {
202 |           "length": 100
203 |         },
204 |         {
205 |           "length": 100
206 |         },
207 |         {
208 |           "length": 100
209 |         },
210 |         {
211 |           "length": 100
212 |         },
213 |         {
214 |           "length": 100
215 |         },
216 |         {
217 |           "length": 100
218 |         },
219 |         {
220 |           "length": 100
221 |         },
222 |         {
223 |           "length": 100
224 |         },
225 |         {
226 |           "length": 100
227 |         },
228 |         {
229 |           "length": 100
230 |         },
231 |         {
232 |           "length": 100
233 |         },
234 |         {
235 |           "length": 100
236 |         },
237 |         {
238 |           "length": 100
239 |         },
240 |         {
241 |           "length": 100
242 |         },
243 |         {
244 |           "length": 100
245 |         },
246 |         {
247 |           "length": 100
248 |         },
249 |         {
250 |           "length": 100
251 |         },
252 |         {
253 |           "length": 100
254 |         },
255 |         {
256 |           "length": 100
257 |         },
258 |         {
259 |           "length": 100
260 |         },
261 |         {
262 |           "length": 100
263 |         },
264 |         {
265 |           "length": 100
266 |         },
267 |         {
268 |           "length": 100
269 |         },
270 |         {
271 |           "length": 100
272 |         },
273 |         {
274 |           "length": 100
275 |         },
276 |         {
277 |           "length": 100
278 |         },
279 |         {
280 |           "length": 100
281 |         },
282 |         {
283 |           "length": 100
284 |         },
285 |         {
286 |           "length": 100
287 |         },
288 |         {
289 |           "length": 100
290 |         },
291 |         {
292 |           "length": 100
293 |         },
294 |         {
295 |           "length": 100
296 |         },
297 |         {
298 |           "length": 100
299 |         },
300 |         {
301 |           "length": 100
302 |         },
303 |         {
304 |           "length": 100
305 |         },
306 |         {
307 |           "length": 100
308 |         },
309 |         {
310 |           "length": 100
311 |         },
312 |         {
313 |           "length": 100
314 |         },
315 |         {
316 |           "length": 100
317 |         },
318 |         {
319 |           "length": 100
320 |         },
321 |         {
322 |           "length": 100
323 |         },
324 |         {
325 |           "length": 100
326 |         },
327 |         {
328 |           "length": 100
329 |         },
330 |         {
331 |           "length": 100
332 |         },
333 |         {
334 |           "length": 100
335 |         },
336 |         {
337 |           "length": 100
338 |         },
339 |         {
340 |           "length": 100
341 |         },
342 |         {
343 |           "length": 100
344 |         },
345 |         {
346 |           "length": 100
347 |         },
348 |         {
349 |           "length": 100
350 |         },
351 |         {
352 |           "length": 100
353 |         },
354 |         {
355 |           "length": 100
356 |         },
357 |         {
358 |           "length": 100
359 |         },
360 |         {
361 |           "length": 100
362 |         },
363 |         {
364 |           "length": 100
365 |         },
366 |         {
367 |           "length": 100
368 |         },
369 |         {
370 |           "length": 100
371 |         },
372 |         {
373 |           "length": 100
374 |         },
375 |         {
376 |           "length": 100
377 |         },
378 |         {
379 |           "length": 100
380 |         },
381 |         {
382 |           "length": 100
383 |         },
384 |         {
385 |           "length": 100
386 |         },
387 |         {
388 |           "length": 100
389 |         },
390 |         {
391 |           "length": 100
392 |         },
393 |         {
394 |           "length": 100
395 |         },
396 |         {
397 |           "length": 100
398 |         },
399 |         {
400 |           "length": 100
401 |         },
402 |         {
403 |           "length": 100
404 |         },
405 |         {
406 |           "length": 100
407 |         },
408 |         {
409 |           "length": 100
410 |         },
411 |         {
412 |           "length": 100
413 |         },
414 |         {
415 |           "length": 100
416 |         },
417 |         {
418 |           "length": 100
419 |         },
420 |         {
421 |           "length": 100
422 |         },
423 |         {
424 |           "length": 100
425 |         },
426 |         {
427 |           "length": 100
428 |         },
429 |         {
430 |           "length": 100
431 |         },
432 |         {
433 |           "length": 100
434 |         },
435 |         {
436 |           "length": 100
437 |         },
438 |         {
439 |           "length": 100
440 |         },
441 |         {
442 |           "length": 100
443 |         },
444 |         {
445 |           "length": 100
446 |         },
447 |         {
448 |           "length": 100
449 |         },
450 |         {
451 |           "length": 100
452 |         },
453 |         {
454 |           "length": 100
455 |         },
456 |         {
457 |           "length": 100
458 |         },
459 |         {
460 |           "length": 100
461 |         },
462 |         {
463 |           "length": 100
464 |         },
465 |         {
466 |           "length": 100
467 |         },
468 |         {
469 |           "length": 100
470 |         },
471 |         {
472 |           "length": 100
473 |         },
474 |         {
475 |           "length": 100
476 |         },
477 |         {
478 |           "length": 100
479 |         },
480 |         {
481 |           "length": 100
482 |         },
483 |         {
484 |           "length": 100
485 |         },
486 |         {
487 |           "length": 100
488 |         },
489 |         {
490 |           "length": 100
491 |         },
492 |         {
493 |           "length": 100
494 |         },
495 |         {
496 |           "length": 100
497 |         },
498 |         {
499 |           "length": 100
500 |         },
501 |         {
502 |           "length": 100
503 |         },
504 |         {
505 |           "length": 100
506 |         },
507 |         {
508 |           "length": 100
509 |         },
510 |         {
511 |           "length": 100
512 |         },
513 |         {
514 |           "length": 100
515 |         },
516 |         {
517 |           "length": 100
518 |         },
519 |         {
520 |           "length": 100
521 |         },
522 |         {
523 |           "length": 100
524 |         },
525 |         {
526 |           "length": 100
527 |         },
528 |         {
529 |           "length": 100
530 |         },
531 |         {
532 |           "length": 100
533 |         },
534 |         {
535 |           "length": 100
536 |         },
537 |         {
538 |           "length": 100
539 |         },
540 |         {
541 |           "length": 100
542 |         },
543 |         {
544 |           "length": 100
545 |         },
546 |         {
547 |           "length": 100
548 |         },
549 |         {
550 |           "length": 100
551 |         },
552 |         {
553 |           "length": 100
554 |         },
555 |         {
556 |           "length": 100
557 |         },
558 |         {
559 |           "length": 100
560 |         },
561 |         {
562 |           "length": 100
563 |         },
564 |         {
565 |           "length": 100
566 |         },
567 |         {
568 |           "length": 100
569 |         },
570 |         {
571 |           "length": 100
572 |         },
573 |         {
574 |           "length": 100
575 |         },
576 |         {
577 |           "length": 100
578 |         },
579 |         {
580 |           "length": 100
581 |         },
582 |         {
583 |           "length": 100
584 |         },
585 |         {
586 |           "length": 100
587 |         },
588 |         {
589 |           "length": 100
590 |         },
591 |         {
592 |           "length": 100
593 |         },
594 |         {
595 |           "length": 100
596 |         },
597 |         {
598 |           "length": 100
599 |         },
600 |         {
601 |           "length": 100
602 |         },
603 |         {
604 |           "length": 100
605 |         },
606 |         {
607 |           "length": 100
608 |         },
609 |         {
610 |           "length": 100
611 |         },
612 |         {
613 |           "length": 100
614 |         },
615 |         {
616 |           "length": 100
617 |         },
618 |         {
619 |           "length": 80
620 |         }
621 |       ]
622 |     }
623 |   ]
624 | }
625 | 


--------------------------------------------------------------------------------
/tests/integration/files/many-blocks.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/many-blocks.xz


--------------------------------------------------------------------------------
/tests/integration/files/one-stream-with-padding.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz --block-size 15000",
 5 |       "length": 20280
 6 |     },
 7 |     {
 8 |       "cmd": "head -c 100 /dev/zero"
 9 |     }
10 |   ],
11 |   "streams": [
12 |     {
13 |       "check": 4,
14 |       "blocks": [
15 |         {
16 |           "length": 15000
17 |         },
18 |         {
19 |           "length": 5280
20 |         }
21 |       ]
22 |     }
23 |   ]
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/integration/files/one-stream-with-padding.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/one-stream-with-padding.xz


--------------------------------------------------------------------------------
/tests/integration/files/several-filters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz --x86=start=42 --delta=dist=3 --powerpc --lzma2",
 5 |       "length": 20280
 6 |     }
 7 |   ],
 8 |   "streams": [
 9 |     {
10 |       "check": 4,
11 |       "blocks": [
12 |         {
13 |           "filters": [
14 |             {
15 |               "id": 4,
16 |               "start_offset": 42
17 |             },
18 |             {
19 |               "id": 3,
20 |               "dist": 3
21 |             },
22 |             {
23 |               "id": 5
24 |             },
25 |             {
26 |               "id": 33
27 |             }
28 |           ],
29 |           "length": 20280
30 |         }
31 |       ]
32 |     }
33 |   ]
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/integration/files/several-filters.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-filters.xz


--------------------------------------------------------------------------------
/tests/integration/files/several-streams-with-padding.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz --block-size 1000",
 5 |       "length": 1500
 6 |     },
 7 |     {
 8 |       "cmd": "head -c 100 /dev/zero"
 9 |     },
10 |     {
11 |       "cmd": "xz",
12 |       "length": 2000
13 |     },
14 |     {
15 |       "cmd": "head -c 200 /dev/zero"
16 |     },
17 |     {
18 |       "cmd": "xz --block-size 4000",
19 |       "length": 16780
20 |     },
21 |     {
22 |       "cmd": "head -c 400 /dev/zero"
23 |     }
24 |   ],
25 |   "streams": [
26 |     {
27 |       "check": 4,
28 |       "blocks": [
29 |         {
30 |           "length": 1000
31 |         },
32 |         {
33 |           "length": 500
34 |         }
35 |       ]
36 |     },
37 |     {
38 |       "check": 4,
39 |       "blocks": [
40 |         {
41 |           "length": 2000
42 |         }
43 |       ]
44 |     },
45 |     {
46 |       "check": 4,
47 |       "blocks": [
48 |         {
49 |           "length": 4000
50 |         },
51 |         {
52 |           "length": 4000
53 |         },
54 |         {
55 |           "length": 4000
56 |         },
57 |         {
58 |           "length": 4000
59 |         },
60 |         {
61 |           "length": 780
62 |         }
63 |       ]
64 |     }
65 |   ]
66 | }
67 | 


--------------------------------------------------------------------------------
/tests/integration/files/several-streams-with-padding.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-streams-with-padding.xz


--------------------------------------------------------------------------------
/tests/integration/files/several-streams.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz --block-size 5000",
 5 |       "length": 17000
 6 |     },
 7 |     {
 8 |       "cmd": "xz --block-size 1000",
 9 |       "length": 2100
10 |     },
11 |     {
12 |       "cmd": "xz",
13 |       "length": 1180
14 |     }
15 |   ],
16 |   "streams": [
17 |     {
18 |       "check": 4,
19 |       "blocks": [
20 |         {
21 |           "length": 5000
22 |         },
23 |         {
24 |           "length": 5000
25 |         },
26 |         {
27 |           "length": 5000
28 |         },
29 |         {
30 |           "length": 2000
31 |         }
32 |       ]
33 |     },
34 |     {
35 |       "check": 4,
36 |       "blocks": [
37 |         {
38 |           "length": 1000
39 |         },
40 |         {
41 |           "length": 1000
42 |         },
43 |         {
44 |           "length": 100
45 |         }
46 |       ]
47 |     },
48 |     {
49 |       "check": 4,
50 |       "blocks": [
51 |         {
52 |           "length": 1180
53 |         }
54 |       ]
55 |     }
56 |   ]
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/integration/files/several-streams.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-streams.xz


--------------------------------------------------------------------------------
/tests/integration/files/various-block-sizes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz --block-list 1234,567,8901,234,5678,90,0",
 5 |       "length": 20280
 6 |     }
 7 |   ],
 8 |   "streams": [
 9 |     {
10 |       "check": 4,
11 |       "blocks": [
12 |         {
13 |           "length": 1234
14 |         },
15 |         {
16 |           "length": 567
17 |         },
18 |         {
19 |           "length": 8901
20 |         },
21 |         {
22 |           "length": 234
23 |         },
24 |         {
25 |           "length": 5678
26 |         },
27 |         {
28 |           "length": 90
29 |         },
30 |         {
31 |           "length": 3576
32 |         }
33 |       ]
34 |     }
35 |   ]
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/integration/files/various-block-sizes.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-block-sizes.xz


--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz -C none --block-list 100,1000,200,2000,0",
 5 |       "length": 3600
 6 |     },
 7 |     {
 8 |       "cmd": "head -c 100 /dev/zero"
 9 |     },
10 |     {
11 |       "cmd": "xz -C crc32",
12 |       "length": 10000
13 |     },
14 |     {
15 |       "cmd": "head -c 800 /dev/zero"
16 |     },
17 |     {
18 |       "cmd": "xz -C crc64 --block-list 3000,300,0",
19 |       "length": 3333
20 |     },
21 |     {
22 |       "cmd": "xz -C sha256 --block-size 600",
23 |       "length": 3347
24 |     },
25 |     {
26 |       "cmd": "head -c 400 /dev/zero"
27 |     }
28 |   ],
29 |   "streams": [
30 |     {
31 |       "check": 0,
32 |       "blocks": [
33 |         {
34 |           "length": 100
35 |         },
36 |         {
37 |           "length": 1000
38 |         },
39 |         {
40 |           "length": 200
41 |         },
42 |         {
43 |           "length": 2000
44 |         },
45 |         {
46 |           "length": 300
47 |         }
48 |       ]
49 |     },
50 |     {
51 |       "check": 1,
52 |       "blocks": [
53 |         {
54 |           "length": 10000
55 |         }
56 |       ]
57 |     },
58 |     {
59 |       "check": 4,
60 |       "blocks": [
61 |         {
62 |           "length": 3000
63 |         },
64 |         {
65 |           "length": 300
66 |         },
67 |         {
68 |           "length": 33
69 |         }
70 |       ]
71 |     },
72 |     {
73 |       "check": 10,
74 |       "blocks": [
75 |         {
76 |           "length": 600
77 |         },
78 |         {
79 |           "length": 600
80 |         },
81 |         {
82 |           "length": 600
83 |         },
84 |         {
85 |           "length": 600
86 |         },
87 |         {
88 |           "length": 600
89 |         },
90 |         {
91 |           "length": 347
92 |         }
93 |       ]
94 |     }
95 |   ]
96 | }
97 | 


--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.xz


--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "generate": [
 3 |     {
 4 |       "cmd": "xz -C none",
 5 |       "length": 5070
 6 |     },
 7 |     {
 8 |       "cmd": "xz -C crc32",
 9 |       "length": 5070
10 |     },
11 |     {
12 |       "cmd": "xz -C crc64",
13 |       "length": 5070
14 |     },
15 |     {
16 |       "cmd": "xz -C sha256",
17 |       "length": 5070
18 |     }
19 |   ],
20 |   "streams": [
21 |     {
22 |       "check": 0,
23 |       "blocks": [
24 |         {
25 |           "length": 5070
26 |         }
27 |       ]
28 |     },
29 |     {
30 |       "check": 1,
31 |       "blocks": [
32 |         {
33 |           "length": 5070
34 |         }
35 |       ]
36 |     },
37 |     {
38 |       "check": 4,
39 |       "blocks": [
40 |         {
41 |           "length": 5070
42 |         }
43 |       ]
44 |     },
45 |     {
46 |       "check": 10,
47 |       "blocks": [
48 |         {
49 |           "length": 5070
50 |         }
51 |       ]
52 |     }
53 |   ]
54 | }
55 | 


--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-stream-checks.xz


--------------------------------------------------------------------------------
/tests/integration/test_file_read.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Any, Dict, Tuple
 3 | 
 4 | from xz import XZFile
 5 | 
 6 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
 7 | 
 8 | 
 9 | def test_read_all(integration_case: _IntegrationCase, data_pattern: bytes) -> None:
10 |     xz_path, metadata = integration_case
11 |     with XZFile(xz_path) as xzfile:
12 |         streams_items = list(
13 |             xzfile._fileobjs.items()  # pylint: disable=protected-access
14 |         )
15 |         assert len(streams_items) == len(metadata["streams"])
16 |         pos = 0
17 |         stream_boundaries = []
18 |         block_boundaries = []
19 |         for stream_item, metadata_stream in zip(streams_items, metadata["streams"]):
20 |             stream_boundaries.append(pos)
21 |             stream_pos, stream = stream_item
22 |             assert stream_pos == pos
23 |             assert stream.check == metadata_stream["check"]
24 |             block_items = list(
25 |                 stream._fileobjs.items()  # pylint: disable=protected-access
26 |             )
27 |             assert len(block_items) == len(metadata_stream["blocks"])
28 |             for block_item, metadata_block in zip(
29 |                 block_items, metadata_stream["blocks"]
30 |             ):
31 |                 block_boundaries.append(pos)
32 |                 block_pos, block = block_item
33 |                 assert block_pos == pos - stream_pos
34 |                 assert len(block) == metadata_block["length"]
35 |                 pos += metadata_block["length"]
36 |             assert len(stream) == pos - stream_pos
37 |         assert xzfile.stream_boundaries == stream_boundaries
38 |         assert xzfile.block_boundaries == block_boundaries
39 |         assert xzfile.read() == data_pattern
40 | 
41 | 
42 | def test_read_reversed(integration_case: _IntegrationCase, data_pattern: bytes) -> None:
43 |     xz_path, _ = integration_case
44 |     with XZFile(xz_path) as xzfile:
45 |         # we are testing the worst possible case (lots of negative seeking)
46 |         # limit the time to test by reading in chunks instead of 1 byte at a time
47 |         read_size = 37
48 |         for pos in reversed(range(0, len(data_pattern), read_size)):
49 |             xzfile.seek(pos)
50 |             assert xzfile.read(read_size) == data_pattern[pos : pos + read_size]
51 | 


--------------------------------------------------------------------------------
/tests/integration/test_file_write.py:
--------------------------------------------------------------------------------
 1 | from hashlib import sha256
 2 | from pathlib import Path
 3 | from typing import Any, Dict, Tuple
 4 | 
 5 | import pytest
 6 | 
 7 | import xz
 8 | 
 9 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
10 | 
11 | 
12 | def test(
13 |     integration_case: _IntegrationCase, data_pattern: bytes, tmp_path: Path
14 | ) -> None:
15 |     xz_path, metadata = integration_case
16 |     data = memoryview(data_pattern)
17 | 
18 |     if "padding" in xz_path.name:
19 |         pytest.skip("Write mode does not support stream padding yet")
20 | 
21 |     generated_path = tmp_path / "archive.xz"
22 | 
23 |     with xz.open(generated_path, "w") as xzfile:
24 |         for stream in metadata["streams"]:
25 |             xzfile.check = stream["check"]
26 |             xzfile.change_stream()
27 |             for block in stream["blocks"]:
28 |                 xzfile.filters = block.get("filters")
29 |                 xzfile.change_block()
30 |                 xzfile.write(data[: block["length"]])
31 |                 data = data[block["length"] :]
32 | 
33 |     assert not data
34 | 
35 |     expected_hash = sha256(xz_path.read_bytes())
36 |     generated_hash = sha256(generated_path.read_bytes())
37 | 
38 |     assert generated_hash.hexdigest() == expected_hash.hexdigest()
39 | 


--------------------------------------------------------------------------------
/tests/integration/test_generate_files.py:
--------------------------------------------------------------------------------
 1 | from hashlib import sha256
 2 | from pathlib import Path
 3 | import subprocess
 4 | from typing import Any, Dict, Tuple
 5 | 
 6 | import pytest
 7 | 
 8 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
 9 | 
10 | 
11 | @pytest.mark.generate_integration_files
12 | def test(integration_case: _IntegrationCase, data_pattern: bytes) -> None:
13 |     xz_path, metadata = integration_case
14 | 
15 |     expected_hash = sha256(xz_path.read_bytes())
16 | 
17 |     # note that we override current xz file
18 |     # this allows to create new integration files from json metadata
19 |     data = memoryview(data_pattern)
20 |     with xz_path.open("wb") as fout:
21 |         for step in metadata["generate"]:
22 |             step_data_len = step.get("length", 0)
23 |             step_data = data[:step_data_len]
24 |             data = data[step_data_len:]
25 |             fout.write(
26 |                 subprocess.run(
27 |                     step["cmd"].split(" "),
28 |                     input=step_data,
29 |                     stdout=subprocess.PIPE,
30 |                     check=True,
31 |                 ).stdout
32 |             )
33 |     assert not data
34 | 
35 |     generated_hash = sha256(xz_path.read_bytes())
36 | 
37 |     assert generated_hash.hexdigest() == expected_hash.hexdigest()
38 | 


--------------------------------------------------------------------------------
/tests/integration/test_ram_usage.py:
--------------------------------------------------------------------------------
  1 | from io import DEFAULT_BUFFER_SIZE
  2 | from lzma import compress
  3 | from pathlib import Path
  4 | from random import seed
  5 | import sys
  6 | from typing import BinaryIO, Optional, cast
  7 | 
  8 | import pytest
  9 | 
 10 | from xz import XZFile
 11 | from xz.common import create_xz_index_footer, parse_xz_footer, parse_xz_index
 12 | from xz.io import IOCombiner, IOStatic
 13 | 
 14 | if sys.version_info >= (3, 9):
 15 |     from collections.abc import Callable, Iterator
 16 |     from random import randbytes
 17 | else:
 18 |     from random import getrandbits
 19 |     from typing import Callable, Iterator
 20 | 
 21 |     def randbytes(length: int) -> bytes:
 22 |         return getrandbits(length * 8).to_bytes(length, "little")
 23 | 
 24 | 
 25 | @pytest.fixture
 26 | def ram_usage() -> Iterator[Callable[[], int]]:
 27 |     try:
 28 |         import tracemalloc  # pylint: disable=import-outside-toplevel
 29 |     except ImportError:  # e.g. PyPy
 30 |         pytest.skip("tracemalloc module not available")
 31 | 
 32 |     try:
 33 |         tracemalloc.start()
 34 |         yield lambda: tracemalloc.get_traced_memory()[1]
 35 |     finally:
 36 |         tracemalloc.stop()
 37 | 
 38 | 
 39 | BLOCK_SIZE = 1_000_000
 40 | 
 41 | 
 42 | @pytest.fixture
 43 | def fileobj() -> BinaryIO:
 44 |     # create xz raw data composed of many identical blocks
 45 |     nb_blocks = 50
 46 | 
 47 |     seed(0)
 48 |     data = compress(randbytes(BLOCK_SIZE))
 49 |     header = data[:12]
 50 |     footer = data[-12:]
 51 |     check, backward_size = parse_xz_footer(footer)
 52 |     block = data[12 : -12 - backward_size]
 53 |     records = parse_xz_index(data[-12 - backward_size : -12])
 54 |     index_footer = create_xz_index_footer(check, records * nb_blocks)
 55 | 
 56 |     return cast(
 57 |         BinaryIO,
 58 |         IOCombiner(
 59 |             IOStatic(header),
 60 |             *[IOStatic(block)] * nb_blocks,
 61 |             IOStatic(index_footer),
 62 |         ),
 63 |     )
 64 | 
 65 | 
 66 | def test_read_linear(
 67 |     # pylint: disable=redefined-outer-name
 68 |     fileobj: BinaryIO,
 69 |     ram_usage: Callable[[], int],
 70 | ) -> None:
 71 |     with XZFile(fileobj) as xz_file:
 72 |         # read almost one block
 73 |         xz_file.read(BLOCK_SIZE - 1)
 74 |         one_block_memory = ram_usage()
 75 | 
 76 |         # read all the file
 77 |         while xz_file.read(DEFAULT_BUFFER_SIZE):
 78 |             assert (
 79 |                 # should not use much more memory, take 2 as error margin
 80 |                 ram_usage()
 81 |                 < one_block_memory * 2
 82 |             ), f"Consumes too much RAM (at {100 * xz_file.tell() / len(xz_file):.0f}%)"
 83 | 
 84 | 
 85 | def test_partial_read_each_block(
 86 |     # pylint: disable=redefined-outer-name
 87 |     fileobj: BinaryIO,
 88 |     ram_usage: Callable[[], int],
 89 | ) -> None:
 90 |     one_block_memory: Optional[int] = None
 91 | 
 92 |     with XZFile(fileobj) as xz_file:
 93 |         for pos in xz_file.block_boundaries[1:]:
 94 |             # read second-to last byte of each block
 95 |             xz_file.seek(pos - 2)
 96 |             xz_file.read(1)
 97 |             if one_block_memory is None:
 98 |                 one_block_memory = ram_usage()
 99 |             else:
100 |                 assert (
101 |                     # default strategy is max 8 blocks, take 10 as error margin
102 |                     ram_usage()
103 |                     < one_block_memory * 10
104 |                 ), f"Consumes too much RAM (at {100 * xz_file.tell() / len(xz_file):.0f}%)"
105 | 
106 | 
107 | def test_write(
108 |     tmp_path: Path,
109 |     # pylint: disable=redefined-outer-name
110 |     ram_usage: Callable[[], int],
111 | ) -> None:
112 |     nb_blocks = 10
113 | 
114 |     seed(0)
115 | 
116 |     one_block_memory: Optional[int] = None
117 | 
118 |     with XZFile(tmp_path / "archive.xz", "w") as xz_file:
119 |         for i in range(nb_blocks):
120 |             xz_file.change_block()
121 |             xz_file.write(randbytes(BLOCK_SIZE))
122 | 
123 |             if one_block_memory is None:
124 |                 one_block_memory = ram_usage()
125 |             else:
126 |                 assert (
127 |                     # should not use much more memory, take 2 as error margin
128 |                     ram_usage()
129 |                     < one_block_memory * 2
130 |                 ), f"Consumes too much RAM (at {i / nb_blocks:.0f}%)"
131 | 


--------------------------------------------------------------------------------
/tests/integration/test_readme.py:
--------------------------------------------------------------------------------
 1 | import doctest
 2 | import os
 3 | from pathlib import Path
 4 | import shutil
 5 | import sys
 6 | from typing import List, Optional, Tuple
 7 | 
 8 | import pytest
 9 | 
10 | import xz
11 | 
12 | if sys.version_info >= (3, 9):  # pragma: no cover
13 |     from collections.abc import Iterator
14 | else:  # pragma: no cover
15 |     from typing import Iterator
16 | 
17 | 
18 | @pytest.fixture(autouse=True)
19 | def change_dir(tmp_path: Path) -> Iterator[None]:
20 |     old_dir = os.getcwd()
21 |     shutil.copy(Path(__file__).parent / "files" / "example.xz", tmp_path)
22 |     os.chdir(tmp_path)
23 |     yield
24 |     os.chdir(old_dir)
25 | 
26 | 
27 | def _parse_readme() -> List[Tuple[int, str]]:
28 |     code_blocks = []
29 |     current_code_block = ""
30 |     current_code_block_line: Optional[int] = None
31 |     with (Path(__file__).parent.parent.parent / "README.md").open() as fin:
32 |         for line_no, line in enumerate(fin):
33 |             if line.startswith("```"):
34 |                 if current_code_block_line is None:
35 |                     if "python" in line:
36 |                         current_code_block_line = line_no + 1
37 |                 else:
38 |                     code_blocks.append((current_code_block_line, current_code_block))
39 |                     current_code_block = ""
40 |                     current_code_block_line = None
41 |             elif current_code_block_line is not None:
42 |                 current_code_block += line
43 |     return code_blocks
44 | 
45 | 
46 | _README_CODE_BLOCKS = _parse_readme()
47 | 
48 | 
49 | @pytest.mark.parametrize(
50 |     "code_block",
51 |     [
52 |         pytest.param(code_block, id=f"line_{line_no}")
53 |         for line_no, code_block in _README_CODE_BLOCKS
54 |     ],
55 | )
56 | def test_readme(
57 |     code_block: str, tmp_path: Path
58 | ) -> None:  # pylint: disable=redefined-outer-name
59 |     path = tmp_path / "block.txt"
60 |     path.write_text(code_block)
61 |     failure_count, test_count = doctest.testfile(
62 |         str(path),
63 |         module_relative=False,
64 |         extraglobs={"xz": xz},
65 |     )
66 |     assert failure_count == 0
67 |     assert test_count
68 | 


--------------------------------------------------------------------------------
/tests/unit/test_attr_proxy.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import pytest
 4 | 
 5 | from xz.utils import AttrProxy
 6 | 
 7 | 
 8 | class Dest:
 9 |     abc = "012"
10 | 
11 | 
12 | class Src:
13 |     proxy: Optional[Dest] = None
14 |     abc = AttrProxy[str]("proxy")
15 | 
16 | 
17 | def test_direct() -> None:
18 |     dest = Dest()
19 |     src = Src()
20 | 
21 |     # not proxied
22 |     with pytest.raises(AttributeError) as exc_info:
23 |         src.abc  # pylint: disable=pointless-statement
24 |     assert (
25 |         str(exc_info.value)
26 |         == "'Src' object has not attribute 'abc' until its attribute 'proxy' is defined"
27 |     )
28 | 
29 |     src.abc = "345"
30 |     assert src.abc == "345"
31 |     assert dest.abc == "012"  # unchanged
32 | 
33 |     # proxied
34 |     src.proxy = dest
35 | 
36 |     assert src.abc == "012"  # get initial value back from proxy
37 | 
38 |     src.abc = "678"
39 |     assert src.abc == "678"
40 |     assert dest.abc == "678"  # changed
41 | 


--------------------------------------------------------------------------------
/tests/unit/test_block.py:
--------------------------------------------------------------------------------
  1 | from io import SEEK_SET, BytesIO, UnsupportedOperation
  2 | import sys
  3 | from typing import Tuple, cast
  4 | from unittest.mock import Mock, call
  5 | 
  6 | import pytest
  7 | 
  8 | import xz.block as block_module
  9 | from xz.block import BlockRead, XZBlock
 10 | from xz.common import XZError, create_xz_header, create_xz_index_footer
 11 | from xz.io import IOAbstract, IOStatic
 12 | 
 13 | if sys.version_info >= (3, 9):  # pragma: no cover
 14 |     from collections.abc import Callable, Iterator
 15 | else:  # pragma: no cover
 16 |     from typing import Callable, Iterator
 17 | 
 18 | 
 19 | BLOCK_BYTES = bytes.fromhex(
 20 |     "0200210116000000742fe5a3e0006300415d00209842100431d01ab285328305"
 21 |     "7ddb5924a128599cc9911a7fcff8d59c1f6f887bcee97b1f83f1808f005de273"
 22 |     "e1a6e99a7eac4f8f632b7e43bbf1da311dce5c0000000000e7c35efa"
 23 | )
 24 | 
 25 | 
 26 | def create_fileobj(data: bytes) -> Mock:
 27 |     raw = BytesIO(data)
 28 |     mock = Mock(wraps=raw)
 29 |     mock.__class__ = cast(Mock, IOAbstract)  # needs to be subclass of IOAbstract
 30 |     mock.__len__ = lambda _: len(raw.getvalue())
 31 |     return mock
 32 | 
 33 | 
 34 | @pytest.fixture
 35 | def fileobj() -> Iterator[Mock]:
 36 |     yield create_fileobj(BLOCK_BYTES)
 37 | 
 38 | 
 39 | @pytest.fixture
 40 | def fileobj_empty() -> Iterator[Mock]:
 41 |     yield create_fileobj(b"")
 42 | 
 43 | 
 44 | @pytest.fixture(autouse=True)
 45 | def patch_buffer_size(monkeypatch: pytest.MonkeyPatch) -> None:
 46 |     monkeypatch.setattr(BlockRead, "read_size", 17)
 47 | 
 48 | 
 49 | @pytest.fixture
 50 | def compressor(monkeypatch: pytest.MonkeyPatch) -> Iterator[Mock]:
 51 |     mock = Mock()
 52 |     monkeypatch.setattr(block_module, "LZMACompressor", mock)
 53 |     yield mock.return_value
 54 | 
 55 | 
 56 | # pylint: disable=redefined-outer-name
 57 | 
 58 | 
 59 | #
 60 | # read
 61 | #
 62 | 
 63 | 
 64 | def test_read_all(
 65 |     fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
 66 | ) -> None:
 67 |     block = XZBlock(fileobj, 1, 89, 100)
 68 |     assert block.tell() == 0
 69 |     assert data_pattern_locate(block.read()) == (0, 100)
 70 | 
 71 |     assert fileobj.method_calls == [
 72 |         call.seek(0, SEEK_SET),
 73 |         call.read(5),  # xz padding is 12 bytes
 74 |         call.seek(5, SEEK_SET),
 75 |         call.read(17),
 76 |         call.seek(22, SEEK_SET),
 77 |         call.read(17),
 78 |         call.seek(39, SEEK_SET),
 79 |         call.read(17),
 80 |         call.seek(56, SEEK_SET),
 81 |         call.read(17),
 82 |         call.seek(73, SEEK_SET),
 83 |         call.read(17),
 84 |         # below is not needed to get the data
 85 |         # but needed to perform various checks
 86 |         # see other tests
 87 |         call.seek(90, SEEK_SET),
 88 |         call.read(17),
 89 |     ]
 90 |     fileobj.method_calls.clear()
 91 | 
 92 |     assert block.read() == b""
 93 |     assert not fileobj.method_calls
 94 | 
 95 | 
 96 | def test_read_seek_forward(
 97 |     fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
 98 | ) -> None:
 99 |     block = XZBlock(fileobj, 1, 89, 100)
100 |     assert block.tell() == 0
101 | 
102 |     block.seek(0)
103 |     assert block.tell() == 0
104 |     assert not fileobj.method_calls  # no file access
105 |     assert data_pattern_locate(block.read(4)) == (0, 4)
106 |     assert block.tell() == 4
107 |     assert fileobj.method_calls == [
108 |         call.seek(0, SEEK_SET),
109 |         call.read(5),  # xz padding is 12 bytes
110 |         call.seek(5, SEEK_SET),
111 |         call.read(17),
112 |         call.seek(22, SEEK_SET),
113 |         call.read(17),
114 |     ]
115 |     fileobj.method_calls.clear()
116 | 
117 |     block.seek(10)
118 |     assert block.tell() == 10
119 |     assert not fileobj.method_calls  # no file access
120 |     assert data_pattern_locate(block.read(4)) == (10, 4)
121 |     assert block.tell() == 14
122 |     assert not fileobj.method_calls  # no file access
123 | 
124 |     block.seek(30)
125 |     assert block.tell() == 30
126 |     assert not fileobj.method_calls  # no file access
127 |     assert data_pattern_locate(block.read(4)) == (30, 4)
128 |     assert block.tell() == 34
129 |     assert fileobj.method_calls == [
130 |         call.seek(39, SEEK_SET),
131 |         call.read(17),
132 |     ]
133 |     fileobj.method_calls.clear()
134 | 
135 |     block.seek(60)
136 |     assert block.tell() == 60
137 |     assert not fileobj.method_calls  # no file access
138 |     assert data_pattern_locate(block.read(4)) == (60, 4)
139 |     assert block.tell() == 64
140 |     assert fileobj.method_calls == [
141 |         call.seek(56, SEEK_SET),
142 |         call.read(17),
143 |     ]
144 |     fileobj.method_calls.clear()
145 | 
146 | 
147 | def test_read_seek_backward(
148 |     fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
149 | ) -> None:
150 |     block = XZBlock(fileobj, 1, 89, 100)
151 |     assert block.tell() == 0
152 | 
153 |     block.seek(60)
154 |     assert block.tell() == 60
155 |     assert not fileobj.method_calls  # no file access
156 | 
157 |     block.seek(40)
158 |     assert block.tell() == 40
159 |     assert not fileobj.method_calls  # no file access
160 |     assert data_pattern_locate(block.read(4)) == (40, 4)
161 |     assert block.tell() == 44
162 |     assert fileobj.method_calls == [
163 |         call.seek(0, SEEK_SET),
164 |         call.read(5),  # xz padding is 12 bytes
165 |         call.seek(5, SEEK_SET),
166 |         call.read(17),
167 |         call.seek(22, SEEK_SET),
168 |         call.read(17),
169 |         call.seek(39, SEEK_SET),
170 |         call.read(17),
171 |     ]
172 |     fileobj.method_calls.clear()
173 |     assert not fileobj.method_calls  # no file access
174 | 
175 |     block.seek(20)
176 |     assert block.tell() == 20
177 |     assert not fileobj.method_calls  # no file access
178 |     assert data_pattern_locate(block.read(4)) == (20, 4)
179 |     assert block.tell() == 24
180 |     assert fileobj.method_calls == [
181 |         call.seek(0, SEEK_SET),
182 |         call.read(5),  # xz padding is 12 bytes
183 |         call.seek(5, SEEK_SET),
184 |         call.read(17),
185 |         call.seek(22, SEEK_SET),
186 |         call.read(17),
187 |         call.seek(39, SEEK_SET),
188 |         call.read(17),
189 |     ]
190 |     fileobj.method_calls.clear()
191 | 
192 | 
193 | def test_read_wrong_uncompressed_size_too_small(
194 |     fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
195 | ) -> None:
196 |     block = XZBlock(fileobj, 1, 89, 99)
197 | 
198 |     # read all but last byte
199 |     assert data_pattern_locate(block.read(98)) == (0, 98)
200 | 
201 |     # read last byte
202 |     with pytest.raises(XZError) as exc_info:
203 |         block.read(1)
204 |     assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
205 | 
206 | 
207 | def test_read_wrong_uncompressed_size_too_big(
208 |     fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
209 | ) -> None:
210 |     block = XZBlock(fileobj, 1, 89, 101)
211 | 
212 |     # read all but last byte
213 |     assert data_pattern_locate(block.read(100)) == (0, 100)
214 | 
215 |     # read last byte
216 |     with pytest.raises(XZError) as exc_info:
217 |         block.read(1)
218 |     assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
219 | 
220 | 
221 | def test_read_wrong_block_padding(
222 |     data_pattern_locate: Callable[[bytes], Tuple[int, int]]
223 | ) -> None:
224 |     fileobj = IOStatic(BLOCK_BYTES[:-5] + b"\xff" + BLOCK_BYTES[-4:])
225 |     block = XZBlock(fileobj, 1, 89, 100)
226 | 
227 |     # read all but last byte
228 |     assert data_pattern_locate(block.read(99)) == (0, 99)
229 | 
230 |     # read last byte
231 |     with pytest.raises(XZError) as exc_info:
232 |         block.read(1)
233 |     assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
234 | 
235 | 
236 | def test_read_wrong_check(
237 |     data_pattern_locate: Callable[[bytes], Tuple[int, int]]
238 | ) -> None:
239 |     fileobj = IOStatic(BLOCK_BYTES[:-4] + b"\xff" * 4)
240 | 
241 |     block = XZBlock(fileobj, 1, 89, 100)
242 | 
243 |     # read all but last byte
244 |     assert data_pattern_locate(block.read(99)) == (0, 99)
245 | 
246 |     # read last byte
247 |     with pytest.raises(XZError) as exc_info:
248 |         block.read(1)
249 |     assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
250 | 
251 | 
252 | def test_read_truncated_data() -> None:
253 |     fileobj = create_fileobj(
254 |         bytes.fromhex(
255 |             # header
256 |             "fd377a585a0000016922de36"
257 |             # one block (truncated)
258 |             "0200210116000000742fe5a301000941"
259 |         )
260 |     )
261 | 
262 |     block = XZBlock(fileobj, 1, 89, 100)
263 | 
264 |     with pytest.raises(XZError) as exc_info:
265 |         block.read()
266 |     assert str(exc_info.value) == "block: data eof"
267 | 
268 | 
269 | def test_read_decompressor_eof(
270 |     data_pattern_locate: Callable[[bytes], Tuple[int, int]]
271 | ) -> None:
272 |     fileobj = IOStatic(
273 |         bytes.fromhex(
274 |             # one block
275 |             "0200210116000000742fe5a301000941"
276 |             "6130416131416132410000004e4aa467"
277 |             # index
278 |             "00011e0aea6312149042990d0100"
279 |             # stream footer
280 |             "00000001595a"
281 |         )
282 |     )
283 | 
284 |     # real uncompressed size is 10, not 11
285 |     # it is changed to trigger the error case we are testing here
286 |     block = XZBlock(fileobj, 1, 30, 11)
287 | 
288 |     # read all but last byte
289 |     assert data_pattern_locate(block.read(10)) == (0, 10)
290 | 
291 |     # read last byte
292 |     with pytest.raises(XZError) as exc_info:
293 |         block.read(1)
294 |     assert str(exc_info.value) == "block: decompressor eof"
295 | 
296 | 
297 | #
298 | # writable
299 | #
300 | 
301 | 
302 | def test_writable(fileobj: Mock) -> None:
303 |     block = XZBlock(fileobj, 1, 89, 100)
304 |     assert not block.writable()
305 | 
306 | 
307 | def test_writable_empty(fileobj_empty: Mock) -> None:
308 |     block = XZBlock(fileobj_empty, 1, 0, 0)
309 |     assert block.writable()
310 | 
311 | 
312 | #
313 | # write
314 | #
315 | 
316 | 
317 | def test_write_once(fileobj_empty: Mock) -> None:
318 |     with XZBlock(fileobj_empty, 1, 0, 0) as block:
319 |         block.write(b"Hello, world!\n")
320 |         assert block.tell() == 14
321 |         assert fileobj_empty.method_calls == [
322 |             call.seek(0),
323 |             call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"),
324 |         ]
325 |         fileobj_empty.reset_mock()
326 | 
327 |     assert block.unpadded_size == 34
328 |     assert block.uncompressed_size == 14
329 | 
330 |     assert fileobj_empty.method_calls == [
331 |         call.seek(12),
332 |         call.write(b"\x01\x00\rHello, world!\n\x00\x00\x00\x18\xa7U{"),
333 |     ]
334 | 
335 | 
336 | def test_write_multiple(fileobj_empty: Mock) -> None:
337 |     with XZBlock(fileobj_empty, 1, 0, 0) as block:
338 |         block.write(b"Hello,")
339 |         assert block.tell() == 6
340 |         assert fileobj_empty.method_calls == [
341 |             call.seek(0),
342 |             call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"),
343 |         ]
344 |         fileobj_empty.reset_mock()
345 | 
346 |         block.write(b" world!\n")
347 |         assert block.tell() == 14
348 |         assert not fileobj_empty.method_calls  # buffered
349 | 
350 |         block.write(b"A" * 3_000_000)
351 |         assert block.tell() == 3_000_014
352 |         assert fileobj_empty.method_calls  # not buffered
353 | 
354 |     assert block.unpadded_size == 540
355 |     assert block.uncompressed_size == 3_000_014
356 | 
357 |     assert fileobj_empty.method_calls  # flushing compressor
358 | 
359 | 
360 | @pytest.mark.parametrize("pos", [0, 42, 100, 200])
361 | def test_write_existing(fileobj: Mock, pos: int) -> None:
362 |     block = XZBlock(fileobj, 1, 89, 100)
363 |     block.seek(pos)
364 |     with pytest.raises(UnsupportedOperation):
365 |         # block is not empty, so not writable
366 |         block.write(b"a")
367 | 
368 | 
369 | def test_write_compressor_error_0(fileobj_empty: Mock, compressor: Mock) -> None:
370 |     compressor.compress.return_value = create_xz_header(0)
371 |     with XZBlock(fileobj_empty, 1, 0, 0) as block:
372 |         with pytest.raises(XZError) as exc_info:
373 |             block.write(b"Hello, world!\n")
374 |     assert str(exc_info.value) == "block: compressor header"
375 | 
376 | 
377 | def test_write_compressor_error_1(fileobj_empty: Mock, compressor: Mock) -> None:
378 |     compressor.compress.return_value = create_xz_header(1)
379 |     compressor.flush.return_value = create_xz_index_footer(0, [(13, 37), (4, 2)])
380 |     with pytest.raises(XZError) as exc_info:
381 |         with XZBlock(fileobj_empty, 1, 0, 0) as block:
382 |             block.write(b"Hello, world!\n")
383 |     assert str(exc_info.value) == "block: compressor footer check"
384 | 
385 | 
386 | def test_write_compressor_error_2(fileobj_empty: Mock, compressor: Mock) -> None:
387 |     compressor.compress.return_value = create_xz_header(1)
388 |     compressor.flush.return_value = create_xz_index_footer(1, [(13, 37), (4, 2)])
389 |     with pytest.raises(XZError) as exc_info:
390 |         with XZBlock(fileobj_empty, 1, 0, 0) as block:
391 |             block.write(b"Hello, world!\n")
392 |     assert str(exc_info.value) == "block: compressor index records length"
393 | 
394 | 
395 | def test_write_compressor_error_3(fileobj_empty: Mock, compressor: Mock) -> None:
396 |     compressor.compress.return_value = create_xz_header(1)
397 |     compressor.flush.return_value = create_xz_index_footer(1, [(34, 1337)])
398 |     with pytest.raises(XZError) as exc_info:
399 |         with XZBlock(fileobj_empty, 1, 0, 0) as block:
400 |             block.write(b"Hello, world!\n")
401 |     assert str(exc_info.value) == "block: compressor uncompressed size"
402 | 
403 | 
404 | #
405 | # truncate
406 | #
407 | 
408 | 
409 | def test_truncate_empty_zero(fileobj_empty: Mock) -> None:
410 |     with XZBlock(fileobj_empty, 1, 0, 0) as block:
411 |         block.truncate(0)
412 |         assert block.tell() == 0
413 |         assert not fileobj_empty.method_calls
414 | 
415 |     assert block.unpadded_size == 0
416 |     assert block.uncompressed_size == 0
417 | 
418 |     assert not fileobj_empty.method_calls
419 | 
420 | 
421 | def test_truncate_empty_fill(fileobj_empty: Mock) -> None:
422 |     with XZBlock(fileobj_empty, 1, 0, 0) as block:
423 |         block.truncate(42)
424 |         assert block.tell() == 0
425 |         assert fileobj_empty.method_calls == [
426 |             call.seek(0),
427 |             call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"),
428 |         ]
429 |         fileobj_empty.reset_mock()
430 | 
431 |     assert block.unpadded_size == 30
432 |     assert block.uncompressed_size == 42
433 | 
434 |     assert fileobj_empty.method_calls == [
435 |         call.seek(12),
436 |         call.write(b"\xe0\x00)\x00\x06]\x00\x00n,GH\x00\x00\x00\x00\xfb(o\xe4"),
437 |     ]
438 | 
439 | 
440 | @pytest.mark.parametrize("size", [0, 42, 100, 200])
441 | def test_truncate_existing(fileobj: Mock, size: int) -> None:
442 |     block = XZBlock(fileobj, 1, 89, 100)
443 |     with pytest.raises(UnsupportedOperation):
444 |         # block is not empty, so not writable
445 |         block.truncate(size)
446 | 


--------------------------------------------------------------------------------
/tests/unit/test_common.py:
--------------------------------------------------------------------------------
  1 | from lzma import CHECK_CRC32, CHECK_CRC64, CHECK_NONE, CHECK_SHA256, is_check_supported
  2 | from typing import List, Tuple
  3 | 
  4 | import pytest
  5 | 
  6 | from xz.common import (
  7 |     DEFAULT_CHECK,
  8 |     XZError,
  9 |     create_xz_header,
 10 |     create_xz_index_footer,
 11 |     decode_mbi,
 12 |     encode_mbi,
 13 |     pad,
 14 |     parse_xz_footer,
 15 |     parse_xz_header,
 16 |     parse_xz_index,
 17 |     round_up,
 18 | )
 19 | 
 20 | MBI_CASE = tuple(
 21 |     pytest.param(value, data, id=hex(value))
 22 |     for value, data in (
 23 |         (0, "00"),
 24 |         (1, "01"),
 25 |         ((1 << 7) - 1, "7f"),
 26 |         (1 << 7, "8001"),
 27 |         ((1 << 7 * 2) - 1, "ff7f"),
 28 |         (1 << 7 * 2, "808001"),
 29 |         ((1 << 7 * 3) - 1, "ffff7f"),
 30 |         (1 << 7 * 3, "80808001"),
 31 |         ((1 << 7 * 10) - 1, "ffffffffffffffffff7f"),
 32 |         (1 << 7 * 10, "8080808080808080808001"),
 33 |         (9, "09"),
 34 |         (99, "63"),
 35 |         (999, "e707"),
 36 |         (9999, "8f4e"),
 37 |         (99999, "9f8d06"),
 38 |         (999999, "bf843d"),
 39 |         (9999999, "fface204"),
 40 |         (99999999, "ffc1d72f"),
 41 |         (999999999, "ff93ebdc03"),
 42 |     )
 43 | )
 44 | 
 45 | 
 46 | @pytest.mark.parametrize("value, data", MBI_CASE)
 47 | def test_encode_mbi(value: int, data: str) -> None:
 48 |     assert encode_mbi(value) == bytes.fromhex(data)
 49 | 
 50 | 
 51 | @pytest.mark.parametrize("value, data", MBI_CASE)
 52 | def test_decode_mbi(value: int, data: str) -> None:
 53 |     assert decode_mbi(bytes.fromhex(data) + b"\xff\x00" * 10) == (len(data) // 2, value)
 54 | 
 55 | 
 56 | @pytest.mark.parametrize("data", ("", "81828384"), ids=("empty", "truncated"))
 57 | def test_decode_mbi_invalid(data: str) -> None:
 58 |     with pytest.raises(XZError) as exc_info:
 59 |         decode_mbi(bytes.fromhex(data))
 60 |     assert str(exc_info.value) == "invalid mbi"
 61 | 
 62 | 
 63 | @pytest.mark.parametrize(
 64 |     "value, expected",
 65 |     ((0, 0), (1, 4), (2, 4), (3, 4), (4, 4), (5, 8), (6, 8), (7, 8), (8, 8)),
 66 | )
 67 | def test_round_up(value: int, expected: int) -> None:
 68 |     assert round_up(value) == expected
 69 | 
 70 | 
 71 | @pytest.mark.parametrize(
 72 |     "value, padding",
 73 |     (
 74 |         (0, ""),
 75 |         (1, "000000"),
 76 |         (2, "0000"),
 77 |         (3, "00"),
 78 |         (4, ""),
 79 |         (5, "000000"),
 80 |         (6, "0000"),
 81 |         (7, "00"),
 82 |         (8, ""),
 83 |     ),
 84 | )
 85 | def test_pad(value: int, padding: str) -> None:
 86 |     assert pad(value) == bytes.fromhex(padding)
 87 |     data = b"B" * value
 88 |     data += bytes.fromhex(padding)
 89 |     assert not len(data) % 4
 90 | 
 91 | 
 92 | XZ_HEADER_CASES = (
 93 |     pytest.param(CHECK_NONE, "fd377a585a000000ff12d941", id="check_none"),
 94 |     pytest.param(CHECK_CRC32, "fd377a585a0000016922de36", id="check_crc32"),
 95 |     pytest.param(CHECK_CRC64, "fd377a585a000004e6d6b446", id="check_crc64"),
 96 |     pytest.param(CHECK_SHA256, "fd377a585a00000ae1fb0ca1", id="check_sha256"),
 97 | )
 98 | 
 99 | 
100 | @pytest.mark.parametrize("check, data", XZ_HEADER_CASES)
101 | def test_create_xz_header(check: int, data: str) -> None:
102 |     assert create_xz_header(check) == bytes.fromhex(data)
103 | 
104 | 
105 | def test_create_xz_header_invalid_check() -> None:
106 |     with pytest.raises(XZError) as exc_info:
107 |         create_xz_header(17)
108 |     assert str(exc_info.value) == "header check"
109 | 
110 | 
111 | @pytest.mark.parametrize("check, data", XZ_HEADER_CASES)
112 | def test_parse_xz_header(check: int, data: str) -> None:
113 |     assert parse_xz_header(bytes.fromhex(data)) == check
114 | 
115 | 
116 | @pytest.mark.parametrize(
117 |     "data, message",
118 |     (
119 |         ("fd377a585a0000016922de3600", "header length"),
120 |         ("f1377a585a000000ff12d941", "header magic"),
121 |         ("fd377a585a0000016942de36", "header crc32"),
122 |         ("fd377a585a0000110d32692b", "header flags"),
123 |         ("fd377a585a0001012813c52f", "header flags"),
124 |         ("fd377a585a00100138301c7c", "header flags"),
125 |     ),
126 | )
127 | def test_parse_xz_header_invalid(data: str, message: str) -> None:
128 |     with pytest.raises(XZError) as exc_info:
129 |         parse_xz_header(bytes.fromhex(data))
130 |     assert str(exc_info.value) == message
131 | 
132 | 
133 | XZ_INDEX_CASES = (
134 |     # all have check=1
135 |     pytest.param([], "000000001cdf4421", id="empty"),
136 |     pytest.param([(24, 4)], "000118046be9f0a5", id="one-small-block"),
137 |     pytest.param([(2062, 20280)], "00018e10b89e010039f45fb1", id="one-big-block"),
138 |     pytest.param(
139 |         [(73, 60), (73, 60), (73, 60), (56, 30)],
140 |         "0004493c493c493c381e0000b6ec1657",
141 |         id="several-small-blocks",
142 |     ),
143 |     pytest.param(
144 |         [(1, 2), (11, 2222), (1111, 22222222), (11111111, 2222222222222222)],
145 |         "000401020bae11d7088eabcc0ac795a6058ec7abf196a3f903000000c9647142",
146 |         id="several-blocks-various-sizes",
147 |     ),
148 | )
149 | 
150 | 
151 | @pytest.mark.parametrize("records, data", XZ_INDEX_CASES)
152 | def test_create_xz_index(records: List[Tuple[int, int]], data: str) -> None:
153 |     assert create_xz_index_footer(1, records)[:-12] == bytes.fromhex(data)
154 | 
155 | 
156 | def test_create_xz_index_invalid() -> None:
157 |     with pytest.raises(XZError) as exc_info:
158 |         create_xz_index_footer(1, [(73, 60), (0, 12), (56, 30)])
159 |     assert str(exc_info.value) == "index record unpadded size"
160 | 
161 | 
162 | @pytest.mark.parametrize("records, data", XZ_INDEX_CASES)
163 | def test_parse_xz_index(records: List[Tuple[int, int]], data: str) -> None:
164 |     assert parse_xz_index(bytes.fromhex(data)) == records
165 | 
166 | 
167 | @pytest.mark.parametrize(
168 |     "data, message",
169 |     (
170 |         ("0000001cdf4421", "index length"),
171 |         ("420000001cdf4421", "index indicator"),
172 |         ("000000001cdf4221", "index crc32"),
173 |         ("000218043257b6a7", "index size"),
174 |         ("000100043271eb27", "index record unpadded size"),
175 |         ("000188047163b1d4", "index size"),
176 |         ("000104002f70ea44", "index record uncompressed size"),
177 |         ("000180180400420096a658c0", "index padding"),
178 |     ),
179 | )
180 | def test_parse_xz_index_invalid(data: str, message: str) -> None:
181 |     with pytest.raises(XZError) as exc_info:
182 |         parse_xz_index(bytes.fromhex(data))
183 |     assert str(exc_info.value) == message
184 | 
185 | 
186 | XZ_FOOTER_CASES = (
187 |     # all have backward_size=8 (i.e. no blocks)
188 |     pytest.param(CHECK_NONE, "06729e7a010000000000595a", id="check_none"),
189 |     pytest.param(CHECK_CRC32, "9042990d010000000001595a", id="check_crc32"),
190 |     pytest.param(CHECK_CRC64, "1fb6f37d010000000004595a", id="check_crc64"),
191 |     pytest.param(CHECK_SHA256, "189b4b9a01000000000a595a", id="check_sha256"),
192 | )
193 | 
194 | 
195 | @pytest.mark.parametrize("check, data", XZ_FOOTER_CASES)
196 | def test_create_xz_footer(check: int, data: str) -> None:
197 |     assert create_xz_index_footer(check, [])[-12:] == bytes.fromhex(data)
198 | 
199 | 
200 | def test_create_xz_footer_invalid_check() -> None:
201 |     with pytest.raises(XZError) as exc_info:
202 |         create_xz_index_footer(17, [])
203 |     assert str(exc_info.value) == "footer check"
204 | 
205 | 
206 | @pytest.mark.parametrize("check, data", XZ_FOOTER_CASES)
207 | def test_parse_xz_footer(check: int, data: str) -> None:
208 |     assert parse_xz_footer(bytes.fromhex(data)) == (check, 8)
209 | 
210 | 
211 | @pytest.mark.parametrize(
212 |     "data, message",
213 |     (
214 |         ("009042990d010000000001595a", "footer length"),
215 |         ("9042990d0100000000015959", "footer magic"),
216 |         ("9042090d010000000001595a", "footer crc32"),
217 |         ("f4522e10010000000011595a", "footer flags"),
218 |         ("d1738214010000000101595a", "footer flags"),
219 |         ("c1505b47010000001001595a", "footer flags"),
220 |     ),
221 | )
222 | def test_parse_xz_footer_invalid(data: str, message: str) -> None:
223 |     with pytest.raises(XZError) as exc_info:
224 |         parse_xz_footer(bytes.fromhex(data))
225 |     assert str(exc_info.value) == message
226 | 
227 | 
228 | def test_default_check_supported() -> None:
229 |     assert is_check_supported(DEFAULT_CHECK)
230 | 


--------------------------------------------------------------------------------
/tests/unit/test_floordict.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict
  2 | 
  3 | import pytest
  4 | 
  5 | from xz.utils import FloorDict
  6 | 
  7 | 
  8 | def expect_floor_dict(floordict: FloorDict[str], items: Dict[int, str]) -> None:
  9 |     sorted_keys = sorted(items)
 10 |     assert len(floordict) == len(items)
 11 |     assert list(floordict) == sorted_keys
 12 |     assert list(floordict.keys()) == sorted_keys
 13 |     assert list(floordict.values()) == [items[key] for key in sorted_keys]
 14 |     assert list(floordict.items()) == [(key, items[key]) for key in sorted_keys]
 15 |     # pylint: disable=protected-access
 16 |     assert floordict._keys == sorted_keys
 17 |     assert floordict._dict == items
 18 | 
 19 | 
 20 | def test_empty() -> None:
 21 |     floordict = FloorDict[str]()
 22 | 
 23 |     expect_floor_dict(floordict, {})
 24 | 
 25 |     with pytest.raises(KeyError):
 26 |         floordict[0]  # pylint: disable=pointless-statement
 27 |     with pytest.raises(KeyError):
 28 |         floordict[42]  # pylint: disable=pointless-statement
 29 |     with pytest.raises(KeyError):
 30 |         floordict.last_key  # pylint: disable=pointless-statement
 31 |     with pytest.raises(KeyError):
 32 |         floordict.last_item  # pylint: disable=pointless-statement
 33 | 
 34 | 
 35 | def test_normal() -> None:
 36 |     floordict = FloorDict[str]()
 37 |     floordict[10] = "ten"
 38 |     floordict[50] = "fifty"
 39 |     with pytest.raises(TypeError):
 40 |         floordict["wrong type"] = "wrong type"  # type: ignore[index]
 41 | 
 42 |     expect_floor_dict(floordict, {10: "ten", 50: "fifty"})
 43 | 
 44 |     assert floordict[10] == "ten"
 45 |     assert floordict.last_key == 50
 46 |     assert floordict.last_item == "fifty"
 47 | 
 48 |     assert floordict[42] == "ten"
 49 |     assert floordict.get_with_index(42) == (10, "ten")
 50 | 
 51 |     assert floordict[50] == "fifty"
 52 |     assert floordict[1337] == "fifty"
 53 |     assert floordict.get(0) is None
 54 |     with pytest.raises(KeyError):
 55 |         floordict[0]  # pylint: disable=pointless-statement
 56 |     assert floordict.get(7) is None
 57 |     with pytest.raises(KeyError):
 58 |         floordict[7]  # pylint: disable=pointless-statement
 59 |     with pytest.raises(KeyError):
 60 |         floordict[-42]  # pylint: disable=pointless-statement
 61 |     with pytest.raises(TypeError):
 62 |         # pylint: disable=pointless-statement
 63 |         floordict["wrong type"]  # type: ignore[index]
 64 | 
 65 | 
 66 | def test_override() -> None:
 67 |     floordict = FloorDict[str]()
 68 |     floordict[10] = "ten"
 69 |     floordict[20] = "twenty"
 70 |     floordict[30] = "thirty"
 71 | 
 72 |     expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"})
 73 | 
 74 |     floordict[20] = "two-ten"
 75 |     assert floordict[15] == "ten"
 76 |     assert floordict[20] == "two-ten"
 77 |     assert floordict[25] == "two-ten"
 78 |     assert floordict[50] == "thirty"
 79 | 
 80 |     expect_floor_dict(floordict, {10: "ten", 20: "two-ten", 30: "thirty"})
 81 | 
 82 | 
 83 | def test_del() -> None:
 84 |     floordict = FloorDict[str]()
 85 |     floordict[10] = "ten"
 86 |     floordict[20] = "twenty"
 87 |     floordict[30] = "thirty"
 88 |     assert floordict[20] == "twenty"
 89 |     assert floordict[22] == "twenty"
 90 |     expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"})
 91 | 
 92 |     del floordict[20]
 93 |     assert floordict[20] == "ten"
 94 |     assert floordict[22] == "ten"
 95 |     expect_floor_dict(floordict, {10: "ten", 30: "thirty"})
 96 | 
 97 |     with pytest.raises(KeyError):
 98 |         del floordict[20]
 99 |     with pytest.raises(KeyError):
100 |         del floordict[40]
101 | 
102 | 
103 | def test_pop() -> None:
104 |     floordict = FloorDict[str]()
105 |     floordict[10] = "ten"
106 |     floordict[20] = "twenty"
107 |     floordict[30] = "thirty"
108 |     assert floordict[25] == "twenty"
109 |     expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"})
110 | 
111 |     with pytest.raises(KeyError):
112 |         floordict.pop(25)
113 | 
114 |     assert floordict.pop(20) == "twenty"
115 |     expect_floor_dict(floordict, {10: "ten", 30: "thirty"})
116 |     assert floordict[25] == "ten"
117 | 
118 | 
119 | def test_values() -> None:
120 |     floordict = FloorDict[str]()
121 |     expected = {}
122 |     for i in range(50):
123 |         floordict[i * 2] = str(i * 2)
124 |         expected[i * 2] = str(i * 2)
125 |         expect_floor_dict(floordict, expected)
126 |         for j in range(100):
127 |             value = min(i * 2, j - (j % 2))
128 |             assert floordict[j] == str(value)
129 |             assert floordict.get_with_index(j) == (value, str(value))
130 | 


--------------------------------------------------------------------------------
/tests/unit/test_ioabstract.py:
--------------------------------------------------------------------------------
  1 | from io import DEFAULT_BUFFER_SIZE, UnsupportedOperation
  2 | from pathlib import Path
  3 | from typing import BinaryIO
  4 | from unittest.mock import Mock, call
  5 | 
  6 | import pytest
  7 | 
  8 | from xz.io import IOAbstract
  9 | 
 10 | #
 11 | # len
 12 | #
 13 | 
 14 | 
 15 | def test_len() -> None:
 16 |     obj = IOAbstract(10)
 17 |     assert len(obj) == 10
 18 | 
 19 | 
 20 | #
 21 | # fileno
 22 | #
 23 | 
 24 | 
 25 | def test_fileno(tmp_path: Path) -> None:
 26 |     file_path = tmp_path / "file"
 27 |     file_path.write_bytes(b"abcd")
 28 | 
 29 |     class Impl(IOAbstract):
 30 |         def __init__(self, fileobj: BinaryIO) -> None:
 31 |             super().__init__(10)
 32 |             self.fileobj = fileobj
 33 | 
 34 |     with file_path.open("rb") as fin:
 35 |         obj = Impl(fin)
 36 |         assert obj.fileno() == fin.fileno()
 37 | 
 38 | 
 39 | def test_fileno_ko() -> None:
 40 |     obj = IOAbstract(10)
 41 |     with pytest.raises(UnsupportedOperation):
 42 |         obj.fileno()
 43 | 
 44 | 
 45 | #
 46 | # tell / seek
 47 | #
 48 | 
 49 | 
 50 | def test_seek_not_seekable() -> None:
 51 |     class Impl(IOAbstract):
 52 |         def __init__(self) -> None:
 53 |             super().__init__(10)
 54 | 
 55 |         def seekable(self) -> bool:
 56 |             return False
 57 | 
 58 |     obj = Impl()
 59 |     assert obj.seekable() is False
 60 |     with pytest.raises(UnsupportedOperation) as exc_info:
 61 |         obj.seek(1)
 62 |     assert str(exc_info.value) == "seek"
 63 | 
 64 | 
 65 | def test_tell_seek() -> None:
 66 |     obj = IOAbstract(10)
 67 |     assert obj.seekable() is True
 68 |     assert obj.tell() == 0
 69 | 
 70 |     # absolute (no whence)
 71 |     assert obj.seek(1) == 1
 72 |     assert obj.tell() == 1
 73 |     assert obj.seek(3) == 3
 74 |     assert obj.tell() == 3
 75 |     assert obj.seek(10) == 10
 76 |     assert obj.tell() == 10
 77 |     with pytest.raises(ValueError) as exc_info:
 78 |         obj.seek(-1)
 79 |     assert str(exc_info.value) == "invalid seek position"
 80 |     assert obj.seek(42) == 42
 81 |     assert obj.tell() == 42
 82 | 
 83 |     # absolute (with whence)
 84 |     assert obj.seek(5, 0) == 5
 85 |     assert obj.tell() == 5
 86 |     assert obj.seek(10, 0) == 10
 87 |     assert obj.tell() == 10
 88 |     with pytest.raises(ValueError) as exc_info:
 89 |         obj.seek(-1, 0)
 90 |     assert str(exc_info.value) == "invalid seek position"
 91 |     assert obj.seek(42, 0) == 42
 92 |     assert obj.tell() == 42
 93 | 
 94 |     # relative
 95 |     assert obj.seek(10) == 10
 96 |     assert obj.seek(-7, 1) == 3
 97 |     assert obj.tell() == 3
 98 |     assert obj.seek(2, 1) == 5
 99 |     assert obj.tell() == 5
100 |     with pytest.raises(ValueError) as exc_info:
101 |         obj.seek(-6, 1)
102 |     assert str(exc_info.value) == "invalid seek position"
103 |     assert obj.tell() == 5
104 |     assert obj.seek(37, 1) == 42
105 |     assert obj.tell() == 42
106 | 
107 |     # from end
108 |     assert obj.seek(0, 2) == 10
109 |     assert obj.tell() == 10
110 |     assert obj.seek(-4, 2) == 6
111 |     assert obj.tell() == 6
112 |     assert obj.seek(-10, 2) == 0
113 |     assert obj.tell() == 0
114 |     assert obj.seek(32, 2) == 42
115 |     assert obj.tell() == 42
116 |     with pytest.raises(ValueError) as exc_info:
117 |         obj.seek(-11, 2)
118 |     assert str(exc_info.value) == "invalid seek position"
119 | 
120 |     # from error
121 |     with pytest.raises(ValueError) as exc_info:
122 |         obj.seek(42, 3)
123 |     assert str(exc_info.value) == "unsupported whence value"
124 | 
125 |     # seek after close
126 |     obj.close()
127 |     with pytest.raises(ValueError) as exc_info:
128 |         obj.seek(0)
129 |     assert str(exc_info.value) == "I/O operation on closed file"
130 | 
131 | 
132 | #
133 | # read
134 | #
135 | 
136 | 
137 | def test_read_non_readable() -> None:
138 |     class Impl(IOAbstract):
139 |         def __init__(self) -> None:
140 |             super().__init__(10)
141 | 
142 |         def readable(self) -> bool:
143 |             return False
144 | 
145 |     obj = Impl()
146 |     assert obj.readable() is False
147 |     with pytest.raises(UnsupportedOperation) as exc_info:
148 |         obj.read(1)
149 |     assert str(exc_info.value) == "read"
150 | 
151 | 
152 | def test_tell_read() -> None:
153 |     class Impl(IOAbstract):
154 |         def __init__(self) -> None:
155 |             super().__init__(10)
156 | 
157 |         def _read(self, size: int) -> bytes:
158 |             # for tests, does not rely on position
159 |             return b"xyz"[:size]
160 | 
161 |         def _write_after(self) -> None:
162 |             raise RuntimeError("should not be called")
163 | 
164 |     obj = Impl()
165 |     assert obj.tell() == 0
166 | 
167 |     # read all
168 |     assert obj.read() == b"xyzxyzxyzx"
169 |     obj.seek(5)
170 |     assert obj.read() == b"xyzxy"
171 | 
172 |     # read from pos
173 |     obj.seek(3)
174 |     assert obj.read(2) == b"xy"
175 |     assert obj.read(2) == b"xy"
176 |     assert obj.read(2) == b"xy"
177 |     assert obj.read(2) == b"x"
178 |     assert obj.read(2) == b""
179 |     assert obj.read(2) == b""
180 | 
181 |     # read from after EOF
182 |     obj.seek(11)
183 |     assert obj.read(2) == b""
184 | 
185 |     # read after close
186 |     obj.close()
187 |     with pytest.raises(ValueError) as exc_info:
188 |         obj.read(1)
189 |     assert str(exc_info.value) == "I/O operation on closed file"
190 | 
191 | 
192 | def test_tell_read_empty() -> None:
193 |     class Impl(IOAbstract):
194 |         def __init__(self) -> None:
195 |             super().__init__(10)
196 |             self.empty_reads = 100
197 | 
198 |         def _read(self, size: int) -> bytes:
199 |             self.empty_reads -= 1
200 |             if self.empty_reads > 0:
201 |                 return b""
202 |             return b"a"
203 | 
204 |     obj = Impl()
205 |     assert obj.tell() == 0
206 |     assert obj.read() == b"aaaaaaaaaa"
207 | 
208 | 
209 | #
210 | # write
211 | #
212 | 
213 | 
214 | def test_write_non_writeable() -> None:
215 |     class Impl(IOAbstract):
216 |         def __init__(self) -> None:
217 |             super().__init__(10)
218 | 
219 |         def writable(self) -> bool:
220 |             return False
221 | 
222 |     with Impl() as obj:
223 |         assert obj.writable() is False
224 |         with pytest.raises(UnsupportedOperation) as exc_info:
225 |             obj.write(b"hello")
226 |         assert str(exc_info.value) == "write"
227 | 
228 | 
229 | @pytest.mark.parametrize("write_partial", (True, False))
230 | def test_write_full(write_partial: bool) -> None:
231 |     class Impl(IOAbstract):
232 |         def __init__(self) -> None:
233 |             super().__init__(10)
234 |             self.mock = Mock()
235 | 
236 |         def _write_before(self) -> None:
237 |             self.mock.write_start()
238 | 
239 |         def _write_after(self) -> None:
240 |             self.mock.write_finish()
241 | 
242 |         def _write(self, data: bytes) -> int:
243 |             self.mock.write(bytes(data))
244 |             if write_partial:
245 |                 return min(2, len(data))
246 |             return len(data)
247 | 
248 |     with Impl() as obj:
249 |         # write before end
250 |         obj.seek(5)
251 |         with pytest.raises(ValueError) as exc_info:
252 |             obj.write(b"abcdef")
253 |         assert str(exc_info.value) == "write is only supported from EOF"
254 |         assert not obj.mock.called
255 | 
256 |         # write at end
257 |         obj.seek(10)
258 |         assert obj.write(b"") == 0
259 |         assert obj.tell() == 10
260 |         assert not obj.mock.called
261 |         assert obj.write(b"ghijkl") == 6
262 |         assert obj.tell() == 16
263 |         if write_partial:
264 |             assert obj.mock.method_calls == [
265 |                 call.write_start(),
266 |                 call.write(b"ghijkl"),
267 |                 call.write(b"ijkl"),
268 |                 call.write(b"kl"),
269 |             ]
270 |         else:
271 |             assert obj.mock.method_calls == [
272 |                 call.write_start(),
273 |                 call.write(b"ghijkl"),
274 |             ]
275 |         obj.mock.reset_mock()
276 | 
277 |         # write after end
278 |         obj.seek(20)
279 |         assert obj.write(b"mnopq") == 5
280 |         assert obj.tell() == 25
281 |         if write_partial:
282 |             assert obj.mock.method_calls == [
283 |                 call.write(b"\x00\x00\x00\x00"),
284 |                 call.write(b"\x00\x00"),
285 |                 call.write(b"mnopq"),
286 |                 call.write(b"opq"),
287 |                 call.write(b"q"),
288 |             ]
289 |         else:
290 |             assert obj.mock.method_calls == [
291 |                 call.write(b"\x00\x00\x00\x00"),
292 |                 call.write(b"mnopq"),
293 |             ]
294 |         obj.mock.reset_mock()
295 | 
296 |         # (big) write nothing after end (used e.g. by tuncate)
297 |         limit = 30 if write_partial else int(DEFAULT_BUFFER_SIZE * 3.7)
298 |         obj.seek(limit)
299 |         assert obj.write(b"") == 0
300 |         assert obj.tell() == limit
301 |         if write_partial:
302 |             assert obj.mock.method_calls == [
303 |                 call.write(b"\x00\x00\x00\x00\x00"),
304 |                 call.write(b"\x00\x00\x00"),
305 |                 call.write(b"\x00"),
306 |             ]
307 |         else:
308 |             assert obj.mock.method_calls == [
309 |                 call.write(b"\x00" * DEFAULT_BUFFER_SIZE),
310 |                 call.write(b"\x00" * DEFAULT_BUFFER_SIZE),
311 |                 call.write(b"\x00" * DEFAULT_BUFFER_SIZE),
312 |                 call.write(b"\x00" * (limit - 3 * DEFAULT_BUFFER_SIZE - 25)),
313 |             ]
314 |         obj.mock.reset_mock()
315 | 
316 |         # close calls write_finish once
317 |         obj.close()
318 |         assert obj.mock.method_calls == [call.write_finish()]
319 |         obj.mock.reset_mock()
320 |         obj.close()
321 |         assert not obj.mock.method_calls
322 |         obj.close()
323 | 
324 |         # write after close
325 |         with pytest.raises(ValueError) as exc_info:
326 |             obj.write(b"xyz")
327 |         assert str(exc_info.value) == "I/O operation on closed file"
328 | 
329 | 
330 | #
331 | # truncate
332 | #
333 | 
334 | 
335 | def test_truncate_non_writeable() -> None:
336 |     class Impl(IOAbstract):
337 |         def __init__(self) -> None:
338 |             super().__init__(10)
339 | 
340 |         def writable(self) -> bool:
341 |             return False
342 | 
343 |     with Impl() as obj:
344 |         assert obj.writable() is False
345 |         with pytest.raises(UnsupportedOperation) as exc_info:
346 |             obj.truncate(4)
347 |         assert str(exc_info.value) == "truncate"
348 | 
349 | 
350 | @pytest.mark.parametrize("with_size", (True, False))
351 | def test_truncate_with_size(with_size: bool) -> None:
352 |     class Impl(IOAbstract):
353 |         def __init__(self) -> None:
354 |             super().__init__(10)
355 |             self.mock = Mock()
356 | 
357 |         def _write_before(self) -> None:
358 |             self.mock.write_start()
359 | 
360 |         def _write_after(self) -> None:
361 |             self.mock.write_finish()
362 | 
363 |         def _write(self, data: bytes) -> int:
364 |             raise RuntimeError("should not be called")
365 | 
366 |         def _truncate(self, size: int) -> None:
367 |             self.mock.truncate(size)
368 | 
369 |     with Impl() as obj:
370 |         obj.seek(7)
371 |         assert not obj.mock.method_calls
372 | 
373 |         def truncate(size: int) -> int:
374 |             if with_size:
375 |                 return obj.truncate(size)
376 |             obj.seek(size)
377 |             return obj.truncate()
378 | 
379 |         # truncate before start
380 |         with pytest.raises(ValueError) as exc_info:
381 |             obj.truncate(-1)
382 |         assert str(exc_info.value) == "invalid truncate size"
383 |         assert not obj.mock.method_calls
384 | 
385 |         # truncate before end
386 |         assert truncate(5) == 5
387 |         assert not with_size or obj.tell() == 7
388 |         assert len(obj) == 5
389 |         assert obj.mock.method_calls == [call.write_start(), call.truncate(5)]
390 |         obj.mock.reset_mock()
391 | 
392 |         # truncate at end
393 |         assert truncate(5) == 5
394 |         assert not with_size or obj.tell() == 7
395 |         assert len(obj) == 5
396 |         assert not obj.mock.method_calls
397 |         obj.mock.reset_mock()
398 | 
399 |         # truncate after end
400 |         assert truncate(20) == 20
401 |         assert not with_size or obj.tell() == 7
402 |         assert len(obj) == 20
403 |         assert obj.mock.method_calls == [call.truncate(20)]
404 |         obj.mock.reset_mock()
405 | 
406 |         # close calls write_finish once
407 |         obj.close()
408 |         assert obj.mock.method_calls == [call.write_finish()]
409 |         obj.mock.reset_mock()
410 |         obj.close()
411 |         assert not obj.mock.method_calls
412 | 
413 |         # truncate after close
414 |         with pytest.raises(ValueError) as exc_info:
415 |             obj.truncate(5)
416 |         assert str(exc_info.value) == "I/O operation on closed file"
417 | 


--------------------------------------------------------------------------------
/tests/unit/test_iocombiner.py:
--------------------------------------------------------------------------------
  1 | from io import SEEK_SET, BytesIO
  2 | from typing import List, cast
  3 | from unittest.mock import Mock, call
  4 | 
  5 | import pytest
  6 | 
  7 | from xz.io import IOAbstract, IOCombiner, IOProxy
  8 | 
  9 | 
 10 | def generate_mock(length: int) -> Mock:
 11 |     mock = Mock()
 12 |     mock.__class__ = cast(Mock, IOAbstract)  # needs to be subclass of IOAbstract
 13 |     mock._length = length  # pylint: disable=protected-access
 14 |     mock.__len__ = lambda s: s._length  # pylint: disable=protected-access
 15 | 
 16 |     def write(data: bytes) -> int:
 17 |         mock._length += len(data)
 18 |         return len(data)
 19 | 
 20 |     mock.write.side_effect = write
 21 |     mock.writable.return_value = True
 22 |     return mock
 23 | 
 24 | 
 25 | #
 26 | # tell / seek
 27 | #
 28 | 
 29 | 
 30 | def test_seek() -> None:
 31 |     originals = [
 32 |         generate_mock(2),
 33 |         generate_mock(0),
 34 |         generate_mock(8),
 35 |     ]
 36 |     combiner = IOCombiner(*originals)
 37 | 
 38 |     assert combiner.tell() == 0
 39 |     assert combiner.seek(7) == 7
 40 |     assert combiner.tell() == 7
 41 | 
 42 |     for original in originals:
 43 |         assert not original.method_calls  # did not touch original
 44 | 
 45 | 
 46 | #
 47 | # read
 48 | #
 49 | 
 50 | 
 51 | def test_read() -> None:
 52 |     originals: List[IOAbstract] = [
 53 |         IOProxy(BytesIO(b"abc"), 0, 3),
 54 |         generate_mock(0),  # size 0, will be never used
 55 |         IOProxy(BytesIO(b"defghij"), 0, 7),
 56 |     ]
 57 |     combiner = IOCombiner(*originals)
 58 | 
 59 |     # read all
 60 |     originals[0].seek(2)
 61 |     originals[2].seek(4)
 62 |     combiner.seek(0)
 63 |     assert combiner.read() == b"abcdefghij"
 64 |     assert originals[0].tell() == 3
 65 |     assert originals[2].tell() == 7
 66 |     combiner.seek(4)
 67 |     assert combiner.read() == b"efghij"
 68 |     assert originals[0].tell() == 3
 69 |     assert originals[2].tell() == 7
 70 | 
 71 |     # read partial
 72 |     originals[0].seek(2)
 73 |     originals[2].seek(4)
 74 |     combiner.seek(1)
 75 |     assert combiner.read(6) == b"bcdefg"
 76 |     assert originals[0].tell() == 3
 77 |     assert originals[2].tell() == 4
 78 |     assert combiner.read(6) == b"hij"
 79 |     assert originals[0].tell() == 3
 80 |     assert originals[2].tell() == 7
 81 |     assert combiner.read(6) == b""
 82 |     assert originals[0].tell() == 3
 83 |     assert originals[2].tell() == 7
 84 |     assert combiner.read(6) == b""
 85 |     assert originals[0].tell() == 3
 86 |     assert originals[2].tell() == 7
 87 | 
 88 |     # with original seek
 89 |     combiner.seek(1)
 90 |     originals[0].seek(2)
 91 |     originals[2].seek(4)
 92 |     assert combiner.read(5) == b"bcdef"
 93 |     assert originals[0].tell() == 3
 94 |     assert originals[2].tell() == 3
 95 | 
 96 |     # never used at all
 97 |     assert not cast(Mock, originals[1]).method_calls
 98 | 
 99 | 
100 | #
101 | # write
102 | #
103 | 
104 | 
105 | def test_write() -> None:
106 |     parts = []
107 | 
108 |     class Combiner(IOCombiner[IOAbstract]):
109 |         def _create_fileobj(self) -> IOAbstract:
110 |             fileobj = generate_mock(0)
111 |             parts.append(fileobj)
112 |             return fileobj
113 | 
114 |     with Combiner() as combiner:
115 |         assert combiner.writable()
116 |         assert len(parts) == 0
117 | 
118 |         # create new from scratch
119 |         combiner.write(b"abc")
120 |         assert len(parts) == 1
121 |         assert parts[0].method_calls == [
122 |             call.seek(0, SEEK_SET),
123 |             call.write(memoryview(b"abc")),
124 |         ]
125 |         parts[0].method_calls.clear()
126 | 
127 |         combiner.write(b"def")
128 |         assert len(parts) == 1
129 |         assert parts[0].method_calls == [
130 |             call.seek(3, SEEK_SET),
131 |             call.writable(),
132 |             call.write(memoryview(b"def")),
133 |         ]
134 |         parts[0].method_calls.clear()
135 | 
136 |         combiner.seek(8)
137 |         combiner.write(b"ghi")
138 |         assert len(parts) == 1
139 |         assert parts[0].method_calls == [
140 |             call.seek(6, SEEK_SET),
141 |             call.writable(),
142 |             call.write(memoryview(b"\x00\x00")),
143 |             call.seek(8, SEEK_SET),
144 |             call.writable(),
145 |             call.write(memoryview(b"ghi")),
146 |         ]
147 |         parts[0].method_calls.clear()
148 | 
149 |         # not writable anymore -> create new fileobj
150 |         parts[0].writable.return_value = False
151 |         combiner.write(b"jkl")
152 |         assert len(parts) == 2
153 |         assert parts[0].method_calls == [
154 |             call.seek(11, SEEK_SET),
155 |             call.writable(),
156 |             call.writable(),
157 |         ]
158 |         assert parts[1].method_calls == [
159 |             call.seek(0, SEEK_SET),
160 |             call.write(memoryview(b"jkl")),
161 |         ]
162 |         parts[0].method_calls.clear()
163 |         parts[1].method_calls.clear()
164 | 
165 |         combiner.write(b"mno")
166 |         assert len(parts) == 2
167 |         assert not parts[0].method_calls
168 |         assert parts[1].method_calls == [
169 |             call.seek(3, SEEK_SET),
170 |             call.writable(),
171 |             call.write(memoryview(b"mno")),
172 |         ]
173 |         parts[1].method_calls.clear()
174 | 
175 |         # force change fileobj
176 |         combiner._change_fileobj()  # pylint: disable=protected-access
177 |         assert len(parts) == 3
178 |         assert not parts[0].method_calls
179 |         assert parts[1].method_calls == [
180 |             call.writable(),
181 |             call._write_end(),  # pylint: disable=protected-access
182 |         ]
183 |         assert not parts[2].method_calls
184 |         parts[1].method_calls.clear()
185 | 
186 |         # force change fileobj again
187 |         combiner._change_fileobj()  # pylint: disable=protected-access
188 |         assert len(parts) == 4
189 |         assert not parts[0].method_calls
190 |         assert not parts[1].method_calls
191 |         assert not parts[2].method_calls  # no call to _write_end
192 |         assert not parts[3].method_calls
193 |         parts[1].method_calls.clear()
194 | 
195 |         combiner.write(b"pqr")
196 |         assert len(parts) == 4
197 |         assert not parts[0].method_calls
198 |         assert not parts[1].method_calls
199 |         assert not parts[2].method_calls
200 |         assert parts[3].method_calls == [
201 |             call.seek(0, SEEK_SET),
202 |             call.writable(),
203 |             call.write(memoryview(b"pqr")),
204 |         ]
205 |         parts[3].method_calls.clear()
206 | 
207 |         # don't create fileobj if write nothing
208 |         parts[1].writable.return_value = False
209 |         combiner.write(b"")
210 |         assert len(parts) == 4
211 |         assert not parts[0].method_calls
212 |         assert not parts[1].method_calls
213 |         assert not parts[2].method_calls
214 |         assert not parts[3].method_calls
215 | 
216 |     # check write_finish
217 |     assert not parts[0].method_calls
218 |     assert not parts[1].method_calls
219 |     assert not parts[2].method_calls
220 |     assert parts[3].method_calls == [
221 |         call._write_end(),  # pylint: disable=protected-access
222 |     ]
223 | 
224 |     # check if last fileobj is empty no calls to _write_end
225 |     with Combiner() as combiner:
226 |         combiner.write(b"abc")
227 |         combiner._change_fileobj()  # pylint: disable=protected-access
228 |         parts[0].method_calls.clear()
229 |         assert not parts[1].method_calls
230 |     assert not parts[0].method_calls
231 |     assert not parts[1].method_calls  # no calls to _write_end
232 | 
233 | 
234 | #
235 | # truncate
236 | #
237 | 
238 | 
239 | def test_truncate() -> None:
240 |     # pylint: disable=protected-access
241 |     originals = [
242 |         generate_mock(2),
243 |         generate_mock(0),
244 |         generate_mock(8),
245 |         generate_mock(10),
246 |         generate_mock(20),
247 |     ]
248 | 
249 |     with IOCombiner(*originals) as combiner:
250 |         # truncate between two boundaries
251 |         combiner.truncate(17)
252 |         assert originals[3].method_calls == [call.truncate(7)]
253 |         assert not originals[4].method_calls
254 |         assert dict(combiner._fileobjs) == {
255 |             0: originals[0],
256 |             2: originals[2],
257 |             10: originals[3],
258 |         }
259 |         originals[3].reset_mock()
260 | 
261 |         # truncate after length
262 |         combiner.truncate(42)
263 |         assert originals[3].method_calls == [call.truncate(32)]
264 |         assert dict(combiner._fileobjs) == {
265 |             0: originals[0],
266 |             2: originals[2],
267 |             10: originals[3],
268 |         }
269 |         originals[3].reset_mock()
270 | 
271 |         # truncate at boundary
272 |         combiner.truncate(10)
273 |         assert dict(combiner._fileobjs) == {
274 |             0: originals[0],
275 |             2: originals[2],
276 |         }
277 |         assert not originals[2].method_calls
278 |         assert not originals[3].method_calls
279 | 
280 |         # truncate at boundary
281 |         combiner.truncate(2)
282 |         assert dict(combiner._fileobjs) == {
283 |             0: originals[0],
284 |         }
285 |         assert not originals[0].method_calls
286 |         assert not originals[1].method_calls
287 |         assert not originals[2].method_calls
288 | 
289 |         # truncate at start
290 |         combiner.truncate(0)
291 |         assert not dict(combiner._fileobjs)
292 |         assert not originals[0].method_calls
293 | 
294 | 
295 | #
296 | # append
297 | #
298 | 
299 | 
300 | def test_append() -> None:
301 |     combiner = IOCombiner[IOAbstract](generate_mock(13), generate_mock(37))
302 |     assert len(combiner) == 50
303 |     combiner._append(  # pylint: disable=protected-access
304 |         IOProxy(BytesIO(b"abcdefghij"), 0, 10)
305 |     )
306 |     assert len(combiner) == 60
307 |     combiner.seek(54)
308 |     assert combiner.read(4) == b"efgh"
309 | 
310 | 
311 | def test_append_invalid() -> None:
312 |     combiner = IOCombiner[IOAbstract](generate_mock(13), generate_mock(37))
313 |     assert len(combiner) == 50
314 |     with pytest.raises(TypeError):
315 |         # pylint: disable=protected-access
316 |         combiner._append(BytesIO(b"abcdefghij"))  # type: ignore[arg-type]
317 | 


--------------------------------------------------------------------------------
/tests/unit/test_ioproxy.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | from pathlib import Path
 3 | from unittest.mock import Mock, call
 4 | 
 5 | from xz.io import IOProxy
 6 | 
 7 | 
 8 | def test_fileno(tmp_path: Path) -> None:
 9 |     file_path = tmp_path / "file"
10 |     file_path.write_bytes(b"abcd")
11 | 
12 |     with file_path.open("rb") as fin:
13 |         obj = IOProxy(fin, 1, 3)
14 |         assert obj.fileno() == fin.fileno()
15 | 
16 | 
17 | def test_seek() -> None:
18 |     original = Mock()
19 |     proxy = IOProxy(original, 4, 14)
20 | 
21 |     assert proxy.tell() == 0
22 |     assert proxy.seek(7) == 7
23 |     assert proxy.tell() == 7
24 | 
25 |     assert not original.method_calls  # did not touch original
26 | 
27 | 
28 | def test_read() -> None:
29 |     original = BytesIO(b"xxxxabcdefghijyyyyy")
30 |     proxy = IOProxy(original, 4, 14)
31 | 
32 |     # read all
33 |     original.seek(2)
34 |     proxy.seek(0)
35 |     assert proxy.read() == b"abcdefghij"
36 |     assert original.tell() == 14
37 |     proxy.seek(4)
38 |     assert proxy.read() == b"efghij"
39 |     assert original.tell() == 14
40 | 
41 |     # read partial
42 |     original.seek(2)
43 |     proxy.seek(6)
44 |     assert proxy.read(3) == b"ghi"
45 |     assert original.tell() == 13
46 |     assert proxy.read(3) == b"j"
47 |     assert original.tell() == 14
48 |     assert proxy.read(3) == b""
49 |     assert original.tell() == 14
50 |     assert proxy.read(3) == b""
51 |     assert original.tell() == 14
52 | 
53 |     # with original seek
54 |     original.seek(2)
55 |     proxy.seek(4)
56 |     original.seek(1)
57 |     assert proxy.read() == b"efghij"
58 |     assert original.tell() == 14
59 | 
60 | 
61 | def test_write() -> None:
62 |     original = BytesIO(b"xxxxabcdefghijyyyyy")
63 |     with IOProxy(original, 4, 14) as proxy:
64 |         proxy.seek(10)
65 | 
66 |         assert proxy.write(b"uvw") == 3
67 |         assert original.getvalue() == b"xxxxabcdefghijuvwyy"
68 | 
69 |         assert proxy.write(b"UVWXYZ") == 6
70 |         assert original.getvalue() == b"xxxxabcdefghijuvwUVWXYZ"
71 | 
72 | 
73 | def test_truncate() -> None:
74 |     original = Mock()
75 |     with IOProxy(original, 4, 14) as proxy:
76 |         assert proxy.truncate(5) == 5
77 |         assert original.method_calls == [call.truncate(9)]
78 |         original.reset_mock()
79 | 
80 |         assert proxy.truncate(20) == 20
81 |         assert original.method_calls == [call.truncate(24)]
82 | 


--------------------------------------------------------------------------------
/tests/unit/test_iostatic.py:
--------------------------------------------------------------------------------
 1 | from io import UnsupportedOperation
 2 | 
 3 | import pytest
 4 | 
 5 | from xz.io import IOStatic
 6 | 
 7 | 
 8 | def test_read() -> None:
 9 |     static = IOStatic(b"abcdefghij")
10 | 
11 |     # read all
12 |     static.seek(0)
13 |     assert static.read() == b"abcdefghij"
14 |     static.seek(4)
15 |     assert static.read() == b"efghij"
16 | 
17 |     # read partial
18 |     static.seek(6)
19 |     assert static.read(3) == b"ghi"
20 |     assert static.read(3) == b"j"
21 |     assert static.read(3) == b""
22 |     assert static.read(3) == b""
23 | 
24 | 
25 | def test_write() -> None:
26 |     with IOStatic(b"abc") as static:
27 |         assert static.writable() is False
28 |         static.seek(3)
29 |         with pytest.raises(UnsupportedOperation):
30 |             static.write(b"def")
31 | 
32 | 
33 | def test_truncate() -> None:
34 |     with IOStatic(b"abc") as static:
35 |         assert static.writable() is False
36 |         with pytest.raises(UnsupportedOperation):
37 |             static.truncate()
38 | 


--------------------------------------------------------------------------------
/tests/unit/test_open.py:
--------------------------------------------------------------------------------
  1 | from io import BytesIO
  2 | import lzma
  3 | from pathlib import Path
  4 | from typing import List, Optional
  5 | from unittest.mock import Mock
  6 | 
  7 | import pytest
  8 | 
  9 | from xz.open import xz_open
 10 | from xz.strategy import RollingBlockReadStrategy
 11 | 
 12 | # a stream with two blocks (lengths: 10, 3)
 13 | # one UTF8 character is between the two blocks
 14 | STREAM_BYTES = bytes.fromhex(
 15 |     "fd377a585a000004e6d6b446"
 16 |     "0200210116000000742fe5a3010009e299a5207574663820e2000000404506004bafe33d"
 17 |     "0200210116000000742fe5a301000299a50a0000c6687a2b8dbda0cf"
 18 |     "0002220a1b0300001b1c3777"
 19 |     "b1c467fb020000000004595a"
 20 | )
 21 | 
 22 | 
 23 | #
 24 | # read
 25 | #
 26 | 
 27 | 
 28 | def test_mode_rb() -> None:
 29 |     fileobj = BytesIO(STREAM_BYTES)
 30 | 
 31 |     with xz_open(fileobj, "rb") as xzfile:
 32 |         assert xzfile.mode == "r"
 33 |         assert len(xzfile) == 13
 34 |         assert xzfile.stream_boundaries == [0]
 35 |         assert xzfile.block_boundaries == [0, 10]
 36 | 
 37 |         assert xzfile.read() == b"\xe2\x99\xa5 utf8 \xe2\x99\xa5\n"
 38 | 
 39 |         assert xzfile.seek(9) == 9
 40 |         assert xzfile.read() == b"\xe2\x99\xa5\n"
 41 | 
 42 | 
 43 | def test_mode_rt() -> None:
 44 |     fileobj = BytesIO(STREAM_BYTES)
 45 | 
 46 |     with xz_open(fileobj, "rt") as xzfile:
 47 |         assert xzfile.mode == "rt"
 48 |         assert xzfile.stream_boundaries == [0]
 49 |         assert xzfile.block_boundaries == [0, 10]
 50 | 
 51 |         assert xzfile.read() == "♥ utf8 ♥\n"
 52 | 
 53 |         assert xzfile.seek(9) == 9
 54 |         assert xzfile.read() == "♥\n"
 55 | 
 56 | 
 57 | def test_mode_rt_file(tmp_path: Path) -> None:
 58 |     file_path = tmp_path / "file.xz"
 59 |     file_path.write_bytes(STREAM_BYTES)
 60 | 
 61 |     with file_path.open("rb") as fin:
 62 |         with xz_open(fin, "rt") as xzfile:
 63 |             assert xzfile.stream_boundaries == [0]
 64 |             assert xzfile.block_boundaries == [0, 10]
 65 |             assert xzfile.fileno() == fin.fileno()
 66 | 
 67 |             assert xzfile.read() == "♥ utf8 ♥\n"
 68 | 
 69 |             assert xzfile.seek(9) == 9
 70 |             assert xzfile.read() == "♥\n"
 71 | 
 72 | 
 73 | @pytest.mark.parametrize(
 74 |     "encoding, expected",
 75 |     (
 76 |         pytest.param("utf8", "еñϲоԺε", id="utf8"),
 77 |         pytest.param("latin1", "ÐµÃ±Ï²Ð¾ÔºÎµ", id="latin1"),
 78 |     ),
 79 | )
 80 | def test_mode_rt_encoding(encoding: str, expected: str) -> None:
 81 |     fileobj = BytesIO(
 82 |         bytes.fromhex(
 83 |             "fd377a585a000000ff12d9410200210116000000742fe5a301000bd0b5c3b1cf"
 84 |             "b2d0bed4baceb50000011c0c5da447cf06729e7a010000000000595a"
 85 |         )
 86 |     )
 87 |     with xz_open(fileobj, "rt", encoding=encoding) as xzfile:
 88 |         assert xzfile.read() == expected
 89 | 
 90 | 
 91 | @pytest.mark.parametrize(
 92 |     "errors, expected",
 93 |     (
 94 |         pytest.param(None, None, id="None"),
 95 |         pytest.param("strict", None, id="strict"),
 96 |         pytest.param("ignore", "encoding", id="ignore"),
 97 |         pytest.param("replace", "en�co�di�ng", id="replace"),
 98 |         pytest.param(
 99 |             "backslashreplace", r"en\x99co\x98di\x97ng", id="backslashreplace"
100 |         ),
101 |     ),
102 | )
103 | def test_mode_rt_encoding_errors(
104 |     errors: Optional[str], expected: Optional[str]
105 | ) -> None:
106 |     fileobj = BytesIO(
107 |         bytes.fromhex(
108 |             "fd377a585a000000ff12d9410200210116000000742fe5a301000a656e99636f"
109 |             "986469976e67000000011b0b39a7621e06729e7a010000000000595a"
110 |         )
111 |     )
112 | 
113 |     with xz_open(fileobj, "rt", errors=errors) as xzfile:
114 |         if expected is None:
115 |             with pytest.raises(ValueError):
116 |                 xzfile.read()
117 |         else:
118 |             assert xzfile.read() == expected
119 | 
120 | 
121 | @pytest.mark.parametrize(
122 |     "newline, expected",
123 |     (
124 |         pytest.param(None, ["a\n", "b\n", "c\n", "d"], id="None"),
125 |         pytest.param("", ["a\n", "b\r", "c\r\n", "d"], id="''"),
126 |         pytest.param("\n", ["a\n", "b\rc\r\n", "d"], id="'\n'"),
127 |         pytest.param("\r", ["a\nb\r", "c\r", "\nd"], id="'\r'"),
128 |         pytest.param("\r\n", ["a\nb\rc\r\n", "d"], id="'\r\n'"),
129 |     ),
130 | )
131 | def test_mode_rt_newline(newline: Optional[str], expected: List[str]) -> None:
132 |     fileobj = BytesIO(
133 |         bytes.fromhex(
134 |             "fd377a585a000000ff12d9410200210116000000742fe5a3010007610a620d63"
135 |             "0d0a64000001180840a546ac06729e7a010000000000595a"
136 |         )
137 |     )
138 | 
139 |     with xz_open(fileobj, "rt", newline=newline) as xzfile:
140 |         assert xzfile.readlines() == expected
141 | 
142 | 
143 | def test_mode_rb_encoding() -> None:
144 |     fileobj = BytesIO(STREAM_BYTES)
145 |     with pytest.raises(ValueError):
146 |         xz_open(fileobj, "rb", encoding="latin1")
147 | 
148 | 
149 | def test_mode_rb_encoding_errors() -> None:
150 |     fileobj = BytesIO(STREAM_BYTES)
151 |     with pytest.raises(ValueError):
152 |         xz_open(fileobj, "rb", errors="ignore")
153 | 
154 | 
155 | def test_mode_rb_newline() -> None:
156 |     fileobj = BytesIO(STREAM_BYTES)
157 |     with pytest.raises(ValueError):
158 |         xz_open(fileobj, "rb", newline="\n")
159 | 
160 | 
161 | #
162 | # write
163 | #
164 | 
165 | TEST_MODE_W_CHECK_BYTES = bytes.fromhex(
166 |     # stream 1
167 |     "fd377a585a0000016922de36"
168 |     "0200210116000000742fe5a3010001ceb1000000256bc6a8"
169 |     "00011602d06110d2"
170 |     "9042990d010000000001595a"
171 |     # stream 2
172 |     "fd377a585a0000016922de36"
173 |     "0200210116000000742fe5a3010001ceb20000009f3acf31"
174 |     "00011602d06110d2"
175 |     "9042990d010000000001595a"
176 |     # stream 3 (changed check)
177 |     "fd377a585a000004e6d6b446"
178 |     "0200210116000000742fe5a3010001ceb3000000ab6cffc6b19a1d23"
179 |     "00011a02dc2ea57e"
180 |     "1fb6f37d010000000004595a"
181 |     # stream 4 (changed check)
182 |     "fd377a585a000004e6d6b446"
183 |     "0200210116000000742fe5a3010001ceb4000000accd9792dc23671f"
184 |     "00011a02dc2ea57e"
185 |     "1fb6f37d010000000004595a"
186 | )
187 | 
188 | 
189 | def test_mode_wb_check() -> None:
190 |     fileobj = BytesIO()
191 | 
192 |     with xz_open(fileobj, "wb", check=1) as xzfile:
193 |         assert xzfile.mode == "w"
194 |         xzfile.write(b"\xce\xb1")
195 |         xzfile.change_stream()
196 |         xzfile.check = 4
197 |         xzfile.write(b"\xce\xb2")
198 |         xzfile.change_stream()
199 |         xzfile.write(b"\xce\xb3")
200 |         xzfile.change_stream()
201 |         xzfile.write(b"\xce\xb4")
202 | 
203 |     assert fileobj.getvalue() == TEST_MODE_W_CHECK_BYTES
204 | 
205 | 
206 | def test_mode_wt_check() -> None:
207 |     fileobj = BytesIO()
208 | 
209 |     with xz_open(fileobj, "wt", check=1) as xzfile:
210 |         assert xzfile.mode == "wt"
211 |         xzfile.write("α")
212 |         xzfile.change_stream()
213 |         xzfile.check = 4
214 |         xzfile.write("β")
215 |         xzfile.change_stream()
216 |         xzfile.write("γ")
217 |         xzfile.change_stream()
218 |         xzfile.write("δ")
219 | 
220 |     assert fileobj.getvalue() == TEST_MODE_W_CHECK_BYTES
221 | 
222 | 
223 | TEST_MODE_W_FILTERS_BYTES = bytes.fromhex(
224 |     ## stream 1
225 |     # header
226 |     "fd377a585a0000016922de36"
227 |     # block 1
228 |     "0200210116000000742fe5a3010001ceb1000000256bc6a8"
229 |     # block 2
230 |     "0200210116000000742fe5a3010001ceb20000009f3acf31"
231 |     # block 3 (changed filters)
232 |     "02010301002101167920c4ee010001cee5000000090ac846"
233 |     # block 4 (changed filters)
234 |     "02010301002101167920c4ee010001cee6000000aa9facd8"
235 |     # index
236 |     "0004160216021602160200008a2bb83b"
237 |     # footer
238 |     "9be35140030000000001595a"
239 |     ## stream 2
240 |     # header
241 |     "fd377a585a0000016922de36"
242 |     # block 1 (changed filters)
243 |     "02010301002101167920c4ee010001cee70000003cafabaf"
244 |     # block 2 (changed filters)
245 |     "02010301002101167920c4ee010001cee800000086fea236"
246 |     # index
247 |     "00021602160200008ba0042b"
248 |     # footer
249 |     "3e300d8b020000000001595a"
250 |     ## stream 3
251 |     # header
252 |     "fd377a585a0000016922de36"
253 |     # block 1 (changed filters)
254 |     "02010301002101167920c4ee010001cee900000010cea541"
255 |     # block 2 (changed filters)
256 |     "02010301002101167920c4ee010001ceea00000081d31ad1"
257 |     # index
258 |     "00021602160200008ba0042b"
259 |     # footer
260 |     "3e300d8b020000000001595a"
261 | )
262 | 
263 | 
264 | def test_mode_wb_filters() -> None:
265 |     fileobj = BytesIO()
266 | 
267 |     with xz_open(fileobj, "wb", check=1) as xzfile:
268 |         xzfile.write(b"\xce\xb1")
269 |         xzfile.change_block()
270 |         xzfile.filters = [{"id": 3, "dist": 1}, {"id": 33}]
271 |         xzfile.write(b"\xce\xb2")
272 |         xzfile.change_block()
273 |         xzfile.write(b"\xce\xb3")
274 |         xzfile.change_block()
275 |         xzfile.write(b"\xce\xb4")
276 |         xzfile.change_stream()
277 |         xzfile.write(b"\xce\xb5")
278 |         xzfile.change_block()
279 |         xzfile.write(b"\xce\xb6")
280 |         xzfile.change_stream()
281 |         xzfile.write(b"\xce\xb7")
282 |         xzfile.change_block()
283 |         xzfile.write(b"\xce\xb8")
284 | 
285 |     assert fileobj.getvalue() == TEST_MODE_W_FILTERS_BYTES
286 | 
287 | 
288 | def test_mode_wt_filters() -> None:
289 |     fileobj = BytesIO()
290 | 
291 |     with xz_open(fileobj, "wt", check=1) as xzfile:
292 |         xzfile.write("α")
293 |         xzfile.change_block()
294 |         xzfile.filters = [{"id": 3, "dist": 1}, {"id": 33}]
295 |         xzfile.write("β")
296 |         xzfile.change_block()
297 |         xzfile.write("γ")
298 |         xzfile.change_block()
299 |         xzfile.write("δ")
300 |         xzfile.change_stream()
301 |         xzfile.write("ε")
302 |         xzfile.change_block()
303 |         xzfile.write("ζ")
304 |         xzfile.change_stream()
305 |         xzfile.write("η")
306 |         xzfile.change_block()
307 |         xzfile.write("θ")
308 | 
309 |     assert fileobj.getvalue() == TEST_MODE_W_FILTERS_BYTES
310 | 
311 | 
312 | TEST_MODE_W_PRESET_BYTES = bytes.fromhex(
313 |     ## stream 1
314 |     # header
315 |     "fd377a585a0000016922de36"
316 |     # block 1
317 |     "0200210116000000742fe5a3010001ceb1000000256bc6a8"
318 |     # block 2
319 |     "0200210116000000742fe5a3010001ceb20000009f3acf31"
320 |     # block 3 (changed preset)
321 |     "020021011c00000010cf58cc010001ceb3000000090ac846"
322 |     # block 4 (changed preset)
323 |     "020021011c00000010cf58cc010001ceb4000000aa9facd8"
324 |     # index
325 |     "0004160216021602160200008a2bb83b"
326 |     # footer
327 |     "9be35140030000000001595a"
328 |     ## stream 2
329 |     # header
330 |     "fd377a585a0000016922de36"
331 |     # block 1 (changed preset)
332 |     "020021011c00000010cf58cc010001ceb50000003cafabaf"
333 |     # block 2 (changed preset)
334 |     "020021011c00000010cf58cc010001ceb600000086fea236"
335 |     # index
336 |     "00021602160200008ba0042b"
337 |     # footer
338 |     "3e300d8b020000000001595a"
339 |     ## stream 3
340 |     # header
341 |     "fd377a585a0000016922de36"
342 |     # block 1 (changed preset)
343 |     "020021011c00000010cf58cc010001ceb700000010cea541"
344 |     # block 2 (changed preset)
345 |     "020021011c00000010cf58cc010001ceb800000081d31ad1"
346 |     # index
347 |     "00021602160200008ba0042b"
348 |     # footer
349 |     "3e300d8b020000000001595a"
350 | )
351 | 
352 | 
353 | def test_mode_wb_preset() -> None:
354 |     fileobj = BytesIO()
355 | 
356 |     with xz_open(fileobj, "wb", check=1) as xzfile:
357 |         xzfile.write(b"\xce\xb1")
358 |         xzfile.change_block()
359 |         xzfile.preset = 9
360 |         xzfile.write(b"\xce\xb2")
361 |         xzfile.change_block()
362 |         xzfile.write(b"\xce\xb3")
363 |         xzfile.change_block()
364 |         xzfile.write(b"\xce\xb4")
365 |         xzfile.change_stream()
366 |         xzfile.write(b"\xce\xb5")
367 |         xzfile.change_block()
368 |         xzfile.write(b"\xce\xb6")
369 |         xzfile.change_stream()
370 |         xzfile.write(b"\xce\xb7")
371 |         xzfile.change_block()
372 |         xzfile.write(b"\xce\xb8")
373 | 
374 |     assert fileobj.getvalue() == TEST_MODE_W_PRESET_BYTES
375 | 
376 | 
377 | def test_mode_wt_preset() -> None:
378 |     fileobj = BytesIO()
379 | 
380 |     with xz_open(fileobj, "wt", check=1) as xzfile:
381 |         xzfile.write("α")
382 |         xzfile.change_block()
383 |         xzfile.preset = 9
384 |         xzfile.write("β")
385 |         xzfile.change_block()
386 |         xzfile.write("γ")
387 |         xzfile.change_block()
388 |         xzfile.write("δ")
389 |         xzfile.change_stream()
390 |         xzfile.write("ε")
391 |         xzfile.change_block()
392 |         xzfile.write("ζ")
393 |         xzfile.change_stream()
394 |         xzfile.write("η")
395 |         xzfile.change_block()
396 |         xzfile.write("θ")
397 | 
398 |     assert fileobj.getvalue() == TEST_MODE_W_PRESET_BYTES
399 | 
400 | 
401 | @pytest.mark.parametrize(
402 |     "encoding, data",
403 |     (
404 |         pytest.param("utf8", "еñϲоԺε", id="utf8"),
405 |         pytest.param("latin1", "ÐµÃ±Ï²Ð¾ÔºÎµ", id="latin1"),
406 |     ),
407 | )
408 | def test_mode_wt_encoding(encoding: str, data: str) -> None:
409 |     fileobj = BytesIO()
410 |     with xz_open(fileobj, "wt", check=0, encoding=encoding) as xzfile:
411 |         xzfile.write(data)
412 | 
413 |     assert fileobj.getvalue() == bytes.fromhex(
414 |         "fd377a585a000000ff12d9410200210116000000742fe5a301000bd0b5c3b1cf"
415 |         "b2d0bed4baceb50000011c0c5da447cf06729e7a010000000000595a"
416 |     )
417 | 
418 | 
419 | @pytest.mark.parametrize(
420 |     "errors, data",
421 |     (
422 |         pytest.param(None, None, id="None"),
423 |         pytest.param("strict", None, id="strict"),
424 |         pytest.param(
425 |             "ignore",
426 |             b"encoding",
427 |             id="ignore",
428 |         ),
429 |         pytest.param(
430 |             "replace",
431 |             b"en?co?di?ng",
432 |             id="replace",
433 |         ),
434 |         pytest.param(
435 |             "backslashreplace",
436 |             rb"en\udc01co\udc02di\udc03ng",
437 |             id="backslashreplace",
438 |         ),
439 |     ),
440 | )
441 | def test_mode_wt_encoding_errors(errors: Optional[str], data: Optional[bytes]) -> None:
442 |     fileobj = BytesIO()
443 | 
444 |     with xz_open(fileobj, "wt", errors=errors) as xzfile:
445 |         if data is None:
446 |             xzfile.write("X")  # to avoid having an empty file
447 |             with pytest.raises(ValueError):
448 |                 xzfile.write("en\udc01co\udc0di\udc03ng")
449 |         else:
450 |             xzfile.write("en\udc01co\udc02di\udc03ng")
451 | 
452 |     if data is not None:
453 |         assert lzma.decompress(fileobj.getvalue()) == data
454 | 
455 | 
456 | @pytest.mark.parametrize(
457 |     "newline, data",
458 |     (
459 |         pytest.param(None, b"a\nb\n", id="None"),
460 |         pytest.param("", b"a\nb\n", id="''"),
461 |         pytest.param("\n", b"a\nb\n", id="'\n'"),
462 |         pytest.param("\r", b"a\rb\r", id="'\r'"),
463 |         pytest.param("\r\n", b"a\r\nb\r\n", id="'\r\n'"),
464 |     ),
465 | )
466 | def test_mode_wt_newline(newline: Optional[str], data: bytes) -> None:
467 |     fileobj = BytesIO()
468 | 
469 |     with xz_open(fileobj, "wt", newline=newline) as xzfile:
470 |         xzfile.writelines(["a\n", "b\n"])
471 | 
472 |     assert lzma.decompress(fileobj.getvalue()) == data
473 | 
474 | 
475 | #
476 | # misc
477 | #
478 | 
479 | 
480 | @pytest.mark.parametrize("mode", ("rtb", "rbt", "wtb", "wbt"))
481 | def test_mode_invalid(mode: str) -> None:
482 |     fileobj = BytesIO(STREAM_BYTES)
483 | 
484 |     with pytest.raises(ValueError) as exc_info:
485 |         xz_open(fileobj, mode)
486 |     assert str(exc_info.value) == f"Invalid mode: {mode}"
487 | 
488 | 
489 | @pytest.mark.parametrize("mode", ("r", "rt"))
490 | def test_default_strategy(mode: str) -> None:
491 |     fileobj = BytesIO(STREAM_BYTES)
492 | 
493 |     with xz_open(fileobj, mode) as xzfile:
494 |         assert isinstance(xzfile.block_read_strategy, RollingBlockReadStrategy)
495 | 
496 | 
497 | @pytest.mark.parametrize("mode", ("r", "rt"))
498 | def test_custom_strategy(mode: str) -> None:
499 |     fileobj = BytesIO(STREAM_BYTES)
500 |     strategy = Mock()
501 | 
502 |     with xz_open(fileobj, mode, block_read_strategy=strategy) as xzfile:
503 |         assert xzfile.block_read_strategy == strategy
504 | 


--------------------------------------------------------------------------------
/tests/unit/test_parse_mode.py:
--------------------------------------------------------------------------------
 1 | from itertools import permutations, product
 2 | import sys
 3 | from typing import Tuple
 4 | 
 5 | try:
 6 |     from typing import get_args
 7 | except ImportError:
 8 |     pass
 9 | 
10 | import pytest
11 | 
12 | from xz.typing import _XZModesBinaryType, _XZModesTextType
13 | from xz.utils import parse_mode
14 | 
15 | VALID_MODES = {
16 |     "".join(sorted(case[0] + extra)): case
17 |     for case in [
18 |         ("r", True, False),
19 |         ("r+", True, True),
20 |         ("w", False, True),
21 |         ("w+", True, True),
22 |         ("x", False, True),
23 |         ("x+", True, True),
24 |     ]
25 |     for extra in ("", "b")
26 | }
27 | 
28 | 
29 | @pytest.mark.skipif(
30 |     sys.version_info < (3, 9),
31 |     reason="Literal or get_args not supported",
32 | )
33 | def test_known_valid_modes_binary() -> None:
34 |     assert sorted(
35 |         "".join(sorted(mode)) for mode in get_args(_XZModesBinaryType)
36 |     ) == sorted(VALID_MODES)
37 | 
38 | 
39 | @pytest.mark.skipif(
40 |     sys.version_info < (3, 9),
41 |     reason="Literal or get_args not supported",
42 | )
43 | def test_known_valid_modes_text() -> None:
44 |     assert sorted(
45 |         "".join(sorted(mode.replace("t", ""))) for mode in get_args(_XZModesTextType)
46 |     ) == sorted(mode for mode in VALID_MODES if "b" not in mode)
47 | 
48 | 
49 | @pytest.mark.parametrize(
50 |     "mode, expected",
51 |     [pytest.param(mode, expected, id=mode) for mode, expected in VALID_MODES.items()],
52 | )
53 | def test_parse_mode_valid(mode: str, expected: Tuple[str, bool, bool]) -> None:
54 |     for parts in permutations(mode):
55 |         mode_permuted = "".join(parts)
56 |         assert parse_mode(mode_permuted) == expected, mode_permuted
57 | 
58 | 
59 | @pytest.mark.parametrize(
60 |     "mode",
61 |     [
62 |         "".join(mode_parts)
63 |         for mode_parts in product(*((c, "") for c in "arwx+tb"))
64 |         if "".join(sorted(mode_parts)) not in VALID_MODES
65 |     ]
66 |     + [mode * 2 for mode in VALID_MODES],
67 | )
68 | def test_parse_mode_invalid(mode: str) -> None:
69 |     for parts in permutations(mode):
70 |         mode_permuted = "".join(parts)
71 |         with pytest.raises(ValueError):
72 |             parse_mode(mode_permuted)
73 | 


--------------------------------------------------------------------------------
/tests/unit/test_stream.py:
--------------------------------------------------------------------------------
  1 | from io import SEEK_CUR, SEEK_END, BytesIO
  2 | import sys
  3 | from typing import Tuple, cast
  4 | from unittest.mock import Mock, call
  5 | 
  6 | import pytest
  7 | 
  8 | from xz.common import XZError
  9 | from xz.io import IOProxy
 10 | from xz.stream import XZStream
 11 | 
 12 | if sys.version_info >= (3, 9):  # pragma: no cover
 13 |     from collections.abc import Callable
 14 | else:  # pragma: no cover
 15 |     from typing import Callable
 16 | 
 17 | 
 18 | # a stream with two blocks (lengths: 100, 90)
 19 | STREAM_BYTES = bytes.fromhex(
 20 |     "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020"
 21 |     "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b"
 22 |     "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00"
 23 |     "00000000e7c35efa0200210116000000742fe5a3e00059003d5d00320cc42641"
 24 |     "c8b91ac7908be7e635b8e7d681d74b683cde914399f8de5460dc672363f1e067"
 25 |     "5a3ebac9f427ecbebcb94552c0dba85b26950f0ac98b22390000000031f4ee87"
 26 |     "00025964555a0000041276283e300d8b020000000001595a"
 27 | )
 28 | 
 29 | # a stream with no blocks
 30 | STREAM_BYTES_EMPTY = bytes.fromhex(
 31 |     "fd377a585a0000016922de36000000001cdf44219042990d010000000001595a"
 32 | )
 33 | 
 34 | 
 35 | def test_parse(data_pattern_locate: Callable[[bytes], Tuple[int, int]]) -> None:
 36 |     fileobj = Mock(wraps=BytesIO(b"\xff" * 1000 + STREAM_BYTES + b"\xee" * 1000))
 37 |     fileobj.seek(-1000, SEEK_END)
 38 |     fileobj.method_calls.clear()
 39 | 
 40 |     # parse stream
 41 |     stream = XZStream.parse(fileobj)
 42 |     assert stream.check == 1
 43 |     assert len(stream) == 190
 44 |     assert stream.block_boundaries == [0, 100]
 45 | 
 46 |     # make sure we don't read the blocks
 47 |     assert fileobj.method_calls == [
 48 |         call.seek(-12, SEEK_CUR),
 49 |         call.read(12),
 50 |         call.seek(-24, SEEK_CUR),
 51 |         call.read(12),
 52 |         call.seek(-204, SEEK_CUR),  # blocks are skipped over here
 53 |         call.read(12),
 54 |         call.seek(-12, SEEK_CUR),
 55 |     ]
 56 | 
 57 |     # fileobj should be at the begining of the stream
 58 |     assert fileobj.tell() == 1000
 59 | 
 60 |     # read from start
 61 |     assert data_pattern_locate(stream.read(20)) == (0, 20)
 62 | 
 63 |     # read from middle of a block
 64 |     stream.seek(40)
 65 |     assert data_pattern_locate(stream.read(20)) == (40, 20)
 66 | 
 67 |     # read accross two blocks
 68 |     stream.seek(90)
 69 |     assert data_pattern_locate(stream.read(20)) == (90, 20)
 70 | 
 71 |     # read middle of an other block
 72 |     stream.seek(160)
 73 |     assert data_pattern_locate(stream.read(20)) == (160, 20)
 74 | 
 75 |     # go backward and read
 76 |     stream.seek(130)
 77 |     assert data_pattern_locate(stream.read(20)) == (130, 20)
 78 | 
 79 |     # read in previous block (going backward from last read in that block)
 80 |     stream.seek(60)
 81 |     assert data_pattern_locate(stream.read(20)) == (60, 20)
 82 | 
 83 |     # read until end
 84 |     stream.seek(170)
 85 |     assert data_pattern_locate(stream.read()) == (170, 20)
 86 | 
 87 | 
 88 | def test_parse_invalid_stream_flags_missmatch() -> None:
 89 |     fileobj = BytesIO(
 90 |         bytes.fromhex(
 91 |             "fd377a585a000004e6d6b446000000001cdf44219042990d010000000001595a"
 92 |         )
 93 |     )
 94 |     fileobj.seek(0, SEEK_END)
 95 |     with pytest.raises(XZError) as exc_info:
 96 |         XZStream.parse(fileobj)
 97 |     assert str(exc_info.value) == "stream: inconsistent check value"
 98 | 
 99 | 
100 | def test_parse_empty_block() -> None:
101 |     fileobj = BytesIO(
102 |         bytes.fromhex(
103 |             "fd377a585a0000016922de360200210116000000742fe5a30000000000000000"
104 |             "000111003b965f739042990d010000000001595a"
105 |         )
106 |     )
107 |     fileobj.seek(0, SEEK_END)
108 |     with pytest.raises(XZError) as exc_info:
109 |         XZStream.parse(fileobj)
110 |     assert str(exc_info.value) == "index record uncompressed size"
111 | 
112 | 
113 | def test_parse_empty_stream() -> None:
114 |     fileobj = BytesIO(STREAM_BYTES_EMPTY)
115 |     fileobj.seek(0, SEEK_END)
116 |     stream = XZStream.parse(fileobj)
117 |     assert len(stream) == 0
118 |     assert stream.block_boundaries == []
119 | 
120 | 
121 | def test_write(data_pattern: bytes) -> None:
122 |     # init with more size than what will be written at the end
123 |     init_size = 1024
124 |     assert len(STREAM_BYTES) < init_size
125 | 
126 |     fileobj = BytesIO(b"A" * init_size)
127 | 
128 |     with XZStream(cast(IOProxy, fileobj), 1) as stream:
129 |         assert fileobj.getvalue() == b"A" * init_size
130 | 
131 |         assert stream.block_boundaries == []
132 | 
133 |         stream.change_block()
134 |         assert stream.block_boundaries == []
135 | 
136 |         stream.write(data_pattern[:100])
137 |         assert stream.block_boundaries == [0]
138 | 
139 |         stream.change_block()
140 |         assert stream.block_boundaries == [0, 100]
141 | 
142 |         stream.write(data_pattern[100:190])
143 |         assert stream.block_boundaries == [0, 100]
144 | 
145 |     assert fileobj.getvalue() == STREAM_BYTES
146 | 
147 | 
148 | def test_write_from_existing_stream(data_pattern: bytes) -> None:
149 |     fileobj = BytesIO(
150 |         bytes.fromhex(
151 |             "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020"
152 |             "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b"
153 |             "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00"
154 |             "00000000e7c35efa0001596477f620019042990d010000000001595a"
155 |         )
156 |     )
157 |     fileobj.seek(0, SEEK_END)
158 |     with XZStream.parse(fileobj) as stream:
159 |         assert stream.read() == data_pattern[:100]
160 |         assert stream.block_boundaries == [0]
161 | 
162 |         stream.write(data_pattern[100:190])
163 |         assert stream.block_boundaries == [0, 100]
164 | 
165 |     assert fileobj.getvalue() == STREAM_BYTES
166 | 
167 | 
168 | def test_truncate_and_write(data_pattern: bytes) -> None:
169 |     fileobj = BytesIO(
170 |         bytes.fromhex(
171 |             "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020"
172 |             "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b"
173 |             "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00"
174 |             "00000000e7c35efa0200210116000000742fe5a30100025a5a5a0000407f8055"
175 |             "00025964170300008d97067a3e300d8b020000000001595a"
176 |         )
177 |     )
178 |     fileobj.seek(0, SEEK_END)
179 |     with XZStream.parse(fileobj) as stream:
180 |         assert stream.read() == data_pattern[:100] + b"ZZZ"
181 |         assert stream.block_boundaries == [0, 100]
182 | 
183 |         stream.seek(100)
184 |         stream.truncate()
185 |         assert stream.block_boundaries == [0]
186 | 
187 |         stream.write(data_pattern[100:190])
188 |         assert stream.block_boundaries == [0, 100]
189 | 
190 |     assert fileobj.getvalue() == STREAM_BYTES
191 | 
192 | 
193 | def test_truncate_middle_block() -> None:
194 |     fileobj = BytesIO(STREAM_BYTES)
195 |     fileobj.seek(0, SEEK_END)
196 |     with pytest.raises(ValueError) as exc_info:
197 |         with XZStream.parse(fileobj) as stream:
198 |             stream.truncate(80)
199 |     assert str(exc_info.value) == "truncate"
200 | 
201 | 
202 | def test_read_only_check() -> None:
203 |     fileobj = BytesIO()
204 | 
205 |     with XZStream(cast(IOProxy, fileobj), 1) as stream:
206 |         with pytest.raises(AttributeError):
207 |             stream.check = 4  # type: ignore[misc]
208 | 
209 | 
210 | def test_change_filters() -> None:
211 |     fileobj = BytesIO()
212 | 
213 |     with XZStream(cast(IOProxy, fileobj), 1) as stream:
214 |         stream.write(b"aa")
215 |         stream.change_block()
216 |         stream.filters = [{"id": 3, "dist": 1}, {"id": 33}]
217 |         stream.write(b"bb")
218 |         stream.change_block()
219 |         stream.write(b"cc")
220 |         stream.change_block()
221 |         stream.write(b"dd")
222 | 
223 |     assert fileobj.getvalue() == bytes.fromhex(
224 |         # header
225 |         "fd377a585a0000016922de36"
226 |         # block 1
227 |         "0200210116000000742fe5a30100016161000000d7198a07"
228 |         # block 2
229 |         "0200210116000000742fe5a30100016262000000ae1baeb5"
230 |         # block 3 (changed filters)
231 |         "02010301002101167920c4ee0100016300000000791ab2db"
232 |         # block 4 (changed filters)
233 |         "02010301002101167920c4ee01000164000000001d19970a"
234 |         # index
235 |         "0004160216021602160200008a2bb83b"
236 |         # footer
237 |         "9be35140030000000001595a"
238 |     )
239 | 
240 | 
241 | def test_change_preset() -> None:
242 |     fileobj = BytesIO()
243 | 
244 |     with XZStream(cast(IOProxy, fileobj), 1) as stream:
245 |         stream.write(b"aa")
246 |         stream.change_block()
247 |         stream.preset = 9
248 |         stream.write(b"bb")
249 |         stream.change_block()
250 |         stream.write(b"cc")
251 |         stream.change_block()
252 |         stream.write(b"dd")
253 | 
254 |     assert fileobj.getvalue() == bytes.fromhex(
255 |         # header
256 |         "fd377a585a0000016922de36"
257 |         # block 1
258 |         "0200210116000000742fe5a30100016161000000d7198a07"
259 |         # block 2
260 |         "0200210116000000742fe5a30100016262000000ae1baeb5"
261 |         # block 3 (changed preset)
262 |         "020021011c00000010cf58cc0100016363000000791ab2db"
263 |         # block 4 (changed preset)
264 |         "020021011c00000010cf58cc01000164640000001d19970a"
265 |         # index
266 |         "0004160216021602160200008a2bb83b"
267 |         # footer
268 |         "9be35140030000000001595a"
269 |     )
270 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist =
 3 |     py, py37, py38, py39, py310, py311, pypy3
 4 |     generate-integration-files, type, lint, format
 5 | 
 6 | [testenv]
 7 | deps =
 8 |     pytest
 9 |     pytest-cov
10 | passenv = PY_COLORS
11 | setenv =
12 |     COVERAGE_FILE = {toxworkdir}/{envname}/.coverage
13 |     PYTHONDEVMODE = 1
14 | commands =
15 |     pytest {posargs:-vv --cov-fail-under=100}
16 |     py: -coverage html
17 | 
18 | [testenv:generate-integration-files]
19 | deps =
20 |     pytest
21 |     pytest-cov
22 | passenv = PY_COLORS
23 | setenv =
24 |     PYTEST_ADDOPTS = --no-cov
25 |     PYTHONDEVMODE = 1
26 | commands = pytest -vv -m generate_integration_files --generate-integration-files
27 | 
28 | [testenv:type]
29 | deps =
30 |     mypy
31 |     pytest # for typing
32 | commands =
33 |     mypy
34 |     mypy --namespace-packages --explicit-package-bases tests
35 | 
36 | [testenv:lint]
37 | deps =
38 |     pylint
39 |     pytest # to avoid import errors
40 | commands =
41 |     pylint src
42 |     pylint -d duplicate-code,too-many-statements,use-implicit-booleaness-not-comparison tests
43 | 
44 | [testenv:format]
45 | skip_install = true
46 | deps =
47 |     black
48 |     isort
49 | commands =
50 |     black {posargs:--check --diff} src tests
51 |     isort {posargs:--check --diff} src tests
52 | 


--------------------------------------------------------------------------------