├── .coveragerc
├── .github
└── workflows
│ └── build.yml
├── .gitignore
├── .isort.cfg
├── .prettierrc.yaml
├── .pylintrc
├── .vscode
├── env
├── launch.json
└── settings.json
├── CHANGELOG.md
├── LICENSE.txt
├── README.md
├── dev-requirements.txt
├── mypy.ini
├── pytest.ini
├── setup.cfg
├── setup.py
├── src
└── xz
│ ├── __init__.py
│ ├── block.py
│ ├── common.py
│ ├── file.py
│ ├── io.py
│ ├── open.py
│ ├── py.typed
│ ├── strategy.py
│ ├── stream.py
│ ├── typing.py
│ └── utils.py
├── tests
├── conftest.py
├── integration
│ ├── conftest.py
│ ├── files
│ │ ├── check-crc32.json
│ │ ├── check-crc32.xz
│ │ ├── check-crc64.json
│ │ ├── check-crc64.xz
│ │ ├── check-none.json
│ │ ├── check-none.xz
│ │ ├── check-sha256.json
│ │ ├── check-sha256.xz
│ │ ├── example.xz
│ │ ├── few-blocks.json
│ │ ├── few-blocks.xz
│ │ ├── many-blocks.json
│ │ ├── many-blocks.xz
│ │ ├── one-stream-with-padding.json
│ │ ├── one-stream-with-padding.xz
│ │ ├── several-filters.json
│ │ ├── several-filters.xz
│ │ ├── several-streams-with-padding.json
│ │ ├── several-streams-with-padding.xz
│ │ ├── several-streams.json
│ │ ├── several-streams.xz
│ │ ├── various-block-sizes.json
│ │ ├── various-block-sizes.xz
│ │ ├── various-stream-checks-stream-padding-and-block-sizes.json
│ │ ├── various-stream-checks-stream-padding-and-block-sizes.xz
│ │ ├── various-stream-checks.json
│ │ └── various-stream-checks.xz
│ ├── test_file_read.py
│ ├── test_file_write.py
│ ├── test_generate_files.py
│ ├── test_ram_usage.py
│ └── test_readme.py
└── unit
│ ├── test_attr_proxy.py
│ ├── test_block.py
│ ├── test_common.py
│ ├── test_file.py
│ ├── test_floordict.py
│ ├── test_ioabstract.py
│ ├── test_iocombiner.py
│ ├── test_ioproxy.py
│ ├── test_iostatic.py
│ ├── test_open.py
│ ├── test_parse_mode.py
│ └── test_stream.py
└── tox.ini
/.coveragerc:
--------------------------------------------------------------------------------
1 | [html]
2 | directory = coverage
3 |
4 | [paths]
5 | source =
6 | src/xz/
7 | .tox/py*/lib/python*/site-packages/xz/
8 | .tox/py*/site-packages/xz/
9 |
10 | [report]
11 | exclude_lines =
12 | pragma: no cover
13 | def __repr__
14 | def __str__
15 | if __name__ == "__main__":
16 | show_missing = True
17 |
18 | [run]
19 | branch = True
20 | source = xz
21 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 |
3 | on: [push, pull_request]
4 |
5 | env:
6 | PY_COLORS: 1
7 |
8 | jobs:
9 | tests-py:
10 | runs-on: ubuntu-latest
11 | strategy:
12 | matrix:
13 | python:
14 | - "3.7"
15 | - "3.8"
16 | - "3.9"
17 | - "3.10"
18 | - "3.11"
19 | - "pypy-3.7"
20 | - "pypy-3.8"
21 | - "pypy-3.9"
22 | steps:
23 | - uses: actions/checkout@v3
24 | - name: Setup Python ${{ matrix.python }}
25 | uses: actions/setup-python@v4
26 | with:
27 | python-version: ${{ matrix.python }}
28 | - name: Install dependencies
29 | run: pip install tox
30 | - name: Run tests
31 | run: tox -e py
32 |
33 | tests-misc:
34 | runs-on: ubuntu-latest
35 | strategy:
36 | matrix:
37 | env: [generate-integration-files, type, lint, format]
38 | steps:
39 | - uses: actions/checkout@v3
40 | - name: Setup Python
41 | uses: actions/setup-python@v4
42 | with:
43 | python-version: "3.11"
44 | - name: Install dependencies
45 | run: pip install tox
46 | - name: Run ${{ matrix.env }}
47 | run: tox -e ${{ matrix.env }}
48 |
49 | build:
50 | needs: [tests-py, tests-misc]
51 | runs-on: ubuntu-latest
52 | steps:
53 | - uses: actions/checkout@v3
54 | with:
55 | # fetch all commits for setuptools_scm
56 | fetch-depth: 0
57 | - name: Setup Python
58 | uses: actions/setup-python@v4
59 | with:
60 | python-version: "3.11"
61 | - name: Build
62 | run: python setup.py sdist bdist_wheel
63 | - name: Save build artifacts
64 | uses: actions/upload-artifact@v3
65 | with:
66 | name: dist
67 | path: dist
68 |
69 | publish:
70 | if: startsWith(github.ref, 'refs/tags')
71 | needs: build
72 | runs-on: ubuntu-latest
73 | steps:
74 | - name: Restore build artifacts
75 | uses: actions/download-artifact@v3
76 | with:
77 | name: dist
78 | path: dist
79 | - name: Publish to PyPI
80 | uses: pypa/gh-action-pypi-publish@release/v1
81 | with:
82 | password: ${{ secrets.pypi_password }}
83 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /env
2 | __pycache__
3 |
4 | /build
5 | /dist
6 | *.egg-info
7 | /.eggs
8 |
9 | /.coverage
10 | /coverage
11 | /.pytest_cache
12 | /.tox
13 |
14 | /src/xz/_version.py
15 |
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | force_sort_within_sections = True
3 | profile = black
4 | src_paths = src
5 |
--------------------------------------------------------------------------------
/.prettierrc.yaml:
--------------------------------------------------------------------------------
1 | printWidth: 88
2 | proseWrap: always
3 | tabWidth: 2
4 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [BASIC]
2 | good-names =
3 | i,
4 | j,
5 | k,
6 | ex,
7 | _,
8 | T,
9 |
10 | [MESSAGES CONTROL]
11 | disable =
12 | missing-class-docstring,
13 | missing-function-docstring,
14 | missing-module-docstring,
15 | too-few-public-methods,
16 | too-many-arguments,
17 | too-many-branches,
18 | too-many-instance-attributes,
19 | too-many-locals,
20 |
21 | [SIMILARITIES]
22 | ignore-imports=yes
23 |
--------------------------------------------------------------------------------
/.vscode/env:
--------------------------------------------------------------------------------
1 | # for vscode + extensions import when outside of src (e.g. in tests)
2 | PYTHONPATH=./src
3 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "Python: Current File (Integrated Terminal)",
6 | "type": "python",
7 | "request": "launch",
8 | "program": "${file}",
9 | "console": "integratedTerminal"
10 | }
11 | ]
12 | }
13 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "files.exclude": {
3 | "env": true,
4 | "**/__pycache__": true,
5 | ".eggs": true,
6 | "**/*.egg-info": true,
7 | ".coverage": true,
8 | ".pytest_cache": true,
9 | ".tox": true
10 | },
11 | "editor.detectIndentation": false,
12 | "editor.formatOnSave": true,
13 | "editor.insertSpaces": true,
14 | "editor.tabSize": 4,
15 | "files.insertFinalNewline": true,
16 | "[json]": {
17 | "editor.defaultFormatter": "esbenp.prettier-vscode"
18 | },
19 | "[markdown]": {
20 | "editor.defaultFormatter": "esbenp.prettier-vscode",
21 | "editor.rulers": [88],
22 | "editor.wordWrap": "on",
23 | "editor.wordWrapColumn": 88
24 | },
25 | "[python]": {
26 | "editor.codeActionsOnSave": {
27 | "source.organizeImports": true
28 | }
29 | },
30 | "[yaml]": {
31 | "editor.defaultFormatter": "esbenp.prettier-vscode",
32 | "editor.tabSize": 2
33 | },
34 | "python.envFile": "${workspaceFolder}/.vscode/env",
35 | "python.formatting.provider": "black",
36 | "python.linting.pylintEnabled": true,
37 | "python.testing.pytestEnabled": true,
38 | "python.sortImports.args": ["-sp .isort.cfg"]
39 | }
40 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project
6 | adheres to [Semantic Versioning](https://semver.org/).
7 |
8 | ## [0.5.0] - 2023-02-27
9 |
10 | [0.5.0]: https://github.com/rogdham/python-xz/releases/tag/v0.5.0
11 |
12 | ### :boom: Breaking changes
13 |
14 | - End of Python 3.6 support
15 |
16 | ### :house: Internal
17 |
18 | - Necessary code changes following dev dependency update: black, pylint, pytest
19 | - Refactor a descriptor following PEP 487
20 | - Add tests for CPython 3.11 and PyPy 3.9
21 | - Use CPython 3.11 for misc. tests
22 | - Update Github actions dependencies
23 | - Import typing modules impacted by PEP 585 based on Python version
24 |
25 | ## [0.4.0] - 2022-01-09
26 |
27 | [0.4.0]: https://github.com/rogdham/python-xz/releases/tag/v0.4.0
28 |
29 | ### :rocket: Added
30 |
31 | - Advanced users may use the new `block_read_strategy` argument of `XZFile`/`xz.open` to
32 | customize the strategy for freeing block readers, and implement a different tradeoff
33 | between memory consumption and read speed when alternating reads between several
34 | blocks; the following strategies are provided: `RollingBlockReadStrategy` and
35 | `KeepBlockReadStrategy`
36 |
37 | ### :bug: Fixes
38 |
39 | - Free memory after a block is fully read
40 | - Free memory of LZMA decompressors when many blocks are partially read; this is a
41 | tradeoff defaulting to keeping the last 8 LZMA decompressors used
42 | - Typing: use `BinaryIO` instead of `IO[bytes]`
43 |
44 | ### :house: Internal
45 |
46 | - Specify the Python versions required in package metadata
47 | - Test the `mode` attribute of objects returned by `xz.open`/`XZFile`
48 | - Minor improvements in some docstrings
49 |
50 | ## [0.3.1] - 2021-12-26
51 |
52 | [0.3.1]: https://github.com/rogdham/python-xz/releases/tag/v0.3.1
53 |
54 | ### :house: Internal
55 |
56 | - Add tests for CPython 3.10 and PyPy 3.8
57 | - Use CPython 3.10 for misc. tests
58 | - Clarify which Python versions are supported in readme
59 | - Fix some linting issues found by latest versions of mypy/pylint
60 |
61 | ## [0.3.0] - 2021-11-07
62 |
63 | [0.3.0]: https://github.com/rogdham/python-xz/releases/tag/v0.3.0
64 |
65 | ### :boom: Breaking changes
66 |
67 | - The `filename` argument of `XZFile` is now mandatory; this change should have very
68 | limited impact as not providing it makes no sense and would have raised a `TypeError`,
69 | plus it was already mandatory on `xz.open`
70 |
71 | ### :rocket: Added
72 |
73 | - Type hints
74 |
75 | ### :house: Internal
76 |
77 | - Type validation with mypy
78 | - Distribute `py.typed` file in conformance with [PEP 561]
79 |
80 | [pep 561]: https://www.python.org/dev/peps/pep-0561/
81 |
82 | ## [0.2.0] - 2021-10-23
83 |
84 | [0.2.0]: https://github.com/rogdham/python-xz/releases/tag/v0.2.0
85 |
86 | ### :rocket: Added
87 |
88 | - Write modes (`w`, `x`, `r+`, `w+`, `x+`) :tada:
89 | - Allow to `seek` past the end of the fileobj
90 | - Calling `len` on a fileobj gives its length, and `bool` tells if it is empty
91 | - Export useful constants and functions from `lzma` for easy access: checks, filters,
92 | etc.
93 |
94 | ### :house: Internal
95 |
96 | - Test that no warnings are generated
97 | - Change development status to Alpha
98 |
99 | ## [0.1.2] - 2021-09-19
100 |
101 | [0.1.2]: https://github.com/rogdham/python-xz/releases/tag/v0.1.2
102 |
103 | ### :rocket: Added
104 |
105 | - Add `__version__` attribute to module, despite [PEP 396] being rejected
106 |
107 | [pep 396]: https://www.python.org/dev/peps/pep-0396/
108 |
109 | ## [0.1.1] - 2021-05-14
110 |
111 | [0.1.1]: https://github.com/rogdham/python-xz/releases/tag/v0.1.1
112 |
113 | ### :rocket: Added
114 |
115 | - Implementation of the `fileno` method
116 |
117 | ## [0.1.0] - 2021-05-13
118 |
119 | [0.1.0]: https://github.com/rogdham/python-xz/releases/tag/v0.1.0
120 |
121 | ### :rocket: Added
122 |
123 | - Initial public release :tada:
124 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2020 Rogdham
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # python-xz
4 |
5 | Pure Python implementation of the XZ file format with random access support
6 |
7 | _Leveraging the lzma module for fast (de)compression_
8 |
9 | [](https://github.com/rogdham/python-xz/actions?query=branch:master)
10 | [](https://pypi.org/project/python-xz/)
11 | [](https://github.com/rogdham/python-xz/search?q=fail+under&type=Code)
12 | [](https://mypy.readthedocs.io/)
13 | [](https://github.com/Rogdham/python-xz/blob/master/LICENSE.txt)
14 |
15 | ---
16 |
17 | [📖 Documentation](https://github.com/rogdham/python-xz/#usage) | [📃 Changelog](./CHANGELOG.md)
18 |
19 |
20 |
21 | ---
22 |
23 | A XZ file can be composed of several streams and blocks. This allows for fast random
24 | access when reading, but this is not supported by Python's builtin `lzma` module (which
25 | would read all previous blocks for nothing).
26 |
27 |
28 |
29 | | | [lzma] | [lzmaffi] | python-xz |
30 | | :---------------: | :---------------: | :------------------: | :------------------: |
31 | | module type | builtin | cffi (C extension) | pure Python |
32 | | 📄 **read** | | | |
33 | | random access | ❌ no1 | ✔️ yes2 | ✔️ yes2 |
34 | | several blocks | ✔️ yes | ✔️✔️ yes3 | ✔️✔️ yes3 |
35 | | several streams | ✔️ yes | ✔️ yes | ✔️✔️ yes4 |
36 | | stream padding | ❌ no5 | ✔️ yes | ✔️ yes |
37 | | 📝 **write** | | | |
38 | | `w` mode | ✔️ yes | ✔️ yes | ✔️ yes |
39 | | `x` mode | ✔️ yes | ❌ no | ✔️ yes |
40 | | `a` mode | ✔️ new stream | ✔️ new stream | ⏳ planned |
41 | | `r+`/`w+`/… modes | ❌ no | ❌ no | ✔️ yes |
42 | | several blocks | ❌ no | ❌ no | ✔️ yes |
43 | | several streams | ❌ no6 | ❌ no6 | ✔️ yes |
44 | | stream padding | ❌ no | ❌ no | ⏳ planned |
45 |
46 |
47 |
48 |
49 | Notes
50 |
51 | 1. Reading from a position will read the file from the very beginning
52 | 2. Reading from a position will read the file from the beginning of the block
53 | 3. Block positions available with the `block_boundaries` attribute
54 | 4. Stream positions available with the `stream_boundaries` attribute
55 | 5. Related [issue](https://github.com/python/cpython/issues/88300)
56 | 6. Possible by manually closing and re-opening in append mode
57 |
58 |
59 |
60 | [lzma]: https://docs.python.org/3/library/lzma.html
61 | [lzmaffi]: https://github.com/r3m0t/backports.lzma
62 |
63 | ---
64 |
65 | ## Install
66 |
67 | Install `python-xz` with pip:
68 |
69 | ```sh
70 | $ python -m pip install python-xz
71 | ```
72 |
73 | _An unofficial package for conda is [also available][conda package], see [issue #5][#5]
74 | for more information._
75 |
76 | [conda package]: https://anaconda.org/conda-forge/python-xz
77 | [#5]: https://github.com/Rogdham/python-xz/issues/5
78 |
79 | ## Usage
80 |
81 | The API is similar to [lzma]: you can use either `xz.open` or `xz.XZFile`.
82 |
83 | ### Read mode
84 |
85 | ```python
86 | >>> with xz.open('example.xz') as fin:
87 | ... fin.read(18)
88 | ... fin.stream_boundaries # 2 streams
89 | ... fin.block_boundaries # 4 blocks in first stream, 2 blocks in second stream
90 | ... fin.seek(1000)
91 | ... fin.read(31)
92 | ...
93 | b'Hello, world! \xf0\x9f\x91\x8b'
94 | [0, 2000]
95 | [0, 500, 1000, 1500, 2000, 3000]
96 | 1000
97 | b'\xe2\x9c\xa8 Random access is fast! \xf0\x9f\x9a\x80'
98 | ```
99 |
100 | Opening in text mode works as well, but notice that seek arguments as well as boundaries
101 | are still in bytes (just like with `lzma.open`).
102 |
103 | ```python
104 | >>> with xz.open('example.xz', 'rt') as fin:
105 | ... fin.read(15)
106 | ... fin.stream_boundaries
107 | ... fin.block_boundaries
108 | ... fin.seek(1000)
109 | ... fin.read(26)
110 | ...
111 | 'Hello, world! 👋'
112 | [0, 2000]
113 | [0, 500, 1000, 1500, 2000, 3000]
114 | 1000
115 | '✨ Random access is fast! 🚀'
116 | ```
117 |
118 | ### Write mode
119 |
120 | Writing is only supported from the end of file. It is however possible to truncate the
121 | file first. Note that truncating is only supported on block boundaries.
122 |
123 | ```python
124 | >>> with xz.open('test.xz', 'w') as fout:
125 | ... fout.write(b'Hello, world!\n')
126 | ... fout.write(b'This sentence is still in the previous block\n')
127 | ... fout.change_block()
128 | ... fout.write(b'But this one is in its own!\n')
129 | ...
130 | 14
131 | 45
132 | 28
133 | ```
134 |
135 | Advanced usage:
136 |
137 | - Modes like `r+`/`w+`/`x+` allow to open for both read and write at the same time;
138 | however in the current implementation, a block with writing in progress is
139 | automatically closed when reading data from it.
140 | - The `check`, `preset` and `filters` arguments to `xz.open` and `xz.XZFile` allow to
141 | configure the default values for new streams and blocks.
142 | - Change block with the `change_block` method (the `preset` and `filters` attributes can
143 | be changed beforehand to apply to the new block).
144 | - Change stream with the `change_stream` method (the `check` attribute can be changed
145 | beforehand to apply to the new stream).
146 |
147 | ---
148 |
149 | ## FAQ
150 |
151 | ### How does random-access works?
152 |
153 | XZ files are made of a number of streams, and each stream is composed of a number of
154 | block. This can be seen with `xz --list`:
155 |
156 | ```sh
157 | $ xz --list file.xz
158 | Strms Blocks Compressed Uncompressed Ratio Check Filename
159 | 1 13 16.8 MiB 297.9 MiB 0.056 CRC64 file.xz
160 | ```
161 |
162 | To read data from the middle of the 10th block, we will decompress the 10th block from
163 | its start it until we reach the middle (and drop that decompressed data), then returned
164 | the decompressed data from that point.
165 |
166 | Choosing the good block size is a tradeoff between seeking time during random access and
167 | compression ratio.
168 |
169 | ### How can I create XZ files optimized for random-access?
170 |
171 | You can open the file for writing and use the `change_block` method to create several
172 | blocks.
173 |
174 | Other tools allow to create XZ files with several blocks as well:
175 |
176 | - [XZ Utils](https://tukaani.org/xz/) needs to be called with flags:
177 |
178 | ```sh
179 | $ xz -T0 file # threading mode
180 | $ xz --block-size 16M file # same size for all blocks
181 | $ xz --block-list 16M,32M,8M,42M file # specific size for each block
182 | ```
183 |
184 | - [PIXZ](https://github.com/vasi/pixz) creates files with several blocks by default:
185 |
186 | ```sh
187 | $ pixz file
188 | ```
189 |
190 | ### Python version support
191 |
192 | As a general rule, all Python versions that are both [released and still officially
193 | supported][python-versions] are supported by `python-xz` and tested against (both
194 | CPython and PyPy implementations).
195 |
196 | If you have other use cases or find issues with some Python versions, feel free to
197 | [open a ticket](https://github.com/Rogdham/python-xz/issues/new)!
198 |
199 | [python-versions]: https://devguide.python.org/versions/#versions
200 |
--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | # install + dependencies
2 | -e .
3 |
4 | # typing
5 | mypy
6 |
7 | # tests
8 | coverage
9 | pytest
10 | pytest-cov
11 |
12 | # lint
13 | pylint
14 |
15 | # format
16 | black
17 | isort
18 |
19 | # publish
20 | setuptools_scm
21 | wheel
22 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | # section names refer to the documentation
3 | # https://mypy.readthedocs.io/en/stable/config_file.html
4 |
5 | # Import discovery
6 | files = src
7 | ignore_missing_imports = False
8 | follow_imports = normal
9 |
10 | # Platform configuration
11 | python_version = 3.11
12 |
13 | # Disallow dynamic typing
14 | disallow_any_unimported = True
15 | disallow_any_decorated = True
16 | disallow_any_generics = True
17 | disallow_subclassing_any = True
18 |
19 | # Untyped definitions and calls
20 | disallow_untyped_calls = True
21 | disallow_untyped_defs = True
22 | disallow_incomplete_defs = True
23 | check_untyped_defs = True
24 | disallow_untyped_decorators = True
25 |
26 | # None and Optional handling
27 | no_implicit_optional = True
28 | strict_optional = True
29 |
30 | # Configuring warning
31 | warn_redundant_casts = True
32 | warn_unused_ignores = True
33 | warn_no_return = True
34 | warn_return_any = True
35 | warn_unreachable = True
36 |
37 | # Supressing errors
38 | show_none_errors = True
39 | ignore_errors = False
40 |
41 | # Miscellaneous strictness flags
42 | strict_equality = True
43 |
44 | # Configuring error messages
45 | show_error_context = True
46 | show_error_codes = True
47 |
48 | # Miscellaneous
49 | warn_unused_configs = True
50 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts =
3 | --cov
4 | --strict-markers
5 | filterwarnings =
6 | error
7 | # issue in standard lib for PyPy < 3.9
8 | ignore:Using or importing the ABCs from 'collections':DeprecationWarning:_lzma
9 | markers =
10 | generate_integration_files
11 | integration
12 | unit
13 | testpaths =
14 | tests
15 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = python-xz
3 | author = Rogdham
4 | author_email = contact@rogdham.net
5 | description = Pure Python implementation of the XZ file format with random access support
6 | long_description = file: README.md
7 | long_description_content_type = text/markdown
8 | url = https://github.com/rogdham/python-xz
9 | project_urls =
10 | Source = https://github.com/rogdham/python-xz
11 | keywords = xz lzma compression decompression
12 | license = MIT
13 | license_files = LICENSE.txt
14 | platform = any
15 | classifiers =
16 | Development Status :: 3 - Alpha
17 | License :: OSI Approved :: MIT License
18 | Operating System :: OS Independent
19 | Programming Language :: Python
20 | Programming Language :: Python :: 3
21 | Programming Language :: Python :: 3 :: Only
22 | Programming Language :: Python :: 3.7
23 | Programming Language :: Python :: 3.8
24 | Programming Language :: Python :: 3.9
25 | Programming Language :: Python :: 3.10
26 | Programming Language :: Python :: 3.11
27 | Topic :: Utilities
28 | Topic :: System :: Archiving
29 | Topic :: System :: Archiving :: Compression
30 |
31 | [options]
32 | include_package_data = True
33 | package_dir = =src
34 | packages = xz
35 | python_requires = >=3.7
36 | setup_requires =
37 | setuptools_scm
38 | wheel
39 | install_requires =
40 | typing-extensions>=4.5.0;python_version<"3.8"
41 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from setuptools import setup
4 |
5 | setup(
6 | use_scm_version={
7 | "write_to": "src/xz/_version.py",
8 | "write_to_template": '__version__ = "{version}"\n',
9 | }
10 | )
11 |
--------------------------------------------------------------------------------
/src/xz/__init__.py:
--------------------------------------------------------------------------------
1 | from lzma import (
2 | CHECK_CRC32,
3 | CHECK_CRC64,
4 | CHECK_ID_MAX,
5 | CHECK_NONE,
6 | CHECK_SHA256,
7 | CHECK_UNKNOWN,
8 | FILTER_ARM,
9 | FILTER_ARMTHUMB,
10 | FILTER_DELTA,
11 | FILTER_IA64,
12 | FILTER_LZMA1,
13 | FILTER_LZMA2,
14 | FILTER_POWERPC,
15 | FILTER_SPARC,
16 | FILTER_X86,
17 | MF_BT2,
18 | MF_BT3,
19 | MF_BT4,
20 | MF_HC3,
21 | MF_HC4,
22 | MODE_FAST,
23 | MODE_NORMAL,
24 | PRESET_DEFAULT,
25 | PRESET_EXTREME,
26 | compress,
27 | decompress,
28 | is_check_supported,
29 | )
30 |
31 | try:
32 | from xz._version import __version__
33 | except ImportError: # pragma: no cover
34 | __version__ = "0.0.0.dev0-unknown"
35 |
36 |
37 | from xz.common import XZError
38 | from xz.file import XZFile
39 | from xz.open import xz_open
40 | from xz.strategy import KeepBlockReadStrategy, RollingBlockReadStrategy
41 |
42 | # pylint: disable=redefined-builtin
43 | open = xz_open
44 | # pylint: enable=redefined-builtin
45 |
46 |
47 | __all__ = (
48 | "__version__",
49 | "KeepBlockReadStrategy",
50 | "RollingBlockReadStrategy",
51 | "XZError",
52 | "XZFile",
53 | "open",
54 | # re-export from lzma for easy access
55 | "CHECK_CRC32",
56 | "CHECK_CRC64",
57 | "CHECK_ID_MAX",
58 | "CHECK_NONE",
59 | "CHECK_SHA256",
60 | "CHECK_UNKNOWN",
61 | "FILTER_ARM",
62 | "FILTER_ARMTHUMB",
63 | "FILTER_DELTA",
64 | "FILTER_IA64",
65 | "FILTER_LZMA1",
66 | "FILTER_LZMA2",
67 | "FILTER_POWERPC",
68 | "FILTER_SPARC",
69 | "FILTER_X86",
70 | "MF_BT2",
71 | "MF_BT3",
72 | "MF_BT4",
73 | "MF_HC3",
74 | "MF_HC4",
75 | "MODE_FAST",
76 | "MODE_NORMAL",
77 | "PRESET_DEFAULT",
78 | "PRESET_EXTREME",
79 | "compress",
80 | "decompress",
81 | "is_check_supported",
82 | )
83 |
--------------------------------------------------------------------------------
/src/xz/block.py:
--------------------------------------------------------------------------------
1 | from io import DEFAULT_BUFFER_SIZE, SEEK_SET
2 | from lzma import FORMAT_XZ, LZMACompressor, LZMADecompressor, LZMAError
3 | from typing import Optional, Tuple, Union
4 |
5 | from xz.common import (
6 | XZError,
7 | create_xz_header,
8 | create_xz_index_footer,
9 | parse_xz_footer,
10 | parse_xz_index,
11 | )
12 | from xz.io import IOAbstract, IOCombiner, IOStatic
13 | from xz.strategy import KeepBlockReadStrategy
14 | from xz.typing import _BlockReadStrategyType, _LZMAFiltersType, _LZMAPresetType
15 |
16 |
17 | class BlockRead:
18 | read_size = DEFAULT_BUFFER_SIZE
19 |
20 | def __init__(
21 | self,
22 | fileobj: IOAbstract,
23 | check: int,
24 | unpadded_size: int,
25 | uncompressed_size: int,
26 | ) -> None:
27 | self.length = uncompressed_size
28 | self.fileobj = IOCombiner(
29 | IOStatic(create_xz_header(check)),
30 | fileobj,
31 | IOStatic(
32 | create_xz_index_footer(check, [(unpadded_size, uncompressed_size)])
33 | ),
34 | )
35 | self.reset()
36 |
37 | def reset(self) -> None:
38 | self.fileobj.seek(0, SEEK_SET)
39 | self.pos = 0
40 | self.decompressor = LZMADecompressor(format=FORMAT_XZ)
41 |
42 | def decompress(self, pos: int, size: int) -> bytes:
43 | if pos < self.pos:
44 | self.reset()
45 |
46 | skip_before = pos - self.pos
47 |
48 | # pylint: disable=using-constant-test
49 | if self.decompressor.eof:
50 | raise XZError("block: decompressor eof")
51 |
52 | if self.decompressor.needs_input:
53 | data_input = self.fileobj.read(self.read_size)
54 | if not data_input:
55 | raise XZError("block: data eof")
56 | else:
57 | data_input = b""
58 |
59 | data_output = self.decompressor.decompress(data_input, skip_before + size)
60 | self.pos += len(data_output)
61 |
62 | if self.pos == self.length:
63 | # we reached the end of the block
64 | # according to the XZ specification, we must check the
65 | # remaining bytes of the block; this is mainly performed by the
66 | # decompressor itself when we consume it
67 | while not self.decompressor.eof:
68 | if self.decompress(self.pos, 1):
69 | raise LZMAError("Corrupt input data")
70 |
71 | return data_output[skip_before:]
72 |
73 |
74 | class BlockWrite:
75 | def __init__(
76 | self,
77 | fileobj: IOAbstract,
78 | check: int,
79 | preset: _LZMAPresetType,
80 | filters: _LZMAFiltersType,
81 | ) -> None:
82 | self.fileobj = fileobj
83 | self.check = check
84 | self.compressor = LZMACompressor(FORMAT_XZ, check, preset, filters)
85 | self.pos = 0
86 | if self.compressor.compress(b"") != create_xz_header(check):
87 | raise XZError("block: compressor header")
88 |
89 | def _write(self, data: bytes) -> None:
90 | if data:
91 | self.fileobj.seek(self.pos)
92 | self.fileobj.write(data)
93 | self.pos += len(data)
94 |
95 | def compress(self, data: bytes) -> None:
96 | self._write(self.compressor.compress(data))
97 |
98 | def finish(self) -> Tuple[int, int]:
99 | data = self.compressor.flush()
100 |
101 | # footer
102 | check, backward_size = parse_xz_footer(data[-12:])
103 | if check != self.check:
104 | raise XZError("block: compressor footer check")
105 |
106 | # index
107 | records = parse_xz_index(data[-12 - backward_size : -12])
108 | if len(records) != 1:
109 | raise XZError("block: compressor index records length")
110 |
111 | # remaining block data
112 | self._write(data[: -12 - backward_size])
113 |
114 | return records[0] # (unpadded_size, uncompressed_size)
115 |
116 |
117 | class XZBlock(IOAbstract):
118 | def __init__(
119 | self,
120 | fileobj: IOAbstract,
121 | check: int,
122 | unpadded_size: int,
123 | uncompressed_size: int,
124 | preset: _LZMAPresetType = None,
125 | filters: _LZMAFiltersType = None,
126 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
127 | ):
128 | super().__init__(uncompressed_size)
129 | self.fileobj = fileobj
130 | self.check = check
131 | self.preset = preset
132 | self.filters = filters
133 | self.block_read_strategy = block_read_strategy or KeepBlockReadStrategy()
134 | self.unpadded_size = unpadded_size
135 | self.operation: Union[BlockRead, BlockWrite, None] = None
136 |
137 | @property
138 | def uncompressed_size(self) -> int:
139 | return self._length
140 |
141 | def _read(self, size: int) -> bytes:
142 | # enforce read mode
143 | if not isinstance(self.operation, BlockRead):
144 | self._write_end()
145 | self.clear()
146 | self.block_read_strategy.on_create(self)
147 | self.operation = BlockRead(
148 | self.fileobj,
149 | self.check,
150 | self.unpadded_size,
151 | self.uncompressed_size,
152 | )
153 |
154 | # read data
155 | self.block_read_strategy.on_read(self)
156 | try:
157 | data = self.operation.decompress(self._pos, size)
158 | except LZMAError as ex:
159 | raise XZError(f"block: error while decompressing: {ex}") from ex
160 |
161 | if self._pos + len(data) == self._length:
162 | self.clear()
163 |
164 | return data
165 |
166 | def writable(self) -> bool:
167 | return isinstance(self.operation, BlockWrite) or not self._length
168 |
169 | def _write(self, data: bytes) -> int:
170 | # enforce write mode
171 | if not isinstance(self.operation, BlockWrite):
172 | self.clear()
173 | self.operation = BlockWrite(
174 | self.fileobj,
175 | self.check,
176 | self.preset,
177 | self.filters,
178 | )
179 |
180 | # write data
181 | self.operation.compress(data)
182 | return len(data)
183 |
184 | def _write_after(self) -> None:
185 | if isinstance(self.operation, BlockWrite):
186 | self.unpadded_size, uncompressed_size = self.operation.finish()
187 | if uncompressed_size != self.uncompressed_size:
188 | raise XZError("block: compressor uncompressed size")
189 | self.clear()
190 |
191 | def _truncate(self, size: int) -> None:
192 | # thanks to the writable method, we are sure that length is zero
193 | # so we don't need to handle the case of truncating in middle of the block
194 | self.seek(size)
195 | self.write(b"")
196 |
197 | def clear(self) -> None:
198 | if isinstance(self.operation, BlockRead):
199 | self.block_read_strategy.on_delete(self)
200 | self.operation = None # free memory
201 |
--------------------------------------------------------------------------------
/src/xz/common.py:
--------------------------------------------------------------------------------
1 | from binascii import crc32 as crc32int
2 | import lzma
3 | from struct import pack, unpack
4 | from typing import List, Tuple, cast
5 |
6 | HEADER_MAGIC = b"\xfd7zXZ\x00"
7 | FOOTER_MAGIC = b"YZ"
8 |
9 |
10 | class XZError(Exception):
11 | pass
12 |
13 |
14 | def encode_mbi(value: int) -> bytes:
15 | data = bytearray()
16 | while value >= 0x80:
17 | data.append((value & 0x7F) | 0x80)
18 | value >>= 7
19 | data.append(value)
20 | return data
21 |
22 |
23 | def decode_mbi(data: bytes) -> Tuple[int, int]:
24 | value = 0
25 | for size, byte in enumerate(data):
26 | value |= (byte & 0x7F) << (size * 7)
27 | if not byte & 0x80:
28 | return (size + 1, value)
29 | raise XZError("invalid mbi")
30 |
31 |
32 | def crc32(data: bytes) -> bytes:
33 | return pack(" int:
37 | remainder = value % 4
38 | if remainder:
39 | return value - remainder + 4
40 | return value
41 |
42 |
43 | def pad(value: int) -> bytes:
44 | return b"\x00" * (round_up(value) - value)
45 |
46 |
47 | def create_xz_header(check: int) -> bytes:
48 | if not 0 <= check <= 0xF:
49 | raise XZError("header check")
50 | # stream header
51 | flags = pack(" bytes:
56 | if not 0 <= check <= 0xF:
57 | raise XZError("footer check")
58 | # index
59 | index = b"\x00"
60 | index += encode_mbi(len(records))
61 | for unpadded_size, uncompressed_size in records:
62 | if not unpadded_size:
63 | raise XZError("index record unpadded size")
64 | index += encode_mbi(unpadded_size)
65 | index += encode_mbi(uncompressed_size)
66 | index += pad(len(index))
67 | index += crc32(index)
68 | # stream footer
69 | footer = pack(" int:
75 | if len(header) != 12:
76 | raise XZError("header length")
77 | if header[:6] != HEADER_MAGIC:
78 | raise XZError("header magic")
79 | if crc32(header[6:8]) != header[8:12]:
80 | raise XZError("header crc32")
81 | flag_first_byte, check = cast(
82 | Tuple[int, int],
83 | unpack(" List[Tuple[int, int]]:
91 | if len(index) < 8 or len(index) % 4:
92 | raise XZError("index length")
93 | index = memoryview(index)
94 | if index[0]:
95 | raise XZError("index indicator")
96 | if crc32(index[:-4]) != index[-4:]:
97 | raise XZError("index crc32")
98 | size, nb_records = decode_mbi(index[1:])
99 | index = index[1 + size : -4]
100 | # records
101 | records = []
102 | for _ in range(nb_records):
103 | if not index:
104 | raise XZError("index size")
105 | size, unpadded_size = decode_mbi(index)
106 | if not unpadded_size:
107 | raise XZError("index record unpadded size")
108 | index = index[size:]
109 | if not index:
110 | raise XZError("index size")
111 | size, uncompressed_size = decode_mbi(index)
112 | if not uncompressed_size:
113 | raise XZError("index record uncompressed size")
114 | index = index[size:]
115 | records.append((unpadded_size, uncompressed_size))
116 | # index padding
117 | if any(index):
118 | raise XZError("index padding")
119 | return records
120 |
121 |
122 | def parse_xz_footer(footer: bytes) -> Tuple[int, int]:
123 | if len(footer) != 12:
124 | raise XZError("footer length")
125 | if footer[10:12] != FOOTER_MAGIC:
126 | raise XZError("footer magic")
127 | if crc32(footer[4:10]) != footer[:4]:
128 | raise XZError("footer crc32")
129 | backward_size, flag_first_byte, check = cast(
130 | Tuple[int, int, int],
131 | unpack(" None:
41 | """Open an XZ file in binary mode.
42 |
43 | The filename argument can be either an actual file name
44 | (given as a str, bytes, or PathLike object),
45 | in which case the named file is opened,
46 | or it can be an existing file object to read from or write to.
47 |
48 | The mode argument can be one of the following:
49 | - "r" for reading (default)
50 | - "w" for writing, truncating the file
51 | - "r+" for reading and writing
52 | - "w+" for reading and writing, truncating the file
53 | - "x" and "x+" are like "w" and "w+", except that an
54 | FileExistsError is raised if the file already exists
55 |
56 | The following arguments are used during writing:
57 | - check: when creating a new stream
58 | - preset: when creating a new block
59 | - filters: when creating a new block
60 |
61 | For more information about the check/preset/filters arguments,
62 | refer to the documentation of the lzma module.
63 |
64 | The block_read_strategy argument allows to specify a strategy
65 | for freeing block readers, and implement a different tradeoff
66 | between memory consumption and read speed when alternating reads
67 | between several blocks.
68 | """
69 | self._close_fileobj = False
70 | self._close_check_empty = False
71 |
72 | super().__init__()
73 |
74 | self._mode, self._readable, self._writable = parse_mode(mode)
75 |
76 | # create strategy
77 | if block_read_strategy is None:
78 | self.block_read_strategy: _BlockReadStrategyType = (
79 | RollingBlockReadStrategy()
80 | )
81 | else:
82 | self.block_read_strategy = block_read_strategy
83 |
84 | # get fileobj
85 | if isinstance(filename, (str, bytes, os.PathLike)):
86 | # pylint: disable=consider-using-with, unspecified-encoding
87 | self.fileobj = cast(BinaryIO, open(filename, self._mode + "b"))
88 | self._close_fileobj = True
89 | elif hasattr(filename, "read"): # weak check but better than nothing
90 | self.fileobj = filename
91 | else:
92 | raise TypeError("filename must be a str, bytes, file or PathLike object")
93 |
94 | # check fileobj
95 | if not self.fileobj.seekable():
96 | raise ValueError("filename is not seekable")
97 | if self._readable and not self.fileobj.readable():
98 | raise ValueError("filename is not readable")
99 | if self._writable and not self.fileobj.writable():
100 | raise ValueError("filename is not writable")
101 |
102 | # init
103 | if self._mode[0] in "wx":
104 | self.fileobj.truncate(0)
105 | if self._readable:
106 | self._init_parse()
107 | if self._mode[0] == "r" and not self._fileobjs:
108 | raise XZError("file: no streams")
109 |
110 | self.check = check if check != -1 else DEFAULT_CHECK
111 | self.preset = preset
112 | self.filters = filters
113 |
114 | self._close_check_empty = self._mode[0] != "r"
115 |
116 | @property
117 | def _last_stream(self) -> Optional[XZStream]:
118 | try:
119 | return self._fileobjs.last_item
120 | except KeyError:
121 | return None
122 |
123 | preset = AttrProxy[_LZMAPresetType]("_last_stream")
124 | filters = AttrProxy[_LZMAFiltersType]("_last_stream")
125 |
126 | @property
127 | def mode(self) -> str:
128 | return self._mode
129 |
130 | def readable(self) -> bool:
131 | return self._readable
132 |
133 | def writable(self) -> bool:
134 | return self._writable
135 |
136 | def close(self) -> None:
137 | try:
138 | super().close()
139 | if self._close_check_empty and not self:
140 | warnings.warn(
141 | "Empty XZFile: nothing was written, "
142 | "so output is empty (and not a valid xz file).",
143 | RuntimeWarning,
144 | )
145 | finally:
146 | if self._close_fileobj:
147 | self.fileobj.close() # self.fileobj exists at this point
148 | if sys.version_info < (3, 10): # pragma: no cover
149 | # fix coverage issue on some Python versions
150 | # see https://github.com/nedbat/coveragepy/issues/1480
151 | pass
152 |
153 | @property
154 | def stream_boundaries(self) -> List[int]:
155 | return list(self._fileobjs)
156 |
157 | @property
158 | def block_boundaries(self) -> List[int]:
159 | return [
160 | stream_pos + block_boundary
161 | for stream_pos, stream in self._fileobjs.items()
162 | for block_boundary in stream.block_boundaries
163 | ]
164 |
165 | def _init_parse(self) -> None:
166 | self.fileobj.seek(0, SEEK_END)
167 |
168 | streams = []
169 |
170 | while self.fileobj.tell():
171 | if self.fileobj.tell() % 4:
172 | raise XZError("file: invalid size")
173 | self.fileobj.seek(-4, SEEK_CUR)
174 | if any(self.fileobj.read(4)):
175 | streams.append(XZStream.parse(self.fileobj, self.block_read_strategy))
176 | else:
177 | self.fileobj.seek(-4, SEEK_CUR) # stream padding
178 |
179 | while streams:
180 | self._append(streams.pop())
181 |
182 | def _create_fileobj(self) -> XZStream:
183 | stream_pos = sum(len(stream.fileobj) for stream in self._fileobjs.values())
184 | return XZStream(
185 | IOProxy(
186 | self.fileobj,
187 | stream_pos,
188 | stream_pos,
189 | ),
190 | self.check,
191 | self.preset,
192 | self.filters,
193 | self.block_read_strategy,
194 | )
195 |
196 | def change_stream(self) -> None:
197 | """
198 | Create a new stream.
199 |
200 | If the current stream is empty, replace it instead."""
201 | if self._fileobjs:
202 | self._change_fileobj()
203 |
204 | def change_block(self) -> None:
205 | """
206 | Create a new block.
207 |
208 | If the current block is empty, replace it instead."""
209 | last_stream = self._last_stream
210 | if last_stream:
211 | last_stream.change_block()
212 |
--------------------------------------------------------------------------------
/src/xz/io.py:
--------------------------------------------------------------------------------
1 | from io import (
2 | DEFAULT_BUFFER_SIZE,
3 | SEEK_CUR,
4 | SEEK_END,
5 | SEEK_SET,
6 | IOBase,
7 | UnsupportedOperation,
8 | )
9 | from typing import BinaryIO, Generic, Optional, TypeVar, Union, cast
10 |
11 | from xz.utils import FloorDict
12 |
13 | #
14 | # Typing note
15 | #
16 | # The consensus seems to favour IO instead of IOBase for typing.
17 | # However we cannot subclass BinaryIO in IOAbstract as it conflicts with IOBase.
18 | #
19 | # As a result, some casting or unions between the two types may be required internally.
20 | #
21 |
22 |
23 | class IOAbstract(IOBase):
24 | def __init__(self, length: int) -> None:
25 | super().__init__()
26 | self._pos = 0
27 | self._length = length
28 | self._modified = False
29 |
30 | def __repr__(self) -> str:
31 | return f"<{self.__class__.__name__} object at {hex(hash(self))}>"
32 |
33 | def __len__(self) -> int:
34 | return self._length
35 |
36 | def _check_not_closed(self) -> None:
37 | # https://github.com/PyCQA/pylint/issues/3484
38 | # pylint: disable=using-constant-test
39 | if self.closed:
40 | raise ValueError("I/O operation on closed file")
41 |
42 | def fileno(self) -> int:
43 | try:
44 | return cast(BinaryIO, self.fileobj).fileno() # type: ignore[attr-defined]
45 | except AttributeError:
46 | raise UnsupportedOperation("fileno") # pylint: disable=raise-missing-from
47 |
48 | def seekable(self) -> bool:
49 | """Return a bool indicating whether object supports random access."""
50 | return True
51 |
52 | def readable(self) -> bool:
53 | """Return a bool indicating whether object was opened for reading."""
54 | return True
55 |
56 | def writable(self) -> bool:
57 | """Return a bool indicating whether object was opened for writing."""
58 | return True
59 |
60 | def seek(self, pos: int, whence: int = SEEK_SET) -> int:
61 | """Change stream position.
62 |
63 | Change the stream position to byte offset pos. Argument pos is
64 | interpreted relative to the position indicated by whence. Values
65 | for whence are ints:
66 |
67 | * 0 -- start of stream (the default); offset should be zero or positive
68 | * 1 -- current stream position; offset may be negative
69 | * 2 -- end of stream; offset should be negative
70 |
71 | Return an int indicating the new absolute position.
72 | """
73 | self._check_not_closed()
74 | if not self.seekable(): # just in case seekable is overridden
75 | raise UnsupportedOperation("seek")
76 | if whence == SEEK_SET:
77 | pass
78 | elif whence == SEEK_CUR:
79 | pos += self._pos
80 | elif whence == SEEK_END:
81 | pos += self._length
82 | else:
83 | raise ValueError("unsupported whence value")
84 | if pos >= 0:
85 | self._pos = pos
86 | return self._pos
87 | raise ValueError("invalid seek position")
88 |
89 | def tell(self) -> int:
90 | """Return an int indicating the current stream position."""
91 | self._check_not_closed()
92 | return self._pos
93 |
94 | def read(self, size: int = -1) -> bytes:
95 | """Read at most size bytes, returned as a bytes object.
96 |
97 | If the size argument is negative, read until EOF is reached.
98 | Return an empty bytes object at or after EOF.
99 | """
100 | self._check_not_closed()
101 | if not self.readable():
102 | raise UnsupportedOperation("read")
103 | if size < 0:
104 | size = self._length
105 | size = min(size, self._length - self._pos)
106 | parts = []
107 | while size > 0:
108 | data = self._read(size) # do not stop if nothing was read
109 | parts.append(data)
110 | size -= len(data)
111 | self._pos += len(data)
112 | return b"".join(parts)
113 |
114 | def _write_start(self) -> None:
115 | if not self._modified:
116 | self._write_before()
117 | self._modified = True
118 |
119 | def _write_end(self) -> None:
120 | if self._modified:
121 | self._write_after()
122 | self._modified = False
123 |
124 | def write(self, data: bytes) -> int:
125 | """Write data, passed as a bytes object.
126 |
127 | Returns the number of bytes written, which is always the length
128 | of the input data in bytes.
129 | """
130 | self._check_not_closed()
131 | if not self.writable():
132 | raise UnsupportedOperation("write")
133 | written_bytes = len(data)
134 | padding_size = self._pos - self._length
135 | if padding_size < 0:
136 | raise ValueError("write is only supported from EOF")
137 | if padding_size > 0:
138 | null_bytes = memoryview(bytearray(DEFAULT_BUFFER_SIZE))
139 | self._pos = self._length
140 | data = memoryview(data)
141 | while padding_size or data:
142 | self._write_start()
143 | if padding_size > 0:
144 | # pad with null bytes, not counted in written_bytes
145 | padding = null_bytes[:padding_size]
146 | written_len = self._write(padding) # do not stop if nothing was written
147 | padding_size -= written_len
148 | else:
149 | written_len = self._write(data) # do not stop if nothing was written
150 | data = data[written_len:]
151 | self._pos += written_len
152 | self._length = max(self._length, self._pos)
153 | return written_bytes
154 |
155 | def truncate(self, size: Optional[int] = None) -> int:
156 | """Truncate file to size bytes.
157 | Size defaults to the current IO position as reported by tell().
158 |
159 | The current file position is unchanged.
160 |
161 | Return the new size.
162 | """
163 | self._check_not_closed()
164 | if not self.writable():
165 | raise UnsupportedOperation("truncate")
166 | if size is None:
167 | size = self._pos
168 | elif size < 0:
169 | raise ValueError("invalid truncate size")
170 | if size != self._length:
171 | self._write_start()
172 | pos = self._pos
173 | self._truncate(size)
174 | self._length = size
175 | self._pos = pos # make sure position is unchanged
176 | return self._length
177 |
178 | def close(self) -> None:
179 | """Flush and close the stream.
180 |
181 | This method has no effect if it is already closed.
182 | """
183 | try:
184 | if not self.closed:
185 | self._write_end()
186 | finally:
187 | super().close()
188 |
189 | # the methods below are expected to be implemented by subclasses
190 |
191 | def _read(self, size: int) -> bytes: # pragma: no cover
192 | """Read and return up to size bytes, where size is an int.
193 |
194 | The size will not exceed the number of bytes between self._pos and
195 | self._length. This should prevent to deal with EOF.
196 |
197 | This method can return less bytes than size, in which case it will be
198 | called again. This includes being able to return an empty bytes object.
199 | """
200 | raise UnsupportedOperation("read")
201 |
202 | def _write_before(self) -> None:
203 | """This method is called before the first write operation."""
204 |
205 | def _write_after(self) -> None:
206 | """This method is called after the last write operation (usually on file close)."""
207 |
208 | def _write(self, data: bytes) -> int: # pragma: no cover
209 | """Writes as many bytes from data as possible, and return the number
210 | of bytes written.
211 |
212 | data may be greater than the number of bytes between self._pos
213 | and self._length; self._length will be updated by caller afterwards.
214 |
215 | This method can return and int smaller than the length of data, in which
216 | case it will be called again. This includes being able to return 0.
217 | """
218 | raise UnsupportedOperation("write")
219 |
220 | def _truncate(self, size: int) -> None: # pragma: no cover
221 | """Truncate the file to the given size.
222 | This resizing can extend or reduce the current file size.
223 |
224 | The current file position may be changed by this method,
225 | but is restored by caller.
226 |
227 | Returns None.
228 | """
229 | raise UnsupportedOperation("truncate")
230 |
231 |
232 | class IOStatic(IOAbstract):
233 | def __init__(self, data: bytes) -> None:
234 | self.data = bytearray(data)
235 | super().__init__(len(self.data))
236 |
237 | def writable(self) -> bool:
238 | return False
239 |
240 | def _read(self, size: int) -> bytes:
241 | return self.data[self._pos : self._pos + size]
242 |
243 |
244 | class IOProxy(IOAbstract):
245 | def __init__(
246 | self,
247 | fileobj: Union[BinaryIO, IOBase], # see typing note on top of this file
248 | start: int,
249 | end: int,
250 | ) -> None:
251 | super().__init__(end - start)
252 | self.fileobj = fileobj
253 | self.start = start
254 |
255 | def _read(self, size: int) -> bytes:
256 | self.fileobj.seek(self.start + self._pos, SEEK_SET)
257 | return self.fileobj.read(size) # size already restricted by caller
258 |
259 | def _write(self, data: bytes) -> int:
260 | self.fileobj.seek(self.start + self._pos, SEEK_SET)
261 | return self.fileobj.write(data)
262 |
263 | def _truncate(self, size: int) -> None:
264 | self.fileobj.truncate(self.start + size)
265 |
266 |
267 | T = TypeVar("T", bound=IOAbstract)
268 |
269 |
270 | class IOCombiner(IOAbstract, Generic[T]):
271 | def __init__(self, *fileobjs: T) -> None:
272 | super().__init__(0)
273 | self._fileobjs: FloorDict[T] = FloorDict()
274 | for fileobj in fileobjs:
275 | self._append(fileobj)
276 |
277 | def _get_fileobj(self) -> T:
278 | start, fileobj = self._fileobjs.get_with_index(self._pos)
279 | fileobj.seek(self._pos - start, SEEK_SET)
280 | return fileobj
281 |
282 | def _read(self, size: int) -> bytes:
283 | return self._get_fileobj().read(size)
284 |
285 | def _write_after(self) -> None:
286 | if self._fileobjs:
287 | last_fileobj = self._fileobjs.last_item
288 | if last_fileobj:
289 | last_fileobj._write_end() # pylint: disable=protected-access
290 | else:
291 | del self._fileobjs[self._fileobjs.last_key]
292 |
293 | def _write(self, data: bytes) -> int:
294 | if self._fileobjs:
295 | fileobj: Optional[T] = self._get_fileobj()
296 | else:
297 | fileobj = None
298 |
299 | if fileobj is None or not fileobj.writable():
300 | self._change_fileobj()
301 | fileobj = self._get_fileobj()
302 |
303 | # newly created fileobj should be writable
304 | # otherwire this will raise UnsupportedOperation
305 | return fileobj.write(data)
306 |
307 | def _truncate(self, size: int) -> None:
308 | start, fileobj = self._fileobjs.get_with_index(size)
309 | if start != size:
310 | fileobj.truncate(size - start)
311 | for key in reversed(self._fileobjs):
312 | if key < size:
313 | break
314 | del self._fileobjs[key]
315 |
316 | def _append(self, fileobj: T) -> None:
317 | if not isinstance(fileobj, IOAbstract):
318 | raise TypeError
319 | self._fileobjs[self._length] = fileobj # override empty streams
320 | self._length += len(fileobj)
321 |
322 | def _change_fileobj(self) -> None:
323 | """Create and append a new fileobj.
324 |
325 | If the last fileobj was empty, delete it.
326 | """
327 | # end write on last fileobj
328 | if self._fileobjs:
329 | last_fileobj = self._fileobjs.last_item
330 | if last_fileobj:
331 | if last_fileobj.writable():
332 | last_fileobj._write_end() # pylint: disable=protected-access
333 | else:
334 | del self._fileobjs[self._fileobjs.last_key]
335 |
336 | # create and append new fileobj
337 | self._append(self._create_fileobj())
338 |
339 | def _create_fileobj(self) -> T: # pragma: no cover
340 | """
341 | Create a new fileobj to be concatenated.
342 |
343 | It must be writable.
344 | """
345 | raise NotImplementedError
346 |
--------------------------------------------------------------------------------
/src/xz/open.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 | from io import TextIOWrapper
3 | from typing import BinaryIO, List, Optional, Union, cast, overload
4 |
5 | from xz.file import XZFile
6 | from xz.typing import (
7 | _BlockReadStrategyType,
8 | _LZMAFilenameType,
9 | _LZMAFiltersType,
10 | _LZMAPresetType,
11 | _XZModesBinaryType,
12 | _XZModesTextType,
13 | )
14 | from xz.utils import AttrProxy
15 |
16 |
17 | class _XZFileText(TextIOWrapper):
18 | def __init__(
19 | self,
20 | filename: _LZMAFilenameType,
21 | mode: str,
22 | *,
23 | check: int = -1,
24 | preset: _LZMAPresetType = None,
25 | filters: _LZMAFiltersType = None,
26 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
27 | encoding: Optional[str] = None,
28 | errors: Optional[str] = None,
29 | newline: Optional[str] = None,
30 | ) -> None:
31 | self.xz_file = XZFile(
32 | filename,
33 | mode.replace("t", ""),
34 | check=check,
35 | preset=preset,
36 | filters=filters,
37 | block_read_strategy=block_read_strategy,
38 | )
39 | super().__init__(
40 | cast(BinaryIO, self.xz_file),
41 | encoding,
42 | errors,
43 | newline,
44 | )
45 |
46 | check = AttrProxy[int]("xz_file")
47 | preset = AttrProxy[_LZMAPresetType]("xz_file")
48 | filters = AttrProxy[_LZMAFiltersType]("xz_file")
49 | stream_boundaries = AttrProxy[List[int]]("xz_file")
50 | block_boundaries = AttrProxy[List[int]]("xz_file")
51 | block_read_strategy = AttrProxy[_BlockReadStrategyType]("xz_file")
52 |
53 | @property
54 | def mode(self) -> str:
55 | return f"{self.xz_file.mode}t"
56 |
57 | @wraps(XZFile.change_stream)
58 | def change_stream(self) -> None:
59 | self.flush()
60 | self.xz_file.change_stream()
61 |
62 | @wraps(XZFile.change_block)
63 | def change_block(self) -> None:
64 | self.flush()
65 | self.xz_file.change_block()
66 |
67 |
68 | @overload
69 | def xz_open(
70 | filename: _LZMAFilenameType,
71 | mode: _XZModesBinaryType = "rb",
72 | *,
73 | # XZFile kwargs
74 | check: int = -1,
75 | preset: _LZMAPresetType = None,
76 | filters: _LZMAFiltersType = None,
77 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
78 | # text-mode kwargs
79 | encoding: Optional[str] = None,
80 | errors: Optional[str] = None,
81 | newline: Optional[str] = None,
82 | ) -> XZFile:
83 | ... # pragma: no cover
84 |
85 |
86 | @overload
87 | def xz_open(
88 | filename: _LZMAFilenameType,
89 | mode: _XZModesTextType,
90 | *,
91 | # XZFile kwargs
92 | check: int = -1,
93 | preset: _LZMAPresetType = None,
94 | filters: _LZMAFiltersType = None,
95 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
96 | # text-mode kwargs
97 | encoding: Optional[str] = None,
98 | errors: Optional[str] = None,
99 | newline: Optional[str] = None,
100 | ) -> _XZFileText:
101 | ... # pragma: no cover
102 |
103 |
104 | @overload
105 | def xz_open(
106 | filename: _LZMAFilenameType,
107 | mode: str,
108 | *,
109 | # XZFile kwargs
110 | check: int = -1,
111 | preset: _LZMAPresetType = None,
112 | filters: _LZMAFiltersType = None,
113 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
114 | # text-mode kwargs
115 | encoding: Optional[str] = None,
116 | errors: Optional[str] = None,
117 | newline: Optional[str] = None,
118 | ) -> Union[XZFile, _XZFileText]:
119 | ... # pragma: no cover
120 |
121 |
122 | def xz_open(
123 | filename: _LZMAFilenameType,
124 | mode: str = "rb",
125 | *,
126 | # XZFile kwargs
127 | check: int = -1,
128 | preset: _LZMAPresetType = None,
129 | filters: _LZMAFiltersType = None,
130 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
131 | # text-mode kwargs
132 | encoding: Optional[str] = None,
133 | errors: Optional[str] = None,
134 | newline: Optional[str] = None,
135 | ) -> Union[XZFile, _XZFileText]:
136 | """Open an XZ file in binary or text mode.
137 |
138 | filename can be either an actual file name (given as a str, bytes,
139 | or PathLike object), in which case the named file is opened, or it
140 | can be an existing file object to read from or write to.
141 |
142 | For binary mode, this function is equivalent to the XZFile
143 | constructor: XZFile(filename, mode, ...). In this case, the
144 | encoding, errors and newline arguments must not be provided.
145 |
146 | For text mode, an XZFile object is created, and wrapped in an
147 | io.TextIOWrapper instance with the specified encoding, error
148 | handling behavior, and line ending(s).
149 | """
150 | if "t" in mode:
151 | if "b" in mode:
152 | raise ValueError(f"Invalid mode: {mode}")
153 |
154 | return _XZFileText(
155 | filename,
156 | mode,
157 | check=check,
158 | preset=preset,
159 | filters=filters,
160 | block_read_strategy=block_read_strategy,
161 | encoding=encoding,
162 | errors=errors,
163 | newline=newline,
164 | )
165 |
166 | if encoding is not None:
167 | raise ValueError("Argument 'encoding' not supported in binary mode")
168 | if errors is not None:
169 | raise ValueError("Argument 'errors' not supported in binary mode")
170 | if newline is not None:
171 | raise ValueError("Argument 'newline' not supported in binary mode")
172 |
173 | return XZFile(
174 | filename,
175 | mode,
176 | check=check,
177 | preset=preset,
178 | filters=filters,
179 | block_read_strategy=block_read_strategy,
180 | )
181 |
--------------------------------------------------------------------------------
/src/xz/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/src/xz/py.typed
--------------------------------------------------------------------------------
/src/xz/strategy.py:
--------------------------------------------------------------------------------
1 | import time
2 | from typing import TYPE_CHECKING, Dict
3 |
4 | if TYPE_CHECKING: # pragma: no cover
5 | # avoid circular dependency
6 | from xz.block import XZBlock
7 |
8 |
9 | class KeepBlockReadStrategy:
10 | def on_create(self, block: "XZBlock") -> None:
11 | pass # do nothing
12 |
13 | def on_delete(self, block: "XZBlock") -> None:
14 | pass # do nothing
15 |
16 | def on_read(self, block: "XZBlock") -> None:
17 | pass # do nothing
18 |
19 |
20 | class RollingBlockReadStrategy:
21 | def __init__(self, max_block_read_nb: int = 8) -> None:
22 | self.block_reads: Dict["XZBlock", float] = {}
23 | self.max_block_read_nb = max_block_read_nb
24 |
25 | def _freshly_used(self, block: "XZBlock") -> None:
26 | self.block_reads[block] = time.monotonic()
27 |
28 | def on_create(self, block: "XZBlock") -> None:
29 | self._freshly_used(block)
30 | if len(self.block_reads) > self.max_block_read_nb:
31 | to_clear = min(
32 | self.block_reads.items(),
33 | key=lambda item: item[1],
34 | )[0]
35 | to_clear.clear() # will call on_delete
36 |
37 | def on_delete(self, block: "XZBlock") -> None:
38 | del self.block_reads[block]
39 |
40 | def on_read(self, block: "XZBlock") -> None:
41 | self._freshly_used(block)
42 |
--------------------------------------------------------------------------------
/src/xz/stream.py:
--------------------------------------------------------------------------------
1 | from io import SEEK_CUR
2 | from typing import BinaryIO, List, Optional
3 |
4 | from xz.block import XZBlock
5 | from xz.common import (
6 | XZError,
7 | create_xz_header,
8 | create_xz_index_footer,
9 | parse_xz_footer,
10 | parse_xz_header,
11 | parse_xz_index,
12 | round_up,
13 | )
14 | from xz.io import IOCombiner, IOProxy
15 | from xz.typing import _BlockReadStrategyType, _LZMAFiltersType, _LZMAPresetType
16 |
17 |
18 | class XZStream(IOCombiner[XZBlock]):
19 | def __init__(
20 | self,
21 | fileobj: IOProxy,
22 | check: int,
23 | preset: _LZMAPresetType = None,
24 | filters: _LZMAFiltersType = None,
25 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
26 | ) -> None:
27 | super().__init__()
28 | self.fileobj = fileobj
29 | self._check = check
30 | self.preset = preset
31 | self.filters = filters
32 | self.block_read_strategy = block_read_strategy
33 |
34 | @property
35 | def check(self) -> int:
36 | return self._check
37 |
38 | @property
39 | def block_boundaries(self) -> List[int]:
40 | return list(self._fileobjs)
41 |
42 | @property
43 | def _fileobj_blocks_end_pos(self) -> int:
44 | return 12 + sum(
45 | round_up(block.unpadded_size) for block in self._fileobjs.values()
46 | )
47 |
48 | @classmethod
49 | def parse(
50 | cls,
51 | fileobj: BinaryIO,
52 | block_read_strategy: Optional[_BlockReadStrategyType] = None,
53 | ) -> "XZStream":
54 | """Parse one XZ stream from a fileobj.
55 |
56 | fileobj position should be right at the end of the stream when calling
57 | and will be moved right at the start of the stream
58 | """
59 | # footer
60 | footer_end_pos = fileobj.seek(-12, SEEK_CUR) + 12
61 | footer = fileobj.read(12)
62 | check, backward_size = parse_xz_footer(footer)
63 |
64 | # index
65 | block_start = fileobj.seek(-12 - backward_size, SEEK_CUR)
66 | index = fileobj.read(backward_size)
67 | records = parse_xz_index(index)
68 | blocks_len = sum(round_up(unpadded_size) for unpadded_size, _ in records)
69 | block_start -= blocks_len
70 | blocks = []
71 | for unpadded_size, uncompressed_size in records:
72 | block_end = block_start + round_up(unpadded_size)
73 | blocks.append(
74 | XZBlock(
75 | IOProxy(fileobj, block_start, block_end),
76 | check,
77 | unpadded_size,
78 | uncompressed_size,
79 | block_read_strategy=block_read_strategy,
80 | )
81 | )
82 | block_start = block_end
83 |
84 | # header
85 | fileobj.seek(-12 - blocks_len - backward_size, SEEK_CUR)
86 | header = fileobj.read(12)
87 | header_check = parse_xz_header(header)
88 | if header_check != check:
89 | raise XZError("stream: inconsistent check value")
90 |
91 | header_start_pos = fileobj.seek(-12, SEEK_CUR)
92 |
93 | stream_fileobj = IOProxy(fileobj, header_start_pos, footer_end_pos)
94 | stream = cls(stream_fileobj, check, block_read_strategy=block_read_strategy)
95 | for block in blocks:
96 | stream._append(block)
97 | return stream
98 |
99 | def _create_fileobj(self) -> XZBlock:
100 | self.fileobj.truncate(self._fileobj_blocks_end_pos)
101 | return XZBlock(
102 | IOProxy(
103 | self.fileobj,
104 | self._fileobj_blocks_end_pos,
105 | self._fileobj_blocks_end_pos,
106 | ),
107 | self.check,
108 | 0,
109 | 0,
110 | self.preset,
111 | self.filters,
112 | self.block_read_strategy,
113 | )
114 |
115 | def _write_before(self) -> None:
116 | if not self:
117 | self.fileobj.seek(0)
118 | self.fileobj.truncate()
119 | self.fileobj.write(create_xz_header(self.check))
120 |
121 | def _write_after(self) -> None:
122 | super()._write_after()
123 | self.fileobj.seek(self._fileobj_blocks_end_pos)
124 | self.fileobj.truncate()
125 | self.fileobj.write(
126 | create_xz_index_footer(
127 | self.check,
128 | [
129 | (block.unpadded_size, block.uncompressed_size)
130 | for block in self._fileobjs.values()
131 | ],
132 | )
133 | )
134 |
135 | def change_block(self) -> None:
136 | """
137 | End the current block, and create a new one.
138 |
139 | If the current block is empty, replace it instead."""
140 | if self._fileobjs:
141 | self._change_fileobj()
142 |
--------------------------------------------------------------------------------
/src/xz/typing.py:
--------------------------------------------------------------------------------
1 | from os import PathLike
2 | import sys
3 | from typing import TYPE_CHECKING, Any, BinaryIO, Optional, Union
4 |
5 | if sys.version_info >= (3, 9): # pragma: no cover
6 | from collections.abc import Mapping, Sequence
7 |
8 | _LZMAFilenameType = Union[str, bytes, PathLike[str], PathLike[bytes], BinaryIO]
9 | else: # pragma: no cover
10 | from typing import Mapping, Sequence
11 |
12 | _LZMAFilenameType = Union[str, bytes, PathLike, BinaryIO]
13 |
14 |
15 | if sys.version_info >= (3, 8): # pragma: no cover
16 | from typing import Literal, Protocol
17 | else: # pragma: no cover
18 | from typing_extensions import Literal, Protocol
19 |
20 |
21 | if TYPE_CHECKING: # pragma: no cover
22 | # avoid circular dependency
23 | from xz.block import XZBlock
24 |
25 |
26 | _LZMAPresetType = Optional[int]
27 | _LZMAFiltersType = Optional[Sequence[Mapping[str, Any]]]
28 |
29 |
30 | # all valid modes if we don't consider changing order nor repetitions
31 | # (see utils.parse_mode for more details)
32 | # the values are unit tested in test_parse_mode to make sure that all are here
33 | _XZModesBinaryType = Literal[
34 | "r", "r+", "w", "w+", "x", "x+", "rb", "rb+", "wb", "wb+", "xb", "xb+"
35 | ]
36 | _XZModesTextType = Literal["rt", "rt+", "wt", "wt+", "xt", "xt+"]
37 |
38 |
39 | class _BlockReadStrategyType(Protocol):
40 | def on_create(self, block: "XZBlock") -> None:
41 | ... # pragma: no cover
42 |
43 | def on_delete(self, block: "XZBlock") -> None:
44 | ... # pragma: no cover
45 |
46 | def on_read(self, block: "XZBlock") -> None:
47 | ... # pragma: no cover
48 |
--------------------------------------------------------------------------------
/src/xz/utils.py:
--------------------------------------------------------------------------------
1 | from bisect import bisect_right, insort_right
2 | import sys
3 | from typing import Any, Dict, Generic, List, Tuple, TypeVar, cast
4 |
5 | if sys.version_info >= (3, 9): # pragma: no cover
6 | from collections.abc import Iterator, MutableMapping
7 | else: # pragma: no cover
8 | from typing import Iterator, MutableMapping
9 |
10 |
11 | T = TypeVar("T")
12 |
13 |
14 | class FloorDict(MutableMapping[int, T]):
15 | """A dict where keys are int, and accessing a key will use the closest lower one.
16 |
17 | Differences from dict:
18 | - keys must be int
19 | - obj[key] will return the value whose key is the closest one which is lower or equal to key
20 | """
21 |
22 | def __init__(self) -> None:
23 | self._dict: Dict[int, T] = {}
24 | self._keys: List[int] = [] # sorted
25 |
26 | def __repr__(self) -> str:
27 | return f"FloorDict<{self._dict!r}>"
28 |
29 | def __iter__(self) -> Iterator[int]:
30 | return iter(self._keys)
31 |
32 | def __reversed__(self) -> Iterator[int]:
33 | return reversed(self._keys)
34 |
35 | def __len__(self) -> int:
36 | return len(self._keys)
37 |
38 | def _key_index(self, key: int) -> int:
39 | index = bisect_right(self._keys, key) - 1
40 | if index < 0:
41 | raise KeyError(key)
42 | return index
43 |
44 | def get_with_index(self, key: int) -> Tuple[int, T]:
45 | if not isinstance(key, int):
46 | raise TypeError("Invalid key")
47 | index = self._keys[self._key_index(key)]
48 | value = self._dict[index]
49 | return (index, value)
50 |
51 | def __getitem__(self, key: int) -> T:
52 | return self.get_with_index(key)[1]
53 |
54 | def __setitem__(self, key: int, value: T) -> None:
55 | if not isinstance(key, int):
56 | raise TypeError("Invalid key")
57 | if key not in self._dict: # prevent duplicates in _keys
58 | insort_right(self._keys, key)
59 | self._dict[key] = value
60 |
61 | def __delitem__(self, key: int) -> None:
62 | del self._dict[key]
63 | # the key is an exact index (otherwise KeyError raised on last line)
64 | self._keys.pop(self._key_index(key))
65 |
66 | @property
67 | def last_key(self) -> int:
68 | if not self._keys:
69 | raise KeyError("dictionary is empty")
70 | return self._keys[-1]
71 |
72 | @property
73 | def last_item(self) -> T:
74 | return self._dict[self.last_key]
75 |
76 |
77 | def parse_mode(mode: str) -> Tuple[str, bool, bool]:
78 | """Parse a mode used in open.
79 |
80 | Order is not considered at all.
81 | Binary flag (b) is ignored.
82 | Valid modes are: r, r+, w, w+, x, x+.
83 |
84 | Return a tuple (nomalized, is_read, is_write).
85 | """
86 | mode_set = set(mode)
87 | if len(mode_set) != len(mode):
88 | raise ValueError(f"invalid mode: {mode}")
89 | mode_plus = "+" in mode_set
90 | mode_set -= {"b", "+"}
91 | mode_base = mode_set.pop() if mode_set else "invalid"
92 | if mode_set or mode_base not in "rwx":
93 | raise ValueError(f"invalid mode: {mode}")
94 | if mode_plus:
95 | return (f"{mode_base}+", True, True)
96 | return (mode_base, mode_base == "r", mode_base != "r")
97 |
98 |
99 | class AttrProxy(Generic[T]):
100 | """Create a descriptor that is a proxy to the same attribute of an attribute.
101 |
102 | Example:
103 |
104 | class Foo:
105 | proxy = Something()
106 | bar = AttrProxy("proxy")
107 |
108 | foo = Foo()
109 |
110 | then foo.bar would be proxied to foo.proxy.bar
111 |
112 | If the proxy value is None, then use a local value instead,
113 | which acts as a temporary storage in the meanwhile.
114 | """
115 |
116 | # Typing note
117 | #
118 | # There is no typing enforced to make sure that the proxy attribute
119 | # on the attribute exists and is of type T.
120 | # We just trust that the user-provided T is right.
121 | #
122 | # This explains the use of Any everywhere
123 | #
124 |
125 | attribute: str
126 | not_proxied_value: T
127 |
128 | def __init__(self, proxy: str) -> None:
129 | self.proxy = proxy
130 |
131 | def __set_name__(self, klass: Any, name: str) -> None:
132 | self.attribute = name
133 |
134 | def __get__(self, instance: Any, klass: Any) -> T:
135 | dest = getattr(instance, self.proxy)
136 | if dest is None:
137 | try:
138 | return self.not_proxied_value
139 | except AttributeError as ex:
140 | raise AttributeError(
141 | f"'{klass.__name__}' object has not attribute '{self.attribute}'"
142 | f" until its attribute '{self.proxy}' is defined"
143 | ) from ex
144 | return cast(T, getattr(dest, self.attribute))
145 |
146 | def __set__(self, instance: Any, value: T) -> None:
147 | dest = getattr(instance, self.proxy)
148 | if dest is None:
149 | self.not_proxied_value = value
150 | else:
151 | setattr(dest, self.attribute, value)
152 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from itertools import chain, product
2 | from pathlib import Path
3 | import sys
4 | from typing import List, Tuple
5 |
6 | import pytest
7 |
8 | if sys.version_info >= (3, 9): # pragma: no cover
9 | from collections.abc import Callable, Iterator
10 | else: # pragma: no cover
11 | from typing import Callable, Iterator
12 |
13 |
14 | def pytest_addoption(parser: pytest.Parser) -> None:
15 | parser.addoption(
16 | "--generate-integration-files",
17 | action="store_true",
18 | default=False,
19 | help="Test the generation of the integration files",
20 | )
21 |
22 |
23 | def pytest_collection_modifyitems(
24 | config: pytest.Config, items: List[pytest.Item]
25 | ) -> None:
26 | root = Path(__file__).parent.parent
27 | for item in items:
28 | if item.fspath:
29 | relative = Path(item.fspath).parent.relative_to(root)
30 | mark = relative.name
31 | item.add_marker(getattr(pytest.mark, mark))
32 | if not config.getoption("--generate-integration-files"):
33 | skip_mark = pytest.mark.skip(
34 | reason="need --generate-integration-files option to run"
35 | )
36 | for item in items:
37 | if "generate_integration_files" in item.keywords:
38 | item.add_marker(skip_mark)
39 |
40 |
41 | # any 3 consecutive bytes is unique in _DATA_PATTERN
42 | _DATA_PATTERN = bytes(
43 | chain(
44 | *product(
45 | range(65, 91), # uppercase
46 | range(97, 123), # lowercase
47 | range(48, 58), # digit
48 | )
49 | )
50 | )
51 |
52 |
53 | @pytest.fixture(scope="session")
54 | def data_pattern() -> bytes:
55 | return _DATA_PATTERN
56 |
57 |
58 | @pytest.fixture(scope="session")
59 | def data_pattern_locate() -> Iterator[Callable[[bytes], Tuple[int, int]]]:
60 | def locate(data: bytes) -> Tuple[int, int]:
61 | if len(data) < 3:
62 | raise ValueError("data to short")
63 | return (_DATA_PATTERN.index(data), len(data))
64 |
65 | yield locate
66 |
--------------------------------------------------------------------------------
/tests/integration/conftest.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 | from typing import TYPE_CHECKING, Any, Dict, Tuple, cast
4 |
5 | import pytest
6 |
7 | if TYPE_CHECKING:
8 |
9 | class _Request(pytest.FixtureRequest):
10 | param: Path
11 |
12 |
13 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
14 |
15 |
16 | @pytest.fixture(
17 | params=(Path(__file__).parent / "files").rglob("*.json"),
18 | ids=lambda path: cast(Path, path).name,
19 | )
20 | def integration_case(request: "_Request") -> _IntegrationCase:
21 | json_path = request.param
22 | with json_path.open() as json_file:
23 | metadata = cast(Dict[str, Any], json.load(json_file))
24 | return (json_path.with_suffix(".xz"), metadata)
25 |
--------------------------------------------------------------------------------
/tests/integration/files/check-crc32.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz -C crc32",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 1,
11 | "blocks": [
12 | {
13 | "length": 20280
14 | }
15 | ]
16 | }
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/tests/integration/files/check-crc32.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-crc32.xz
--------------------------------------------------------------------------------
/tests/integration/files/check-crc64.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz -C crc64",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 4,
11 | "blocks": [
12 | {
13 | "length": 20280
14 | }
15 | ]
16 | }
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/tests/integration/files/check-crc64.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-crc64.xz
--------------------------------------------------------------------------------
/tests/integration/files/check-none.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz -C none",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 0,
11 | "blocks": [
12 | {
13 | "length": 20280
14 | }
15 | ]
16 | }
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/tests/integration/files/check-none.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-none.xz
--------------------------------------------------------------------------------
/tests/integration/files/check-sha256.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz -C sha256",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 10,
11 | "blocks": [
12 | {
13 | "length": 20280
14 | }
15 | ]
16 | }
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/tests/integration/files/check-sha256.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/check-sha256.xz
--------------------------------------------------------------------------------
/tests/integration/files/example.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/example.xz
--------------------------------------------------------------------------------
/tests/integration/files/few-blocks.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz --block-size 10000",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 4,
11 | "blocks": [
12 | {
13 | "length": 10000
14 | },
15 | {
16 | "length": 10000
17 | },
18 | {
19 | "length": 280
20 | }
21 | ]
22 | }
23 | ]
24 | }
25 |
--------------------------------------------------------------------------------
/tests/integration/files/few-blocks.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/few-blocks.xz
--------------------------------------------------------------------------------
/tests/integration/files/many-blocks.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz --block-size 100",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 4,
11 | "blocks": [
12 | {
13 | "length": 100
14 | },
15 | {
16 | "length": 100
17 | },
18 | {
19 | "length": 100
20 | },
21 | {
22 | "length": 100
23 | },
24 | {
25 | "length": 100
26 | },
27 | {
28 | "length": 100
29 | },
30 | {
31 | "length": 100
32 | },
33 | {
34 | "length": 100
35 | },
36 | {
37 | "length": 100
38 | },
39 | {
40 | "length": 100
41 | },
42 | {
43 | "length": 100
44 | },
45 | {
46 | "length": 100
47 | },
48 | {
49 | "length": 100
50 | },
51 | {
52 | "length": 100
53 | },
54 | {
55 | "length": 100
56 | },
57 | {
58 | "length": 100
59 | },
60 | {
61 | "length": 100
62 | },
63 | {
64 | "length": 100
65 | },
66 | {
67 | "length": 100
68 | },
69 | {
70 | "length": 100
71 | },
72 | {
73 | "length": 100
74 | },
75 | {
76 | "length": 100
77 | },
78 | {
79 | "length": 100
80 | },
81 | {
82 | "length": 100
83 | },
84 | {
85 | "length": 100
86 | },
87 | {
88 | "length": 100
89 | },
90 | {
91 | "length": 100
92 | },
93 | {
94 | "length": 100
95 | },
96 | {
97 | "length": 100
98 | },
99 | {
100 | "length": 100
101 | },
102 | {
103 | "length": 100
104 | },
105 | {
106 | "length": 100
107 | },
108 | {
109 | "length": 100
110 | },
111 | {
112 | "length": 100
113 | },
114 | {
115 | "length": 100
116 | },
117 | {
118 | "length": 100
119 | },
120 | {
121 | "length": 100
122 | },
123 | {
124 | "length": 100
125 | },
126 | {
127 | "length": 100
128 | },
129 | {
130 | "length": 100
131 | },
132 | {
133 | "length": 100
134 | },
135 | {
136 | "length": 100
137 | },
138 | {
139 | "length": 100
140 | },
141 | {
142 | "length": 100
143 | },
144 | {
145 | "length": 100
146 | },
147 | {
148 | "length": 100
149 | },
150 | {
151 | "length": 100
152 | },
153 | {
154 | "length": 100
155 | },
156 | {
157 | "length": 100
158 | },
159 | {
160 | "length": 100
161 | },
162 | {
163 | "length": 100
164 | },
165 | {
166 | "length": 100
167 | },
168 | {
169 | "length": 100
170 | },
171 | {
172 | "length": 100
173 | },
174 | {
175 | "length": 100
176 | },
177 | {
178 | "length": 100
179 | },
180 | {
181 | "length": 100
182 | },
183 | {
184 | "length": 100
185 | },
186 | {
187 | "length": 100
188 | },
189 | {
190 | "length": 100
191 | },
192 | {
193 | "length": 100
194 | },
195 | {
196 | "length": 100
197 | },
198 | {
199 | "length": 100
200 | },
201 | {
202 | "length": 100
203 | },
204 | {
205 | "length": 100
206 | },
207 | {
208 | "length": 100
209 | },
210 | {
211 | "length": 100
212 | },
213 | {
214 | "length": 100
215 | },
216 | {
217 | "length": 100
218 | },
219 | {
220 | "length": 100
221 | },
222 | {
223 | "length": 100
224 | },
225 | {
226 | "length": 100
227 | },
228 | {
229 | "length": 100
230 | },
231 | {
232 | "length": 100
233 | },
234 | {
235 | "length": 100
236 | },
237 | {
238 | "length": 100
239 | },
240 | {
241 | "length": 100
242 | },
243 | {
244 | "length": 100
245 | },
246 | {
247 | "length": 100
248 | },
249 | {
250 | "length": 100
251 | },
252 | {
253 | "length": 100
254 | },
255 | {
256 | "length": 100
257 | },
258 | {
259 | "length": 100
260 | },
261 | {
262 | "length": 100
263 | },
264 | {
265 | "length": 100
266 | },
267 | {
268 | "length": 100
269 | },
270 | {
271 | "length": 100
272 | },
273 | {
274 | "length": 100
275 | },
276 | {
277 | "length": 100
278 | },
279 | {
280 | "length": 100
281 | },
282 | {
283 | "length": 100
284 | },
285 | {
286 | "length": 100
287 | },
288 | {
289 | "length": 100
290 | },
291 | {
292 | "length": 100
293 | },
294 | {
295 | "length": 100
296 | },
297 | {
298 | "length": 100
299 | },
300 | {
301 | "length": 100
302 | },
303 | {
304 | "length": 100
305 | },
306 | {
307 | "length": 100
308 | },
309 | {
310 | "length": 100
311 | },
312 | {
313 | "length": 100
314 | },
315 | {
316 | "length": 100
317 | },
318 | {
319 | "length": 100
320 | },
321 | {
322 | "length": 100
323 | },
324 | {
325 | "length": 100
326 | },
327 | {
328 | "length": 100
329 | },
330 | {
331 | "length": 100
332 | },
333 | {
334 | "length": 100
335 | },
336 | {
337 | "length": 100
338 | },
339 | {
340 | "length": 100
341 | },
342 | {
343 | "length": 100
344 | },
345 | {
346 | "length": 100
347 | },
348 | {
349 | "length": 100
350 | },
351 | {
352 | "length": 100
353 | },
354 | {
355 | "length": 100
356 | },
357 | {
358 | "length": 100
359 | },
360 | {
361 | "length": 100
362 | },
363 | {
364 | "length": 100
365 | },
366 | {
367 | "length": 100
368 | },
369 | {
370 | "length": 100
371 | },
372 | {
373 | "length": 100
374 | },
375 | {
376 | "length": 100
377 | },
378 | {
379 | "length": 100
380 | },
381 | {
382 | "length": 100
383 | },
384 | {
385 | "length": 100
386 | },
387 | {
388 | "length": 100
389 | },
390 | {
391 | "length": 100
392 | },
393 | {
394 | "length": 100
395 | },
396 | {
397 | "length": 100
398 | },
399 | {
400 | "length": 100
401 | },
402 | {
403 | "length": 100
404 | },
405 | {
406 | "length": 100
407 | },
408 | {
409 | "length": 100
410 | },
411 | {
412 | "length": 100
413 | },
414 | {
415 | "length": 100
416 | },
417 | {
418 | "length": 100
419 | },
420 | {
421 | "length": 100
422 | },
423 | {
424 | "length": 100
425 | },
426 | {
427 | "length": 100
428 | },
429 | {
430 | "length": 100
431 | },
432 | {
433 | "length": 100
434 | },
435 | {
436 | "length": 100
437 | },
438 | {
439 | "length": 100
440 | },
441 | {
442 | "length": 100
443 | },
444 | {
445 | "length": 100
446 | },
447 | {
448 | "length": 100
449 | },
450 | {
451 | "length": 100
452 | },
453 | {
454 | "length": 100
455 | },
456 | {
457 | "length": 100
458 | },
459 | {
460 | "length": 100
461 | },
462 | {
463 | "length": 100
464 | },
465 | {
466 | "length": 100
467 | },
468 | {
469 | "length": 100
470 | },
471 | {
472 | "length": 100
473 | },
474 | {
475 | "length": 100
476 | },
477 | {
478 | "length": 100
479 | },
480 | {
481 | "length": 100
482 | },
483 | {
484 | "length": 100
485 | },
486 | {
487 | "length": 100
488 | },
489 | {
490 | "length": 100
491 | },
492 | {
493 | "length": 100
494 | },
495 | {
496 | "length": 100
497 | },
498 | {
499 | "length": 100
500 | },
501 | {
502 | "length": 100
503 | },
504 | {
505 | "length": 100
506 | },
507 | {
508 | "length": 100
509 | },
510 | {
511 | "length": 100
512 | },
513 | {
514 | "length": 100
515 | },
516 | {
517 | "length": 100
518 | },
519 | {
520 | "length": 100
521 | },
522 | {
523 | "length": 100
524 | },
525 | {
526 | "length": 100
527 | },
528 | {
529 | "length": 100
530 | },
531 | {
532 | "length": 100
533 | },
534 | {
535 | "length": 100
536 | },
537 | {
538 | "length": 100
539 | },
540 | {
541 | "length": 100
542 | },
543 | {
544 | "length": 100
545 | },
546 | {
547 | "length": 100
548 | },
549 | {
550 | "length": 100
551 | },
552 | {
553 | "length": 100
554 | },
555 | {
556 | "length": 100
557 | },
558 | {
559 | "length": 100
560 | },
561 | {
562 | "length": 100
563 | },
564 | {
565 | "length": 100
566 | },
567 | {
568 | "length": 100
569 | },
570 | {
571 | "length": 100
572 | },
573 | {
574 | "length": 100
575 | },
576 | {
577 | "length": 100
578 | },
579 | {
580 | "length": 100
581 | },
582 | {
583 | "length": 100
584 | },
585 | {
586 | "length": 100
587 | },
588 | {
589 | "length": 100
590 | },
591 | {
592 | "length": 100
593 | },
594 | {
595 | "length": 100
596 | },
597 | {
598 | "length": 100
599 | },
600 | {
601 | "length": 100
602 | },
603 | {
604 | "length": 100
605 | },
606 | {
607 | "length": 100
608 | },
609 | {
610 | "length": 100
611 | },
612 | {
613 | "length": 100
614 | },
615 | {
616 | "length": 100
617 | },
618 | {
619 | "length": 80
620 | }
621 | ]
622 | }
623 | ]
624 | }
625 |
--------------------------------------------------------------------------------
/tests/integration/files/many-blocks.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/many-blocks.xz
--------------------------------------------------------------------------------
/tests/integration/files/one-stream-with-padding.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz --block-size 15000",
5 | "length": 20280
6 | },
7 | {
8 | "cmd": "head -c 100 /dev/zero"
9 | }
10 | ],
11 | "streams": [
12 | {
13 | "check": 4,
14 | "blocks": [
15 | {
16 | "length": 15000
17 | },
18 | {
19 | "length": 5280
20 | }
21 | ]
22 | }
23 | ]
24 | }
25 |
--------------------------------------------------------------------------------
/tests/integration/files/one-stream-with-padding.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/one-stream-with-padding.xz
--------------------------------------------------------------------------------
/tests/integration/files/several-filters.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz --x86=start=42 --delta=dist=3 --powerpc --lzma2",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 4,
11 | "blocks": [
12 | {
13 | "filters": [
14 | {
15 | "id": 4,
16 | "start_offset": 42
17 | },
18 | {
19 | "id": 3,
20 | "dist": 3
21 | },
22 | {
23 | "id": 5
24 | },
25 | {
26 | "id": 33
27 | }
28 | ],
29 | "length": 20280
30 | }
31 | ]
32 | }
33 | ]
34 | }
35 |
--------------------------------------------------------------------------------
/tests/integration/files/several-filters.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-filters.xz
--------------------------------------------------------------------------------
/tests/integration/files/several-streams-with-padding.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz --block-size 1000",
5 | "length": 1500
6 | },
7 | {
8 | "cmd": "head -c 100 /dev/zero"
9 | },
10 | {
11 | "cmd": "xz",
12 | "length": 2000
13 | },
14 | {
15 | "cmd": "head -c 200 /dev/zero"
16 | },
17 | {
18 | "cmd": "xz --block-size 4000",
19 | "length": 16780
20 | },
21 | {
22 | "cmd": "head -c 400 /dev/zero"
23 | }
24 | ],
25 | "streams": [
26 | {
27 | "check": 4,
28 | "blocks": [
29 | {
30 | "length": 1000
31 | },
32 | {
33 | "length": 500
34 | }
35 | ]
36 | },
37 | {
38 | "check": 4,
39 | "blocks": [
40 | {
41 | "length": 2000
42 | }
43 | ]
44 | },
45 | {
46 | "check": 4,
47 | "blocks": [
48 | {
49 | "length": 4000
50 | },
51 | {
52 | "length": 4000
53 | },
54 | {
55 | "length": 4000
56 | },
57 | {
58 | "length": 4000
59 | },
60 | {
61 | "length": 780
62 | }
63 | ]
64 | }
65 | ]
66 | }
67 |
--------------------------------------------------------------------------------
/tests/integration/files/several-streams-with-padding.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-streams-with-padding.xz
--------------------------------------------------------------------------------
/tests/integration/files/several-streams.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz --block-size 5000",
5 | "length": 17000
6 | },
7 | {
8 | "cmd": "xz --block-size 1000",
9 | "length": 2100
10 | },
11 | {
12 | "cmd": "xz",
13 | "length": 1180
14 | }
15 | ],
16 | "streams": [
17 | {
18 | "check": 4,
19 | "blocks": [
20 | {
21 | "length": 5000
22 | },
23 | {
24 | "length": 5000
25 | },
26 | {
27 | "length": 5000
28 | },
29 | {
30 | "length": 2000
31 | }
32 | ]
33 | },
34 | {
35 | "check": 4,
36 | "blocks": [
37 | {
38 | "length": 1000
39 | },
40 | {
41 | "length": 1000
42 | },
43 | {
44 | "length": 100
45 | }
46 | ]
47 | },
48 | {
49 | "check": 4,
50 | "blocks": [
51 | {
52 | "length": 1180
53 | }
54 | ]
55 | }
56 | ]
57 | }
58 |
--------------------------------------------------------------------------------
/tests/integration/files/several-streams.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/several-streams.xz
--------------------------------------------------------------------------------
/tests/integration/files/various-block-sizes.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz --block-list 1234,567,8901,234,5678,90,0",
5 | "length": 20280
6 | }
7 | ],
8 | "streams": [
9 | {
10 | "check": 4,
11 | "blocks": [
12 | {
13 | "length": 1234
14 | },
15 | {
16 | "length": 567
17 | },
18 | {
19 | "length": 8901
20 | },
21 | {
22 | "length": 234
23 | },
24 | {
25 | "length": 5678
26 | },
27 | {
28 | "length": 90
29 | },
30 | {
31 | "length": 3576
32 | }
33 | ]
34 | }
35 | ]
36 | }
37 |
--------------------------------------------------------------------------------
/tests/integration/files/various-block-sizes.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-block-sizes.xz
--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz -C none --block-list 100,1000,200,2000,0",
5 | "length": 3600
6 | },
7 | {
8 | "cmd": "head -c 100 /dev/zero"
9 | },
10 | {
11 | "cmd": "xz -C crc32",
12 | "length": 10000
13 | },
14 | {
15 | "cmd": "head -c 800 /dev/zero"
16 | },
17 | {
18 | "cmd": "xz -C crc64 --block-list 3000,300,0",
19 | "length": 3333
20 | },
21 | {
22 | "cmd": "xz -C sha256 --block-size 600",
23 | "length": 3347
24 | },
25 | {
26 | "cmd": "head -c 400 /dev/zero"
27 | }
28 | ],
29 | "streams": [
30 | {
31 | "check": 0,
32 | "blocks": [
33 | {
34 | "length": 100
35 | },
36 | {
37 | "length": 1000
38 | },
39 | {
40 | "length": 200
41 | },
42 | {
43 | "length": 2000
44 | },
45 | {
46 | "length": 300
47 | }
48 | ]
49 | },
50 | {
51 | "check": 1,
52 | "blocks": [
53 | {
54 | "length": 10000
55 | }
56 | ]
57 | },
58 | {
59 | "check": 4,
60 | "blocks": [
61 | {
62 | "length": 3000
63 | },
64 | {
65 | "length": 300
66 | },
67 | {
68 | "length": 33
69 | }
70 | ]
71 | },
72 | {
73 | "check": 10,
74 | "blocks": [
75 | {
76 | "length": 600
77 | },
78 | {
79 | "length": 600
80 | },
81 | {
82 | "length": 600
83 | },
84 | {
85 | "length": 600
86 | },
87 | {
88 | "length": 600
89 | },
90 | {
91 | "length": 347
92 | }
93 | ]
94 | }
95 | ]
96 | }
97 |
--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-stream-checks-stream-padding-and-block-sizes.xz
--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks.json:
--------------------------------------------------------------------------------
1 | {
2 | "generate": [
3 | {
4 | "cmd": "xz -C none",
5 | "length": 5070
6 | },
7 | {
8 | "cmd": "xz -C crc32",
9 | "length": 5070
10 | },
11 | {
12 | "cmd": "xz -C crc64",
13 | "length": 5070
14 | },
15 | {
16 | "cmd": "xz -C sha256",
17 | "length": 5070
18 | }
19 | ],
20 | "streams": [
21 | {
22 | "check": 0,
23 | "blocks": [
24 | {
25 | "length": 5070
26 | }
27 | ]
28 | },
29 | {
30 | "check": 1,
31 | "blocks": [
32 | {
33 | "length": 5070
34 | }
35 | ]
36 | },
37 | {
38 | "check": 4,
39 | "blocks": [
40 | {
41 | "length": 5070
42 | }
43 | ]
44 | },
45 | {
46 | "check": 10,
47 | "blocks": [
48 | {
49 | "length": 5070
50 | }
51 | ]
52 | }
53 | ]
54 | }
55 |
--------------------------------------------------------------------------------
/tests/integration/files/various-stream-checks.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rogdham/python-xz/89af850a59aaf83920a0eb7c314d9f2ed71979fa/tests/integration/files/various-stream-checks.xz
--------------------------------------------------------------------------------
/tests/integration/test_file_read.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Any, Dict, Tuple
3 |
4 | from xz import XZFile
5 |
6 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
7 |
8 |
9 | def test_read_all(integration_case: _IntegrationCase, data_pattern: bytes) -> None:
10 | xz_path, metadata = integration_case
11 | with XZFile(xz_path) as xzfile:
12 | streams_items = list(
13 | xzfile._fileobjs.items() # pylint: disable=protected-access
14 | )
15 | assert len(streams_items) == len(metadata["streams"])
16 | pos = 0
17 | stream_boundaries = []
18 | block_boundaries = []
19 | for stream_item, metadata_stream in zip(streams_items, metadata["streams"]):
20 | stream_boundaries.append(pos)
21 | stream_pos, stream = stream_item
22 | assert stream_pos == pos
23 | assert stream.check == metadata_stream["check"]
24 | block_items = list(
25 | stream._fileobjs.items() # pylint: disable=protected-access
26 | )
27 | assert len(block_items) == len(metadata_stream["blocks"])
28 | for block_item, metadata_block in zip(
29 | block_items, metadata_stream["blocks"]
30 | ):
31 | block_boundaries.append(pos)
32 | block_pos, block = block_item
33 | assert block_pos == pos - stream_pos
34 | assert len(block) == metadata_block["length"]
35 | pos += metadata_block["length"]
36 | assert len(stream) == pos - stream_pos
37 | assert xzfile.stream_boundaries == stream_boundaries
38 | assert xzfile.block_boundaries == block_boundaries
39 | assert xzfile.read() == data_pattern
40 |
41 |
42 | def test_read_reversed(integration_case: _IntegrationCase, data_pattern: bytes) -> None:
43 | xz_path, _ = integration_case
44 | with XZFile(xz_path) as xzfile:
45 | # we are testing the worst possible case (lots of negative seeking)
46 | # limit the time to test by reading in chunks instead of 1 byte at a time
47 | read_size = 37
48 | for pos in reversed(range(0, len(data_pattern), read_size)):
49 | xzfile.seek(pos)
50 | assert xzfile.read(read_size) == data_pattern[pos : pos + read_size]
51 |
--------------------------------------------------------------------------------
/tests/integration/test_file_write.py:
--------------------------------------------------------------------------------
1 | from hashlib import sha256
2 | from pathlib import Path
3 | from typing import Any, Dict, Tuple
4 |
5 | import pytest
6 |
7 | import xz
8 |
9 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
10 |
11 |
12 | def test(
13 | integration_case: _IntegrationCase, data_pattern: bytes, tmp_path: Path
14 | ) -> None:
15 | xz_path, metadata = integration_case
16 | data = memoryview(data_pattern)
17 |
18 | if "padding" in xz_path.name:
19 | pytest.skip("Write mode does not support stream padding yet")
20 |
21 | generated_path = tmp_path / "archive.xz"
22 |
23 | with xz.open(generated_path, "w") as xzfile:
24 | for stream in metadata["streams"]:
25 | xzfile.check = stream["check"]
26 | xzfile.change_stream()
27 | for block in stream["blocks"]:
28 | xzfile.filters = block.get("filters")
29 | xzfile.change_block()
30 | xzfile.write(data[: block["length"]])
31 | data = data[block["length"] :]
32 |
33 | assert not data
34 |
35 | expected_hash = sha256(xz_path.read_bytes())
36 | generated_hash = sha256(generated_path.read_bytes())
37 |
38 | assert generated_hash.hexdigest() == expected_hash.hexdigest()
39 |
--------------------------------------------------------------------------------
/tests/integration/test_generate_files.py:
--------------------------------------------------------------------------------
1 | from hashlib import sha256
2 | from pathlib import Path
3 | import subprocess
4 | from typing import Any, Dict, Tuple
5 |
6 | import pytest
7 |
8 | _IntegrationCase = Tuple[Path, Dict[str, Any]]
9 |
10 |
11 | @pytest.mark.generate_integration_files
12 | def test(integration_case: _IntegrationCase, data_pattern: bytes) -> None:
13 | xz_path, metadata = integration_case
14 |
15 | expected_hash = sha256(xz_path.read_bytes())
16 |
17 | # note that we override current xz file
18 | # this allows to create new integration files from json metadata
19 | data = memoryview(data_pattern)
20 | with xz_path.open("wb") as fout:
21 | for step in metadata["generate"]:
22 | step_data_len = step.get("length", 0)
23 | step_data = data[:step_data_len]
24 | data = data[step_data_len:]
25 | fout.write(
26 | subprocess.run(
27 | step["cmd"].split(" "),
28 | input=step_data,
29 | stdout=subprocess.PIPE,
30 | check=True,
31 | ).stdout
32 | )
33 | assert not data
34 |
35 | generated_hash = sha256(xz_path.read_bytes())
36 |
37 | assert generated_hash.hexdigest() == expected_hash.hexdigest()
38 |
--------------------------------------------------------------------------------
/tests/integration/test_ram_usage.py:
--------------------------------------------------------------------------------
1 | from io import DEFAULT_BUFFER_SIZE
2 | from lzma import compress
3 | from pathlib import Path
4 | from random import seed
5 | import sys
6 | from typing import BinaryIO, Optional, cast
7 |
8 | import pytest
9 |
10 | from xz import XZFile
11 | from xz.common import create_xz_index_footer, parse_xz_footer, parse_xz_index
12 | from xz.io import IOCombiner, IOStatic
13 |
14 | if sys.version_info >= (3, 9):
15 | from collections.abc import Callable, Iterator
16 | from random import randbytes
17 | else:
18 | from random import getrandbits
19 | from typing import Callable, Iterator
20 |
21 | def randbytes(length: int) -> bytes:
22 | return getrandbits(length * 8).to_bytes(length, "little")
23 |
24 |
25 | @pytest.fixture
26 | def ram_usage() -> Iterator[Callable[[], int]]:
27 | try:
28 | import tracemalloc # pylint: disable=import-outside-toplevel
29 | except ImportError: # e.g. PyPy
30 | pytest.skip("tracemalloc module not available")
31 |
32 | try:
33 | tracemalloc.start()
34 | yield lambda: tracemalloc.get_traced_memory()[1]
35 | finally:
36 | tracemalloc.stop()
37 |
38 |
39 | BLOCK_SIZE = 1_000_000
40 |
41 |
42 | @pytest.fixture
43 | def fileobj() -> BinaryIO:
44 | # create xz raw data composed of many identical blocks
45 | nb_blocks = 50
46 |
47 | seed(0)
48 | data = compress(randbytes(BLOCK_SIZE))
49 | header = data[:12]
50 | footer = data[-12:]
51 | check, backward_size = parse_xz_footer(footer)
52 | block = data[12 : -12 - backward_size]
53 | records = parse_xz_index(data[-12 - backward_size : -12])
54 | index_footer = create_xz_index_footer(check, records * nb_blocks)
55 |
56 | return cast(
57 | BinaryIO,
58 | IOCombiner(
59 | IOStatic(header),
60 | *[IOStatic(block)] * nb_blocks,
61 | IOStatic(index_footer),
62 | ),
63 | )
64 |
65 |
66 | def test_read_linear(
67 | # pylint: disable=redefined-outer-name
68 | fileobj: BinaryIO,
69 | ram_usage: Callable[[], int],
70 | ) -> None:
71 | with XZFile(fileobj) as xz_file:
72 | # read almost one block
73 | xz_file.read(BLOCK_SIZE - 1)
74 | one_block_memory = ram_usage()
75 |
76 | # read all the file
77 | while xz_file.read(DEFAULT_BUFFER_SIZE):
78 | assert (
79 | # should not use much more memory, take 2 as error margin
80 | ram_usage()
81 | < one_block_memory * 2
82 | ), f"Consumes too much RAM (at {100 * xz_file.tell() / len(xz_file):.0f}%)"
83 |
84 |
85 | def test_partial_read_each_block(
86 | # pylint: disable=redefined-outer-name
87 | fileobj: BinaryIO,
88 | ram_usage: Callable[[], int],
89 | ) -> None:
90 | one_block_memory: Optional[int] = None
91 |
92 | with XZFile(fileobj) as xz_file:
93 | for pos in xz_file.block_boundaries[1:]:
94 | # read second-to last byte of each block
95 | xz_file.seek(pos - 2)
96 | xz_file.read(1)
97 | if one_block_memory is None:
98 | one_block_memory = ram_usage()
99 | else:
100 | assert (
101 | # default strategy is max 8 blocks, take 10 as error margin
102 | ram_usage()
103 | < one_block_memory * 10
104 | ), f"Consumes too much RAM (at {100 * xz_file.tell() / len(xz_file):.0f}%)"
105 |
106 |
107 | def test_write(
108 | tmp_path: Path,
109 | # pylint: disable=redefined-outer-name
110 | ram_usage: Callable[[], int],
111 | ) -> None:
112 | nb_blocks = 10
113 |
114 | seed(0)
115 |
116 | one_block_memory: Optional[int] = None
117 |
118 | with XZFile(tmp_path / "archive.xz", "w") as xz_file:
119 | for i in range(nb_blocks):
120 | xz_file.change_block()
121 | xz_file.write(randbytes(BLOCK_SIZE))
122 |
123 | if one_block_memory is None:
124 | one_block_memory = ram_usage()
125 | else:
126 | assert (
127 | # should not use much more memory, take 2 as error margin
128 | ram_usage()
129 | < one_block_memory * 2
130 | ), f"Consumes too much RAM (at {i / nb_blocks:.0f}%)"
131 |
--------------------------------------------------------------------------------
/tests/integration/test_readme.py:
--------------------------------------------------------------------------------
1 | import doctest
2 | import os
3 | from pathlib import Path
4 | import shutil
5 | import sys
6 | from typing import List, Optional, Tuple
7 |
8 | import pytest
9 |
10 | import xz
11 |
12 | if sys.version_info >= (3, 9): # pragma: no cover
13 | from collections.abc import Iterator
14 | else: # pragma: no cover
15 | from typing import Iterator
16 |
17 |
18 | @pytest.fixture(autouse=True)
19 | def change_dir(tmp_path: Path) -> Iterator[None]:
20 | old_dir = os.getcwd()
21 | shutil.copy(Path(__file__).parent / "files" / "example.xz", tmp_path)
22 | os.chdir(tmp_path)
23 | yield
24 | os.chdir(old_dir)
25 |
26 |
27 | def _parse_readme() -> List[Tuple[int, str]]:
28 | code_blocks = []
29 | current_code_block = ""
30 | current_code_block_line: Optional[int] = None
31 | with (Path(__file__).parent.parent.parent / "README.md").open() as fin:
32 | for line_no, line in enumerate(fin):
33 | if line.startswith("```"):
34 | if current_code_block_line is None:
35 | if "python" in line:
36 | current_code_block_line = line_no + 1
37 | else:
38 | code_blocks.append((current_code_block_line, current_code_block))
39 | current_code_block = ""
40 | current_code_block_line = None
41 | elif current_code_block_line is not None:
42 | current_code_block += line
43 | return code_blocks
44 |
45 |
46 | _README_CODE_BLOCKS = _parse_readme()
47 |
48 |
49 | @pytest.mark.parametrize(
50 | "code_block",
51 | [
52 | pytest.param(code_block, id=f"line_{line_no}")
53 | for line_no, code_block in _README_CODE_BLOCKS
54 | ],
55 | )
56 | def test_readme(
57 | code_block: str, tmp_path: Path
58 | ) -> None: # pylint: disable=redefined-outer-name
59 | path = tmp_path / "block.txt"
60 | path.write_text(code_block)
61 | failure_count, test_count = doctest.testfile(
62 | str(path),
63 | module_relative=False,
64 | extraglobs={"xz": xz},
65 | )
66 | assert failure_count == 0
67 | assert test_count
68 |
--------------------------------------------------------------------------------
/tests/unit/test_attr_proxy.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import pytest
4 |
5 | from xz.utils import AttrProxy
6 |
7 |
8 | class Dest:
9 | abc = "012"
10 |
11 |
12 | class Src:
13 | proxy: Optional[Dest] = None
14 | abc = AttrProxy[str]("proxy")
15 |
16 |
17 | def test_direct() -> None:
18 | dest = Dest()
19 | src = Src()
20 |
21 | # not proxied
22 | with pytest.raises(AttributeError) as exc_info:
23 | src.abc # pylint: disable=pointless-statement
24 | assert (
25 | str(exc_info.value)
26 | == "'Src' object has not attribute 'abc' until its attribute 'proxy' is defined"
27 | )
28 |
29 | src.abc = "345"
30 | assert src.abc == "345"
31 | assert dest.abc == "012" # unchanged
32 |
33 | # proxied
34 | src.proxy = dest
35 |
36 | assert src.abc == "012" # get initial value back from proxy
37 |
38 | src.abc = "678"
39 | assert src.abc == "678"
40 | assert dest.abc == "678" # changed
41 |
--------------------------------------------------------------------------------
/tests/unit/test_block.py:
--------------------------------------------------------------------------------
1 | from io import SEEK_SET, BytesIO, UnsupportedOperation
2 | import sys
3 | from typing import Tuple, cast
4 | from unittest.mock import Mock, call
5 |
6 | import pytest
7 |
8 | import xz.block as block_module
9 | from xz.block import BlockRead, XZBlock
10 | from xz.common import XZError, create_xz_header, create_xz_index_footer
11 | from xz.io import IOAbstract, IOStatic
12 |
13 | if sys.version_info >= (3, 9): # pragma: no cover
14 | from collections.abc import Callable, Iterator
15 | else: # pragma: no cover
16 | from typing import Callable, Iterator
17 |
18 |
19 | BLOCK_BYTES = bytes.fromhex(
20 | "0200210116000000742fe5a3e0006300415d00209842100431d01ab285328305"
21 | "7ddb5924a128599cc9911a7fcff8d59c1f6f887bcee97b1f83f1808f005de273"
22 | "e1a6e99a7eac4f8f632b7e43bbf1da311dce5c0000000000e7c35efa"
23 | )
24 |
25 |
26 | def create_fileobj(data: bytes) -> Mock:
27 | raw = BytesIO(data)
28 | mock = Mock(wraps=raw)
29 | mock.__class__ = cast(Mock, IOAbstract) # needs to be subclass of IOAbstract
30 | mock.__len__ = lambda _: len(raw.getvalue())
31 | return mock
32 |
33 |
34 | @pytest.fixture
35 | def fileobj() -> Iterator[Mock]:
36 | yield create_fileobj(BLOCK_BYTES)
37 |
38 |
39 | @pytest.fixture
40 | def fileobj_empty() -> Iterator[Mock]:
41 | yield create_fileobj(b"")
42 |
43 |
44 | @pytest.fixture(autouse=True)
45 | def patch_buffer_size(monkeypatch: pytest.MonkeyPatch) -> None:
46 | monkeypatch.setattr(BlockRead, "read_size", 17)
47 |
48 |
49 | @pytest.fixture
50 | def compressor(monkeypatch: pytest.MonkeyPatch) -> Iterator[Mock]:
51 | mock = Mock()
52 | monkeypatch.setattr(block_module, "LZMACompressor", mock)
53 | yield mock.return_value
54 |
55 |
56 | # pylint: disable=redefined-outer-name
57 |
58 |
59 | #
60 | # read
61 | #
62 |
63 |
64 | def test_read_all(
65 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
66 | ) -> None:
67 | block = XZBlock(fileobj, 1, 89, 100)
68 | assert block.tell() == 0
69 | assert data_pattern_locate(block.read()) == (0, 100)
70 |
71 | assert fileobj.method_calls == [
72 | call.seek(0, SEEK_SET),
73 | call.read(5), # xz padding is 12 bytes
74 | call.seek(5, SEEK_SET),
75 | call.read(17),
76 | call.seek(22, SEEK_SET),
77 | call.read(17),
78 | call.seek(39, SEEK_SET),
79 | call.read(17),
80 | call.seek(56, SEEK_SET),
81 | call.read(17),
82 | call.seek(73, SEEK_SET),
83 | call.read(17),
84 | # below is not needed to get the data
85 | # but needed to perform various checks
86 | # see other tests
87 | call.seek(90, SEEK_SET),
88 | call.read(17),
89 | ]
90 | fileobj.method_calls.clear()
91 |
92 | assert block.read() == b""
93 | assert not fileobj.method_calls
94 |
95 |
96 | def test_read_seek_forward(
97 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
98 | ) -> None:
99 | block = XZBlock(fileobj, 1, 89, 100)
100 | assert block.tell() == 0
101 |
102 | block.seek(0)
103 | assert block.tell() == 0
104 | assert not fileobj.method_calls # no file access
105 | assert data_pattern_locate(block.read(4)) == (0, 4)
106 | assert block.tell() == 4
107 | assert fileobj.method_calls == [
108 | call.seek(0, SEEK_SET),
109 | call.read(5), # xz padding is 12 bytes
110 | call.seek(5, SEEK_SET),
111 | call.read(17),
112 | call.seek(22, SEEK_SET),
113 | call.read(17),
114 | ]
115 | fileobj.method_calls.clear()
116 |
117 | block.seek(10)
118 | assert block.tell() == 10
119 | assert not fileobj.method_calls # no file access
120 | assert data_pattern_locate(block.read(4)) == (10, 4)
121 | assert block.tell() == 14
122 | assert not fileobj.method_calls # no file access
123 |
124 | block.seek(30)
125 | assert block.tell() == 30
126 | assert not fileobj.method_calls # no file access
127 | assert data_pattern_locate(block.read(4)) == (30, 4)
128 | assert block.tell() == 34
129 | assert fileobj.method_calls == [
130 | call.seek(39, SEEK_SET),
131 | call.read(17),
132 | ]
133 | fileobj.method_calls.clear()
134 |
135 | block.seek(60)
136 | assert block.tell() == 60
137 | assert not fileobj.method_calls # no file access
138 | assert data_pattern_locate(block.read(4)) == (60, 4)
139 | assert block.tell() == 64
140 | assert fileobj.method_calls == [
141 | call.seek(56, SEEK_SET),
142 | call.read(17),
143 | ]
144 | fileobj.method_calls.clear()
145 |
146 |
147 | def test_read_seek_backward(
148 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
149 | ) -> None:
150 | block = XZBlock(fileobj, 1, 89, 100)
151 | assert block.tell() == 0
152 |
153 | block.seek(60)
154 | assert block.tell() == 60
155 | assert not fileobj.method_calls # no file access
156 |
157 | block.seek(40)
158 | assert block.tell() == 40
159 | assert not fileobj.method_calls # no file access
160 | assert data_pattern_locate(block.read(4)) == (40, 4)
161 | assert block.tell() == 44
162 | assert fileobj.method_calls == [
163 | call.seek(0, SEEK_SET),
164 | call.read(5), # xz padding is 12 bytes
165 | call.seek(5, SEEK_SET),
166 | call.read(17),
167 | call.seek(22, SEEK_SET),
168 | call.read(17),
169 | call.seek(39, SEEK_SET),
170 | call.read(17),
171 | ]
172 | fileobj.method_calls.clear()
173 | assert not fileobj.method_calls # no file access
174 |
175 | block.seek(20)
176 | assert block.tell() == 20
177 | assert not fileobj.method_calls # no file access
178 | assert data_pattern_locate(block.read(4)) == (20, 4)
179 | assert block.tell() == 24
180 | assert fileobj.method_calls == [
181 | call.seek(0, SEEK_SET),
182 | call.read(5), # xz padding is 12 bytes
183 | call.seek(5, SEEK_SET),
184 | call.read(17),
185 | call.seek(22, SEEK_SET),
186 | call.read(17),
187 | call.seek(39, SEEK_SET),
188 | call.read(17),
189 | ]
190 | fileobj.method_calls.clear()
191 |
192 |
193 | def test_read_wrong_uncompressed_size_too_small(
194 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
195 | ) -> None:
196 | block = XZBlock(fileobj, 1, 89, 99)
197 |
198 | # read all but last byte
199 | assert data_pattern_locate(block.read(98)) == (0, 98)
200 |
201 | # read last byte
202 | with pytest.raises(XZError) as exc_info:
203 | block.read(1)
204 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
205 |
206 |
207 | def test_read_wrong_uncompressed_size_too_big(
208 | fileobj: Mock, data_pattern_locate: Callable[[bytes], Tuple[int, int]]
209 | ) -> None:
210 | block = XZBlock(fileobj, 1, 89, 101)
211 |
212 | # read all but last byte
213 | assert data_pattern_locate(block.read(100)) == (0, 100)
214 |
215 | # read last byte
216 | with pytest.raises(XZError) as exc_info:
217 | block.read(1)
218 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
219 |
220 |
221 | def test_read_wrong_block_padding(
222 | data_pattern_locate: Callable[[bytes], Tuple[int, int]]
223 | ) -> None:
224 | fileobj = IOStatic(BLOCK_BYTES[:-5] + b"\xff" + BLOCK_BYTES[-4:])
225 | block = XZBlock(fileobj, 1, 89, 100)
226 |
227 | # read all but last byte
228 | assert data_pattern_locate(block.read(99)) == (0, 99)
229 |
230 | # read last byte
231 | with pytest.raises(XZError) as exc_info:
232 | block.read(1)
233 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
234 |
235 |
236 | def test_read_wrong_check(
237 | data_pattern_locate: Callable[[bytes], Tuple[int, int]]
238 | ) -> None:
239 | fileobj = IOStatic(BLOCK_BYTES[:-4] + b"\xff" * 4)
240 |
241 | block = XZBlock(fileobj, 1, 89, 100)
242 |
243 | # read all but last byte
244 | assert data_pattern_locate(block.read(99)) == (0, 99)
245 |
246 | # read last byte
247 | with pytest.raises(XZError) as exc_info:
248 | block.read(1)
249 | assert str(exc_info.value) == "block: error while decompressing: Corrupt input data"
250 |
251 |
252 | def test_read_truncated_data() -> None:
253 | fileobj = create_fileobj(
254 | bytes.fromhex(
255 | # header
256 | "fd377a585a0000016922de36"
257 | # one block (truncated)
258 | "0200210116000000742fe5a301000941"
259 | )
260 | )
261 |
262 | block = XZBlock(fileobj, 1, 89, 100)
263 |
264 | with pytest.raises(XZError) as exc_info:
265 | block.read()
266 | assert str(exc_info.value) == "block: data eof"
267 |
268 |
269 | def test_read_decompressor_eof(
270 | data_pattern_locate: Callable[[bytes], Tuple[int, int]]
271 | ) -> None:
272 | fileobj = IOStatic(
273 | bytes.fromhex(
274 | # one block
275 | "0200210116000000742fe5a301000941"
276 | "6130416131416132410000004e4aa467"
277 | # index
278 | "00011e0aea6312149042990d0100"
279 | # stream footer
280 | "00000001595a"
281 | )
282 | )
283 |
284 | # real uncompressed size is 10, not 11
285 | # it is changed to trigger the error case we are testing here
286 | block = XZBlock(fileobj, 1, 30, 11)
287 |
288 | # read all but last byte
289 | assert data_pattern_locate(block.read(10)) == (0, 10)
290 |
291 | # read last byte
292 | with pytest.raises(XZError) as exc_info:
293 | block.read(1)
294 | assert str(exc_info.value) == "block: decompressor eof"
295 |
296 |
297 | #
298 | # writable
299 | #
300 |
301 |
302 | def test_writable(fileobj: Mock) -> None:
303 | block = XZBlock(fileobj, 1, 89, 100)
304 | assert not block.writable()
305 |
306 |
307 | def test_writable_empty(fileobj_empty: Mock) -> None:
308 | block = XZBlock(fileobj_empty, 1, 0, 0)
309 | assert block.writable()
310 |
311 |
312 | #
313 | # write
314 | #
315 |
316 |
317 | def test_write_once(fileobj_empty: Mock) -> None:
318 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
319 | block.write(b"Hello, world!\n")
320 | assert block.tell() == 14
321 | assert fileobj_empty.method_calls == [
322 | call.seek(0),
323 | call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"),
324 | ]
325 | fileobj_empty.reset_mock()
326 |
327 | assert block.unpadded_size == 34
328 | assert block.uncompressed_size == 14
329 |
330 | assert fileobj_empty.method_calls == [
331 | call.seek(12),
332 | call.write(b"\x01\x00\rHello, world!\n\x00\x00\x00\x18\xa7U{"),
333 | ]
334 |
335 |
336 | def test_write_multiple(fileobj_empty: Mock) -> None:
337 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
338 | block.write(b"Hello,")
339 | assert block.tell() == 6
340 | assert fileobj_empty.method_calls == [
341 | call.seek(0),
342 | call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"),
343 | ]
344 | fileobj_empty.reset_mock()
345 |
346 | block.write(b" world!\n")
347 | assert block.tell() == 14
348 | assert not fileobj_empty.method_calls # buffered
349 |
350 | block.write(b"A" * 3_000_000)
351 | assert block.tell() == 3_000_014
352 | assert fileobj_empty.method_calls # not buffered
353 |
354 | assert block.unpadded_size == 540
355 | assert block.uncompressed_size == 3_000_014
356 |
357 | assert fileobj_empty.method_calls # flushing compressor
358 |
359 |
360 | @pytest.mark.parametrize("pos", [0, 42, 100, 200])
361 | def test_write_existing(fileobj: Mock, pos: int) -> None:
362 | block = XZBlock(fileobj, 1, 89, 100)
363 | block.seek(pos)
364 | with pytest.raises(UnsupportedOperation):
365 | # block is not empty, so not writable
366 | block.write(b"a")
367 |
368 |
369 | def test_write_compressor_error_0(fileobj_empty: Mock, compressor: Mock) -> None:
370 | compressor.compress.return_value = create_xz_header(0)
371 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
372 | with pytest.raises(XZError) as exc_info:
373 | block.write(b"Hello, world!\n")
374 | assert str(exc_info.value) == "block: compressor header"
375 |
376 |
377 | def test_write_compressor_error_1(fileobj_empty: Mock, compressor: Mock) -> None:
378 | compressor.compress.return_value = create_xz_header(1)
379 | compressor.flush.return_value = create_xz_index_footer(0, [(13, 37), (4, 2)])
380 | with pytest.raises(XZError) as exc_info:
381 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
382 | block.write(b"Hello, world!\n")
383 | assert str(exc_info.value) == "block: compressor footer check"
384 |
385 |
386 | def test_write_compressor_error_2(fileobj_empty: Mock, compressor: Mock) -> None:
387 | compressor.compress.return_value = create_xz_header(1)
388 | compressor.flush.return_value = create_xz_index_footer(1, [(13, 37), (4, 2)])
389 | with pytest.raises(XZError) as exc_info:
390 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
391 | block.write(b"Hello, world!\n")
392 | assert str(exc_info.value) == "block: compressor index records length"
393 |
394 |
395 | def test_write_compressor_error_3(fileobj_empty: Mock, compressor: Mock) -> None:
396 | compressor.compress.return_value = create_xz_header(1)
397 | compressor.flush.return_value = create_xz_index_footer(1, [(34, 1337)])
398 | with pytest.raises(XZError) as exc_info:
399 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
400 | block.write(b"Hello, world!\n")
401 | assert str(exc_info.value) == "block: compressor uncompressed size"
402 |
403 |
404 | #
405 | # truncate
406 | #
407 |
408 |
409 | def test_truncate_empty_zero(fileobj_empty: Mock) -> None:
410 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
411 | block.truncate(0)
412 | assert block.tell() == 0
413 | assert not fileobj_empty.method_calls
414 |
415 | assert block.unpadded_size == 0
416 | assert block.uncompressed_size == 0
417 |
418 | assert not fileobj_empty.method_calls
419 |
420 |
421 | def test_truncate_empty_fill(fileobj_empty: Mock) -> None:
422 | with XZBlock(fileobj_empty, 1, 0, 0) as block:
423 | block.truncate(42)
424 | assert block.tell() == 0
425 | assert fileobj_empty.method_calls == [
426 | call.seek(0),
427 | call.write(b"\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3"),
428 | ]
429 | fileobj_empty.reset_mock()
430 |
431 | assert block.unpadded_size == 30
432 | assert block.uncompressed_size == 42
433 |
434 | assert fileobj_empty.method_calls == [
435 | call.seek(12),
436 | call.write(b"\xe0\x00)\x00\x06]\x00\x00n,GH\x00\x00\x00\x00\xfb(o\xe4"),
437 | ]
438 |
439 |
440 | @pytest.mark.parametrize("size", [0, 42, 100, 200])
441 | def test_truncate_existing(fileobj: Mock, size: int) -> None:
442 | block = XZBlock(fileobj, 1, 89, 100)
443 | with pytest.raises(UnsupportedOperation):
444 | # block is not empty, so not writable
445 | block.truncate(size)
446 |
--------------------------------------------------------------------------------
/tests/unit/test_common.py:
--------------------------------------------------------------------------------
1 | from lzma import CHECK_CRC32, CHECK_CRC64, CHECK_NONE, CHECK_SHA256, is_check_supported
2 | from typing import List, Tuple
3 |
4 | import pytest
5 |
6 | from xz.common import (
7 | DEFAULT_CHECK,
8 | XZError,
9 | create_xz_header,
10 | create_xz_index_footer,
11 | decode_mbi,
12 | encode_mbi,
13 | pad,
14 | parse_xz_footer,
15 | parse_xz_header,
16 | parse_xz_index,
17 | round_up,
18 | )
19 |
20 | MBI_CASE = tuple(
21 | pytest.param(value, data, id=hex(value))
22 | for value, data in (
23 | (0, "00"),
24 | (1, "01"),
25 | ((1 << 7) - 1, "7f"),
26 | (1 << 7, "8001"),
27 | ((1 << 7 * 2) - 1, "ff7f"),
28 | (1 << 7 * 2, "808001"),
29 | ((1 << 7 * 3) - 1, "ffff7f"),
30 | (1 << 7 * 3, "80808001"),
31 | ((1 << 7 * 10) - 1, "ffffffffffffffffff7f"),
32 | (1 << 7 * 10, "8080808080808080808001"),
33 | (9, "09"),
34 | (99, "63"),
35 | (999, "e707"),
36 | (9999, "8f4e"),
37 | (99999, "9f8d06"),
38 | (999999, "bf843d"),
39 | (9999999, "fface204"),
40 | (99999999, "ffc1d72f"),
41 | (999999999, "ff93ebdc03"),
42 | )
43 | )
44 |
45 |
46 | @pytest.mark.parametrize("value, data", MBI_CASE)
47 | def test_encode_mbi(value: int, data: str) -> None:
48 | assert encode_mbi(value) == bytes.fromhex(data)
49 |
50 |
51 | @pytest.mark.parametrize("value, data", MBI_CASE)
52 | def test_decode_mbi(value: int, data: str) -> None:
53 | assert decode_mbi(bytes.fromhex(data) + b"\xff\x00" * 10) == (len(data) // 2, value)
54 |
55 |
56 | @pytest.mark.parametrize("data", ("", "81828384"), ids=("empty", "truncated"))
57 | def test_decode_mbi_invalid(data: str) -> None:
58 | with pytest.raises(XZError) as exc_info:
59 | decode_mbi(bytes.fromhex(data))
60 | assert str(exc_info.value) == "invalid mbi"
61 |
62 |
63 | @pytest.mark.parametrize(
64 | "value, expected",
65 | ((0, 0), (1, 4), (2, 4), (3, 4), (4, 4), (5, 8), (6, 8), (7, 8), (8, 8)),
66 | )
67 | def test_round_up(value: int, expected: int) -> None:
68 | assert round_up(value) == expected
69 |
70 |
71 | @pytest.mark.parametrize(
72 | "value, padding",
73 | (
74 | (0, ""),
75 | (1, "000000"),
76 | (2, "0000"),
77 | (3, "00"),
78 | (4, ""),
79 | (5, "000000"),
80 | (6, "0000"),
81 | (7, "00"),
82 | (8, ""),
83 | ),
84 | )
85 | def test_pad(value: int, padding: str) -> None:
86 | assert pad(value) == bytes.fromhex(padding)
87 | data = b"B" * value
88 | data += bytes.fromhex(padding)
89 | assert not len(data) % 4
90 |
91 |
92 | XZ_HEADER_CASES = (
93 | pytest.param(CHECK_NONE, "fd377a585a000000ff12d941", id="check_none"),
94 | pytest.param(CHECK_CRC32, "fd377a585a0000016922de36", id="check_crc32"),
95 | pytest.param(CHECK_CRC64, "fd377a585a000004e6d6b446", id="check_crc64"),
96 | pytest.param(CHECK_SHA256, "fd377a585a00000ae1fb0ca1", id="check_sha256"),
97 | )
98 |
99 |
100 | @pytest.mark.parametrize("check, data", XZ_HEADER_CASES)
101 | def test_create_xz_header(check: int, data: str) -> None:
102 | assert create_xz_header(check) == bytes.fromhex(data)
103 |
104 |
105 | def test_create_xz_header_invalid_check() -> None:
106 | with pytest.raises(XZError) as exc_info:
107 | create_xz_header(17)
108 | assert str(exc_info.value) == "header check"
109 |
110 |
111 | @pytest.mark.parametrize("check, data", XZ_HEADER_CASES)
112 | def test_parse_xz_header(check: int, data: str) -> None:
113 | assert parse_xz_header(bytes.fromhex(data)) == check
114 |
115 |
116 | @pytest.mark.parametrize(
117 | "data, message",
118 | (
119 | ("fd377a585a0000016922de3600", "header length"),
120 | ("f1377a585a000000ff12d941", "header magic"),
121 | ("fd377a585a0000016942de36", "header crc32"),
122 | ("fd377a585a0000110d32692b", "header flags"),
123 | ("fd377a585a0001012813c52f", "header flags"),
124 | ("fd377a585a00100138301c7c", "header flags"),
125 | ),
126 | )
127 | def test_parse_xz_header_invalid(data: str, message: str) -> None:
128 | with pytest.raises(XZError) as exc_info:
129 | parse_xz_header(bytes.fromhex(data))
130 | assert str(exc_info.value) == message
131 |
132 |
133 | XZ_INDEX_CASES = (
134 | # all have check=1
135 | pytest.param([], "000000001cdf4421", id="empty"),
136 | pytest.param([(24, 4)], "000118046be9f0a5", id="one-small-block"),
137 | pytest.param([(2062, 20280)], "00018e10b89e010039f45fb1", id="one-big-block"),
138 | pytest.param(
139 | [(73, 60), (73, 60), (73, 60), (56, 30)],
140 | "0004493c493c493c381e0000b6ec1657",
141 | id="several-small-blocks",
142 | ),
143 | pytest.param(
144 | [(1, 2), (11, 2222), (1111, 22222222), (11111111, 2222222222222222)],
145 | "000401020bae11d7088eabcc0ac795a6058ec7abf196a3f903000000c9647142",
146 | id="several-blocks-various-sizes",
147 | ),
148 | )
149 |
150 |
151 | @pytest.mark.parametrize("records, data", XZ_INDEX_CASES)
152 | def test_create_xz_index(records: List[Tuple[int, int]], data: str) -> None:
153 | assert create_xz_index_footer(1, records)[:-12] == bytes.fromhex(data)
154 |
155 |
156 | def test_create_xz_index_invalid() -> None:
157 | with pytest.raises(XZError) as exc_info:
158 | create_xz_index_footer(1, [(73, 60), (0, 12), (56, 30)])
159 | assert str(exc_info.value) == "index record unpadded size"
160 |
161 |
162 | @pytest.mark.parametrize("records, data", XZ_INDEX_CASES)
163 | def test_parse_xz_index(records: List[Tuple[int, int]], data: str) -> None:
164 | assert parse_xz_index(bytes.fromhex(data)) == records
165 |
166 |
167 | @pytest.mark.parametrize(
168 | "data, message",
169 | (
170 | ("0000001cdf4421", "index length"),
171 | ("420000001cdf4421", "index indicator"),
172 | ("000000001cdf4221", "index crc32"),
173 | ("000218043257b6a7", "index size"),
174 | ("000100043271eb27", "index record unpadded size"),
175 | ("000188047163b1d4", "index size"),
176 | ("000104002f70ea44", "index record uncompressed size"),
177 | ("000180180400420096a658c0", "index padding"),
178 | ),
179 | )
180 | def test_parse_xz_index_invalid(data: str, message: str) -> None:
181 | with pytest.raises(XZError) as exc_info:
182 | parse_xz_index(bytes.fromhex(data))
183 | assert str(exc_info.value) == message
184 |
185 |
186 | XZ_FOOTER_CASES = (
187 | # all have backward_size=8 (i.e. no blocks)
188 | pytest.param(CHECK_NONE, "06729e7a010000000000595a", id="check_none"),
189 | pytest.param(CHECK_CRC32, "9042990d010000000001595a", id="check_crc32"),
190 | pytest.param(CHECK_CRC64, "1fb6f37d010000000004595a", id="check_crc64"),
191 | pytest.param(CHECK_SHA256, "189b4b9a01000000000a595a", id="check_sha256"),
192 | )
193 |
194 |
195 | @pytest.mark.parametrize("check, data", XZ_FOOTER_CASES)
196 | def test_create_xz_footer(check: int, data: str) -> None:
197 | assert create_xz_index_footer(check, [])[-12:] == bytes.fromhex(data)
198 |
199 |
200 | def test_create_xz_footer_invalid_check() -> None:
201 | with pytest.raises(XZError) as exc_info:
202 | create_xz_index_footer(17, [])
203 | assert str(exc_info.value) == "footer check"
204 |
205 |
206 | @pytest.mark.parametrize("check, data", XZ_FOOTER_CASES)
207 | def test_parse_xz_footer(check: int, data: str) -> None:
208 | assert parse_xz_footer(bytes.fromhex(data)) == (check, 8)
209 |
210 |
211 | @pytest.mark.parametrize(
212 | "data, message",
213 | (
214 | ("009042990d010000000001595a", "footer length"),
215 | ("9042990d0100000000015959", "footer magic"),
216 | ("9042090d010000000001595a", "footer crc32"),
217 | ("f4522e10010000000011595a", "footer flags"),
218 | ("d1738214010000000101595a", "footer flags"),
219 | ("c1505b47010000001001595a", "footer flags"),
220 | ),
221 | )
222 | def test_parse_xz_footer_invalid(data: str, message: str) -> None:
223 | with pytest.raises(XZError) as exc_info:
224 | parse_xz_footer(bytes.fromhex(data))
225 | assert str(exc_info.value) == message
226 |
227 |
228 | def test_default_check_supported() -> None:
229 | assert is_check_supported(DEFAULT_CHECK)
230 |
--------------------------------------------------------------------------------
/tests/unit/test_floordict.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 | import pytest
4 |
5 | from xz.utils import FloorDict
6 |
7 |
8 | def expect_floor_dict(floordict: FloorDict[str], items: Dict[int, str]) -> None:
9 | sorted_keys = sorted(items)
10 | assert len(floordict) == len(items)
11 | assert list(floordict) == sorted_keys
12 | assert list(floordict.keys()) == sorted_keys
13 | assert list(floordict.values()) == [items[key] for key in sorted_keys]
14 | assert list(floordict.items()) == [(key, items[key]) for key in sorted_keys]
15 | # pylint: disable=protected-access
16 | assert floordict._keys == sorted_keys
17 | assert floordict._dict == items
18 |
19 |
20 | def test_empty() -> None:
21 | floordict = FloorDict[str]()
22 |
23 | expect_floor_dict(floordict, {})
24 |
25 | with pytest.raises(KeyError):
26 | floordict[0] # pylint: disable=pointless-statement
27 | with pytest.raises(KeyError):
28 | floordict[42] # pylint: disable=pointless-statement
29 | with pytest.raises(KeyError):
30 | floordict.last_key # pylint: disable=pointless-statement
31 | with pytest.raises(KeyError):
32 | floordict.last_item # pylint: disable=pointless-statement
33 |
34 |
35 | def test_normal() -> None:
36 | floordict = FloorDict[str]()
37 | floordict[10] = "ten"
38 | floordict[50] = "fifty"
39 | with pytest.raises(TypeError):
40 | floordict["wrong type"] = "wrong type" # type: ignore[index]
41 |
42 | expect_floor_dict(floordict, {10: "ten", 50: "fifty"})
43 |
44 | assert floordict[10] == "ten"
45 | assert floordict.last_key == 50
46 | assert floordict.last_item == "fifty"
47 |
48 | assert floordict[42] == "ten"
49 | assert floordict.get_with_index(42) == (10, "ten")
50 |
51 | assert floordict[50] == "fifty"
52 | assert floordict[1337] == "fifty"
53 | assert floordict.get(0) is None
54 | with pytest.raises(KeyError):
55 | floordict[0] # pylint: disable=pointless-statement
56 | assert floordict.get(7) is None
57 | with pytest.raises(KeyError):
58 | floordict[7] # pylint: disable=pointless-statement
59 | with pytest.raises(KeyError):
60 | floordict[-42] # pylint: disable=pointless-statement
61 | with pytest.raises(TypeError):
62 | # pylint: disable=pointless-statement
63 | floordict["wrong type"] # type: ignore[index]
64 |
65 |
66 | def test_override() -> None:
67 | floordict = FloorDict[str]()
68 | floordict[10] = "ten"
69 | floordict[20] = "twenty"
70 | floordict[30] = "thirty"
71 |
72 | expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"})
73 |
74 | floordict[20] = "two-ten"
75 | assert floordict[15] == "ten"
76 | assert floordict[20] == "two-ten"
77 | assert floordict[25] == "two-ten"
78 | assert floordict[50] == "thirty"
79 |
80 | expect_floor_dict(floordict, {10: "ten", 20: "two-ten", 30: "thirty"})
81 |
82 |
83 | def test_del() -> None:
84 | floordict = FloorDict[str]()
85 | floordict[10] = "ten"
86 | floordict[20] = "twenty"
87 | floordict[30] = "thirty"
88 | assert floordict[20] == "twenty"
89 | assert floordict[22] == "twenty"
90 | expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"})
91 |
92 | del floordict[20]
93 | assert floordict[20] == "ten"
94 | assert floordict[22] == "ten"
95 | expect_floor_dict(floordict, {10: "ten", 30: "thirty"})
96 |
97 | with pytest.raises(KeyError):
98 | del floordict[20]
99 | with pytest.raises(KeyError):
100 | del floordict[40]
101 |
102 |
103 | def test_pop() -> None:
104 | floordict = FloorDict[str]()
105 | floordict[10] = "ten"
106 | floordict[20] = "twenty"
107 | floordict[30] = "thirty"
108 | assert floordict[25] == "twenty"
109 | expect_floor_dict(floordict, {10: "ten", 20: "twenty", 30: "thirty"})
110 |
111 | with pytest.raises(KeyError):
112 | floordict.pop(25)
113 |
114 | assert floordict.pop(20) == "twenty"
115 | expect_floor_dict(floordict, {10: "ten", 30: "thirty"})
116 | assert floordict[25] == "ten"
117 |
118 |
119 | def test_values() -> None:
120 | floordict = FloorDict[str]()
121 | expected = {}
122 | for i in range(50):
123 | floordict[i * 2] = str(i * 2)
124 | expected[i * 2] = str(i * 2)
125 | expect_floor_dict(floordict, expected)
126 | for j in range(100):
127 | value = min(i * 2, j - (j % 2))
128 | assert floordict[j] == str(value)
129 | assert floordict.get_with_index(j) == (value, str(value))
130 |
--------------------------------------------------------------------------------
/tests/unit/test_ioabstract.py:
--------------------------------------------------------------------------------
1 | from io import DEFAULT_BUFFER_SIZE, UnsupportedOperation
2 | from pathlib import Path
3 | from typing import BinaryIO
4 | from unittest.mock import Mock, call
5 |
6 | import pytest
7 |
8 | from xz.io import IOAbstract
9 |
10 | #
11 | # len
12 | #
13 |
14 |
15 | def test_len() -> None:
16 | obj = IOAbstract(10)
17 | assert len(obj) == 10
18 |
19 |
20 | #
21 | # fileno
22 | #
23 |
24 |
25 | def test_fileno(tmp_path: Path) -> None:
26 | file_path = tmp_path / "file"
27 | file_path.write_bytes(b"abcd")
28 |
29 | class Impl(IOAbstract):
30 | def __init__(self, fileobj: BinaryIO) -> None:
31 | super().__init__(10)
32 | self.fileobj = fileobj
33 |
34 | with file_path.open("rb") as fin:
35 | obj = Impl(fin)
36 | assert obj.fileno() == fin.fileno()
37 |
38 |
39 | def test_fileno_ko() -> None:
40 | obj = IOAbstract(10)
41 | with pytest.raises(UnsupportedOperation):
42 | obj.fileno()
43 |
44 |
45 | #
46 | # tell / seek
47 | #
48 |
49 |
50 | def test_seek_not_seekable() -> None:
51 | class Impl(IOAbstract):
52 | def __init__(self) -> None:
53 | super().__init__(10)
54 |
55 | def seekable(self) -> bool:
56 | return False
57 |
58 | obj = Impl()
59 | assert obj.seekable() is False
60 | with pytest.raises(UnsupportedOperation) as exc_info:
61 | obj.seek(1)
62 | assert str(exc_info.value) == "seek"
63 |
64 |
65 | def test_tell_seek() -> None:
66 | obj = IOAbstract(10)
67 | assert obj.seekable() is True
68 | assert obj.tell() == 0
69 |
70 | # absolute (no whence)
71 | assert obj.seek(1) == 1
72 | assert obj.tell() == 1
73 | assert obj.seek(3) == 3
74 | assert obj.tell() == 3
75 | assert obj.seek(10) == 10
76 | assert obj.tell() == 10
77 | with pytest.raises(ValueError) as exc_info:
78 | obj.seek(-1)
79 | assert str(exc_info.value) == "invalid seek position"
80 | assert obj.seek(42) == 42
81 | assert obj.tell() == 42
82 |
83 | # absolute (with whence)
84 | assert obj.seek(5, 0) == 5
85 | assert obj.tell() == 5
86 | assert obj.seek(10, 0) == 10
87 | assert obj.tell() == 10
88 | with pytest.raises(ValueError) as exc_info:
89 | obj.seek(-1, 0)
90 | assert str(exc_info.value) == "invalid seek position"
91 | assert obj.seek(42, 0) == 42
92 | assert obj.tell() == 42
93 |
94 | # relative
95 | assert obj.seek(10) == 10
96 | assert obj.seek(-7, 1) == 3
97 | assert obj.tell() == 3
98 | assert obj.seek(2, 1) == 5
99 | assert obj.tell() == 5
100 | with pytest.raises(ValueError) as exc_info:
101 | obj.seek(-6, 1)
102 | assert str(exc_info.value) == "invalid seek position"
103 | assert obj.tell() == 5
104 | assert obj.seek(37, 1) == 42
105 | assert obj.tell() == 42
106 |
107 | # from end
108 | assert obj.seek(0, 2) == 10
109 | assert obj.tell() == 10
110 | assert obj.seek(-4, 2) == 6
111 | assert obj.tell() == 6
112 | assert obj.seek(-10, 2) == 0
113 | assert obj.tell() == 0
114 | assert obj.seek(32, 2) == 42
115 | assert obj.tell() == 42
116 | with pytest.raises(ValueError) as exc_info:
117 | obj.seek(-11, 2)
118 | assert str(exc_info.value) == "invalid seek position"
119 |
120 | # from error
121 | with pytest.raises(ValueError) as exc_info:
122 | obj.seek(42, 3)
123 | assert str(exc_info.value) == "unsupported whence value"
124 |
125 | # seek after close
126 | obj.close()
127 | with pytest.raises(ValueError) as exc_info:
128 | obj.seek(0)
129 | assert str(exc_info.value) == "I/O operation on closed file"
130 |
131 |
132 | #
133 | # read
134 | #
135 |
136 |
137 | def test_read_non_readable() -> None:
138 | class Impl(IOAbstract):
139 | def __init__(self) -> None:
140 | super().__init__(10)
141 |
142 | def readable(self) -> bool:
143 | return False
144 |
145 | obj = Impl()
146 | assert obj.readable() is False
147 | with pytest.raises(UnsupportedOperation) as exc_info:
148 | obj.read(1)
149 | assert str(exc_info.value) == "read"
150 |
151 |
152 | def test_tell_read() -> None:
153 | class Impl(IOAbstract):
154 | def __init__(self) -> None:
155 | super().__init__(10)
156 |
157 | def _read(self, size: int) -> bytes:
158 | # for tests, does not rely on position
159 | return b"xyz"[:size]
160 |
161 | def _write_after(self) -> None:
162 | raise RuntimeError("should not be called")
163 |
164 | obj = Impl()
165 | assert obj.tell() == 0
166 |
167 | # read all
168 | assert obj.read() == b"xyzxyzxyzx"
169 | obj.seek(5)
170 | assert obj.read() == b"xyzxy"
171 |
172 | # read from pos
173 | obj.seek(3)
174 | assert obj.read(2) == b"xy"
175 | assert obj.read(2) == b"xy"
176 | assert obj.read(2) == b"xy"
177 | assert obj.read(2) == b"x"
178 | assert obj.read(2) == b""
179 | assert obj.read(2) == b""
180 |
181 | # read from after EOF
182 | obj.seek(11)
183 | assert obj.read(2) == b""
184 |
185 | # read after close
186 | obj.close()
187 | with pytest.raises(ValueError) as exc_info:
188 | obj.read(1)
189 | assert str(exc_info.value) == "I/O operation on closed file"
190 |
191 |
192 | def test_tell_read_empty() -> None:
193 | class Impl(IOAbstract):
194 | def __init__(self) -> None:
195 | super().__init__(10)
196 | self.empty_reads = 100
197 |
198 | def _read(self, size: int) -> bytes:
199 | self.empty_reads -= 1
200 | if self.empty_reads > 0:
201 | return b""
202 | return b"a"
203 |
204 | obj = Impl()
205 | assert obj.tell() == 0
206 | assert obj.read() == b"aaaaaaaaaa"
207 |
208 |
209 | #
210 | # write
211 | #
212 |
213 |
214 | def test_write_non_writeable() -> None:
215 | class Impl(IOAbstract):
216 | def __init__(self) -> None:
217 | super().__init__(10)
218 |
219 | def writable(self) -> bool:
220 | return False
221 |
222 | with Impl() as obj:
223 | assert obj.writable() is False
224 | with pytest.raises(UnsupportedOperation) as exc_info:
225 | obj.write(b"hello")
226 | assert str(exc_info.value) == "write"
227 |
228 |
229 | @pytest.mark.parametrize("write_partial", (True, False))
230 | def test_write_full(write_partial: bool) -> None:
231 | class Impl(IOAbstract):
232 | def __init__(self) -> None:
233 | super().__init__(10)
234 | self.mock = Mock()
235 |
236 | def _write_before(self) -> None:
237 | self.mock.write_start()
238 |
239 | def _write_after(self) -> None:
240 | self.mock.write_finish()
241 |
242 | def _write(self, data: bytes) -> int:
243 | self.mock.write(bytes(data))
244 | if write_partial:
245 | return min(2, len(data))
246 | return len(data)
247 |
248 | with Impl() as obj:
249 | # write before end
250 | obj.seek(5)
251 | with pytest.raises(ValueError) as exc_info:
252 | obj.write(b"abcdef")
253 | assert str(exc_info.value) == "write is only supported from EOF"
254 | assert not obj.mock.called
255 |
256 | # write at end
257 | obj.seek(10)
258 | assert obj.write(b"") == 0
259 | assert obj.tell() == 10
260 | assert not obj.mock.called
261 | assert obj.write(b"ghijkl") == 6
262 | assert obj.tell() == 16
263 | if write_partial:
264 | assert obj.mock.method_calls == [
265 | call.write_start(),
266 | call.write(b"ghijkl"),
267 | call.write(b"ijkl"),
268 | call.write(b"kl"),
269 | ]
270 | else:
271 | assert obj.mock.method_calls == [
272 | call.write_start(),
273 | call.write(b"ghijkl"),
274 | ]
275 | obj.mock.reset_mock()
276 |
277 | # write after end
278 | obj.seek(20)
279 | assert obj.write(b"mnopq") == 5
280 | assert obj.tell() == 25
281 | if write_partial:
282 | assert obj.mock.method_calls == [
283 | call.write(b"\x00\x00\x00\x00"),
284 | call.write(b"\x00\x00"),
285 | call.write(b"mnopq"),
286 | call.write(b"opq"),
287 | call.write(b"q"),
288 | ]
289 | else:
290 | assert obj.mock.method_calls == [
291 | call.write(b"\x00\x00\x00\x00"),
292 | call.write(b"mnopq"),
293 | ]
294 | obj.mock.reset_mock()
295 |
296 | # (big) write nothing after end (used e.g. by tuncate)
297 | limit = 30 if write_partial else int(DEFAULT_BUFFER_SIZE * 3.7)
298 | obj.seek(limit)
299 | assert obj.write(b"") == 0
300 | assert obj.tell() == limit
301 | if write_partial:
302 | assert obj.mock.method_calls == [
303 | call.write(b"\x00\x00\x00\x00\x00"),
304 | call.write(b"\x00\x00\x00"),
305 | call.write(b"\x00"),
306 | ]
307 | else:
308 | assert obj.mock.method_calls == [
309 | call.write(b"\x00" * DEFAULT_BUFFER_SIZE),
310 | call.write(b"\x00" * DEFAULT_BUFFER_SIZE),
311 | call.write(b"\x00" * DEFAULT_BUFFER_SIZE),
312 | call.write(b"\x00" * (limit - 3 * DEFAULT_BUFFER_SIZE - 25)),
313 | ]
314 | obj.mock.reset_mock()
315 |
316 | # close calls write_finish once
317 | obj.close()
318 | assert obj.mock.method_calls == [call.write_finish()]
319 | obj.mock.reset_mock()
320 | obj.close()
321 | assert not obj.mock.method_calls
322 | obj.close()
323 |
324 | # write after close
325 | with pytest.raises(ValueError) as exc_info:
326 | obj.write(b"xyz")
327 | assert str(exc_info.value) == "I/O operation on closed file"
328 |
329 |
330 | #
331 | # truncate
332 | #
333 |
334 |
335 | def test_truncate_non_writeable() -> None:
336 | class Impl(IOAbstract):
337 | def __init__(self) -> None:
338 | super().__init__(10)
339 |
340 | def writable(self) -> bool:
341 | return False
342 |
343 | with Impl() as obj:
344 | assert obj.writable() is False
345 | with pytest.raises(UnsupportedOperation) as exc_info:
346 | obj.truncate(4)
347 | assert str(exc_info.value) == "truncate"
348 |
349 |
350 | @pytest.mark.parametrize("with_size", (True, False))
351 | def test_truncate_with_size(with_size: bool) -> None:
352 | class Impl(IOAbstract):
353 | def __init__(self) -> None:
354 | super().__init__(10)
355 | self.mock = Mock()
356 |
357 | def _write_before(self) -> None:
358 | self.mock.write_start()
359 |
360 | def _write_after(self) -> None:
361 | self.mock.write_finish()
362 |
363 | def _write(self, data: bytes) -> int:
364 | raise RuntimeError("should not be called")
365 |
366 | def _truncate(self, size: int) -> None:
367 | self.mock.truncate(size)
368 |
369 | with Impl() as obj:
370 | obj.seek(7)
371 | assert not obj.mock.method_calls
372 |
373 | def truncate(size: int) -> int:
374 | if with_size:
375 | return obj.truncate(size)
376 | obj.seek(size)
377 | return obj.truncate()
378 |
379 | # truncate before start
380 | with pytest.raises(ValueError) as exc_info:
381 | obj.truncate(-1)
382 | assert str(exc_info.value) == "invalid truncate size"
383 | assert not obj.mock.method_calls
384 |
385 | # truncate before end
386 | assert truncate(5) == 5
387 | assert not with_size or obj.tell() == 7
388 | assert len(obj) == 5
389 | assert obj.mock.method_calls == [call.write_start(), call.truncate(5)]
390 | obj.mock.reset_mock()
391 |
392 | # truncate at end
393 | assert truncate(5) == 5
394 | assert not with_size or obj.tell() == 7
395 | assert len(obj) == 5
396 | assert not obj.mock.method_calls
397 | obj.mock.reset_mock()
398 |
399 | # truncate after end
400 | assert truncate(20) == 20
401 | assert not with_size or obj.tell() == 7
402 | assert len(obj) == 20
403 | assert obj.mock.method_calls == [call.truncate(20)]
404 | obj.mock.reset_mock()
405 |
406 | # close calls write_finish once
407 | obj.close()
408 | assert obj.mock.method_calls == [call.write_finish()]
409 | obj.mock.reset_mock()
410 | obj.close()
411 | assert not obj.mock.method_calls
412 |
413 | # truncate after close
414 | with pytest.raises(ValueError) as exc_info:
415 | obj.truncate(5)
416 | assert str(exc_info.value) == "I/O operation on closed file"
417 |
--------------------------------------------------------------------------------
/tests/unit/test_iocombiner.py:
--------------------------------------------------------------------------------
1 | from io import SEEK_SET, BytesIO
2 | from typing import List, cast
3 | from unittest.mock import Mock, call
4 |
5 | import pytest
6 |
7 | from xz.io import IOAbstract, IOCombiner, IOProxy
8 |
9 |
10 | def generate_mock(length: int) -> Mock:
11 | mock = Mock()
12 | mock.__class__ = cast(Mock, IOAbstract) # needs to be subclass of IOAbstract
13 | mock._length = length # pylint: disable=protected-access
14 | mock.__len__ = lambda s: s._length # pylint: disable=protected-access
15 |
16 | def write(data: bytes) -> int:
17 | mock._length += len(data)
18 | return len(data)
19 |
20 | mock.write.side_effect = write
21 | mock.writable.return_value = True
22 | return mock
23 |
24 |
25 | #
26 | # tell / seek
27 | #
28 |
29 |
30 | def test_seek() -> None:
31 | originals = [
32 | generate_mock(2),
33 | generate_mock(0),
34 | generate_mock(8),
35 | ]
36 | combiner = IOCombiner(*originals)
37 |
38 | assert combiner.tell() == 0
39 | assert combiner.seek(7) == 7
40 | assert combiner.tell() == 7
41 |
42 | for original in originals:
43 | assert not original.method_calls # did not touch original
44 |
45 |
46 | #
47 | # read
48 | #
49 |
50 |
51 | def test_read() -> None:
52 | originals: List[IOAbstract] = [
53 | IOProxy(BytesIO(b"abc"), 0, 3),
54 | generate_mock(0), # size 0, will be never used
55 | IOProxy(BytesIO(b"defghij"), 0, 7),
56 | ]
57 | combiner = IOCombiner(*originals)
58 |
59 | # read all
60 | originals[0].seek(2)
61 | originals[2].seek(4)
62 | combiner.seek(0)
63 | assert combiner.read() == b"abcdefghij"
64 | assert originals[0].tell() == 3
65 | assert originals[2].tell() == 7
66 | combiner.seek(4)
67 | assert combiner.read() == b"efghij"
68 | assert originals[0].tell() == 3
69 | assert originals[2].tell() == 7
70 |
71 | # read partial
72 | originals[0].seek(2)
73 | originals[2].seek(4)
74 | combiner.seek(1)
75 | assert combiner.read(6) == b"bcdefg"
76 | assert originals[0].tell() == 3
77 | assert originals[2].tell() == 4
78 | assert combiner.read(6) == b"hij"
79 | assert originals[0].tell() == 3
80 | assert originals[2].tell() == 7
81 | assert combiner.read(6) == b""
82 | assert originals[0].tell() == 3
83 | assert originals[2].tell() == 7
84 | assert combiner.read(6) == b""
85 | assert originals[0].tell() == 3
86 | assert originals[2].tell() == 7
87 |
88 | # with original seek
89 | combiner.seek(1)
90 | originals[0].seek(2)
91 | originals[2].seek(4)
92 | assert combiner.read(5) == b"bcdef"
93 | assert originals[0].tell() == 3
94 | assert originals[2].tell() == 3
95 |
96 | # never used at all
97 | assert not cast(Mock, originals[1]).method_calls
98 |
99 |
100 | #
101 | # write
102 | #
103 |
104 |
105 | def test_write() -> None:
106 | parts = []
107 |
108 | class Combiner(IOCombiner[IOAbstract]):
109 | def _create_fileobj(self) -> IOAbstract:
110 | fileobj = generate_mock(0)
111 | parts.append(fileobj)
112 | return fileobj
113 |
114 | with Combiner() as combiner:
115 | assert combiner.writable()
116 | assert len(parts) == 0
117 |
118 | # create new from scratch
119 | combiner.write(b"abc")
120 | assert len(parts) == 1
121 | assert parts[0].method_calls == [
122 | call.seek(0, SEEK_SET),
123 | call.write(memoryview(b"abc")),
124 | ]
125 | parts[0].method_calls.clear()
126 |
127 | combiner.write(b"def")
128 | assert len(parts) == 1
129 | assert parts[0].method_calls == [
130 | call.seek(3, SEEK_SET),
131 | call.writable(),
132 | call.write(memoryview(b"def")),
133 | ]
134 | parts[0].method_calls.clear()
135 |
136 | combiner.seek(8)
137 | combiner.write(b"ghi")
138 | assert len(parts) == 1
139 | assert parts[0].method_calls == [
140 | call.seek(6, SEEK_SET),
141 | call.writable(),
142 | call.write(memoryview(b"\x00\x00")),
143 | call.seek(8, SEEK_SET),
144 | call.writable(),
145 | call.write(memoryview(b"ghi")),
146 | ]
147 | parts[0].method_calls.clear()
148 |
149 | # not writable anymore -> create new fileobj
150 | parts[0].writable.return_value = False
151 | combiner.write(b"jkl")
152 | assert len(parts) == 2
153 | assert parts[0].method_calls == [
154 | call.seek(11, SEEK_SET),
155 | call.writable(),
156 | call.writable(),
157 | ]
158 | assert parts[1].method_calls == [
159 | call.seek(0, SEEK_SET),
160 | call.write(memoryview(b"jkl")),
161 | ]
162 | parts[0].method_calls.clear()
163 | parts[1].method_calls.clear()
164 |
165 | combiner.write(b"mno")
166 | assert len(parts) == 2
167 | assert not parts[0].method_calls
168 | assert parts[1].method_calls == [
169 | call.seek(3, SEEK_SET),
170 | call.writable(),
171 | call.write(memoryview(b"mno")),
172 | ]
173 | parts[1].method_calls.clear()
174 |
175 | # force change fileobj
176 | combiner._change_fileobj() # pylint: disable=protected-access
177 | assert len(parts) == 3
178 | assert not parts[0].method_calls
179 | assert parts[1].method_calls == [
180 | call.writable(),
181 | call._write_end(), # pylint: disable=protected-access
182 | ]
183 | assert not parts[2].method_calls
184 | parts[1].method_calls.clear()
185 |
186 | # force change fileobj again
187 | combiner._change_fileobj() # pylint: disable=protected-access
188 | assert len(parts) == 4
189 | assert not parts[0].method_calls
190 | assert not parts[1].method_calls
191 | assert not parts[2].method_calls # no call to _write_end
192 | assert not parts[3].method_calls
193 | parts[1].method_calls.clear()
194 |
195 | combiner.write(b"pqr")
196 | assert len(parts) == 4
197 | assert not parts[0].method_calls
198 | assert not parts[1].method_calls
199 | assert not parts[2].method_calls
200 | assert parts[3].method_calls == [
201 | call.seek(0, SEEK_SET),
202 | call.writable(),
203 | call.write(memoryview(b"pqr")),
204 | ]
205 | parts[3].method_calls.clear()
206 |
207 | # don't create fileobj if write nothing
208 | parts[1].writable.return_value = False
209 | combiner.write(b"")
210 | assert len(parts) == 4
211 | assert not parts[0].method_calls
212 | assert not parts[1].method_calls
213 | assert not parts[2].method_calls
214 | assert not parts[3].method_calls
215 |
216 | # check write_finish
217 | assert not parts[0].method_calls
218 | assert not parts[1].method_calls
219 | assert not parts[2].method_calls
220 | assert parts[3].method_calls == [
221 | call._write_end(), # pylint: disable=protected-access
222 | ]
223 |
224 | # check if last fileobj is empty no calls to _write_end
225 | with Combiner() as combiner:
226 | combiner.write(b"abc")
227 | combiner._change_fileobj() # pylint: disable=protected-access
228 | parts[0].method_calls.clear()
229 | assert not parts[1].method_calls
230 | assert not parts[0].method_calls
231 | assert not parts[1].method_calls # no calls to _write_end
232 |
233 |
234 | #
235 | # truncate
236 | #
237 |
238 |
239 | def test_truncate() -> None:
240 | # pylint: disable=protected-access
241 | originals = [
242 | generate_mock(2),
243 | generate_mock(0),
244 | generate_mock(8),
245 | generate_mock(10),
246 | generate_mock(20),
247 | ]
248 |
249 | with IOCombiner(*originals) as combiner:
250 | # truncate between two boundaries
251 | combiner.truncate(17)
252 | assert originals[3].method_calls == [call.truncate(7)]
253 | assert not originals[4].method_calls
254 | assert dict(combiner._fileobjs) == {
255 | 0: originals[0],
256 | 2: originals[2],
257 | 10: originals[3],
258 | }
259 | originals[3].reset_mock()
260 |
261 | # truncate after length
262 | combiner.truncate(42)
263 | assert originals[3].method_calls == [call.truncate(32)]
264 | assert dict(combiner._fileobjs) == {
265 | 0: originals[0],
266 | 2: originals[2],
267 | 10: originals[3],
268 | }
269 | originals[3].reset_mock()
270 |
271 | # truncate at boundary
272 | combiner.truncate(10)
273 | assert dict(combiner._fileobjs) == {
274 | 0: originals[0],
275 | 2: originals[2],
276 | }
277 | assert not originals[2].method_calls
278 | assert not originals[3].method_calls
279 |
280 | # truncate at boundary
281 | combiner.truncate(2)
282 | assert dict(combiner._fileobjs) == {
283 | 0: originals[0],
284 | }
285 | assert not originals[0].method_calls
286 | assert not originals[1].method_calls
287 | assert not originals[2].method_calls
288 |
289 | # truncate at start
290 | combiner.truncate(0)
291 | assert not dict(combiner._fileobjs)
292 | assert not originals[0].method_calls
293 |
294 |
295 | #
296 | # append
297 | #
298 |
299 |
300 | def test_append() -> None:
301 | combiner = IOCombiner[IOAbstract](generate_mock(13), generate_mock(37))
302 | assert len(combiner) == 50
303 | combiner._append( # pylint: disable=protected-access
304 | IOProxy(BytesIO(b"abcdefghij"), 0, 10)
305 | )
306 | assert len(combiner) == 60
307 | combiner.seek(54)
308 | assert combiner.read(4) == b"efgh"
309 |
310 |
311 | def test_append_invalid() -> None:
312 | combiner = IOCombiner[IOAbstract](generate_mock(13), generate_mock(37))
313 | assert len(combiner) == 50
314 | with pytest.raises(TypeError):
315 | # pylint: disable=protected-access
316 | combiner._append(BytesIO(b"abcdefghij")) # type: ignore[arg-type]
317 |
--------------------------------------------------------------------------------
/tests/unit/test_ioproxy.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from pathlib import Path
3 | from unittest.mock import Mock, call
4 |
5 | from xz.io import IOProxy
6 |
7 |
8 | def test_fileno(tmp_path: Path) -> None:
9 | file_path = tmp_path / "file"
10 | file_path.write_bytes(b"abcd")
11 |
12 | with file_path.open("rb") as fin:
13 | obj = IOProxy(fin, 1, 3)
14 | assert obj.fileno() == fin.fileno()
15 |
16 |
17 | def test_seek() -> None:
18 | original = Mock()
19 | proxy = IOProxy(original, 4, 14)
20 |
21 | assert proxy.tell() == 0
22 | assert proxy.seek(7) == 7
23 | assert proxy.tell() == 7
24 |
25 | assert not original.method_calls # did not touch original
26 |
27 |
28 | def test_read() -> None:
29 | original = BytesIO(b"xxxxabcdefghijyyyyy")
30 | proxy = IOProxy(original, 4, 14)
31 |
32 | # read all
33 | original.seek(2)
34 | proxy.seek(0)
35 | assert proxy.read() == b"abcdefghij"
36 | assert original.tell() == 14
37 | proxy.seek(4)
38 | assert proxy.read() == b"efghij"
39 | assert original.tell() == 14
40 |
41 | # read partial
42 | original.seek(2)
43 | proxy.seek(6)
44 | assert proxy.read(3) == b"ghi"
45 | assert original.tell() == 13
46 | assert proxy.read(3) == b"j"
47 | assert original.tell() == 14
48 | assert proxy.read(3) == b""
49 | assert original.tell() == 14
50 | assert proxy.read(3) == b""
51 | assert original.tell() == 14
52 |
53 | # with original seek
54 | original.seek(2)
55 | proxy.seek(4)
56 | original.seek(1)
57 | assert proxy.read() == b"efghij"
58 | assert original.tell() == 14
59 |
60 |
61 | def test_write() -> None:
62 | original = BytesIO(b"xxxxabcdefghijyyyyy")
63 | with IOProxy(original, 4, 14) as proxy:
64 | proxy.seek(10)
65 |
66 | assert proxy.write(b"uvw") == 3
67 | assert original.getvalue() == b"xxxxabcdefghijuvwyy"
68 |
69 | assert proxy.write(b"UVWXYZ") == 6
70 | assert original.getvalue() == b"xxxxabcdefghijuvwUVWXYZ"
71 |
72 |
73 | def test_truncate() -> None:
74 | original = Mock()
75 | with IOProxy(original, 4, 14) as proxy:
76 | assert proxy.truncate(5) == 5
77 | assert original.method_calls == [call.truncate(9)]
78 | original.reset_mock()
79 |
80 | assert proxy.truncate(20) == 20
81 | assert original.method_calls == [call.truncate(24)]
82 |
--------------------------------------------------------------------------------
/tests/unit/test_iostatic.py:
--------------------------------------------------------------------------------
1 | from io import UnsupportedOperation
2 |
3 | import pytest
4 |
5 | from xz.io import IOStatic
6 |
7 |
8 | def test_read() -> None:
9 | static = IOStatic(b"abcdefghij")
10 |
11 | # read all
12 | static.seek(0)
13 | assert static.read() == b"abcdefghij"
14 | static.seek(4)
15 | assert static.read() == b"efghij"
16 |
17 | # read partial
18 | static.seek(6)
19 | assert static.read(3) == b"ghi"
20 | assert static.read(3) == b"j"
21 | assert static.read(3) == b""
22 | assert static.read(3) == b""
23 |
24 |
25 | def test_write() -> None:
26 | with IOStatic(b"abc") as static:
27 | assert static.writable() is False
28 | static.seek(3)
29 | with pytest.raises(UnsupportedOperation):
30 | static.write(b"def")
31 |
32 |
33 | def test_truncate() -> None:
34 | with IOStatic(b"abc") as static:
35 | assert static.writable() is False
36 | with pytest.raises(UnsupportedOperation):
37 | static.truncate()
38 |
--------------------------------------------------------------------------------
/tests/unit/test_open.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | import lzma
3 | from pathlib import Path
4 | from typing import List, Optional
5 | from unittest.mock import Mock
6 |
7 | import pytest
8 |
9 | from xz.open import xz_open
10 | from xz.strategy import RollingBlockReadStrategy
11 |
12 | # a stream with two blocks (lengths: 10, 3)
13 | # one UTF8 character is between the two blocks
14 | STREAM_BYTES = bytes.fromhex(
15 | "fd377a585a000004e6d6b446"
16 | "0200210116000000742fe5a3010009e299a5207574663820e2000000404506004bafe33d"
17 | "0200210116000000742fe5a301000299a50a0000c6687a2b8dbda0cf"
18 | "0002220a1b0300001b1c3777"
19 | "b1c467fb020000000004595a"
20 | )
21 |
22 |
23 | #
24 | # read
25 | #
26 |
27 |
28 | def test_mode_rb() -> None:
29 | fileobj = BytesIO(STREAM_BYTES)
30 |
31 | with xz_open(fileobj, "rb") as xzfile:
32 | assert xzfile.mode == "r"
33 | assert len(xzfile) == 13
34 | assert xzfile.stream_boundaries == [0]
35 | assert xzfile.block_boundaries == [0, 10]
36 |
37 | assert xzfile.read() == b"\xe2\x99\xa5 utf8 \xe2\x99\xa5\n"
38 |
39 | assert xzfile.seek(9) == 9
40 | assert xzfile.read() == b"\xe2\x99\xa5\n"
41 |
42 |
43 | def test_mode_rt() -> None:
44 | fileobj = BytesIO(STREAM_BYTES)
45 |
46 | with xz_open(fileobj, "rt") as xzfile:
47 | assert xzfile.mode == "rt"
48 | assert xzfile.stream_boundaries == [0]
49 | assert xzfile.block_boundaries == [0, 10]
50 |
51 | assert xzfile.read() == "♥ utf8 ♥\n"
52 |
53 | assert xzfile.seek(9) == 9
54 | assert xzfile.read() == "♥\n"
55 |
56 |
57 | def test_mode_rt_file(tmp_path: Path) -> None:
58 | file_path = tmp_path / "file.xz"
59 | file_path.write_bytes(STREAM_BYTES)
60 |
61 | with file_path.open("rb") as fin:
62 | with xz_open(fin, "rt") as xzfile:
63 | assert xzfile.stream_boundaries == [0]
64 | assert xzfile.block_boundaries == [0, 10]
65 | assert xzfile.fileno() == fin.fileno()
66 |
67 | assert xzfile.read() == "♥ utf8 ♥\n"
68 |
69 | assert xzfile.seek(9) == 9
70 | assert xzfile.read() == "♥\n"
71 |
72 |
73 | @pytest.mark.parametrize(
74 | "encoding, expected",
75 | (
76 | pytest.param("utf8", "еñϲоԺε", id="utf8"),
77 | pytest.param("latin1", "еñϲоԺε", id="latin1"),
78 | ),
79 | )
80 | def test_mode_rt_encoding(encoding: str, expected: str) -> None:
81 | fileobj = BytesIO(
82 | bytes.fromhex(
83 | "fd377a585a000000ff12d9410200210116000000742fe5a301000bd0b5c3b1cf"
84 | "b2d0bed4baceb50000011c0c5da447cf06729e7a010000000000595a"
85 | )
86 | )
87 | with xz_open(fileobj, "rt", encoding=encoding) as xzfile:
88 | assert xzfile.read() == expected
89 |
90 |
91 | @pytest.mark.parametrize(
92 | "errors, expected",
93 | (
94 | pytest.param(None, None, id="None"),
95 | pytest.param("strict", None, id="strict"),
96 | pytest.param("ignore", "encoding", id="ignore"),
97 | pytest.param("replace", "en�co�di�ng", id="replace"),
98 | pytest.param(
99 | "backslashreplace", r"en\x99co\x98di\x97ng", id="backslashreplace"
100 | ),
101 | ),
102 | )
103 | def test_mode_rt_encoding_errors(
104 | errors: Optional[str], expected: Optional[str]
105 | ) -> None:
106 | fileobj = BytesIO(
107 | bytes.fromhex(
108 | "fd377a585a000000ff12d9410200210116000000742fe5a301000a656e99636f"
109 | "986469976e67000000011b0b39a7621e06729e7a010000000000595a"
110 | )
111 | )
112 |
113 | with xz_open(fileobj, "rt", errors=errors) as xzfile:
114 | if expected is None:
115 | with pytest.raises(ValueError):
116 | xzfile.read()
117 | else:
118 | assert xzfile.read() == expected
119 |
120 |
121 | @pytest.mark.parametrize(
122 | "newline, expected",
123 | (
124 | pytest.param(None, ["a\n", "b\n", "c\n", "d"], id="None"),
125 | pytest.param("", ["a\n", "b\r", "c\r\n", "d"], id="''"),
126 | pytest.param("\n", ["a\n", "b\rc\r\n", "d"], id="'\n'"),
127 | pytest.param("\r", ["a\nb\r", "c\r", "\nd"], id="'\r'"),
128 | pytest.param("\r\n", ["a\nb\rc\r\n", "d"], id="'\r\n'"),
129 | ),
130 | )
131 | def test_mode_rt_newline(newline: Optional[str], expected: List[str]) -> None:
132 | fileobj = BytesIO(
133 | bytes.fromhex(
134 | "fd377a585a000000ff12d9410200210116000000742fe5a3010007610a620d63"
135 | "0d0a64000001180840a546ac06729e7a010000000000595a"
136 | )
137 | )
138 |
139 | with xz_open(fileobj, "rt", newline=newline) as xzfile:
140 | assert xzfile.readlines() == expected
141 |
142 |
143 | def test_mode_rb_encoding() -> None:
144 | fileobj = BytesIO(STREAM_BYTES)
145 | with pytest.raises(ValueError):
146 | xz_open(fileobj, "rb", encoding="latin1")
147 |
148 |
149 | def test_mode_rb_encoding_errors() -> None:
150 | fileobj = BytesIO(STREAM_BYTES)
151 | with pytest.raises(ValueError):
152 | xz_open(fileobj, "rb", errors="ignore")
153 |
154 |
155 | def test_mode_rb_newline() -> None:
156 | fileobj = BytesIO(STREAM_BYTES)
157 | with pytest.raises(ValueError):
158 | xz_open(fileobj, "rb", newline="\n")
159 |
160 |
161 | #
162 | # write
163 | #
164 |
165 | TEST_MODE_W_CHECK_BYTES = bytes.fromhex(
166 | # stream 1
167 | "fd377a585a0000016922de36"
168 | "0200210116000000742fe5a3010001ceb1000000256bc6a8"
169 | "00011602d06110d2"
170 | "9042990d010000000001595a"
171 | # stream 2
172 | "fd377a585a0000016922de36"
173 | "0200210116000000742fe5a3010001ceb20000009f3acf31"
174 | "00011602d06110d2"
175 | "9042990d010000000001595a"
176 | # stream 3 (changed check)
177 | "fd377a585a000004e6d6b446"
178 | "0200210116000000742fe5a3010001ceb3000000ab6cffc6b19a1d23"
179 | "00011a02dc2ea57e"
180 | "1fb6f37d010000000004595a"
181 | # stream 4 (changed check)
182 | "fd377a585a000004e6d6b446"
183 | "0200210116000000742fe5a3010001ceb4000000accd9792dc23671f"
184 | "00011a02dc2ea57e"
185 | "1fb6f37d010000000004595a"
186 | )
187 |
188 |
189 | def test_mode_wb_check() -> None:
190 | fileobj = BytesIO()
191 |
192 | with xz_open(fileobj, "wb", check=1) as xzfile:
193 | assert xzfile.mode == "w"
194 | xzfile.write(b"\xce\xb1")
195 | xzfile.change_stream()
196 | xzfile.check = 4
197 | xzfile.write(b"\xce\xb2")
198 | xzfile.change_stream()
199 | xzfile.write(b"\xce\xb3")
200 | xzfile.change_stream()
201 | xzfile.write(b"\xce\xb4")
202 |
203 | assert fileobj.getvalue() == TEST_MODE_W_CHECK_BYTES
204 |
205 |
206 | def test_mode_wt_check() -> None:
207 | fileobj = BytesIO()
208 |
209 | with xz_open(fileobj, "wt", check=1) as xzfile:
210 | assert xzfile.mode == "wt"
211 | xzfile.write("α")
212 | xzfile.change_stream()
213 | xzfile.check = 4
214 | xzfile.write("β")
215 | xzfile.change_stream()
216 | xzfile.write("γ")
217 | xzfile.change_stream()
218 | xzfile.write("δ")
219 |
220 | assert fileobj.getvalue() == TEST_MODE_W_CHECK_BYTES
221 |
222 |
223 | TEST_MODE_W_FILTERS_BYTES = bytes.fromhex(
224 | ## stream 1
225 | # header
226 | "fd377a585a0000016922de36"
227 | # block 1
228 | "0200210116000000742fe5a3010001ceb1000000256bc6a8"
229 | # block 2
230 | "0200210116000000742fe5a3010001ceb20000009f3acf31"
231 | # block 3 (changed filters)
232 | "02010301002101167920c4ee010001cee5000000090ac846"
233 | # block 4 (changed filters)
234 | "02010301002101167920c4ee010001cee6000000aa9facd8"
235 | # index
236 | "0004160216021602160200008a2bb83b"
237 | # footer
238 | "9be35140030000000001595a"
239 | ## stream 2
240 | # header
241 | "fd377a585a0000016922de36"
242 | # block 1 (changed filters)
243 | "02010301002101167920c4ee010001cee70000003cafabaf"
244 | # block 2 (changed filters)
245 | "02010301002101167920c4ee010001cee800000086fea236"
246 | # index
247 | "00021602160200008ba0042b"
248 | # footer
249 | "3e300d8b020000000001595a"
250 | ## stream 3
251 | # header
252 | "fd377a585a0000016922de36"
253 | # block 1 (changed filters)
254 | "02010301002101167920c4ee010001cee900000010cea541"
255 | # block 2 (changed filters)
256 | "02010301002101167920c4ee010001ceea00000081d31ad1"
257 | # index
258 | "00021602160200008ba0042b"
259 | # footer
260 | "3e300d8b020000000001595a"
261 | )
262 |
263 |
264 | def test_mode_wb_filters() -> None:
265 | fileobj = BytesIO()
266 |
267 | with xz_open(fileobj, "wb", check=1) as xzfile:
268 | xzfile.write(b"\xce\xb1")
269 | xzfile.change_block()
270 | xzfile.filters = [{"id": 3, "dist": 1}, {"id": 33}]
271 | xzfile.write(b"\xce\xb2")
272 | xzfile.change_block()
273 | xzfile.write(b"\xce\xb3")
274 | xzfile.change_block()
275 | xzfile.write(b"\xce\xb4")
276 | xzfile.change_stream()
277 | xzfile.write(b"\xce\xb5")
278 | xzfile.change_block()
279 | xzfile.write(b"\xce\xb6")
280 | xzfile.change_stream()
281 | xzfile.write(b"\xce\xb7")
282 | xzfile.change_block()
283 | xzfile.write(b"\xce\xb8")
284 |
285 | assert fileobj.getvalue() == TEST_MODE_W_FILTERS_BYTES
286 |
287 |
288 | def test_mode_wt_filters() -> None:
289 | fileobj = BytesIO()
290 |
291 | with xz_open(fileobj, "wt", check=1) as xzfile:
292 | xzfile.write("α")
293 | xzfile.change_block()
294 | xzfile.filters = [{"id": 3, "dist": 1}, {"id": 33}]
295 | xzfile.write("β")
296 | xzfile.change_block()
297 | xzfile.write("γ")
298 | xzfile.change_block()
299 | xzfile.write("δ")
300 | xzfile.change_stream()
301 | xzfile.write("ε")
302 | xzfile.change_block()
303 | xzfile.write("ζ")
304 | xzfile.change_stream()
305 | xzfile.write("η")
306 | xzfile.change_block()
307 | xzfile.write("θ")
308 |
309 | assert fileobj.getvalue() == TEST_MODE_W_FILTERS_BYTES
310 |
311 |
312 | TEST_MODE_W_PRESET_BYTES = bytes.fromhex(
313 | ## stream 1
314 | # header
315 | "fd377a585a0000016922de36"
316 | # block 1
317 | "0200210116000000742fe5a3010001ceb1000000256bc6a8"
318 | # block 2
319 | "0200210116000000742fe5a3010001ceb20000009f3acf31"
320 | # block 3 (changed preset)
321 | "020021011c00000010cf58cc010001ceb3000000090ac846"
322 | # block 4 (changed preset)
323 | "020021011c00000010cf58cc010001ceb4000000aa9facd8"
324 | # index
325 | "0004160216021602160200008a2bb83b"
326 | # footer
327 | "9be35140030000000001595a"
328 | ## stream 2
329 | # header
330 | "fd377a585a0000016922de36"
331 | # block 1 (changed preset)
332 | "020021011c00000010cf58cc010001ceb50000003cafabaf"
333 | # block 2 (changed preset)
334 | "020021011c00000010cf58cc010001ceb600000086fea236"
335 | # index
336 | "00021602160200008ba0042b"
337 | # footer
338 | "3e300d8b020000000001595a"
339 | ## stream 3
340 | # header
341 | "fd377a585a0000016922de36"
342 | # block 1 (changed preset)
343 | "020021011c00000010cf58cc010001ceb700000010cea541"
344 | # block 2 (changed preset)
345 | "020021011c00000010cf58cc010001ceb800000081d31ad1"
346 | # index
347 | "00021602160200008ba0042b"
348 | # footer
349 | "3e300d8b020000000001595a"
350 | )
351 |
352 |
353 | def test_mode_wb_preset() -> None:
354 | fileobj = BytesIO()
355 |
356 | with xz_open(fileobj, "wb", check=1) as xzfile:
357 | xzfile.write(b"\xce\xb1")
358 | xzfile.change_block()
359 | xzfile.preset = 9
360 | xzfile.write(b"\xce\xb2")
361 | xzfile.change_block()
362 | xzfile.write(b"\xce\xb3")
363 | xzfile.change_block()
364 | xzfile.write(b"\xce\xb4")
365 | xzfile.change_stream()
366 | xzfile.write(b"\xce\xb5")
367 | xzfile.change_block()
368 | xzfile.write(b"\xce\xb6")
369 | xzfile.change_stream()
370 | xzfile.write(b"\xce\xb7")
371 | xzfile.change_block()
372 | xzfile.write(b"\xce\xb8")
373 |
374 | assert fileobj.getvalue() == TEST_MODE_W_PRESET_BYTES
375 |
376 |
377 | def test_mode_wt_preset() -> None:
378 | fileobj = BytesIO()
379 |
380 | with xz_open(fileobj, "wt", check=1) as xzfile:
381 | xzfile.write("α")
382 | xzfile.change_block()
383 | xzfile.preset = 9
384 | xzfile.write("β")
385 | xzfile.change_block()
386 | xzfile.write("γ")
387 | xzfile.change_block()
388 | xzfile.write("δ")
389 | xzfile.change_stream()
390 | xzfile.write("ε")
391 | xzfile.change_block()
392 | xzfile.write("ζ")
393 | xzfile.change_stream()
394 | xzfile.write("η")
395 | xzfile.change_block()
396 | xzfile.write("θ")
397 |
398 | assert fileobj.getvalue() == TEST_MODE_W_PRESET_BYTES
399 |
400 |
401 | @pytest.mark.parametrize(
402 | "encoding, data",
403 | (
404 | pytest.param("utf8", "еñϲоԺε", id="utf8"),
405 | pytest.param("latin1", "еñϲоԺε", id="latin1"),
406 | ),
407 | )
408 | def test_mode_wt_encoding(encoding: str, data: str) -> None:
409 | fileobj = BytesIO()
410 | with xz_open(fileobj, "wt", check=0, encoding=encoding) as xzfile:
411 | xzfile.write(data)
412 |
413 | assert fileobj.getvalue() == bytes.fromhex(
414 | "fd377a585a000000ff12d9410200210116000000742fe5a301000bd0b5c3b1cf"
415 | "b2d0bed4baceb50000011c0c5da447cf06729e7a010000000000595a"
416 | )
417 |
418 |
419 | @pytest.mark.parametrize(
420 | "errors, data",
421 | (
422 | pytest.param(None, None, id="None"),
423 | pytest.param("strict", None, id="strict"),
424 | pytest.param(
425 | "ignore",
426 | b"encoding",
427 | id="ignore",
428 | ),
429 | pytest.param(
430 | "replace",
431 | b"en?co?di?ng",
432 | id="replace",
433 | ),
434 | pytest.param(
435 | "backslashreplace",
436 | rb"en\udc01co\udc02di\udc03ng",
437 | id="backslashreplace",
438 | ),
439 | ),
440 | )
441 | def test_mode_wt_encoding_errors(errors: Optional[str], data: Optional[bytes]) -> None:
442 | fileobj = BytesIO()
443 |
444 | with xz_open(fileobj, "wt", errors=errors) as xzfile:
445 | if data is None:
446 | xzfile.write("X") # to avoid having an empty file
447 | with pytest.raises(ValueError):
448 | xzfile.write("en\udc01co\udc0di\udc03ng")
449 | else:
450 | xzfile.write("en\udc01co\udc02di\udc03ng")
451 |
452 | if data is not None:
453 | assert lzma.decompress(fileobj.getvalue()) == data
454 |
455 |
456 | @pytest.mark.parametrize(
457 | "newline, data",
458 | (
459 | pytest.param(None, b"a\nb\n", id="None"),
460 | pytest.param("", b"a\nb\n", id="''"),
461 | pytest.param("\n", b"a\nb\n", id="'\n'"),
462 | pytest.param("\r", b"a\rb\r", id="'\r'"),
463 | pytest.param("\r\n", b"a\r\nb\r\n", id="'\r\n'"),
464 | ),
465 | )
466 | def test_mode_wt_newline(newline: Optional[str], data: bytes) -> None:
467 | fileobj = BytesIO()
468 |
469 | with xz_open(fileobj, "wt", newline=newline) as xzfile:
470 | xzfile.writelines(["a\n", "b\n"])
471 |
472 | assert lzma.decompress(fileobj.getvalue()) == data
473 |
474 |
475 | #
476 | # misc
477 | #
478 |
479 |
480 | @pytest.mark.parametrize("mode", ("rtb", "rbt", "wtb", "wbt"))
481 | def test_mode_invalid(mode: str) -> None:
482 | fileobj = BytesIO(STREAM_BYTES)
483 |
484 | with pytest.raises(ValueError) as exc_info:
485 | xz_open(fileobj, mode)
486 | assert str(exc_info.value) == f"Invalid mode: {mode}"
487 |
488 |
489 | @pytest.mark.parametrize("mode", ("r", "rt"))
490 | def test_default_strategy(mode: str) -> None:
491 | fileobj = BytesIO(STREAM_BYTES)
492 |
493 | with xz_open(fileobj, mode) as xzfile:
494 | assert isinstance(xzfile.block_read_strategy, RollingBlockReadStrategy)
495 |
496 |
497 | @pytest.mark.parametrize("mode", ("r", "rt"))
498 | def test_custom_strategy(mode: str) -> None:
499 | fileobj = BytesIO(STREAM_BYTES)
500 | strategy = Mock()
501 |
502 | with xz_open(fileobj, mode, block_read_strategy=strategy) as xzfile:
503 | assert xzfile.block_read_strategy == strategy
504 |
--------------------------------------------------------------------------------
/tests/unit/test_parse_mode.py:
--------------------------------------------------------------------------------
1 | from itertools import permutations, product
2 | import sys
3 | from typing import Tuple
4 |
5 | try:
6 | from typing import get_args
7 | except ImportError:
8 | pass
9 |
10 | import pytest
11 |
12 | from xz.typing import _XZModesBinaryType, _XZModesTextType
13 | from xz.utils import parse_mode
14 |
15 | VALID_MODES = {
16 | "".join(sorted(case[0] + extra)): case
17 | for case in [
18 | ("r", True, False),
19 | ("r+", True, True),
20 | ("w", False, True),
21 | ("w+", True, True),
22 | ("x", False, True),
23 | ("x+", True, True),
24 | ]
25 | for extra in ("", "b")
26 | }
27 |
28 |
29 | @pytest.mark.skipif(
30 | sys.version_info < (3, 9),
31 | reason="Literal or get_args not supported",
32 | )
33 | def test_known_valid_modes_binary() -> None:
34 | assert sorted(
35 | "".join(sorted(mode)) for mode in get_args(_XZModesBinaryType)
36 | ) == sorted(VALID_MODES)
37 |
38 |
39 | @pytest.mark.skipif(
40 | sys.version_info < (3, 9),
41 | reason="Literal or get_args not supported",
42 | )
43 | def test_known_valid_modes_text() -> None:
44 | assert sorted(
45 | "".join(sorted(mode.replace("t", ""))) for mode in get_args(_XZModesTextType)
46 | ) == sorted(mode for mode in VALID_MODES if "b" not in mode)
47 |
48 |
49 | @pytest.mark.parametrize(
50 | "mode, expected",
51 | [pytest.param(mode, expected, id=mode) for mode, expected in VALID_MODES.items()],
52 | )
53 | def test_parse_mode_valid(mode: str, expected: Tuple[str, bool, bool]) -> None:
54 | for parts in permutations(mode):
55 | mode_permuted = "".join(parts)
56 | assert parse_mode(mode_permuted) == expected, mode_permuted
57 |
58 |
59 | @pytest.mark.parametrize(
60 | "mode",
61 | [
62 | "".join(mode_parts)
63 | for mode_parts in product(*((c, "") for c in "arwx+tb"))
64 | if "".join(sorted(mode_parts)) not in VALID_MODES
65 | ]
66 | + [mode * 2 for mode in VALID_MODES],
67 | )
68 | def test_parse_mode_invalid(mode: str) -> None:
69 | for parts in permutations(mode):
70 | mode_permuted = "".join(parts)
71 | with pytest.raises(ValueError):
72 | parse_mode(mode_permuted)
73 |
--------------------------------------------------------------------------------
/tests/unit/test_stream.py:
--------------------------------------------------------------------------------
1 | from io import SEEK_CUR, SEEK_END, BytesIO
2 | import sys
3 | from typing import Tuple, cast
4 | from unittest.mock import Mock, call
5 |
6 | import pytest
7 |
8 | from xz.common import XZError
9 | from xz.io import IOProxy
10 | from xz.stream import XZStream
11 |
12 | if sys.version_info >= (3, 9): # pragma: no cover
13 | from collections.abc import Callable
14 | else: # pragma: no cover
15 | from typing import Callable
16 |
17 |
18 | # a stream with two blocks (lengths: 100, 90)
19 | STREAM_BYTES = bytes.fromhex(
20 | "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020"
21 | "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b"
22 | "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00"
23 | "00000000e7c35efa0200210116000000742fe5a3e00059003d5d00320cc42641"
24 | "c8b91ac7908be7e635b8e7d681d74b683cde914399f8de5460dc672363f1e067"
25 | "5a3ebac9f427ecbebcb94552c0dba85b26950f0ac98b22390000000031f4ee87"
26 | "00025964555a0000041276283e300d8b020000000001595a"
27 | )
28 |
29 | # a stream with no blocks
30 | STREAM_BYTES_EMPTY = bytes.fromhex(
31 | "fd377a585a0000016922de36000000001cdf44219042990d010000000001595a"
32 | )
33 |
34 |
35 | def test_parse(data_pattern_locate: Callable[[bytes], Tuple[int, int]]) -> None:
36 | fileobj = Mock(wraps=BytesIO(b"\xff" * 1000 + STREAM_BYTES + b"\xee" * 1000))
37 | fileobj.seek(-1000, SEEK_END)
38 | fileobj.method_calls.clear()
39 |
40 | # parse stream
41 | stream = XZStream.parse(fileobj)
42 | assert stream.check == 1
43 | assert len(stream) == 190
44 | assert stream.block_boundaries == [0, 100]
45 |
46 | # make sure we don't read the blocks
47 | assert fileobj.method_calls == [
48 | call.seek(-12, SEEK_CUR),
49 | call.read(12),
50 | call.seek(-24, SEEK_CUR),
51 | call.read(12),
52 | call.seek(-204, SEEK_CUR), # blocks are skipped over here
53 | call.read(12),
54 | call.seek(-12, SEEK_CUR),
55 | ]
56 |
57 | # fileobj should be at the begining of the stream
58 | assert fileobj.tell() == 1000
59 |
60 | # read from start
61 | assert data_pattern_locate(stream.read(20)) == (0, 20)
62 |
63 | # read from middle of a block
64 | stream.seek(40)
65 | assert data_pattern_locate(stream.read(20)) == (40, 20)
66 |
67 | # read accross two blocks
68 | stream.seek(90)
69 | assert data_pattern_locate(stream.read(20)) == (90, 20)
70 |
71 | # read middle of an other block
72 | stream.seek(160)
73 | assert data_pattern_locate(stream.read(20)) == (160, 20)
74 |
75 | # go backward and read
76 | stream.seek(130)
77 | assert data_pattern_locate(stream.read(20)) == (130, 20)
78 |
79 | # read in previous block (going backward from last read in that block)
80 | stream.seek(60)
81 | assert data_pattern_locate(stream.read(20)) == (60, 20)
82 |
83 | # read until end
84 | stream.seek(170)
85 | assert data_pattern_locate(stream.read()) == (170, 20)
86 |
87 |
88 | def test_parse_invalid_stream_flags_missmatch() -> None:
89 | fileobj = BytesIO(
90 | bytes.fromhex(
91 | "fd377a585a000004e6d6b446000000001cdf44219042990d010000000001595a"
92 | )
93 | )
94 | fileobj.seek(0, SEEK_END)
95 | with pytest.raises(XZError) as exc_info:
96 | XZStream.parse(fileobj)
97 | assert str(exc_info.value) == "stream: inconsistent check value"
98 |
99 |
100 | def test_parse_empty_block() -> None:
101 | fileobj = BytesIO(
102 | bytes.fromhex(
103 | "fd377a585a0000016922de360200210116000000742fe5a30000000000000000"
104 | "000111003b965f739042990d010000000001595a"
105 | )
106 | )
107 | fileobj.seek(0, SEEK_END)
108 | with pytest.raises(XZError) as exc_info:
109 | XZStream.parse(fileobj)
110 | assert str(exc_info.value) == "index record uncompressed size"
111 |
112 |
113 | def test_parse_empty_stream() -> None:
114 | fileobj = BytesIO(STREAM_BYTES_EMPTY)
115 | fileobj.seek(0, SEEK_END)
116 | stream = XZStream.parse(fileobj)
117 | assert len(stream) == 0
118 | assert stream.block_boundaries == []
119 |
120 |
121 | def test_write(data_pattern: bytes) -> None:
122 | # init with more size than what will be written at the end
123 | init_size = 1024
124 | assert len(STREAM_BYTES) < init_size
125 |
126 | fileobj = BytesIO(b"A" * init_size)
127 |
128 | with XZStream(cast(IOProxy, fileobj), 1) as stream:
129 | assert fileobj.getvalue() == b"A" * init_size
130 |
131 | assert stream.block_boundaries == []
132 |
133 | stream.change_block()
134 | assert stream.block_boundaries == []
135 |
136 | stream.write(data_pattern[:100])
137 | assert stream.block_boundaries == [0]
138 |
139 | stream.change_block()
140 | assert stream.block_boundaries == [0, 100]
141 |
142 | stream.write(data_pattern[100:190])
143 | assert stream.block_boundaries == [0, 100]
144 |
145 | assert fileobj.getvalue() == STREAM_BYTES
146 |
147 |
148 | def test_write_from_existing_stream(data_pattern: bytes) -> None:
149 | fileobj = BytesIO(
150 | bytes.fromhex(
151 | "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020"
152 | "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b"
153 | "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00"
154 | "00000000e7c35efa0001596477f620019042990d010000000001595a"
155 | )
156 | )
157 | fileobj.seek(0, SEEK_END)
158 | with XZStream.parse(fileobj) as stream:
159 | assert stream.read() == data_pattern[:100]
160 | assert stream.block_boundaries == [0]
161 |
162 | stream.write(data_pattern[100:190])
163 | assert stream.block_boundaries == [0, 100]
164 |
165 | assert fileobj.getvalue() == STREAM_BYTES
166 |
167 |
168 | def test_truncate_and_write(data_pattern: bytes) -> None:
169 | fileobj = BytesIO(
170 | bytes.fromhex(
171 | "fd377a585a0000016922de360200210116000000742fe5a3e0006300415d0020"
172 | "9842100431d01ab2853283057ddb5924a128599cc9911a7fcff8d59c1f6f887b"
173 | "cee97b1f83f1808f005de273e1a6e99a7eac4f8f632b7e43bbf1da311dce5c00"
174 | "00000000e7c35efa0200210116000000742fe5a30100025a5a5a0000407f8055"
175 | "00025964170300008d97067a3e300d8b020000000001595a"
176 | )
177 | )
178 | fileobj.seek(0, SEEK_END)
179 | with XZStream.parse(fileobj) as stream:
180 | assert stream.read() == data_pattern[:100] + b"ZZZ"
181 | assert stream.block_boundaries == [0, 100]
182 |
183 | stream.seek(100)
184 | stream.truncate()
185 | assert stream.block_boundaries == [0]
186 |
187 | stream.write(data_pattern[100:190])
188 | assert stream.block_boundaries == [0, 100]
189 |
190 | assert fileobj.getvalue() == STREAM_BYTES
191 |
192 |
193 | def test_truncate_middle_block() -> None:
194 | fileobj = BytesIO(STREAM_BYTES)
195 | fileobj.seek(0, SEEK_END)
196 | with pytest.raises(ValueError) as exc_info:
197 | with XZStream.parse(fileobj) as stream:
198 | stream.truncate(80)
199 | assert str(exc_info.value) == "truncate"
200 |
201 |
202 | def test_read_only_check() -> None:
203 | fileobj = BytesIO()
204 |
205 | with XZStream(cast(IOProxy, fileobj), 1) as stream:
206 | with pytest.raises(AttributeError):
207 | stream.check = 4 # type: ignore[misc]
208 |
209 |
210 | def test_change_filters() -> None:
211 | fileobj = BytesIO()
212 |
213 | with XZStream(cast(IOProxy, fileobj), 1) as stream:
214 | stream.write(b"aa")
215 | stream.change_block()
216 | stream.filters = [{"id": 3, "dist": 1}, {"id": 33}]
217 | stream.write(b"bb")
218 | stream.change_block()
219 | stream.write(b"cc")
220 | stream.change_block()
221 | stream.write(b"dd")
222 |
223 | assert fileobj.getvalue() == bytes.fromhex(
224 | # header
225 | "fd377a585a0000016922de36"
226 | # block 1
227 | "0200210116000000742fe5a30100016161000000d7198a07"
228 | # block 2
229 | "0200210116000000742fe5a30100016262000000ae1baeb5"
230 | # block 3 (changed filters)
231 | "02010301002101167920c4ee0100016300000000791ab2db"
232 | # block 4 (changed filters)
233 | "02010301002101167920c4ee01000164000000001d19970a"
234 | # index
235 | "0004160216021602160200008a2bb83b"
236 | # footer
237 | "9be35140030000000001595a"
238 | )
239 |
240 |
241 | def test_change_preset() -> None:
242 | fileobj = BytesIO()
243 |
244 | with XZStream(cast(IOProxy, fileobj), 1) as stream:
245 | stream.write(b"aa")
246 | stream.change_block()
247 | stream.preset = 9
248 | stream.write(b"bb")
249 | stream.change_block()
250 | stream.write(b"cc")
251 | stream.change_block()
252 | stream.write(b"dd")
253 |
254 | assert fileobj.getvalue() == bytes.fromhex(
255 | # header
256 | "fd377a585a0000016922de36"
257 | # block 1
258 | "0200210116000000742fe5a30100016161000000d7198a07"
259 | # block 2
260 | "0200210116000000742fe5a30100016262000000ae1baeb5"
261 | # block 3 (changed preset)
262 | "020021011c00000010cf58cc0100016363000000791ab2db"
263 | # block 4 (changed preset)
264 | "020021011c00000010cf58cc01000164640000001d19970a"
265 | # index
266 | "0004160216021602160200008a2bb83b"
267 | # footer
268 | "9be35140030000000001595a"
269 | )
270 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist =
3 | py, py37, py38, py39, py310, py311, pypy3
4 | generate-integration-files, type, lint, format
5 |
6 | [testenv]
7 | deps =
8 | pytest
9 | pytest-cov
10 | passenv = PY_COLORS
11 | setenv =
12 | COVERAGE_FILE = {toxworkdir}/{envname}/.coverage
13 | PYTHONDEVMODE = 1
14 | commands =
15 | pytest {posargs:-vv --cov-fail-under=100}
16 | py: -coverage html
17 |
18 | [testenv:generate-integration-files]
19 | deps =
20 | pytest
21 | pytest-cov
22 | passenv = PY_COLORS
23 | setenv =
24 | PYTEST_ADDOPTS = --no-cov
25 | PYTHONDEVMODE = 1
26 | commands = pytest -vv -m generate_integration_files --generate-integration-files
27 |
28 | [testenv:type]
29 | deps =
30 | mypy
31 | pytest # for typing
32 | commands =
33 | mypy
34 | mypy --namespace-packages --explicit-package-bases tests
35 |
36 | [testenv:lint]
37 | deps =
38 | pylint
39 | pytest # to avoid import errors
40 | commands =
41 | pylint src
42 | pylint -d duplicate-code,too-many-statements,use-implicit-booleaness-not-comparison tests
43 |
44 | [testenv:format]
45 | skip_install = true
46 | deps =
47 | black
48 | isort
49 | commands =
50 | black {posargs:--check --diff} src tests
51 | isort {posargs:--check --diff} src tests
52 |
--------------------------------------------------------------------------------