├── .coveragerc ├── .gitattributes ├── .gitignore ├── .pylintrc ├── .travis.yml ├── CHANGES.md ├── CITATION ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── Makefile ├── api │ ├── conf.py │ └── modules.rst ├── conf.py ├── developers.rst ├── index.rst ├── logo.pdf ├── logo.png └── requirements.txt ├── paper ├── codemeta.json ├── generate.rb ├── paper.bib └── paper.md ├── readthedocs.yml ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── foo.gz ├── pytest.ini ├── test_formats.py ├── test_paths.py ├── test_performance.py ├── test_progress.py ├── test_types.py ├── test_urls.py ├── test_utils.py └── test_xphyle.py └── xphyle ├── __init__.py ├── formats.py ├── paths.py ├── progress.py ├── types.py ├── urls.py └── utils.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | tests/* 4 | setup.py 5 | *site-packages/* 6 | 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | pragma: no-cover 11 | def __repr__ 12 | raise NotImplementedError 13 | if __name__ == .__main__.: 14 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | xphyle/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | .coverage 3 | build/ 4 | dist/ 5 | *.pyc 6 | xphyle.egg-info/ 7 | docs/_build/ 8 | .vscode* 9 | .mypy_cache/ 10 | .idea/ 11 | .pytest_cache/ 12 | .eggs/ 13 | .DS_Store 14 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | # C0303: Trailing whitespace; will add a custom checker that only flags 3 | # trailing whitespace at the end of non-empty lines 4 | # C0326: Disabled because it incorrectly flags whitespace around default values 5 | # when function annotations are used; will add a custom checker that 6 | # flags all other cases 7 | disable=fixme,C0303,C0326,too-few-public-methods,too-many-instance-attributes,too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-function-args,too-many-lines,too-many-boolean-expressions,too-many-return-statements 8 | ignore=__pycache__,_version.py 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | cache: 4 | directories: 5 | - $HOME/.cache/pip 6 | os: 7 | - linux 8 | python: 9 | - 3.6 10 | - 3.7 11 | - 3.8 12 | - 3.9 13 | - 3.10 14 | install: 15 | - pip install --upgrade pip wheel 16 | - pip install pytest-cov 17 | - pip install coveralls 18 | - pip install pylint 19 | - make install 20 | script: 21 | - make test 22 | after_success: 23 | - coveralls 24 | - pylint xphyle 25 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Changes 2 | 3 | ## v4.4.1 (2020.12.06) 4 | 5 | * Fix #41 - Windows does not support SIGPIPE 6 | 7 | ## v4.4.0 (2020.08.27) 8 | 9 | * Add support for memory mapping, using the `memory_map` argument to `open_`/`xopen` 10 | 11 | ## v4.3.0 (2020.07.30) 12 | 13 | * Add support for igzip 14 | * Remove useless -p argument when decompressing with pigz 15 | 16 | ## v4.2.2 (2020.01.02) 17 | 18 | * Handle differences between gzip and pigz -l output 19 | 20 | ## v4.2.1 (2019.12.13) 21 | 22 | * Switch from versioneer to setup_tools_scm for version managment. 23 | 24 | ## v4.2.0 (2019.11.20) 25 | 26 | * Add `xphyle.get_compressor` 27 | * Fix Python 3.8 issue with importing from collections 28 | 29 | ## v4.1.3 (2019.10.09) 30 | 31 | * Fixed issue with opening bgzip files 32 | 33 | ## v4.1.2 (2019.06.14) 34 | 35 | * Correctly handle file modes when detecting placeholders 36 | 37 | ## v4.1.1 (2019.06.14) 38 | 39 | * Correctly handle placeholder strings ('-', '_') as arguments to xopen 40 | 41 | ## v4.1.0 (2019.06.14) 42 | 43 | * Add support for zstd 44 | * Adjusted default compression levels based on benchmarking of compression tools. 45 | * Handle placeholder strings ('-', '_') as arguments to xopen 46 | 47 | ## v4.0.8 (2019.04.08) 48 | 49 | * Add pathlib.PurePath as a member of the PathLike type, to work around the lack of os.PathLike as a static superclass of PurePath in python 3.6 50 | 51 | ## v4.0.7 (2019.04.06) 52 | 53 | * Don't complain when writing a bgzip file and the extension is gz 54 | * Reformat codebase using black, and other code cleanup 55 | 56 | ## v4.0.5 (2019.01.10) 57 | 58 | * Fix setup.py and Makefile to perform pypi upload correctly 59 | * Add readthedocs.yml and update docs config to get docs building correctly 60 | 61 | ## v4.0.0 (2019.01.10) 62 | 63 | * Official 4.0.0 release 64 | 65 | ## v4.0.0-rc1 (2018.08.02) 66 | 67 | * Support non-.gz extensions when decompressing bgzip files. 68 | 69 | ## v4.0.0-rc0 (2018.03.18) 70 | 71 | * Starting with v4, xphyle requires python 3.6+ 72 | * All path-oriented functions now use pathlib paths by default. Support for string paths is deprecated. 73 | * Moved to pokrok for progress bar management. 74 | 75 | ## v3.1.6 (2018.01.16) 76 | 77 | * Fix bug when specifying file_type=FileType.FILELIKE. 78 | 79 | ## v3.1.5 (2017.12.11) 80 | 81 | * Added `close_fileobj` parameters to `xopen()` to allow user to specify whether the file/buffer should be closed when the wrapper is closed. 82 | 83 | ## v3.1.2 (2017.11.18) 84 | 85 | * Added `xphyle.utils.uncompressed_size()`. 86 | 87 | ## v3.1.1 (2017.10.13) 88 | 89 | * Added 'overwrite' parameter to xopen (defaults to True). 90 | 91 | ## v3.1.0 (2017.08.31) 92 | 93 | * *Possible breaking change*: We discovered that python 3.3 support never fully worked due to some incompatibilities in the backported libraries for features we rely on that were introduced in 3.4. Thus, we are officially dropping support for python 3.3. This also reverts the change made in 3.0.7. 94 | * Please ignore releases 3.0.8 and 3.0.9. 95 | 96 | ## v3.0.7 (2017.07.22) 97 | 98 | * Add missing pathlib backport dependency for py3.3. 99 | 100 | ## v3.0.6 (2017.07.22) 101 | 102 | * Added 'list_extensions' method to xphyle.formats.Formats. 103 | * Fixed subtle bug that would cause failure when calling xopen on stdout that has been monkeypatched (as is done by pytest). 104 | 105 | ## v3.0.5 (2017.07.19) 106 | 107 | * Fixed #13: opening corrupt gzip file fails silently. 108 | 109 | ## v3.0.3 (2017.06.14) 110 | 111 | * Added basic performance testing. 112 | * Fixed #12: xphyle not recognizing when system-level lzma not installed. 113 | 114 | ## v3.0.2 (2017.05.23) 115 | 116 | * Forcing use of backports.typing for python < 3.6. 117 | 118 | ## v3.0.1 (2017.04.29) 119 | 120 | * Added a paper for submission to JOSS. 121 | * Enabled DOI generation using Zenodo. 122 | 123 | ## v3.0.0 (2017.04.18) 124 | 125 | * Lots of fixes for bugs and type errors using mypy. 126 | * Two breakting changes that necessitate the major version bump: 127 | * Several methods were erroneously named "uncompress_..." and have been corrected to "decompress_..." 128 | * Default values were erroneously used for the char_mode and linesep parameters of fileinput(), fileoutput(), FileInput, FileOutput, and all their subclasses. textinput(), textoutput(), byteinput(), and byteoutput() convenience methods were added, and default values were set to None. 129 | 130 | ## v2.2.3 (2017.04.09) 131 | 132 | 133 | * Add get_compression_format_name() method to Formats. 134 | * Validate the compression type in xopen. 135 | 136 | ## v2.2.1 (2017.03.01) 137 | 138 | 139 | * Switch to pytest for testing. 140 | * Bugfixes in fileoutput. 141 | * Add ability to specifiy a file header for each file opened by fileoutput. 142 | * Add ability to pass initializing text/bytes to xopen with file_type==BUFFER to create a readable buffer. 143 | 144 | ## v2.2.0 (2017.02.17) 145 | 146 | 147 | * Add caching for FileMode and PermissionSet 148 | * Add PatternFileOutput subclass of FileOuptut for generating output files from a pattern and tokens derived from lines in the file. 149 | 150 | ## v2.1.1 (2017.02.13) 151 | 152 | 153 | * Minor bug fixes 154 | * Code cleanup (thanks to Codacy) 155 | 156 | ## v2.1.0 (2017.02.11) 157 | 158 | 159 | * Added support for opening buffer types. 160 | 161 | ## v2.0.0 (2017.02.11) 162 | 163 | * The major version change reflects the introduction of potentially breaking changes: 164 | 1. When a file object is passed to `open_`, it is now wrapped in a `FileLikeWrapper` by default. To avoid this behavior, set `wrap_fileobj=False`, but note that if the file-like object is not a context manager, an error will be raised. 165 | 2. `xopen` no longer wraps files in `FileLikeWrapper` by default. To revert to the old behavior, set `xphyle.configure(default_xopen_context_wrapper=True)`. 166 | 3. For several methods in the `xphyle.paths` module, the `mode` argument has been renamed to `access` to avoid ambiguity. 167 | 4. `xphyle.paths.check_writeable_file` and `xphyle.paths.safe_check_writeable_file` have been changed to 'writable' to be consistent with the spelling used in core python. 168 | 5. In the `xphyle.paths` module: 169 | * `check_file_mode` is removed. 170 | * `get_access` is renamed to `get_permissions`. 171 | * Many attribute and method names changed, mostly due to renaming of 'access' to 'permissions'. 172 | 6. In the context of `FileInput`, `mode` parameters have been changed to `char_mode`. 173 | 7. The `is_iterable` method has moved from `xphyle.utils` to `xphyle.types`. 174 | 8. The `types` parameter of `xphyle.utils.find` is renamed to path_types. 175 | 9. The string name of the FIFO path type has changed from 'fifo' to '|'. 176 | * Added `xphyle.popen`, which opens subprocesses (i.e. `subprocess.Popen` instances) and uses `xopen` to open stdin/stdout/sterr files or wrap PIPEs. This enables sending compressed data to/reading compressed data from subprocesses without knowing in advance what the compression format will be or whether native compression/decompression programs are available. 177 | * `xopen` now accepts two additional argument types: file objects and system commands. The later are specified as a string beginning with '|' (similar to the toolshed `nopen` method). PIPEs are automatically opened for stdin, stdout, and stderr. Additionally, if a compression type is specified, it is used to wrap one of the pipes as follows: 178 | * If mode is read or readwrite, `xopen` opens a PIPE to stdout. 179 | * Otherwise, `xopen` opens a PIPE to stdin. 180 | * Enumerated types are now provided (in `xphyle.typing`) for all argument types in which fixed sets of strings were used previously (e.g. file open mode, path type). All methods with these argument types now accept either the string or Enum value. 181 | -------------------------------------------------------------------------------- /CITATION: -------------------------------------------------------------------------------- 1 | Didion, JP (2017) xphyle: Extraordinarily simple file handling. Journal of Open Source Software; [doi:10.21105/joss.00255](https://doi.org/10.21105/joss.00255) 2 | 3 | @article{Didion2017, 4 | doi = {10.21105/joss.00255}, 5 | url = {https://doi.org/10.21105/joss.00255}, 6 | year = {2017}, 7 | publisher = {The Open Journal}, 8 | volume = {2}, 9 | number = {14}, 10 | pages = {255}, 11 | author = {John Didion}, 12 | title = {xphyle: Extraordinarily simple file handling}, 13 | journal = {Journal of Open Source Software} 14 | } 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at [INSERT EMAIL ADDRESS]. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | We welcome any contributions via pull requests. We are especially interested in a collaborator that would either backport xphyle to Python 2.x or implement a compatibility layer to my xphyle Python version-independent. 2 | 3 | All code must be written in idiomatic python 3. Note that we use [PEP484](https://www.python.org/dev/peps/pep-0484/) type hints. Variable annotations are 4 | defined where needed using the comment syntax. Static code analysis is performed usying [mypy](http://mypy-lang.org/) and pylint. 5 | 6 | Style-wise, we try to adhere to the Google python style guidelines. We use Google-style docstrings, which are formatted by the [Napoleon Sphinx Plugin](https://pypi.python.org/pypi/sphinxcontrib-napoleon). 7 | 8 | We enforce the [Contributor Covenant](http://contributor-covenant.org/) code of conduct. 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | **MIT License** 2 | 3 | Copyright 2017 John P Didion 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO E 18 | VENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | **Copyrighted Works** 24 | This software may use copyrighted and/or public domain works and distributes 25 | these works under the terms of their respective licenses. All copyright 26 | restrictions still apply to these 'third-party' packages. Furthermore, xphyle is 27 | a community project with contributors within and outside of the US Government; 28 | these authors retain copyright on their work, which they may relinquish via a 29 | public domain dedication. Below is a list of contributors, and either the 30 | license under which their work is governed, or the release of copyright under 31 | public domain dedication. 32 | 33 | **List of Contributors** 34 | John P Didion 2016-2017 Public Domain 35 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include tests/test*.py 4 | include docs/*.rst 5 | include docs/conf.py 6 | include docs/Makefile 7 | include docs/logo.png 8 | include docs/api/*.rst 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | module = xphyle 2 | #pytestops = "--full-trace" 3 | #pytestops = "-v -s" 4 | repo = jdidion/$(module) 5 | desc = Release $(version) 6 | tests = tests 7 | desc = '' 8 | # Use this option to show full stack trace for errors 9 | #pytestopts = "--full-trace" 10 | 11 | all: install test 12 | 13 | install: 14 | python setup.py install 15 | 16 | test: 17 | pytest -m "not perf" -vv --cov --cov-report term-missing $(pytestopts) $(tests) 18 | 19 | perftest: 20 | pytest -m "perf" $(tests) 21 | 22 | clean: 23 | rm -Rf __pycache__ 24 | rm -Rf **/__pycache__/* 25 | rm -Rf dist 26 | rm -Rf build 27 | rm -Rf *.egg-info 28 | rm -Rf .pytest_cache 29 | rm -Rf .coverage 30 | 31 | tag: 32 | git tag $(version) 33 | 34 | release: clean tag install test 35 | echo "Releasing version $(version)" 36 | python setup.py sdist bdist_wheel 37 | # pypi doesn't accept eggs 38 | rm dist/*.egg 39 | # release 40 | #python setup.py upload -r pypi 41 | twine upload -u "__token__" -p "$(pypi_token)" dist/* 42 | # push new tag after successful build 43 | git push origin --tags 44 | # create release in GitHub 45 | curl -v -i -X POST \ 46 | -H "Content-Type:application/json" \ 47 | -H "Authorization: token $(github_token)" \ 48 | https://api.github.com/repos/$(repo)/releases \ 49 | -d '{ \ 50 | "tag_name":"$(version)", \ 51 | "target_commitish": "master", \ 52 | "name": "$(version)", \ 53 | "body": "$(desc)", \ 54 | "draft": false, \ 55 | "prerelease": false \ 56 | }' 57 | 58 | docs: 59 | make -C docs api 60 | make -C docs html 61 | 62 | readme: 63 | pandoc --from=markdown --to=rst --output=README.rst README.md 64 | 65 | lint: 66 | pylint $(module) 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xphyle: extraordinarily simple file handling 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/xphyle.svg?branch=master)](https://pypi.python.org/pypi/xphyle) 4 | [![Travis CI](https://img.shields.io/travis/jdidion/xphyle/master.svg)](https://travis-ci.org/jdidion/xphyle) 5 | [![Coverage Status](https://img.shields.io/coveralls/jdidion/xphyle/master.svg)](https://coveralls.io/github/jdidion/xphyle?branch=master) 6 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/b2c0baa52b604e39a09ed108ac2f53ee)](https://www.codacy.com/app/jdidion/xphyle?utm_source=github.com&utm_medium=referral&utm_content=jdidion/xphyle&utm_campaign=Badge_Grade) 7 | [![Documentation Status](https://readthedocs.org/projects/xphyle/badge/?version=latest)](http://xphyle.readthedocs.io/en/latest/?badge=latest) 8 | [![DOI](https://zenodo.org/badge/71260678.svg)](https://zenodo.org/badge/latestdoi/71260678) 9 | [![JOSS](http://joss.theoj.org/papers/10.21105/joss.00255/status.svg)](http://joss.theoj.org/papers/10.21105/joss.00255) 10 | 11 | logo 13 | 14 | xphyle is a small python library that makes it easy to open compressed 15 | files. Most importantly, xphyle will use the appropriate program (e.g. 'gzip') to compress/decompress a file if it is available on your system; this is almost always faster than using the corresponding python library. xphyle also provides methods that simplify common file I/O operations. 16 | 17 | Recent version of xphyle (4.0.0+) require python 3.6. Older versions of xphyle support python 3.4+. 18 | 19 | Please note that xphyle may work on Windows, but it is not tested. 20 | 21 | # Installation 22 | 23 | ``` 24 | pip install xphyle 25 | ``` 26 | 27 | # Building from source 28 | 29 | Clone this repository and run 30 | 31 | ``` 32 | make 33 | ``` 34 | 35 | # Example usages: 36 | 37 | ```python 38 | from xphyle import * 39 | from xphyle.paths import STDIN, STDOUT 40 | 41 | # Open a compressed file... 42 | myfile = xopen('infile.gz') 43 | 44 | # ...or a compressed stream 45 | # e.g. gzip -c afile | python my_program.py 46 | stdin = xopen(STDIN) 47 | 48 | # Easily write to the stdin of a subprocess 49 | with open_('|cat', 'wt') as process: 50 | process.write('foo') 51 | 52 | # We have to tell xopen what kind of compression 53 | # to use when writing to stdout 54 | stdout = xopen(STDOUT, compression='gz') 55 | 56 | # The `open_` method ensures that the file is usable with the `with` keyword. 57 | # Print all lines in a compressed file... 58 | with open_('infile.gz') as myfile: 59 | for line in myfile: 60 | print(line) 61 | 62 | # ... or a compressed URL 63 | with open_('http://foo.com/myfile.gz') as myfile: 64 | for line in myfile: 65 | print(line) 66 | 67 | # Transparently handle paths and file objects 68 | def dostuff(path_or_file): 69 | with open_(path_or_file) as myfile: 70 | for line in myfile: 71 | print(line) 72 | 73 | # Read all lines in a compressed file into a list 74 | from xphyle.utils import read_lines 75 | lines = list(read_lines('infile.gz')) 76 | 77 | # Sum the rows in a compressed file where each line is an integer value 78 | total = sum(read_lines('infile.gz', convert=int)) 79 | ``` 80 | 81 | See the [Documentation](https://xphyle.readthedocs.io/en/latest/) for full usage information. 82 | 83 | # Supported compression formats 84 | 85 | * `gzip` (uses `igzip` or `pigz` if available) 86 | * `bgzip` 87 | * `bzip2` (uses `pbzip2` if available) 88 | * `lzma` 89 | * `zstd` 90 | 91 | # Issues 92 | 93 | Please report bugs and request enhancements using the [issue tracker](https://github.com/jdidion/xphyle). 94 | 95 | # Roadmap 96 | 97 | Future releases are mapped out using [GitHub Projects](https://github.com/jdidion/xphyle/projects). 98 | 99 | # Citing xphyle 100 | 101 | [Didion, JP (2017) xphyle: Extraordinarily simple file handling. Journal of Open Source Software; doi:10.21105/joss.00255](https://joss.theoj.org/papers/10.21105/joss.00255#) 102 | 103 | # Acknowledgements 104 | 105 | * [Dependencies scanned by PyUp.io](http://pyup.io/) 106 | * Thanks to [@ctb](https://github.com/ctb) for reviewing the xphyle paper -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXAPI = sphinx-apidoc 7 | SPHINXBUILD = sphinx-build 8 | PAPER = 9 | BUILDDIR = _build 10 | 11 | # User-friendly check for sphinx-build 12 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 13 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 14 | endif 15 | 16 | # Internal variables. 17 | PAPEROPT_a4 = -D latex_paper_size=a4 18 | PAPEROPT_letter = -D latex_paper_size=letter 19 | MOSTSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) 20 | ALLSPHINXOPTS = $(MOSTSPHINXOPTS) . 21 | # the i18n builder cannot share the environment and doctrees with the others 22 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 23 | 24 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 25 | 26 | all: html 27 | 28 | help: 29 | @echo "Please use \`make ' where is one of" 30 | @echo " html to make standalone HTML files" 31 | @echo " dirhtml to make HTML files named index.html in directories" 32 | @echo " singlehtml to make a single large HTML file" 33 | @echo " pickle to make pickle files" 34 | @echo " json to make JSON files" 35 | @echo " htmlhelp to make HTML files and a HTML help project" 36 | @echo " qthelp to make HTML files and a qthelp project" 37 | @echo " devhelp to make HTML files and a Devhelp project" 38 | @echo " epub to make an epub" 39 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 40 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 41 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 42 | @echo " text to make text files" 43 | @echo " man to make manual pages" 44 | @echo " texinfo to make Texinfo files" 45 | @echo " info to make Texinfo files and run them through makeinfo" 46 | @echo " gettext to make PO message catalogs" 47 | @echo " changes to make an overview of all changed/added/deprecated items" 48 | @echo " xml to make Docutils-native XML files" 49 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 50 | @echo " linkcheck to check all external links for integrity" 51 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 52 | 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | api: 57 | $(SPHINXAPI) -f -o api .. ../setup.py ../tests/* 58 | 59 | html: 60 | $(SPHINXBUILD) -b html $(MOSTSPHINXOPTS) api $(BUILDDIR)/html 61 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 62 | @echo 63 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 64 | 65 | dirhtml: 66 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 67 | @echo 68 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 69 | 70 | singlehtml: 71 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 72 | @echo 73 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 74 | 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | json: 81 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 82 | @echo 83 | @echo "Build finished; now you can process the JSON files." 84 | 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | qthelp: 92 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 93 | @echo 94 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 95 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 96 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/atropos.qhcp" 97 | @echo "To view the help file:" 98 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/atropos.qhc" 99 | 100 | devhelp: 101 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 102 | @echo 103 | @echo "Build finished." 104 | @echo "To view the help file:" 105 | @echo "# mkdir -p $$HOME/.local/share/devhelp/atropos" 106 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/atropos" 107 | @echo "# devhelp" 108 | 109 | epub: 110 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 111 | @echo 112 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 113 | 114 | latex: 115 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 116 | @echo 117 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 118 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 119 | "(use \`make latexpdf' here to do that automatically)." 120 | 121 | latexpdf: 122 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 123 | @echo "Running LaTeX files through pdflatex..." 124 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 125 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 126 | 127 | latexpdfja: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo "Running LaTeX files through platex and dvipdfmx..." 130 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 131 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 132 | 133 | text: 134 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 135 | @echo 136 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 137 | 138 | man: 139 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 140 | @echo 141 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 142 | 143 | texinfo: 144 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 145 | @echo 146 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 147 | @echo "Run \`make' in that directory to run these through makeinfo" \ 148 | "(use \`make info' here to do that automatically)." 149 | 150 | info: 151 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 152 | @echo "Running Texinfo files through makeinfo..." 153 | make -C $(BUILDDIR)/texinfo info 154 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 155 | 156 | gettext: 157 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 158 | @echo 159 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 160 | 161 | changes: 162 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 163 | @echo 164 | @echo "The overview file is in $(BUILDDIR)/changes." 165 | 166 | linkcheck: 167 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 168 | @echo 169 | @echo "Link check complete; look for any errors in the above output " \ 170 | "or in $(BUILDDIR)/linkcheck/output.txt." 171 | 172 | doctest: 173 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 174 | @echo "Testing of doctests in the sources finished, look at the " \ 175 | "results in $(BUILDDIR)/doctest/output.txt." 176 | 177 | xml: 178 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 179 | @echo 180 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 181 | 182 | pseudoxml: 183 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 184 | @echo 185 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 186 | -------------------------------------------------------------------------------- /docs/api/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # xphyle API documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Sep 12 09:11:16 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | sys.path.insert(0, os.path.abspath(os.pardir)) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | # needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | # Add autodoc and napoleon to the extensions list 32 | extensions = ["sphinx.ext.autodoc", "sphinxcontrib.napoleon"] 33 | 34 | # Add any paths that contain templates here, relative to this directory. 35 | templates_path = ["_templates"] 36 | 37 | # The suffix of source filenames. 38 | source_suffix = ".rst" 39 | 40 | # The encoding of source files. 41 | # source_encoding = 'utf-8-sig' 42 | 43 | # The master toctree document. 44 | master_doc = "modules" 45 | 46 | # General information about the project. 47 | project = u"xphyle" 48 | copyright = u"Public domain (government work), by John P Didion" 49 | 50 | # The version info for the project you're documenting, acts as replacement for 51 | # |version| and |release|, also used in various other places throughout the 52 | # built documents. 53 | 54 | from xphyle import __version__ 55 | 56 | # 57 | # The short X.Y version. 58 | version = __version__ 59 | # The full version, including alpha/beta/rc tags. 60 | release = __version__ 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | # language = None 65 | 66 | # There are two options for replacing |today|: either, you set today to some 67 | # non-false value, then it is used: 68 | # today = '' 69 | # Else, today_fmt is used as the format for a strftime call. 70 | # today_fmt = '%B %d, %Y' 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | exclude_patterns = ["tests", "setup.py", "build", "dist", "_build"] 75 | 76 | # The reST default role (used for this markup: `text`) to use for all 77 | # documents. 78 | # default_role = None 79 | 80 | # If true, '()' will be appended to :func: etc. cross-reference text. 81 | # add_function_parentheses = True 82 | 83 | # If true, the current module name will be prepended to all description 84 | # unit titles (such as .. function::). 85 | # add_module_names = True 86 | 87 | # If true, sectionauthor and moduleauthor directives will be shown in the 88 | # output. They are ignored by default. 89 | # show_authors = False 90 | 91 | # The name of the Pygments (syntax highlighting) style to use. 92 | pygments_style = "sphinx" 93 | 94 | # A list of ignored prefixes for module index sorting. 95 | # modindex_common_prefix = [] 96 | 97 | # If true, keep warnings as "system message" paragraphs in the built documents. 98 | # keep_warnings = False 99 | 100 | 101 | # -- Options for HTML output ---------------------------------------------- 102 | 103 | # The theme to use for HTML and HTML Help pages. See the documentation for 104 | # a list of builtin themes. 105 | html_theme = "default" 106 | try: 107 | from better import better_theme_path 108 | 109 | html_theme_path = [better_theme_path] 110 | html_theme = "better" 111 | except ImportError: 112 | pass 113 | 114 | 115 | # Theme options are theme-specific and customize the look and feel of a theme 116 | # further. For a list of options available for each theme, see the 117 | # documentation. 118 | # html_theme_options = {} 119 | 120 | # Add any paths that contain custom themes here, relative to this directory. 121 | # html_theme_path = [] 122 | 123 | # The name for this set of Sphinx documents. If None, it defaults to 124 | # " v documentation". 125 | # html_title = None 126 | 127 | # A shorter title for the navigation bar. Default is the same as html_title. 128 | # html_short_title = None 129 | 130 | # The name of an image file (relative to this directory) to place at the top 131 | # of the sidebar. 132 | # html_logo = 'logo.png' 133 | 134 | # The name of an image file (within the static path) to use as favicon of the 135 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 136 | # pixels large. 137 | # html_favicon = None 138 | 139 | # Add any paths that contain custom static files (such as style sheets) here, 140 | # relative to this directory. They are copied after the builtin static files, 141 | # so a file named "default.css" will overwrite the builtin "default.css". 142 | html_static_path = ["_static"] 143 | 144 | # Add any extra paths that contain custom files (such as robots.txt or 145 | # .htaccess) here, relative to this directory. These files are copied 146 | # directly to the root of the documentation. 147 | # html_extra_path = [] 148 | 149 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 150 | # using the given strftime format. 151 | # html_last_updated_fmt = '%b %d, %Y' 152 | 153 | # If true, SmartyPants will be used to convert quotes and dashes to 154 | # typographically correct entities. 155 | html_use_smartypants = True 156 | 157 | # Custom sidebar templates, maps document names to template names. 158 | # html_sidebars = {} 159 | 160 | # Additional templates that should be rendered to pages, maps page names to 161 | # template names. 162 | # html_additional_pages = {} 163 | 164 | # If false, no module index is generated. 165 | # html_domain_indices = True 166 | 167 | # If false, no index is generated. 168 | # html_use_index = True 169 | 170 | # If true, the index is split into individual pages for each letter. 171 | # html_split_index = False 172 | 173 | # If true, links to the reST sources are added to the pages. 174 | # html_show_sourcelink = True 175 | 176 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 177 | # html_show_sphinx = True 178 | 179 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 180 | # html_show_copyright = True 181 | 182 | # If true, an OpenSearch description file will be output, and all pages will 183 | # contain a tag referring to it. The value of this option must be the 184 | # base URL from which the finished HTML is served. 185 | # html_use_opensearch = '' 186 | 187 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 188 | # html_file_suffix = None 189 | 190 | # Output file base name for HTML help builder. 191 | htmlhelp_basename = "xphyledoc" 192 | 193 | 194 | # -- Options for LaTeX output --------------------------------------------- 195 | 196 | latex_elements = { 197 | # The paper size ('letterpaper' or 'a4paper'). 198 | "papersize": "a4paper", 199 | # The font size ('10pt', '11pt' or '12pt'). 200 | #'pointsize': '10pt', 201 | # Additional stuff for the LaTeX preamble. 202 | #'preamble': '', 203 | } 204 | 205 | # Grouping the document tree into LaTeX files. List of tuples 206 | # (source start file, target name, title, 207 | # author, documentclass [howto, manual, or own class]). 208 | latex_documents = [ 209 | ("index", "xphyle.tex", u"xphyle Documentation", u"John P Didion", "manual"), 210 | ] 211 | 212 | # The name of an image file (relative to this directory) to place at the top of 213 | # the title page. 214 | # latex_logo = None 215 | 216 | # For "manual" documents, if this is true, then toplevel headings are parts, 217 | # not chapters. 218 | # latex_use_parts = False 219 | 220 | # If true, show page references after internal links. 221 | # latex_show_pagerefs = False 222 | 223 | # If true, show URL addresses after external links. 224 | # latex_show_urls = False 225 | 226 | # Documents to append as an appendix to all manuals. 227 | # latex_appendices = [] 228 | 229 | # If false, no module index is generated. 230 | # latex_domain_indices = True 231 | 232 | 233 | # -- Options for manual page output --------------------------------------- 234 | 235 | # One entry per manual page. List of tuples 236 | # (source start file, name, description, authors, manual section). 237 | man_pages = [("index", "xphyle", u"xphyle Documentation", [u"John P Didion"], 1)] 238 | 239 | # If true, show URL addresses after external links. 240 | # man_show_urls = False 241 | 242 | 243 | # -- Options for Texinfo output ------------------------------------------- 244 | 245 | # Grouping the document tree into Texinfo files. List of tuples 246 | # (source start file, target name, title, author, 247 | # dir menu entry, description, category) 248 | texinfo_documents = [ 249 | ( 250 | "index", 251 | "xphyle", 252 | u"xphyle Documentation", 253 | u"John P Didion", 254 | "xphyle", 255 | "Transparently open compressed files", 256 | "io", 257 | ), 258 | ] 259 | 260 | # Documents to append as an appendix to all manuals. 261 | # texinfo_appendices = [] 262 | 263 | # If false, no module index is generated. 264 | # texinfo_domain_indices = True 265 | 266 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 267 | # texinfo_show_urls = 'footnote' 268 | 269 | # If true, do not generate a @detailmenu in the "Top" node's menu. 270 | # texinfo_no_detailmenu = False 271 | -------------------------------------------------------------------------------- /docs/api/modules.rst: -------------------------------------------------------------------------------- 1 | xphyle package 2 | ============== 3 | 4 | Public API 5 | ---------- 6 | 7 | xphyle module 8 | ~~~~~~~~~~~~~ 9 | 10 | .. automodule:: xphyle 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | xphyle.utils module 16 | ~~~~~~~~~~~~~~~~~~~ 17 | 18 | .. automodule:: xphyle.utils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | xphyle.paths module 24 | ~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. automodule:: xphyle.paths 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Plugin API 32 | ---------- 33 | 34 | You shouldn't need these modules unless you want to extend xphyle functionality. 35 | 36 | xphyle.formats module 37 | ~~~~~~~~~~~~~~~~~~~~~ 38 | 39 | .. automodule:: xphyle.formats 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | xphyle.progress module 45 | ~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | .. automodule:: xphyle.progress 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | xphyle.urls module 53 | ~~~~~~~~~~~~~~~~~~ 54 | 55 | .. automodule:: xphyle.urls 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # xphyle documentation build configuration file, created by 5 | # sphinx-quickstart on Fri Sep 12 09:11:16 2014. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath(os.pardir)) 23 | 24 | # The version info for the project you're documenting, acts as replacement for 25 | # |version| and |release|, also used in various other places throughout the 26 | # built documents. 27 | 28 | from xphyle import __version__ 29 | 30 | # The short X.Y version. 31 | version = __version__ 32 | # The full version, including alpha/beta/rc tags. 33 | release = __version__ 34 | 35 | # -- General configuration ------------------------------------------------ 36 | 37 | # If your documentation needs a minimal Sphinx version, state it here. 38 | # needs_sphinx = '1.0' 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 42 | # ones. 43 | # Add autodoc and napoleon to the extensions list 44 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ["_templates"] 48 | 49 | # The suffix of source filenames. 50 | source_suffix = ".rst" 51 | 52 | # The encoding of source files. 53 | # source_encoding = 'utf-8-sig' 54 | 55 | # The master toctree document. 56 | master_doc = "index" 57 | 58 | # General information about the project. 59 | project = u"xphyle" 60 | copyright_ = u"Public domain (government work), by John P Didion" 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | # language = None 65 | 66 | # There are two options for replacing |today|: either, you set today to some 67 | # non-false value, then it is used: 68 | # today = '' 69 | # Else, today_fmt is used as the format for a strftime call. 70 | # today_fmt = '%B %d, %Y' 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | exclude_patterns = ["tests", "setup.py", "build", "dist", "_build"] 75 | 76 | # The reST default role (used for this markup: `text`) to use for all 77 | # documents. 78 | # default_role = None 79 | 80 | # If true, '()' will be appended to :func: etc. cross-reference text. 81 | # add_function_parentheses = True 82 | 83 | # If true, the current module name will be prepended to all description 84 | # unit titles (such as .. function::). 85 | # add_module_names = True 86 | 87 | # If true, sectionauthor and moduleauthor directives will be shown in the 88 | # output. They are ignored by default. 89 | # show_authors = False 90 | 91 | # The name of the Pygments (syntax highlighting) style to use. 92 | pygments_style = "sphinx" 93 | 94 | # A list of ignored prefixes for module index sorting. 95 | # modindex_common_prefix = [] 96 | 97 | # If true, keep warnings as "system message" paragraphs in the built documents. 98 | # keep_warnings = False 99 | 100 | 101 | # -- Options for HTML output ---------------------------------------------- 102 | 103 | # The theme to use for HTML and HTML Help pages. See the documentation for 104 | # a list of builtin themes. 105 | html_theme = "default" 106 | try: 107 | from better import better_theme_path 108 | 109 | html_theme_path = [better_theme_path] 110 | html_theme = "better" 111 | except ImportError: 112 | pass 113 | 114 | # Theme options are theme-specific and customize the look and feel of a theme 115 | # further. For a list of options available for each theme, see the 116 | # documentation. 117 | # html_theme_options = {} 118 | 119 | # Add any paths that contain custom themes here, relative to this directory. 120 | # html_theme_path = [] 121 | 122 | # The name for this set of Sphinx documents. If None, it defaults to 123 | # " v documentation". 124 | # html_title = None 125 | 126 | # A shorter title for the navigation bar. Default is the same as html_title. 127 | # html_short_title = None 128 | 129 | # The name of an image file (relative to this directory) to place at the top 130 | # of the sidebar. 131 | # html_logo = 'logo.png' 132 | 133 | # The name of an image file (within the static path) to use as favicon of the 134 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 135 | # pixels large. 136 | # html_favicon = None 137 | 138 | # Add any paths that contain custom static files (such as style sheets) here, 139 | # relative to this directory. They are copied after the builtin static files, 140 | # so a file named "default.css" will overwrite the builtin "default.css". 141 | html_static_path = ["_static"] 142 | 143 | # Add any extra paths that contain custom files (such as robots.txt or 144 | # .htaccess) here, relative to this directory. These files are copied 145 | # directly to the root of the documentation. 146 | # html_extra_path = [] 147 | 148 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 149 | # using the given strftime format. 150 | # html_last_updated_fmt = '%b %d, %Y' 151 | 152 | # If true, SmartyPants will be used to convert quotes and dashes to 153 | # typographically correct entities. 154 | html_use_smartypants = True 155 | 156 | # Custom sidebar templates, maps document names to template names. 157 | # html_sidebars = {} 158 | 159 | # Additional templates that should be rendered to pages, maps page names to 160 | # template names. 161 | # html_additional_pages = {} 162 | 163 | # If false, no module index is generated. 164 | # html_domain_indices = True 165 | 166 | # If false, no index is generated. 167 | # html_use_index = True 168 | 169 | # If true, the index is split into individual pages for each letter. 170 | # html_split_index = False 171 | 172 | # If true, links to the reST sources are added to the pages. 173 | # html_show_sourcelink = True 174 | 175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 176 | # html_show_sphinx = True 177 | 178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 179 | # html_show_copyright = True 180 | 181 | # If true, an OpenSearch description file will be output, and all pages will 182 | # contain a tag referring to it. The value of this option must be the 183 | # base URL from which the finished HTML is served. 184 | # html_use_opensearch = '' 185 | 186 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 187 | # html_file_suffix = None 188 | 189 | # Output file base name for HTML help builder. 190 | htmlhelp_basename = "xphyledoc" 191 | 192 | # -- Options for LaTeX output --------------------------------------------- 193 | 194 | latex_elements = { 195 | # The paper size ('letterpaper' or 'a4paper'). 196 | "papersize": "a4paper", 197 | # The font size ('10pt', '11pt' or '12pt'). 198 | # 'pointsize': '10pt', 199 | # Additional stuff for the LaTeX preamble. 200 | # 'preamble': '', 201 | } 202 | 203 | # Grouping the document tree into LaTeX files. List of tuples 204 | # (source start file, target name, title, 205 | # author, documentclass [howto, manual, or own class]). 206 | latex_documents = [ 207 | ("index", "xphyle.tex", u"xphyle Documentation", u"John P Didion", "manual"), 208 | ] 209 | 210 | # The name of an image file (relative to this directory) to place at the top of 211 | # the title page. 212 | # latex_logo = None 213 | 214 | # For "manual" documents, if this is true, then toplevel headings are parts, 215 | # not chapters. 216 | # latex_use_parts = False 217 | 218 | # If true, show page references after internal links. 219 | # latex_show_pagerefs = False 220 | 221 | # If true, show URL addresses after external links. 222 | # latex_show_urls = False 223 | 224 | # Documents to append as an appendix to all manuals. 225 | # latex_appendices = [] 226 | 227 | # If false, no module index is generated. 228 | # latex_domain_indices = True 229 | 230 | 231 | # -- Options for manual page output --------------------------------------- 232 | 233 | # One entry per manual page. List of tuples 234 | # (source start file, name, description, authors, manual section). 235 | man_pages = [("index", "xphyle", u"xphyle Documentation", [u"John P Didion"], 1)] 236 | 237 | # If true, show URL addresses after external links. 238 | # man_show_urls = False 239 | 240 | 241 | # -- Options for Texinfo output ------------------------------------------- 242 | 243 | # Grouping the document tree into Texinfo files. List of tuples 244 | # (source start file, target name, title, author, 245 | # dir menu entry, description, category) 246 | texinfo_documents = [ 247 | ( 248 | "index", 249 | "xphyle", 250 | u"xphyle Documentation", 251 | u"John P Didion", 252 | "xphyle", 253 | "Transparently open compressed files", 254 | "io", 255 | ), 256 | ] 257 | 258 | # Documents to append as an appendix to all manuals. 259 | # texinfo_appendices = [] 260 | 261 | # If false, no module index is generated. 262 | # texinfo_domain_indices = True 263 | 264 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 265 | # texinfo_show_urls = 'footnote' 266 | 267 | # If true, do not generate a @detailmenu in the "Top" node's menu. 268 | # texinfo_no_detailmenu = False 269 | -------------------------------------------------------------------------------- /docs/developers.rst: -------------------------------------------------------------------------------- 1 | Style-wise, we try to adhere to the [Google python style guidelines](https://google.github.io/styleguide/pyguide.html). 2 | 3 | We use Google-style docstrings, which are formatted by the [Napoleon Sphinx Plugin](https://pypi.python.org/pypi/sphinxcontrib-napoleon). 4 | 5 | We run pylint as part of each build and strive to maintain a 10/10 score. However, we disable some pylint checks: 6 | 7 | * Function annotations: pylint does not properly handle whitespace around function annotations (https://github.com/PyCQA/pylint/issues/238). 8 | * White space on empty lines: we use white space as a visual guide to the structure of the code. Each blank line should have whitespace matching the indent level of the next non-blank line. 9 | * Checks that are arbitrary/overly restrictive (e.g. 'too-many-xxx'; see .pylintrc for full list) 10 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | xphyle: extraordinarily simple file handling 2 | ============================================ 3 | 4 | .. image:: logo.png 5 | :height: 200px 6 | :width: 200 px 7 | 8 | xphyle is a small python (3.4+) library that makes it easy to open compressed 9 | files and URLs for the highest possible performance available on your system. 10 | 11 | * `API `_ 12 | * `Source code `_ 13 | * `Report an issue `_ 14 | 15 | Installation 16 | ------------ 17 | 18 | xphyle is available from pypi:: 19 | 20 | pip install xphyle 21 | 22 | xphyle tries to use the compression programs installed on your local machine (e.g. gzip, bzip2); if it can't, it will use the built-in python libraries (which are slower). Thus, xphyle has no required dependencies, but we recommend that if you install gzip, etc. if you don't already have them. 23 | 24 | xphyle will use alternative programs for multi-threaded compression if it is available: 25 | 26 | * gzip: `igzip `_ or `pigz `_. 27 | * bzip2: `pbzip2 `_ 28 | 29 | Multithreading support is disabled by default; to set the number of threads that xphyle should use:: 30 | 31 | xphyle.configure(threads=4) 32 | 33 | or, to automatically set it to the number of cores available on your system:: 34 | 35 | xphyle.configure(threads=True) 36 | 37 | If you have programs installed at a location that is not on your path, you can add those locations to xphyle's executable search:: 38 | 39 | xphyle.configure(executable_path=['/path', '/another/path', ...]) 40 | 41 | If you would like progress bars displayed for file operations, you need to configure one or both of the python-level and system-level progress bars. 42 | 43 | For python-level operations, the `pokrok `_ API is used by default. Pokrok provides access to many popular progress bar libraries with a single, standard interface. Please see the documentation for more information about which libraries are currently supported and how to configure them. To enable this:: 44 | 45 | > pip install pokrok 46 | 47 | xphyle.configure(progress=True) 48 | 49 | You can also use you own preferred progress bar by passing a callable, which must take a single iterable argument and two optional keyword arguments and return an iterable:: 50 | 51 | def my_progress_wrapper(itr, desc='My progress bar', size=None): 52 | ... 53 | 54 | xphyle.configure(progress=my_progress_wrapper) 55 | 56 | For system-level operations, an executable is required that reads from stdin and writes to stdout; `pv `_ is used by default. To enable this:: 57 | 58 | xphyle.configure(system_progress=True) 59 | 60 | You can also use your own preferred program by passing a tuple with the command and arguments (:py:func:`` simplifies this):: 61 | 62 | xphyle.configure(system_progress=xphyle.progress.system_progress_command( 63 | 'pv', '-pre', require=True)) 64 | 65 | Working with files 66 | ------------------ 67 | 68 | The heart of xphyle is the simplicity of working with files. There is a single interface -- ``xopen`` -- for opening "file-like objects", regardless of whether they represent local files, remote files (referenced by URLs), or system streams (stdin, stdout, stderr); and regardless of whether they are compressed. 69 | 70 | The following are functionally equivalent ways to open a gzip file:: 71 | 72 | import gzip 73 | f = gzip.open('input.gz', 'rt') 74 | 75 | from xphyle import xopen 76 | f = xopen('input.gz', 'rt') 77 | 78 | So then why use xphyle? Two reasons: 79 | 80 | 1. The ``gzip.open`` method of opening a gzip file above requires you to know that you are expecting a gzip file and only a gzip file. If your program optionally accepts either a compressed or a decompressed file, then you'll need several extra lines of code to either detect the file format or to make the user specify the format of the file they are providing. This becomes increasingly cumbersome with each additional format you want to support. On the other hand, ``xopen`` has the same interface regardless of the compression format. Furthermore, if xphyle doesn't currently support a file format that you would like to use, it enables you to add it via a simple API. 81 | 2. The ``gzip.open`` method of opening a gzip file uses python code to decompress the file. It's well written, highly optimized python code, but unfortunately it's still slower than your natively compiled system-level applications (e.g. pigz or gzip). The ``xopen`` method of opening a gzip file first tries to use pigz or gzip to decompress the file and provides access to the resulting stream of decompressed data (as a file-like object), and only falls back to ``gzip.open`` if neither program is available. 82 | 83 | If you want to be explicit about whether to expect a compressed file, what type of compression to expect, or whether to try and use system programs, you can:: 84 | 85 | from xphyle import xopen 86 | from xphyle.paths import STDIN 87 | 88 | # Expect the file to not be compressed 89 | f = xopen('input', 'rb', compression=False) 90 | 91 | # Open a remote file. Expect the file to be compressed, and throw an error 92 | # if it's not, or if the compression format cannot be determined. 93 | f = xopen('http://foo.com/input.gz', 'rt', compression=True) 94 | 95 | # Open stdin. Expect the input to be gzip compressed, and throw an error if 96 | # it's not 97 | f = xopen(STDIN, 'rt', compression='gzip') 98 | 99 | # Do not try to use the system-level gzip program for decompression 100 | f = xopen('input.gz', 'rt', compression='gzip', use_system=False) 101 | 102 | By default, ``xopen`` returns the file. If desired, ``xopen`` can also wrap the file such that it behaves just like a file with a few additional features: 103 | 104 | * A file iterator is wrapped in a progress bar (if they have been enabled via the ``configure`` method described above). 105 | * A simple event system that enables callbacks to be registered for various events. Currently, the only supported event is closing the file. The ``xphyle.utils`` package provides a few useful event listeners, e.g. to compress, move, or delete the file when it is closed. 106 | * ContextManager functionality, such that the file is always compatible with ``with``, e.g.:: 107 | 108 | def print_lines(path): 109 | # this works whether path refers to a local file, URL or STDIN 110 | with xopen(path, context_wrapper=True) as infile: 111 | for line in infile: 112 | print(line) 113 | 114 | The wrapping behavior can be enabled by passing ``context_wrapper=True`` to ``xopen``. You can configure ``xopen`` to wrap files by default:: 115 | 116 | xphyle.configure(default_xopen_context_wrapper=True) 117 | 118 | **Note that this represents a change from xphyle 1.x, in which wrapping occurred by default.** 119 | 120 | Another common pattern is to write functions that accept either a path or an open file object. Rather than having to test whether the user passed a path or a file and handle each differently, you can use the ``open_`` convenience method:: 121 | 122 | from xphyle import open_ 123 | 124 | def print_lines(path_or_file): 125 | with open_(path_or_file) as infile: 126 | for line in infile: 127 | print(line) 128 | 129 | Note that ``open_`` wraps files by default, including already open file-like objects. To disable this, set ``wrap_fileobj=False``. 130 | 131 | Supported file formats 132 | ~~~~~~~~~~~~~~~~~~~~~~ 133 | 134 | xphyle supports the most commonly used file formats: gzip, bzip2/7zip, and lzma/xz. 135 | 136 | Also supported are: 137 | 138 | * zstandard 139 | * Brotli 140 | * block-based gzip (bgzip), a format commonly used in bioinformatics. Somewhat confusingly, '.gz' is an acceptable extension for bgzip files, and gzip will decompress bgzip files. Thus, to specifically use bgzip, either use a '.bgz' file extension or specify 'bgzip' as the compression format:: 141 | 142 | f = xopen('input.gz', 'rt', compression='bgzip', validate=False) 143 | 144 | Additional compression formats may be added in the future. To get the most up-to-date list:: 145 | 146 | from xphyle.formats import FORMATS 147 | print(', '.join(FORMATS.list_compression_formats()) 148 | 149 | When a file is opened for decompression, its extension is used to determine which decompressor to use. If the extension is not recognized, or if the filename is not available (e.g. when decompressing a stream or buffer), then xphyle attempts to determine the file format from the "magic bytes" at the beginning of the file. 150 | 151 | Processes 152 | ~~~~~~~~~ 153 | 154 | As of xphyle 2.0.0, you can easily open subprocesses using the ``xphyle.popen`` method. This method is similar to python ``subprocess.Popen``, except that it uses ``xopen`` to open files passed to stdin, stdout, and stderr, and/or to wrap subprocess PIPEs. ``xphyle.popen`` returns an ``xphyle.Process`` object, which is a subclass of ``subprocess.Popen`` but adds additional functionality, essentially making a Process behave like a regular file. Writing to a process writes to its stdin PIPE, and reading from a process reads from its stdout or stderr PIPE:: 155 | 156 | from xphyle import popen, PIPE 157 | proc = popen('cat', stdin=PIPE, stdout='myfile.gz') 158 | try: 159 | proc.write('foo') 160 | finally: 161 | proc.close() 162 | 163 | # equivalent to: 164 | with popen('cat', stdin=PIPE, stdout='myfile.gz') as proc: 165 | proc.write('foo') 166 | 167 | # and also to: 168 | popen('cat', stdin=PIPE, stdout='myfile.gz').communicate('foo') 169 | 170 | # for the common case above, there's also a shortcut method 171 | from xphyle.utils import exec_process 172 | exec_process('cat', 'foo', stdout='myfile.gz') 173 | 174 | In addition, ``open_`` and ``xopen`` can open subprocesses. The primary difference is that ``popen`` enables customization of stdin, stdout, and stderr, whereas opening a process through ``open_`` or ``xopen`` uses default behavior of opening PIPEs for all of the streams, and wrapping the PIPE indicated by the file mode. For example:: 175 | 176 | # write to the process stdin 177 | with open_('|cat', 'wt') as proc: 178 | proc.write('foo') 179 | 180 | # this command wraps stdin with gzip compression 181 | with open_('|zcat', 'wt', compression='gzip') as proc: 182 | proc.write('foo') 183 | 184 | # this command wraps stdout with gzip decompression; 185 | # furthermore, the compression format is determined 186 | # automatically 187 | with open_('|gzip -c foobar.txt', 'rt') as proc: 188 | text = proc.read() 189 | 190 | Note that with ``open_`` and ``xopen``, the system command must be specified as a string starting with '|'. 191 | 192 | Buffers 193 | ~~~~~~~ 194 | 195 | As of xphyle 2.1.0, ``open_`` and ``xopen`` can also open buffer types. A buffer is an instance of ``io.StringIO`` or ``io.BytesIO`` (or similar) -- basically an in memory read/write buffer. Passing open buffer objects worked before (they were treated as file-like), but now there is a special file type -- ``FileType.BUFFER`` -- that will cause them to be handled a bit differently. In addition, you can now pass ``str`` or ``bytes`` (the type objects) to automatically create the corresponding buffer type:: 196 | 197 | with open_(str) as buf: 198 | buf.write('foo') 199 | string_foo = buf.getvalue() 200 | 201 | # with compression, type must be 'bytes' 202 | with open_(bytes, compression='gzip') as buf: 203 | buf.write('foo') 204 | compressed_foo = buf.getvalue() 205 | 206 | You can also create readable buffers by passing the string/bytes to read instead of a path, and explicitly specifying the file type:: 207 | 208 | with open_("This is a string I want to read", file_type=FileType.BUFFER) as buf: 209 | buf_str = buf.read() 210 | 211 | Reading/writing data 212 | ~~~~~~~~~~~~~~~~~~~~ 213 | 214 | The ``xphyle.utils`` module provides methods for many of the common operations that you'll want to perform on files. A few examples are shown below; you can read the `API docs `_ for a full list of methods and more detailed descriptions of each. 215 | 216 | There are pairs of methods for reading/writing text and binary data using iterators:: 217 | 218 | # Copy from one file to another, changing the line separator from 219 | # unix to windows 220 | from xphyle.utils import read_lines, write_lines 221 | write_lines( 222 | read_lines('linux_file.txt') 223 | 'windows_file.txt', 224 | linesep='\r') 225 | 226 | # Copy from one binary file to another, changing the encoding from 227 | # ascii to utf-8 228 | from xphyle.utils import read_bytes, write_bytes 229 | def ascii2utf8(x): 230 | if isinstance(x, bytes): 231 | x = x.decode('ascii') 232 | return x.encode('utf-8') 233 | write_bytes( 234 | read_bytes('ascii_file.txt', convert=ascii2utf8), 235 | 'utf8-file.txt') 236 | 237 | There's another pair of methods for reading/writing key=value files:: 238 | 239 | from collections import OrderedDict 240 | from xphyle.utils import read_dict, write_dict 241 | cats = OrderedDict((fluffy,'calico'), (droopy,'tabby'), (sneezy,'siamese')) 242 | write_dict(cats, 'cats.txt.gz') 243 | # change from '=' to '\t' delimited; preserve the order of the items 244 | write_dict( 245 | read_dict(cats, 'cats.txt.gz', ordered=True), 246 | 'cats.tsv', sep='\t') 247 | 248 | You can also read from delimited files such as csv and tsv:: 249 | 250 | from xphyle.utils import read_delimited, read_delimited_as_dict 251 | 252 | class Dog(object): 253 | def __init__(self, name, age, breed): 254 | self.name = name 255 | self.age = age 256 | self.breed = breed 257 | def pet(self): ... 258 | def say(self, message): ... 259 | 260 | for dog in read_delimited( 261 | 'dogs.txt.gz', header=True, 262 | converters=(str,int,str), 263 | row_type=Dog): 264 | dog.pet() 265 | 266 | dogs = read_delimited_as_dict( 267 | 'dogs.txt.gz', header=True, 268 | key='name', converters=(str,int,str), 269 | row_type=Dog): 270 | dogs['Barney'].say('Good Boy!') 271 | 272 | There are convenience methods for compressing and decompressing files:: 273 | 274 | from xphyle.utils import compress_file, decompress_file, transcode_file 275 | 276 | # Gzip compress recipes.txt, and delete the original 277 | compress_file('recipes.txt', compression='gzip', keep=False) 278 | 279 | # decompress a remote compressed file to a local file 280 | decompress_file('http://recipes.com/allrecipes.txt.gz', 281 | 'local_recipes.txt') 282 | 283 | # Change from gzip to bz2 compression: 284 | transcode_file('http://recipes.com/allrecipes.txt.gz', 285 | 'local_recipes.txt.bz2') 286 | 287 | There is a replacement for ``fileinput``:: 288 | 289 | from xphyle.utils import fileinput 290 | 291 | # By default, read from the files specified as command line arguments, 292 | # or stdin if there are no command line arguments, and autodetect 293 | # the compression format 294 | for line in fileinput(): 295 | print(line) 296 | 297 | # Read from multiple files as if they were one 298 | for line in fileinput(('myfile.txt', 'myotherfile.txt.gz')): 299 | print(line) 300 | 301 | There's also a set of classes for writing to multiple files:: 302 | 303 | from xphyle.utils import fileoutput 304 | from xphyle.utils import TeeFileOutput, CycleFileOutput, NCycleFileOutput 305 | 306 | # write all lines in sourcefile.txt to both file1 and file2.gz 307 | with fileoutput( 308 | ('file1', 'file2.gz'), 309 | file_output_type=TeeFileOutput) as out: 310 | out.writelines(read_lines('sourcefile.txt')) 311 | 312 | # Alternate writing each line in sourcefile.txt to file1 and file2.gz 313 | with fileoutput( 314 | ('file1', 'file2.gz'), 315 | file_output_type=CycleFileOutput) as out: 316 | out.writelines(read_lines('sourcefile.txt')) 317 | 318 | # Alternate writing four lines in sourcefile.txt to file1 and file2.gz 319 | with fileoutput( 320 | ('file1', 'file2.gz'), 321 | file_output_type=NCycleFileOutput, n=4) as out: 322 | out.writelines(read_lines('sourcefile.txt')) 323 | 324 | # Write up to 10,000 lines in each file before opening the next file 325 | with RollingFileOutput('file{}.gz', n=10000) as out: 326 | out.writelines(read_lines('sourcefile.txt')) 327 | 328 | And finally, there's some miscellaneous methods such as linecount:: 329 | 330 | from xphyle.utils import linecount 331 | print("There are {} lines in file {}".format( 332 | linecount(path), path)) 333 | 334 | File paths 335 | ~~~~~~~~~~ 336 | 337 | The ``xphyle.paths`` module provides methods for working with file paths. The `API docs `_ have a full list of methods and more detailed descriptions of each. Here are a few examples:: 338 | 339 | from xphyle.paths import * 340 | 341 | # Get the absolute path, being smart about STDIN/STDOUT/STDERR and 342 | # home directory shortcuts 343 | abspath('/foo/bar/baz') # -> /foo/bar/baz 344 | abspath('foo') # -> /path/to/current/dir/foo 345 | abspath('~/foo') # -> /home/myname/foo 346 | abspath(STDIN) # -> STDIN 347 | 348 | # Splat a path into its component parts 349 | dir, name, *extensions = split_path('/home/joe/foo.txt.gz') # -> 350 | # dir = '/home/joe' 351 | # name = 'foo' 352 | # extensions = ['txt', 'gz'] 353 | 354 | # Check that a path exists, is a file, and allows reading 355 | # Raises IOError if any of the expectations are violated, 356 | # otherwise returns the fully resolved path 357 | path = check_path('file.txt.gz', 'f', 'r') 358 | 359 | # Shortcuts to check whether a file is readable/writeable 360 | path = check_readable_file('file.txt') 361 | path = check_writeable_file('file.txt') 362 | 363 | # There are also 'safe' versions of the methods that return 364 | # None rather than raise IOError 365 | path = safe_check_readable_file('nonexistant_file.txt') # path = None 366 | 367 | # Find all files in a directory (recursively) that match a 368 | # regular expression pattern 369 | find('mydir', 'file.*\.txt\.gz') 370 | 371 | # Lookup the path to an executable 372 | gzip_path = get_executable_path('gzip') 373 | 374 | `TempDir `_ is a particularly useful class, especially for unit testing. In fact, it us used extensively for unit testing xphyle itself. TempDir can be thought of as a virtual file system. It creates a temporary directory, and it provides methods to create subdirectories and files within that directory. When the ``close()`` method is called, the entire temporary directory is deleted. ``TempDir`` can also be used as a ContextManager:: 375 | 376 | with TempDir() as temp: 377 | # create three randomly named files under 'tempdir' 378 | paths = temp.make_empty_files(3) 379 | # create directory 'tempdir/foo' 380 | foo = temp.make_directory('foo') 381 | # create a randomly named file with the '.gz' suffix 382 | # within directory 'tempdir/foo' 383 | gzfile = temp[foo].make_file(suffix='.gz') 384 | 385 | Another useful set of classes is `FileSpec `_, `DirSpec `_, and `PathSpec `_. These classes help with the common problem of working files that match a specific pattern, especially when you need to then extract some pieces of information from the file names. For example, you may need to find all the files starting with 'foo' within any subdirectory of '/bar', and then performing different operations depending on the extension. You could use a PathSpec for this:: 386 | 387 | spec = PathSpec( 388 | DirSpec(PathVar('subdir'), template=os.path.join('/bar', '{subdir}')), 389 | FileSpec( 390 | PathVar('name', pattern='foo.*'), 391 | PathVar('ext'), 392 | template='{name}.{ext}')) 393 | files = spec.find(recursive=True) 394 | for f in files: 395 | if f['ext'] == 'txt': 396 | process_text_file(f) 397 | else: 398 | process_binary_file(f) 399 | 400 | A FileSpec or DirSpec has two related fields: a template, which is a python `fstring `_ and is used for constructing filenames from component pieces; and a pattern, which is a regular expression and is used for matching to path strings. The named components of the template correspond to path variables (instances of the `PathVar `_ class). Each PathVar can provide its own pattern, as well as lists of valid or invalid values. If a pattern is not specified during FileSpec/DirSpec creation, the pattern is automatically created by simply substituting the PathVar patterns for the corresponding components in the template string ('.*' by default). 401 | 402 | Note that a DirSpec is only able to construct/match directory paths, and a FileSpec is only able to construct/match file names. A PathSpec is simply a composite type of a DirSpec and a FileSpec that can be used to construct/match full paths. 403 | 404 | Each of the *Spec classes has three methods: 405 | 406 | * construct: Given values for all of the path vars, construct a new path. Note that __call__ is an alias for construct. 407 | * parse: Match a path against the *Spec's pattern. If the path matches, the component's are extracted (through the use of named capture groups), otherwise an exception is raised. 408 | * find: Find all directories/files/paths that match the *Spec's pattern. 409 | 410 | All of these methods return a PathInst, which is a subclass of pathlib.Path (specifically, a subclass of pathlib.WindowsPath when code is run on Windows, otherwise a PosixPath) that has an additional slot, 'values', that is a dictionary of the component name, value pairs, and overrides a few methods. 411 | 412 | Extending xphyle 413 | ---------------- 414 | 415 | You can add support for another compression format by extending one of the base classes in :py:mod:``:: 416 | 417 | import xphyle.formats 418 | 419 | class FooFormat(xphyle.formats.SingleExeCompressionFormat): 420 | """Implementation of CompressionFormat for foo files. 421 | """ 422 | @property 423 | def name(self) -> str: 424 | return 'foo' 425 | 426 | @property 427 | def exts(self) -> Tuple[str, ...]: 428 | return ('foo',) 429 | 430 | @property 431 | def system_commands(self) -> Tuple[str, ...]: 432 | return ('foo',) 433 | 434 | @property 435 | def compresslevel_range(self) -> Tuple[int, int]: 436 | # because of course it goes to 11 437 | return (1, 11) 438 | 439 | @property 440 | def default_compresslevel(self) -> int: 441 | return 6 442 | 443 | @property 444 | def magic_bytes(self) -> Tuple[Tuple[int, ...], ...]: 445 | return ((0x0F, 0x00),) 446 | 447 | @property 448 | def mime_types(self) -> Tuple[str, ...]: 449 | return ('application/foo',) 450 | 451 | # build the system command 452 | # op = 'c' for compress, 'd' for decompress 453 | # src = the source file, or STDIN if input should be read from stdin 454 | # stdout = True if output should be written to stdout 455 | # compresslevel = the compression level 456 | def get_command(self, op, src=STDIN, stdout=True, compresslevel=6): 457 | cmd = [self.executable_path] 458 | if op == 'c': 459 | # adjust the compresslevel to be within the range allowed 460 | # by the program 461 | compresslevel = self._get_compresslevel(compresslevel) 462 | cmd.append('-{}'.format(compresslevel)) 463 | cmd.append('-z') 464 | elif op == 'd': 465 | cmd.append('-d') 466 | if stdout: 467 | cmd.append('-c') 468 | if src != STDIN: 469 | cmd.append(src) 470 | return cmd 471 | 472 | def open_file_python(self, filename, mode, **kwargs): 473 | # self.lib is a property that lazily imports and returns the 474 | # python library named in the ``name`` member above 475 | return self.lib.open_foo(filename, mode, **kwargs) 476 | 477 | Then, register your format:: 478 | 479 | xphyle.formats.register_compression_format(FooFormat) 480 | 481 | Also, note that you can support custom URL schemes by the standard method of adding `urllib `_ handlers:: 482 | 483 | import urllib.request 484 | urllib.request.OpenerDirector.add_handler(my_handler) 485 | -------------------------------------------------------------------------------- /docs/logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdidion/xphyle/6bbb79c3cdf680205f7f4fafcf0e6631999a62f2/docs/logo.pdf -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdidion/xphyle/6bbb79c3cdf680205f7f4fafcf0e6631999a62f2/docs/logo.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==7.2.6 2 | pokrok==0.2.0 -------------------------------------------------------------------------------- /paper/codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", 3 | "@type": "Code", 4 | "author": ["John P Didion"], 5 | "identifier": "http://dx.doi.org/10.5281/zenodo.569933", 6 | "codeRepository": "https://github.com/jdidion/xphyle", 7 | "datePublished": "2017-04-29", 8 | "dateModified": "2017-04-29", 9 | "dateCreated": "2017-04-29", 10 | "description": "xphyle: extraordinarily simple file handling", 11 | "keywords": "python, io, file", 12 | "license": "CC0", 13 | "title": "xphyle", 14 | "version": "3.0.1" 15 | } 16 | -------------------------------------------------------------------------------- /paper/generate.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby 2 | 3 | # For an OO language, this is distinctly procedural. Should probably fix that. 4 | require 'json' 5 | 6 | details = Hash.new({}) 7 | 8 | capture_params = [ 9 | { :name => "title", :message => "Enter project name." }, 10 | { :name => "url", :message => "Enter the URL of the project repository." }, 11 | { :name => "description", :message => "Enter the (short) project description." }, 12 | { :name => "license", :message => "Enter the license this software shared under. (hit enter to skip)\nFor example MIT, BSD, GPL v3.0, Apache 2.0" }, 13 | { :name => "doi", :message => "Enter the DOI of the archived version of this code. (hit enter to skip)\nFor example http://dx.doi.org/10.6084/m9.figshare.828487" }, 14 | { :name => "keywords", :message => "Enter keywords that should be associated with this project (hit enter to skip)\nComma-separated, for example: turkey, chicken, pot pie" }, 15 | { :name => "version", :message => "Enter the version of your software (hit enter to skip)\nSEMVER preferred: http://semver.org e.g. v1.0.0" } 16 | ] 17 | 18 | puts "I'm going to try and help you prepare some things for your JOSS submission" 19 | puts "If all goes well then we'll have a nice codemeta.json file soon..." 20 | puts "" 21 | puts "************************************" 22 | puts "* First, some basic details *" 23 | puts "************************************" 24 | puts "" 25 | 26 | # Loop through the desired captures and print out for clarity 27 | capture_params.each do |param| 28 | puts param[:message] 29 | print "> " 30 | input = gets 31 | 32 | details[param[:name]] = input.chomp 33 | 34 | puts "" 35 | puts "OK, your project has #{param[:name]}: #{input}" 36 | puts "" 37 | end 38 | 39 | puts "" 40 | puts "************************************" 41 | puts "* Experimental stuff *" 42 | puts "************************************" 43 | puts "" 44 | 45 | puts "Would you like me to try and build a list of authors for you?" 46 | puts "(You need to be running this script in a git repository for this to work)" 47 | print "> (Y/N)" 48 | answer = gets.chomp 49 | 50 | case answer.downcase 51 | when "y", "yes" 52 | 53 | # Use git shortlog to extract a list of author names and commit counts. 54 | # Note we don't extract emails here as there's often different emails for 55 | # each user. Instead we capture emails at the end. 56 | 57 | git_log = `git shortlog --summary --numbered --no-merges` 58 | 59 | # ["252\tMichael Jackson", "151\tMC Hammer"] 60 | authors_and_counts = git_log.split("\n").map(&:strip) 61 | 62 | authors_and_counts.each do |author_count| 63 | count, author = author_count.split("\t").map(&:strip) 64 | 65 | puts "Looks like #{author} made #{count} commits" 66 | puts "Add them to the output?" 67 | print "> (Y/N)" 68 | answer = gets.chomp 69 | 70 | # If a user chooses to add this author to the output then we ask for some 71 | # additional information including their email, ORCID and affiliation. 72 | case answer.downcase 73 | when "y", "yes" 74 | puts "What is #{author}'s email address? (hit enter to skip)" 75 | print "> " 76 | email = gets.chomp 77 | 78 | puts "What is #{author}'s ORCID? (hit enter to skip)" 79 | puts "For example: http://orcid.org/0000-0000-0000-0000" 80 | print "> " 81 | orcid = gets.chomp 82 | 83 | puts "What is #{author}'s affiliation? (hit enter to skip)" 84 | print "> " 85 | affiliation = gets.chomp 86 | 87 | 88 | details['authors'].merge!(author => { 'commits' => count, 89 | 'email' => email, 90 | 'orcid' => orcid, 91 | 'affiliation' => affiliation }) 92 | 93 | when "n", "no" 94 | puts "OK boss..." 95 | puts "" 96 | end 97 | end 98 | when "n", "no" 99 | puts "OK boss..." 100 | puts "" 101 | end 102 | 103 | puts "Reticulating splines" 104 | 105 | 5.times do 106 | print "." 107 | sleep 0.5 108 | end 109 | 110 | puts "" 111 | puts "Generating some JSON goodness..." 112 | 113 | # TODO: work out how to use some kind of JSON template here. 114 | # Build the output list of authors from the inputs we've collected. 115 | output_authors = [] 116 | 117 | details['authors'].each do |author_name, values| 118 | entry = { 119 | "@id" => values['orcid'], 120 | "@type" => "Person", 121 | "email" => values['email'], 122 | "name" => author_name, 123 | "affiliation" => values['affiliation'] 124 | } 125 | output_authors << entry 126 | end 127 | 128 | # TODO: this is currently a static template (written out here). It would be good 129 | # to do something smarter here. 130 | output = { 131 | "@context" => "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", 132 | "@type" => "Code", 133 | "author" => output_authors, 134 | "identifier" => details['doi'], 135 | "codeRepository" => details['url'], 136 | "datePublished" => Time.now.strftime("%Y-%m-%d"), 137 | "dateModified" => Time.now.strftime("%Y-%m-%d"), 138 | "dateCreated" => Time.now.strftime("%Y-%m-%d"), 139 | "description" => details['description'], 140 | "keywords" => details['keywords'], 141 | "license" => details['license'], 142 | "title" => details['title'], 143 | "version" => details['version'] 144 | } 145 | 146 | File.open('codemeta.json', 'w') {|f| f.write(JSON.pretty_generate(output)) } 147 | -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @online{xphyle, 2 | author = {John P Didion}, 3 | title = {xphyle: extraordinarily simple file handling}, 4 | year = 2017, 5 | url = {https://github.com/jdidion/xphyle}, 6 | urldate = {2017-04-29} 7 | } 8 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'xphyle: Extraordinarily simple file handling' 3 | tags: 4 | - io 5 | - files 6 | - python 7 | authors: 8 | - name: John P Didion 9 | orcid: 0000-0002-8111-6261 10 | affiliation: 1 11 | affiliations: 12 | - name: National Human Genome Research Institute, NIH, Bethesda, MD, USA 13 | index: 1 14 | date: 29 April 2017 15 | bibliography: paper.bib 16 | --- 17 | 18 | # Summary 19 | 20 | Data compression is commonly used to reduce the storage requirements for large datasets. It is often necessary for software that operates on big data to support several commonly used compression algorithms, including gzip, bzip2, and lzma. Handling these and other types of data sources, such as URLs and in-memory buffers, requires special consideration by software developers. We created xphyle [@xphyle], a small python (3.3+) library, to provide transparent access to files regardless of their source or compression type. Most importantly, xphyle uses the appropriate program (e.g. 'gzip') to compress/decompress a file if the program is available on the host system, which is generally faster than using the corresponding python library. xphyle also provides methods that simplify common file I/O operations. 21 | 22 | # References 23 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | build: 2 | image: latest 3 | 4 | python: 5 | version: 3.6 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pokrok==0.2.0 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [options] 5 | setup_requires = 6 | setuptools_scm==8.0.4 7 | 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | from setuptools import setup 4 | import sys 5 | 6 | 7 | version_info = sys.version_info 8 | if version_info < (3, 6): 9 | sys.stdout.write( 10 | "xphyle 4+ requires python3.6. Use xphyle 3 with python 3.4 or 3.5.\n" 11 | ) 12 | sys.exit(1) 13 | 14 | 15 | setup( 16 | name="xphyle", 17 | use_scm_version=True, 18 | description="Utilities for working with files.", 19 | long_description_content_type="text/markdown", 20 | long_description=codecs.open( 21 | os.path.join(os.path.dirname(os.path.realpath(__file__)), "README.md"), 22 | "rb", 23 | "utf-8", 24 | ).read(), 25 | url="https://github.com/jdidion/xphyle", 26 | author="John Didion", 27 | author_email="github@didion.net", 28 | license="MIT", 29 | packages=["xphyle"], 30 | setup_requires=["setuptools_scm"], 31 | install_requires=["pokrok"], 32 | extras_require={"performance": ["lorem"], "zstd": ["zstandard"]}, 33 | tests_require=["pytest", "pytest-cov"], 34 | classifiers=[ 35 | "Development Status :: 5 - Production/Stable", 36 | "Intended Audience :: Developers", 37 | "Topic :: Software Development :: Libraries :: Python Modules", 38 | "License :: OSI Approved :: MIT License", 39 | "License :: Public Domain", 40 | "Programming Language :: Python :: 3 :: Only", 41 | "Programming Language :: Python :: 3.6", 42 | "Programming Language :: Python :: 3.7", 43 | "Programming Language :: Python :: 3.8", 44 | ], 45 | ) 46 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from io import BytesIO, TextIOWrapper, BufferedIOBase 3 | import random 4 | from typing import cast 5 | from unittest.mock import patch 6 | import urllib.request 7 | 8 | 9 | # Note: the casts of StringIO/BytesIO to BufferedIOBase are only necessary because of 10 | # pycharm bug PY-28155 11 | 12 | 13 | def random_text(n=1024): 14 | return ''.join(chr(random.randint(32, 126)) for _ in range(n)) 15 | 16 | 17 | class MockStdout(object): 18 | def __init__(self, name, as_bytes): 19 | self.bytes_io = BytesIO() 20 | object.__setattr__(self.bytes_io, 'name', name) 21 | self.wrapper = TextIOWrapper(cast(BufferedIOBase, self.bytes_io)) 22 | self.wrapper.mode = 'w' 23 | self.as_bytes = as_bytes 24 | 25 | def getvalue(self): 26 | self.wrapper.flush() 27 | val = self.bytes_io.getvalue() 28 | if not self.as_bytes: 29 | val = val.decode() 30 | return val 31 | 32 | 33 | @contextmanager 34 | def intercept_stdout(as_bytes=False): 35 | i = MockStdout('', as_bytes) 36 | with patch('sys.stdout', i.wrapper): 37 | yield i 38 | 39 | 40 | @contextmanager 41 | def intercept_stderr(as_bytes=False): 42 | i = MockStdout('', as_bytes) 43 | with patch('sys.stderr', i.wrapper): 44 | yield i 45 | 46 | 47 | @contextmanager 48 | def intercept_stdin(content, is_bytes=False): 49 | if not is_bytes: 50 | content = content.encode() 51 | i = BytesIO() 52 | object.__setattr__(i, 'name', '') 53 | i.write(content) 54 | if not (is_bytes or content.endswith(b'\n')): 55 | i.write(b'\n') 56 | i.seek(0) 57 | i = TextIOWrapper(cast(BufferedIOBase, i)) 58 | i.mode = 'r' 59 | with patch('sys.stdin', i): 60 | yield 61 | 62 | 63 | def no_internet(): 64 | """Test whether there's no internet connection available. 65 | """ 66 | try: 67 | urllib.request.urlopen("https://github.com").info() 68 | return False 69 | except: 70 | return True 71 | -------------------------------------------------------------------------------- /tests/foo.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdidion/xphyle/6bbb79c3cdf680205f7f4fafcf0e6631999a62f2/tests/foo.gz -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | perf: marks performance tests 4 | -------------------------------------------------------------------------------- /tests/test_formats.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, skipIf 2 | import gzip 3 | import string 4 | import sys 5 | from xphyle.formats import * 6 | from xphyle.paths import TempDir, EXECUTABLE_CACHE 7 | from . import * 8 | 9 | 10 | def get_format(ext): 11 | return FORMATS.get_compression_format(FORMATS.guess_compression_format(ext)) 12 | 13 | 14 | def write_file(fmt, path, use_system, content, mode="wt"): 15 | with fmt.open_file(path, mode=mode, use_system=use_system) as f: 16 | f.write(content) 17 | 18 | 19 | def read_file(fmt, path, use_system, mode="rt"): 20 | with fmt.open_file(path, mode=mode, use_system=use_system) as f: 21 | return f.read() 22 | 23 | 24 | def create_truncated_file(path, fmt): 25 | # Random text 26 | text = "".join(random.choice(string.ascii_lowercase) for _ in range(200)) 27 | f = fmt.open_file(path, "w") 28 | f.write(text) 29 | f.close() 30 | f = open(path, "a") 31 | f.truncate(os.stat(path).st_size - 10) 32 | f.close() 33 | 34 | 35 | gz_path = get_format("gz").executable_path 36 | # TODO: enable executable to be injected so we can test all variants 37 | no_pigz = gz_path is None or get_format("gz").executable_name != "pigz" 38 | no_igzip = gz_path is None or get_format("gz").executable_name != "igzip" 39 | bgz_compress_path = get_format("bgz").compress_path 40 | bgz_decompress_path = get_format("bgz").decompress_path 41 | bz_path = get_format("bz2").executable_path 42 | no_pbzip2 = bz_path is None or get_format("bz2").executable_name != "pbzip2" 43 | xz_path = get_format("xz").executable_path 44 | zstd_path = get_format("zstd").executable_path 45 | 46 | 47 | class ThreadsTests(TestCase): 48 | def test_threads(self): 49 | threads = ThreadsVar(default_value=2) 50 | threads.update(None) 51 | assert 2 == threads.threads 52 | threads.update(False) 53 | assert 1 == threads.threads 54 | threads.update(0) 55 | assert 1 == threads.threads 56 | import multiprocessing 57 | 58 | threads.update(True) 59 | assert multiprocessing.cpu_count() == threads.threads 60 | threads.update(4) 61 | assert 4 == threads.threads 62 | 63 | 64 | class CompressionTests(TestCase): 65 | def tearDown(self): 66 | EXECUTABLE_CACHE.cache = {} 67 | THREADS.update(1) 68 | 69 | def test_list_formats(self): 70 | self.assertSetEqual( 71 | {"gzip", "bgzip", "bz2", "lzma", "zstd", "brotli"}, 72 | set(FORMATS.list_compression_formats()), 73 | ) 74 | self.assertSetEqual( 75 | {"gzip", "gz", "pigz", "igzip"}, set(get_format("gzip").aliases) 76 | ) 77 | 78 | def test_list_extensions(self): 79 | self.assertSetEqual( 80 | { 81 | ".gz", 82 | ".bgz", 83 | ".bz2", 84 | ".bzip", 85 | ".bzip2", 86 | ".xz", 87 | ".lzma", 88 | ".7z", 89 | ".7zip", 90 | ".zst", 91 | ".br" 92 | }, 93 | set(FORMATS.list_extensions(True)), 94 | ) 95 | 96 | def test_guess_format(self): 97 | assert "gzip" == FORMATS.guess_compression_format("gz") 98 | assert "gzip" == FORMATS.guess_compression_format(".gz") 99 | assert "gzip" == FORMATS.guess_compression_format("foo.gz") 100 | 101 | def test_invalid_format(self): 102 | self.assertIsNone(FORMATS.guess_compression_format("foo")) 103 | with self.assertRaises(ValueError): 104 | FORMATS.get_compression_format("foo") 105 | 106 | def test_get_format_from_mime_type(self): 107 | self.assertEqual("gzip", FORMATS.get_format_for_mime_type("application/gz")) 108 | self.assertEqual("bz2", FORMATS.get_format_for_mime_type("application/bz2")) 109 | self.assertEqual("lzma", FORMATS.get_format_for_mime_type("application/lzma")) 110 | 111 | # TODO: need a way to force selection of a specific executable to properly 112 | # test all possible scenarios 113 | 114 | def _test_format(self, fmt): 115 | assert fmt.default_compresslevel == fmt._get_compresslevel(None) 116 | assert fmt.compresslevel_range[0] == fmt._get_compresslevel(-1) 117 | assert fmt.compresslevel_range[1] == fmt._get_compresslevel(100) 118 | 119 | @skipIf(gz_path is None, "'gzip' not available") 120 | def test_gzip(self): 121 | gz = get_format("gz") 122 | self._test_format(gz) 123 | assert gz.default_ext == "gz" 124 | self.assertEqual( 125 | gz.get_command("c", compresslevel=5), [str(gz_path), "-5", "-c"] 126 | ) 127 | self.assertEqual( 128 | gz.get_command("c", "foo.bar", compresslevel=5), 129 | [str(gz_path), "-5", "-c", "foo.bar"], 130 | ) 131 | self.assertEqual(gz.get_command("d"), [str(gz_path), "-d", "-c"]) 132 | self.assertEqual( 133 | gz.get_command("d", "foo.gz"), [str(gz_path), "-d", "-c", "foo.gz"] 134 | ) 135 | 136 | @skipIf(no_pigz, "'pigz' not available") 137 | def test_pigz(self): 138 | THREADS.update(2) 139 | gz = get_format("gz") 140 | assert gz.default_ext == "gz" 141 | self.assertEqual( 142 | gz.get_command("c", compresslevel=5), [str(gz_path), "-5", "-c", "-p", "2"] 143 | ) 144 | self.assertEqual( 145 | gz.get_command("c", "foo.bar", compresslevel=5), 146 | [str(gz_path), "-5", "-c", "-p", "2", "foo.bar"], 147 | ) 148 | self.assertEqual(gz.get_command("d"), [str(gz_path), "-d", "-c"]) 149 | self.assertEqual( 150 | gz.get_command("d", "foo.gz"), [str(gz_path), "-d", "-c", "foo.gz"] 151 | ) 152 | 153 | @skipIf(no_igzip, "'igzip' not available") 154 | def test_igzip(self): 155 | THREADS.update(2) 156 | gz = get_format("gz") 157 | assert gz.default_ext == "gz" 158 | self.assertEqual( 159 | gz.get_command("c", compresslevel=2), [str(gz_path), "-2", "-c", "-T", "2"] 160 | ) 161 | self.assertEqual( 162 | gz.get_command("c", "foo.bar", compresslevel=2), 163 | [str(gz_path), "-2", "-c", "-T", "2", "foo.bar"], 164 | ) 165 | self.assertEqual(gz.get_command("d"), [str(gz_path), "-d", "-c"]) 166 | self.assertEqual( 167 | gz.get_command("d", "foo.gz"), [str(gz_path), "-d", "-c", "foo.gz"] 168 | ) 169 | 170 | @skipIf(bgz_compress_path is None, "'bgzip' not available") 171 | def test_bgzip_compress(self): 172 | THREADS.update(2) 173 | bgz = get_format("bgz") 174 | assert bgz.default_ext == "bgz" 175 | self.assertEqual( 176 | bgz.get_command("c"), [str(bgz_compress_path), "-l", "4", "-c", "-@", "2"] 177 | ) 178 | self.assertEqual( 179 | bgz.get_command("c", "foo.bar", compresslevel=5), 180 | [str(bgz_compress_path), "-l", "5", "-c", "-@", "2", "foo.bar"], 181 | ) 182 | 183 | @skipIf(bgz_decompress_path is None, "'gzip/pigz' not available") 184 | def test_bgzip_decompress(self): 185 | THREADS.update(2) 186 | bgz = get_format("bgz") 187 | if bgz.decompress_name == "pigz": 188 | self.assertEqual( 189 | bgz.get_command("d"), [str(bgz_decompress_path), "-d", "-c"] 190 | ) 191 | self.assertEqual( 192 | bgz.get_command("d", PurePath("foo.gz")), 193 | [str(bgz_decompress_path), "-d", "-c", "foo.gz"], 194 | ) 195 | self.assertEqual( 196 | bgz.get_command("d", PurePath("foo.bar")), 197 | [str(bgz_decompress_path), "-d", "-c", "-S", ".bar", "foo.bar"], 198 | ) 199 | else: 200 | self.assertEqual( 201 | bgz.get_command("d"), [str(bgz_decompress_path), "-d", "-c"] 202 | ) 203 | self.assertEqual( 204 | bgz.get_command("d", PurePath("foo.gz")), 205 | [str(bgz_decompress_path), "-d", "-c", "foo.gz"], 206 | ) 207 | self.assertEqual( 208 | bgz.get_command("d", PurePath("foo.bar")), 209 | [str(bgz_decompress_path), "-d", "-c", "-S", ".bar", "foo.bar"], 210 | ) 211 | 212 | @skipIf(bz_path is None, "'bzip2' not available") 213 | def test_bzip2(self): 214 | bz = get_format("bz2") 215 | self._test_format(bz) 216 | assert bz.default_ext == "bz2" 217 | self.assertEqual( 218 | bz.get_command("c", compresslevel=5), [str(bz_path), "-5", "-z", "-c"] 219 | ) 220 | self.assertEqual( 221 | bz.get_command("c", "foo.bar", compresslevel=5), 222 | [str(bz_path), "-5", "-z", "-c", "foo.bar"], 223 | ) 224 | self.assertEqual(bz.get_command("d"), [str(bz_path), "-d", "-c"]) 225 | self.assertEqual( 226 | bz.get_command("d", "foo.bz2"), [str(bz_path), "-d", "-c", "foo.bz2"] 227 | ) 228 | 229 | @skipIf(no_pbzip2, "'pbzip2' not available") 230 | def test_pbzip2(self): 231 | THREADS.update(2) 232 | bz = get_format("bz2") 233 | assert bz.default_ext == "bz2" 234 | self.assertEqual( 235 | bz.get_command("c", compresslevel=5), 236 | [str(bz_path), "-5", "-z", "-c", "-p2"], 237 | ) 238 | self.assertEqual( 239 | bz.get_command("c", "foo.bar", compresslevel=5), 240 | [str(bz_path), "-5", "-z", "-c", "-p2", "foo.bar"], 241 | ) 242 | self.assertEqual(bz.get_command("d"), [str(bz_path), "-d", "-c", "-p2"]) 243 | self.assertEqual( 244 | bz.get_command("d", "foo.bz2"), [str(bz_path), "-d", "-c", "-p2", "foo.bz2"] 245 | ) 246 | 247 | @skipIf(xz_path is None, "'xz' not available") 248 | def test_lzma(self): 249 | xz = get_format("xz") 250 | self._test_format(xz) 251 | assert xz.default_ext == "xz" 252 | self.assertEqual( 253 | xz.get_command("c", compresslevel=5), [str(xz_path), "-5", "-z", "-c"] 254 | ) 255 | self.assertEqual( 256 | xz.get_command("c", "foo.bar", compresslevel=5), 257 | [str(xz_path), "-5", "-z", "-c", "foo.bar"], 258 | ) 259 | self.assertEqual(xz.get_command("d"), [str(xz_path), "-d", "-c"]) 260 | self.assertEqual( 261 | xz.get_command("d", "foo.xz"), [str(xz_path), "-d", "-c", "foo.xz"] 262 | ) 263 | # Test with threads 264 | THREADS.update(2) 265 | self.assertEqual( 266 | xz.get_command("c", compresslevel=5), 267 | [str(xz_path), "-5", "-z", "-c", "-T", "2"], 268 | ) 269 | self.assertEqual( 270 | xz.get_command("c", "foo.bar", compresslevel=5), 271 | [str(xz_path), "-5", "-z", "-c", "-T", "2", "foo.bar"], 272 | ) 273 | self.assertEqual(xz.get_command("d"), [str(xz_path), "-d", "-c", "-T", "2"]) 274 | 275 | @skipIf(zstd_path is None, "'zstd' not available") 276 | def test_zstd(self): 277 | zstd = get_format("zstd") 278 | self._test_format(zstd) 279 | assert zstd.default_ext == "zst" 280 | self.assertEqual( 281 | zstd.get_command("c", compresslevel=5), 282 | [str(zstd_path), "-5", "-c", "--single-thread"], 283 | ) 284 | self.assertEqual( 285 | zstd.get_command("c", "foo.bar", compresslevel=5), 286 | [str(zstd_path), "-5", "-c", "--single-thread", "foo.bar"], 287 | ) 288 | self.assertEqual( 289 | zstd.get_command("d"), [str(zstd_path), "-d", "-c", "--single-thread"] 290 | ) 291 | self.assertEqual( 292 | zstd.get_command("d", "foo.xz"), 293 | [str(zstd_path), "-d", "-c", "--single-thread", "foo.xz"], 294 | ) 295 | # Test with threads 296 | THREADS.update(3) 297 | self.assertEqual( 298 | zstd.get_command("c", compresslevel=5), [str(zstd_path), "-5", "-c", "-T2"] 299 | ) 300 | self.assertEqual( 301 | zstd.get_command("c", "foo.bar", compresslevel=5), 302 | [str(zstd_path), "-5", "-c", "-T2", "foo.bar"], 303 | ) 304 | self.assertEqual(zstd.get_command("d"), [str(zstd_path), "-d", "-c", "-T2"]) 305 | 306 | 307 | class FileTests(TestCase): 308 | def setUp(self): 309 | self.root = TempDir() 310 | 311 | def tearDown(self): 312 | self.root.close() 313 | 314 | def test_invalid(self): 315 | with self.assertRaises(ValueError): 316 | get_format("gz").open_file(Path("foo"), "n") 317 | 318 | def write_read_file(self, ext, use_system, mode="t", content=None): 319 | if content is None: 320 | content = random_text() # generate 1 kb of random text 321 | if mode == "b": 322 | content = b"".join(c.encode() for c in content) 323 | path = self.root.make_file(suffix=ext) 324 | fmt = get_format(ext) 325 | write_file(fmt, path, use_system, content, "w" + mode) 326 | in_text = read_file(fmt, path, use_system, "r" + mode) 327 | assert content == in_text 328 | 329 | def test_write_read_bytes_python(self): 330 | for fmt in (".gz", ".bz2", ".xz"): 331 | with self.subTest(fmt=fmt): 332 | self.write_read_file(fmt, False, "b") 333 | 334 | def test_write_read_text_python(self): 335 | for fmt in (".gz", ".bz2", ".xz"): 336 | with self.subTest(fmt=fmt): 337 | self.write_read_file(fmt, False, "t") 338 | 339 | # These tests will be skipped if the required system-level executables 340 | # are not available 341 | 342 | @skipIf(gz_path is None, "'gzip' not available") 343 | def test_system_gzip(self): 344 | self.write_read_file(".gz", True) 345 | 346 | @skipIf(gz_path is None, "'gzip' not available") 347 | def test_iter_system(self): 348 | path = self.root.make_file(suffix=".gz") 349 | text = "line1\nline2\nline3" 350 | fmt = get_format(".gz") 351 | # Have to open in bytes mode, or it will get wrapped in a 352 | # TextBuffer, which does not use the underlying __iter__ 353 | with fmt.open_file(path, mode="wb", ext=".gz", use_system=True) as f: 354 | f.write(text.encode()) 355 | with fmt.open_file(path, mode="rb", ext=".gz", use_system=True) as f: 356 | lines = list(line.rstrip().decode() for line in iter(f)) 357 | self.assertListEqual(lines, ["line1", "line2", "line3"]) 358 | 359 | @skipIf(bz_path is None, "'bzip2' not available") 360 | def test_system_bzip(self): 361 | self.write_read_file(".bz2", True) 362 | 363 | @skipIf(xz_path is None, "'xz' not available") 364 | def test_system_lzma(self): 365 | self.write_read_file(".xz", True) 366 | 367 | @skipIf(zstd_path is None, "'zstd' not available") 368 | def test_system_zstd(self): 369 | self.write_read_file(".zst", True) 370 | 371 | def test_compress_path(self): 372 | b = (True, False) if gz_path else (False,) 373 | for use_system in b: 374 | with self.subTest(use_system=use_system): 375 | path = self.root.make_file() 376 | with open(path, "wt") as o: 377 | o.write("foo") 378 | fmt = get_format(".gz") 379 | dest = fmt.compress_file(path, use_system=use_system) 380 | gzfile = Path(str(path) + ".gz") 381 | assert dest == gzfile 382 | self.assertTrue(os.path.exists(path)) 383 | self.assertTrue(os.path.exists(gzfile)) 384 | with gzip.open(gzfile, "rt") as i: 385 | assert i.read() == "foo" 386 | 387 | path = self.root.make_file() 388 | with open(path, "wt") as o: 389 | o.write("foo") 390 | gzfile = Path(str(path) + ".bar") 391 | fmt = get_format(".gz") 392 | dest = fmt.compress_file( 393 | path, gzfile, keep=False, use_system=use_system 394 | ) 395 | assert dest == gzfile 396 | self.assertFalse(os.path.exists(path)) 397 | self.assertTrue(os.path.exists(gzfile)) 398 | with gzip.open(gzfile, "rt") as i: 399 | assert i.read() == "foo" 400 | 401 | def test_compress_file(self): 402 | b = (True, False) if gz_path else (False,) 403 | for use_system in b: 404 | with self.subTest(use_system=use_system): 405 | path = self.root.make_file() 406 | with open(path, "wt") as o: 407 | o.write("foo") 408 | with open(path, "rb") as i: 409 | fmt = get_format(".gz") 410 | dest = fmt.compress_file(i, use_system=use_system) 411 | gzfile = Path(str(path) + ".gz") 412 | assert dest == gzfile 413 | self.assertTrue(os.path.exists(gzfile)) 414 | with gzip.open(gzfile, "rt") as i: 415 | assert i.read() == "foo" 416 | 417 | path = self.root.make_file() 418 | with open(path, "wt") as o: 419 | o.write("foo") 420 | gzfile = Path(str(path) + ".bar") 421 | with open(path, "rb") as i: 422 | fmt = get_format(".gz") 423 | dest = fmt.compress_file( 424 | i, gzfile, keep=False, use_system=use_system 425 | ) 426 | assert dest == gzfile 427 | self.assertFalse(os.path.exists(path)) 428 | self.assertTrue(os.path.exists(gzfile)) 429 | with gzip.open(gzfile, "rt") as i: 430 | assert i.read() == "foo" 431 | 432 | def test_decompress_path_error(self): 433 | path = self.root.make_file() 434 | with gzip.open(path, "wt") as o: 435 | o.write("foo") 436 | with self.assertRaises(Exception): 437 | fmt = get_format(".gz") 438 | fmt.decompress_file(path) 439 | 440 | def test_decompress_path(self): 441 | b = (True, False) if gz_path else (False,) 442 | for use_system in b: 443 | with self.subTest(use_system=use_system): 444 | path = self.root.make_file() 445 | gzfile = Path(str(path) + ".gz") 446 | with gzip.open(gzfile, "wt") as o: 447 | o.write("foo") 448 | fmt = get_format(".gz") 449 | dest = fmt.decompress_file(gzfile, use_system=use_system) 450 | assert dest == path 451 | self.assertTrue(os.path.exists(path)) 452 | self.assertTrue(os.path.exists(gzfile)) 453 | with open(path, "rt") as i: 454 | assert i.read() == "foo" 455 | 456 | path = self.root.make_file() 457 | gzfile = Path(str(path) + ".gz") 458 | with gzip.open(gzfile, "wt") as o: 459 | o.write("foo") 460 | fmt = get_format(".gz") 461 | dest = fmt.decompress_file( 462 | gzfile, path, keep=False, use_system=use_system 463 | ) 464 | assert dest == path 465 | self.assertTrue(os.path.exists(path)) 466 | self.assertFalse(os.path.exists(gzfile)) 467 | with open(path, "rt") as i: 468 | assert i.read() == "foo" 469 | 470 | def test_decompress_file(self): 471 | b = (True, False) if gz_path else (False,) 472 | for use_system in b: 473 | with self.subTest(use_system=use_system): 474 | path = self.root.make_file() 475 | gzfile = Path(str(path) + ".gz") 476 | with gzip.open(gzfile, "wt") as o: 477 | o.write("foo") 478 | with open(gzfile, "rb") as i: 479 | fmt = get_format(".gz") 480 | dest = fmt.decompress_file(i, use_system=use_system) 481 | assert Path(dest) == path 482 | self.assertTrue(os.path.exists(path)) 483 | self.assertTrue(os.path.exists(gzfile)) 484 | with open(path, "rt") as i: 485 | assert i.read() == "foo" 486 | 487 | with gzip.open(gzfile, "wt") as o: 488 | o.write("foo") 489 | dest = self.root.make_file() 490 | with open(gzfile, "rb") as i, open(dest, "wb") as o: 491 | fmt = get_format(".gz") 492 | fmt.decompress_file(source=i, dest=o, use_system=use_system) 493 | self.assertTrue(os.path.exists(dest)) 494 | self.assertTrue(os.path.exists(gzfile)) 495 | with open(dest, "rt") as i: 496 | assert i.read() == "foo" 497 | 498 | path = self.root.make_file() 499 | gzfile = Path(str(path) + ".bar") 500 | with gzip.open(gzfile, "wt") as o: 501 | o.write("foo") 502 | with open(gzfile, "rb") as i: 503 | fmt = get_format(".gz") 504 | dest = fmt.decompress_file( 505 | i, path, keep=False, use_system=use_system 506 | ) 507 | assert dest == path 508 | self.assertFalse(os.path.exists(gzfile)) 509 | self.assertTrue(os.path.exists(path)) 510 | with open(path, "rt") as i: 511 | assert i.read() == "foo" 512 | 513 | # Disable this test in python 3.3 514 | @skipIf(sys.version_info[:2] <= (3, 3), "Incompatible test") 515 | def test_truncated_gz(self): 516 | fmt = get_format(".gz") 517 | for use_system in (True, False): 518 | with self.subTest(use_system=use_system): 519 | path = self.root.make_path() 520 | gzfile = Path(str(path) + ".gz") 521 | create_truncated_file(gzfile, fmt) 522 | with self.assertRaises(IOError): 523 | fmt.decompress_file(gzfile, use_system=use_system) 524 | 525 | 526 | class StringTests(TestCase): 527 | def test_compress(self): 528 | for ext in (".gz", ".bz2", ".xz"): 529 | with self.subTest(ext=ext): 530 | fmt = get_format(ext) 531 | _bytes = random_text().encode() 532 | compressed = fmt.compress(_bytes) 533 | decompressed = fmt.decompress(compressed) 534 | assert _bytes == decompressed 535 | 536 | def test_compress_string(self): 537 | for ext in (".gz", ".bz2", ".xz"): 538 | with self.subTest(ext=ext): 539 | fmt = get_format(ext) 540 | text = random_text() 541 | compressed = fmt.compress_string(text) 542 | decompressed = fmt.decompress_string(compressed) 543 | assert text == decompressed 544 | 545 | def test_compress_iterable(self): 546 | for ext in (".gz", ".bz2", ".xz"): 547 | with self.subTest(ext=ext): 548 | fmt = get_format(ext) 549 | strings = ["line1", "line2", "line3"] 550 | compressed = fmt.compress_iterable(strings, delimiter=b"|") 551 | decompressed = fmt.decompress_string(compressed) 552 | self.assertListEqual(strings, decompressed.split("|")) 553 | 554 | 555 | class UncompressedSizeTests(TestCase): 556 | @skipIf(gz_path is None, "'gzip' not available") 557 | def test_get_uncompressed_size(self): 558 | for ext in (".gz", ".xz"): 559 | with self.subTest(ext=ext): 560 | with TempDir() as temp: 561 | raw = temp.make_file(contents=random_text(1000)) 562 | compressed = temp.make_file(suffix=ext) 563 | fmt = get_format(ext) 564 | fmt.compress_file(raw, compressed) 565 | assert 1000 == fmt.uncompressed_size(compressed) 566 | -------------------------------------------------------------------------------- /tests/test_paths.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import subprocess 3 | from xphyle.paths import * 4 | 5 | 6 | class TempDirTests(TestCase): 7 | def test_descriptor(self): 8 | with self.assertRaises(ValueError): 9 | TempPathDescriptor(path_type='d', contents='foo') 10 | with self.assertRaises(IOError): 11 | _ = TempPathDescriptor().absolute_path 12 | with TempDir(permissions='rwx') as temp: 13 | f = temp.make_file(name='foo', permissions=None) 14 | f.unlink() 15 | assert temp[f].set_permissions('r') is None 16 | with TempDir(permissions='rwx') as temp: 17 | f = temp.make_file(name='foo', permissions=None) 18 | assert Path('foo') in temp 19 | assert temp[f].exists 20 | assert Path('foo') == temp[f].relative_path 21 | assert temp.absolute_path / 'foo' == temp[f].absolute_path 22 | assert PermissionSet('rwx') == temp[f].permissions 23 | assert PermissionSet('r') == temp[f].set_permissions('r') 24 | with self.assertRaises(PermissionError): 25 | open(f, 'w') 26 | with TempDir(permissions='rwx') as temp: 27 | desc = TempPathDescriptor( 28 | name='foo', path_type='f', parent=temp) 29 | assert Path('foo') == desc.relative_path 30 | assert temp.absolute_path / 'foo' == desc.absolute_path 31 | 32 | def test_context_manager(self): 33 | with TempDir() as temp: 34 | with open(temp.make_file(name='foo'), 'wt') as o: 35 | o.write('foo') 36 | assert not temp.absolute_path.exists() 37 | 38 | def test_dir(self): 39 | temp = TempDir() 40 | foo = temp.make_directory(name='foo') 41 | assert foo == temp.absolute_path / 'foo' 42 | bar = temp.make_directory(name='bar', parent=foo) 43 | assert bar == temp.absolute_path / 'foo' / 'bar' 44 | assert (temp.absolute_path / 'foo' / 'bar').exists() 45 | temp.close() 46 | assert not temp.absolute_path.exists() 47 | # make sure trying to close again doesn't raise error 48 | temp.close() 49 | 50 | def test_tree(self): 51 | temp = TempDir() 52 | foo = temp.make_directory(name='foo') 53 | bar = temp.make_directory(name='bar', parent=foo) 54 | f = temp.make_file(name='baz', parent=bar) 55 | assert f == temp.absolute_path / 'foo' / 'bar' / 'baz' 56 | temp.close() 57 | assert not f.exists() 58 | 59 | def test_mode(self): 60 | # with self.assertRaises(IOError): 61 | # with TempDir(permissions=None) as temp: 62 | # _ = temp.mode 63 | with TempDir('r') as temp: 64 | # Raises error because the tempdir is read-only 65 | with self.assertRaises(PermissionError): 66 | temp.make_file(name='bar') 67 | # Should be able to create the tempdir with existing read-only files 68 | with TempDir( 69 | 'r', [TempPathDescriptor(name='foo', contents='foo')]) as d: 70 | assert d.absolute_path.exists() 71 | assert (d.absolute_path / 'foo').exists() 72 | with open(d.absolute_path / 'foo', 'rt') as i: 73 | assert 'foo' == i.read() 74 | 75 | def test_fifo(self): 76 | with TempDir() as temp: 77 | with self.assertRaises(Exception): 78 | _ = temp.make_fifo(contents='foo') 79 | path = temp.make_fifo() 80 | p = subprocess.Popen('echo foo > {}'.format(path), shell=True) 81 | with open(path, 'rt') as i: 82 | assert i.read() == 'foo\n' 83 | p.communicate() 84 | 85 | 86 | class PathTests(TestCase): 87 | def setUp(self): 88 | self.root = TempDir() 89 | 90 | def tearDown(self): 91 | self.root.close() 92 | EXECUTABLE_CACHE.cache.clear() 93 | 94 | def test_get_set_permissions(self): 95 | path = self.root.make_file(permissions='rw') 96 | assert PermissionSet('rw') == get_permissions(path) 97 | set_permissions(path, 'wx') 98 | assert PermissionSet('wx') == get_permissions(path) 99 | 100 | def test_check_access_std(self): 101 | check_access(STDIN_OR_STDOUT, 'r') 102 | check_access(STDIN_OR_STDOUT, 'w') 103 | check_access(STDIN, 'r') 104 | check_access(STDOUT, 'w') 105 | check_access(STDERR, 'w') 106 | with self.assertRaises(IOError): 107 | check_access(STDOUT, 'x') 108 | with self.assertRaises(IOError): 109 | check_access(STDERR, 'r') 110 | 111 | def test_check_access_file(self): 112 | path = self.root.make_file(permissions='rwx') 113 | check_access(path, 'r') 114 | check_access(path, 'w') 115 | check_access(path, 'x') 116 | 117 | def test_set_permissions(self): 118 | path = self.root.make_file() 119 | with self.assertRaises(ValueError): 120 | set_permissions(path, 'z') 121 | set_permissions(path, 'r') 122 | with self.assertRaises(IOError): 123 | check_access(path, 'w') 124 | 125 | def test_no_permissions(self): 126 | with self.assertRaises(IOError): 127 | path = self.root.make_file(permissions='r') 128 | check_access(path, 'w') 129 | 130 | def test_abspath_std(self): 131 | assert abspath(STDOUT) == STDOUT 132 | assert abspath(STDERR) == STDERR 133 | 134 | def test_abspath_home(self): 135 | home = os.path.expanduser("~") 136 | assert abspath(Path('~/foo')) == Path(home) / 'foo' 137 | 138 | def test_abspath_rel(self): 139 | cwd = os.getcwd() 140 | assert abspath(Path('foo')) == Path(cwd) / 'foo' 141 | 142 | def test_get_root(self): 143 | # Need to do a different test for posix vs windows 144 | if os.sep == '/': 145 | assert '/' == get_root() 146 | assert '/' == get_root(PosixPath('/foo/bar/baz')) 147 | else: 148 | script_drive = os.path.splitdrive(sys.executable)[0] 149 | assert script_drive == get_root() 150 | assert 'C:\\' == get_root(WindowsPath('C:\\foo\\bar\\baz')) 151 | 152 | def test_split_path(self): 153 | parent = self.root.make_directory() 154 | assert split_path(parent / 'foo', keep_seps=False) == (parent, 'foo') 155 | assert split_path(parent / 'foo.tar.gz', keep_seps=False) == \ 156 | (parent, 'foo', 'tar', 'gz') 157 | assert split_path(parent / 'foo.tar.gz', keep_seps=True) == \ 158 | (parent, 'foo', '.tar', '.gz') 159 | 160 | def test_filename(self): 161 | assert filename(Path('/path/to/foo.tar.gz')) == 'foo' 162 | 163 | def test_convert_std_placeholder(self): 164 | assert STDIN == convert_std_placeholder("-", "r") 165 | assert STDOUT == convert_std_placeholder("-", "w") 166 | assert STDERR == convert_std_placeholder("_", "w") 167 | assert "foo" == convert_std_placeholder("foo") 168 | 169 | def test_resolve_std(self): 170 | assert STDOUT == resolve_path(STDOUT) 171 | assert STDERR == resolve_path(STDERR) 172 | 173 | def test_resolve_file(self): 174 | path = self.root.make_file() 175 | assert abspath(path) == resolve_path(path) 176 | 177 | def test_resolve_with_parent(self): 178 | self.root.make_directory(name='foo') 179 | path = self.root.make_file(parent=self.root[Path('foo')]) 180 | name = path.name 181 | parent = path.parent 182 | assert path == resolve_path(Path(name), parent) 183 | 184 | def test_resolve_missing(self): 185 | with self.assertRaises(IOError): 186 | resolve_path(Path('foo')) 187 | 188 | def test_check_readable_file(self): 189 | readable = self.root.make_file(permissions='r') 190 | non_readable = self.root.make_file(permissions='w') 191 | directory = self.root.make_directory() 192 | check_readable_file(readable) 193 | with self.assertRaises(IOError): 194 | check_readable_file(non_readable) 195 | with self.assertRaises(IOError): 196 | check_readable_file(Path('foo')) 197 | with self.assertRaises(IOError): 198 | check_readable_file(directory) 199 | assert safe_check_readable_file(readable) 200 | assert safe_check_readable_file(non_readable) is None 201 | 202 | def test_check_writable_file(self): 203 | writable = self.root.make_file(permissions='w') 204 | non_writable = self.root.make_file(permissions='r') 205 | check_writable_file(writable) 206 | with self.assertRaises(IOError): 207 | check_writable_file(non_writable) 208 | parent = self.root.make_directory() 209 | check_writable_file(parent / 'foo') 210 | subdir_path = parent / 'bar' / 'foo' 211 | check_writable_file(subdir_path) 212 | assert subdir_path.parent.exists() 213 | with self.assertRaises(IOError): 214 | parent = self.root.make_directory(permissions='r') 215 | check_writable_file(parent / 'foo') 216 | assert safe_check_writable_file(writable) 217 | assert safe_check_writable_file(non_writable) is None 218 | 219 | def test_check_path_std(self): 220 | check_path(STDIN_OR_STDOUT, 'f', 'r') 221 | check_path(STDIN_OR_STDOUT, 'f', 'w') 222 | check_path(STDIN, 'f', 'r') 223 | check_path(STDOUT, 'f', 'w') 224 | check_path(STDERR, 'f', 'w') 225 | with self.assertRaises(IOError): 226 | check_path(STDIN, 'f', 'w') 227 | with self.assertRaises(IOError): 228 | check_path(STDOUT, 'f', 'r') 229 | with self.assertRaises(IOError): 230 | check_path(STDERR, 'f', 'r') 231 | with self.assertRaises(IOError): 232 | check_path(STDOUT, 'd', 'r') 233 | 234 | def test_safe_checks(self): 235 | path = self.root.make_file(permissions='r') 236 | assert safe_check_path(path, 'f', 'r') 237 | assert not safe_check_path(path, 'd', 'r') 238 | assert not safe_check_path(path, 'f', 'w') 239 | 240 | def test_find(self): 241 | level1 = self.root.make_directory() 242 | level2 = self.root.make_directory(prefix='foo', parent=level1) 243 | paths = self.root.make_empty_files(3, prefix='bar', parent=level2) 244 | 245 | # recursive 246 | x = find(level1, 'foo.*', 'd', recursive=True) 247 | assert 1 == len(x) 248 | assert level2 == x[0] 249 | y = find(level1, 'bar.*', 'f', recursive=True) 250 | assert 3 == len(y) 251 | assert sorted(paths) == sorted(y) 252 | 253 | # non-recursive 254 | x = find(level1, 'foo.*', 'd', recursive=False) 255 | assert 1 == len(x) 256 | assert level2 == x[0] 257 | y = find(level1, 'bar.*', 'f', recursive=False) 258 | assert 0 == len(y) 259 | 260 | # absolute match 261 | x = find( 262 | level1, os.path.join(str(level1), 'foo.*', 'bar.*'), 'f', 263 | recursive=True) 264 | assert 3 == len(x) 265 | assert sorted(paths) == sorted(x) 266 | 267 | # fifo 268 | path = self.root.make_fifo(prefix='baz', parent=level1) 269 | x = find(level1, 'baz.*', '|') 270 | assert 1 == len(x) 271 | assert path == x[0] 272 | 273 | def test_find_with_matches(self): 274 | level1 = self.root.make_directory() 275 | level2 = self.root.make_directory(prefix='foo', parent=level1) 276 | path = self.root.make_path(name='bar123', parent=level2) 277 | result = cast(Sequence[Tuple[PurePath, Match]], find( 278 | level1, 'bar(.*)', 'f', recursive=True, return_matches=True)) 279 | assert 1 == len(result) 280 | assert path == result[0][0] 281 | assert '123' == result[0][1].group(1) 282 | 283 | def test_get_executable_path(self): 284 | exe = self.root.make_file(suffix=".exe") 285 | exe_path = EXECUTABLE_CACHE.get_path(exe) 286 | assert exe_path is not None 287 | assert exe_path == EXECUTABLE_CACHE.get_path(exe.name) 288 | EXECUTABLE_CACHE.cache.clear() 289 | EXECUTABLE_CACHE.add_search_path(exe.parent) 290 | assert exe_path == EXECUTABLE_CACHE.get_path(exe.name) 291 | # TODO: how to test this fully, since we can't be sure of what 292 | # executables will be available on the installed system? 293 | 294 | def test_resolve_exe(self): 295 | exe = self.root.make_file(suffix=".exe") 296 | exe_name = exe.name 297 | path = EXECUTABLE_CACHE.resolve_exe([exe_name]) 298 | assert path is None 299 | EXECUTABLE_CACHE.cache.clear() 300 | EXECUTABLE_CACHE.add_search_path(exe.parent) 301 | path = EXECUTABLE_CACHE.resolve_exe([exe_name]) 302 | assert path is not None 303 | assert exe == path[0] 304 | 305 | def test_pathvar(self): 306 | pv = StrPathVar('id', pattern='[A-Z0-9_]+', default='ABC123') 307 | assert 'ABC123' == pv(None) 308 | 309 | pv = StrPathVar('id', pattern='[A-Z0-9_]+', optional=True) 310 | assert '' == pv(None) 311 | 312 | pv = StrPathVar('id', pattern='[A-Z0-9_]+') 313 | with self.assertRaises(ValueError): 314 | pv(None) 315 | 316 | def test_filespec(self): 317 | null = FileSpec() 318 | assert '{file}' == null.template 319 | assert 'file' in null.path_vars 320 | 321 | path = self.root.make_file(name='ABC123.txt') 322 | base = path.name 323 | 324 | spec = FileSpec( 325 | StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('XYZ999',)), 326 | StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')), 327 | template='{id}.{ext}') 328 | 329 | # get a single file 330 | pathinst = spec(id='ABC123', ext='txt') 331 | assert path_inst(base, dict(id='ABC123', ext='txt')) == pathinst 332 | assert 'ABC123' == pathinst['id'] 333 | assert 'txt' == pathinst['ext'] 334 | 335 | with self.assertRaises(ValueError): 336 | spec(id='abc123', ext='txt') 337 | 338 | with self.assertRaises(ValueError): 339 | spec(id='ABC123', ext='foo') 340 | 341 | with self.assertRaises(ValueError): 342 | spec(id='XYZ999', ext='txt') 343 | 344 | pathinst = spec.parse(path, fullpath=True) 345 | assert path_inst(path.name, dict(id='ABC123', ext='txt')) == pathinst 346 | 347 | path2 = self.root.make_file(name='abc123.txt') 348 | with self.assertRaises(ValueError): 349 | spec.parse(path2) 350 | 351 | all_paths = spec.find(self.root.absolute_path) 352 | assert 1 == len(all_paths) 353 | assert path_inst(path, dict(id='ABC123', ext='txt')) == all_paths[0] 354 | 355 | def test_dirspec(self): 356 | null = DirSpec() 357 | assert '{dir}' == null.template 358 | assert 'dir' in null.path_vars 359 | 360 | level1 = self.root.make_directory(name='ABC123') 361 | level2 = self.root.make_directory(parent=level1, name='AAA') 362 | base = level1.parent 363 | 364 | spec = DirSpec( 365 | PathPathVar('root'), 366 | StrPathVar('subdir', pattern='[A-Z0-9_]+', invalid=('XYZ999',)), 367 | StrPathVar('leaf', pattern='[^_]+', valid=('AAA', 'BBB')), 368 | template=os.path.join('{root}', '{subdir}', '{leaf}')) 369 | 370 | # get a single dir 371 | pathinst = spec(root=base, subdir='ABC123', leaf='AAA') 372 | assert \ 373 | path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \ 374 | pathinst 375 | assert base == pathinst['root'] 376 | assert 'ABC123' == pathinst['subdir'] 377 | assert 'AAA' == pathinst['leaf'] 378 | 379 | with self.assertRaises(ValueError): 380 | spec(root=base, subdir='abc123', leaf='AAA') 381 | 382 | with self.assertRaises(ValueError): 383 | spec(root=base, subdir='ABC123', leaf='CCC') 384 | 385 | with self.assertRaises(ValueError): 386 | spec(root=base, subdir='XYZ999', leaf='AAA') 387 | 388 | pathinst = spec.parse(level2) 389 | assert \ 390 | path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \ 391 | pathinst 392 | 393 | path = self.root.make_file(parent=level2) 394 | pathinst = spec.parse(path, fullpath=True) 395 | assert \ 396 | path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \ 397 | pathinst 398 | 399 | path2 = self.root.make_directory(name='abc123') 400 | with self.assertRaises(ValueError): 401 | spec.parse(path2) 402 | 403 | all_paths = spec.find(base, recursive=True) 404 | assert 1 == len(all_paths) 405 | assert \ 406 | path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \ 407 | all_paths[0] 408 | 409 | def test_pathspec(self): 410 | level1 = self.root.make_directory(name='ABC123') 411 | level2 = self.root.make_directory(parent=level1, name='AAA') 412 | path = self.root.make_file(parent=level2, name='FFF555.txt') 413 | base = level1.parent 414 | 415 | spec = PathSpec( 416 | DirSpec( 417 | PathPathVar('root'), 418 | StrPathVar('subdir', pattern=r'[A-Z0-9_]+', invalid=('XYZ999',)), 419 | StrPathVar('leaf', pattern=r'[^_]+', valid=('AAA', 'BBB')), 420 | template=os.path.join('{root}', '{subdir}', '{leaf}')), 421 | FileSpec( 422 | StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('ABC123',)), 423 | StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')), 424 | template='{id}.{ext}')) 425 | 426 | path_var_values = dict(root=base, subdir='ABC123', leaf='AAA', 427 | id='FFF555', ext='txt') 428 | pathinst = spec(**path_var_values) 429 | assert path_inst(path, path_var_values) == pathinst 430 | assert base == pathinst['root'] 431 | assert 'ABC123' == pathinst['subdir'] 432 | assert 'AAA' == pathinst['leaf'] 433 | assert 'FFF555' == pathinst['id'] 434 | assert 'txt' == pathinst['ext'] 435 | 436 | fail1 = dict(path_var_values) 437 | # should fail because expecting all caps 438 | fail1['id'] = 'abc123' 439 | with self.assertRaises(ValueError): 440 | spec(**fail1) 441 | 442 | fail2 = dict(path_var_values) 443 | # should fail because foo is not in the valid list 444 | fail2['ext'] = 'foo' 445 | with self.assertRaises(ValueError): 446 | spec(**fail2) 447 | 448 | fail3 = dict(path_var_values) 449 | # should fail because ABC123 is in the invalid list 450 | fail3['id'] = 'ABC123' 451 | with self.assertRaises(ValueError): 452 | spec(**fail3) 453 | 454 | pathinst = spec.parse(path) 455 | assert path_inst(path, path_var_values) == pathinst 456 | 457 | path2 = self.root.make_file(parent=level2, name='fff555.txt') 458 | with self.assertRaises(ValueError): 459 | spec.parse(path2) 460 | 461 | all_paths = spec.find(base, recursive=True) 462 | assert 1 == len(all_paths) 463 | assert path_inst(path, path_var_values) == all_paths[0] 464 | 465 | # make sure it works with plain paths 466 | spec = PathSpec( 467 | level2, 468 | FileSpec( 469 | StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('ABC123',)), 470 | StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')), 471 | template='{id}.{ext}')) 472 | assert path_inst(path, dict(id='FFF555', ext='txt')) == spec.parse(path) 473 | with self.assertRaises(ValueError): 474 | bad_path = Path(get_root()) / 'foo' / 'bar' / path.name 475 | spec.parse(bad_path) 476 | 477 | spec = PathSpec( 478 | DirSpec( 479 | PathPathVar('root'), 480 | StrPathVar('subdir', pattern='[A-Z0-9_]+', invalid=('XYZ999',)), 481 | StrPathVar('leaf', pattern='[^_]+', valid=('AAA', 'BBB')), 482 | template=os.path.join('{root}', '{subdir}', '{leaf}')), 483 | path.name) 484 | assert \ 485 | path_inst(path, dict(root=base, subdir='ABC123', leaf='AAA')) == \ 486 | spec.parse(path) 487 | 488 | spec = PathSpec(level2, path.name) 489 | all_paths = spec.find() 490 | assert 1 == len(all_paths) 491 | assert path_inst(path) == all_paths[0] 492 | 493 | def test_default_search(self): 494 | spec = FileSpec( 495 | StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('XYZ999',)), 496 | StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')), 497 | template='{id}.{ext}') 498 | with self.assertRaises(ValueError): 499 | spec.find() 500 | 501 | level1 = self.root.make_directory(name='ABC123') 502 | level2 = self.root.make_directory(parent=level1, name='AAA') 503 | base = level1.parent 504 | 505 | spec = DirSpec( 506 | StrPathVar('subdir', pattern='[A-Z0-9_]+', invalid=('XYZ999',)), 507 | StrPathVar('leaf', pattern='[^_]+', valid=('AAA', 'BBB')), 508 | template=os.path.join(base, '{subdir}', '{leaf}')) 509 | 510 | all_paths = spec.find(recursive=True) 511 | assert 1 == len(all_paths) 512 | assert \ 513 | path_inst(level2, dict(subdir='ABC123', leaf='AAA')) == \ 514 | all_paths[0] 515 | 516 | def test_pathspec_default_search(self): 517 | path = self.root.make_file(name='FFF555.txt') 518 | base = path.parent 519 | 520 | spec = PathSpec( 521 | DirSpec(template=str(base)), 522 | FileSpec( 523 | StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('ABC123',)), 524 | StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')), 525 | template='{id}.{ext}')) 526 | 527 | all_paths = spec.find() 528 | assert 1 == len(all_paths) 529 | assert path_inst(path, dict(id='FFF555', ext='txt')) == all_paths[0] 530 | -------------------------------------------------------------------------------- /tests/test_performance.py: -------------------------------------------------------------------------------- 1 | """Self-contained performance tests. 2 | """ 3 | from bisect import bisect 4 | import gzip 5 | from itertools import accumulate 6 | from random import random, randint 7 | import time 8 | from xphyle.utils import read_lines 9 | from xphyle.paths import TempDir 10 | import pytest 11 | 12 | 13 | class TimeKeeper: 14 | def __init__(self, msg, **kwargs): 15 | self.msg = msg 16 | self.msg_args = kwargs 17 | self.duration = 0 18 | 19 | def __enter__(self): 20 | self.start = time.clock() 21 | return self 22 | 23 | def __exit__(self, exception_type, exception_value, traceback): 24 | self.stop = time.clock() 25 | self.duration = self.stop - self.start 26 | print(self.msg.format( 27 | duration=self.duration, 28 | **self.msg_args)) 29 | 30 | 31 | def choices(population, weights=None, *, cum_weights=None, k=1): 32 | """Return a k sized list of population elements chosen with replacement. 33 | If the relative weights or cumulative weights are not specified, 34 | the selections are made with equal probability. 35 | 36 | This function is borrowed from the python 3.6 'random' package. 37 | """ 38 | if cum_weights is None: 39 | if weights is None: 40 | _int = int 41 | total = len(population) 42 | return [population[_int(random() * total)] for _ in range(k)] 43 | cum_weights = list(accumulate(weights)) 44 | elif weights is not None: 45 | raise TypeError('Cannot specify both weights and cumulative weights') 46 | if len(cum_weights) != len(population): 47 | raise ValueError('The number of weights does not match the population') 48 | total = cum_weights[-1] 49 | return [population[bisect(cum_weights, random() * total)] for _ in range(k)] 50 | 51 | 52 | def perftest(name, text_generator, num_iter=10): 53 | # generate a big text 54 | msg = """ 55 | Timing of {iter} {name} tests with total size {size:,d} characters and 56 | use_system = {use_system}: {duration:0.2f} sec""" 57 | total_size = 0 58 | 59 | with TempDir() as root: 60 | paths = tuple( 61 | root.make_file(suffix='.gz') 62 | for _ in range(num_iter)) 63 | for path in paths: 64 | txt = text_generator() 65 | total_size += len(txt) 66 | with gzip.open(path, 'wt') as out: 67 | out.write(txt) 68 | 69 | with TimeKeeper( 70 | msg, name=name, iter=num_iter, size=total_size, 71 | use_system=None): 72 | for path in paths: 73 | list(gzip.open(path)) 74 | 75 | for use_system in (True, False): 76 | with TimeKeeper( 77 | msg, name=name, iter=num_iter, size=total_size, 78 | use_system=use_system): 79 | for path in paths: 80 | list(read_lines(path, use_system=use_system)) 81 | 82 | 83 | @pytest.mark.perf 84 | def test_lorem_ipsum(): 85 | from lorem.text import TextLorem 86 | generate_lorem = TextLorem(prange=(500, 1000), trange=(500, 1000)) 87 | return perftest('lorem ipsum', generate_lorem.text) 88 | 89 | 90 | @pytest.mark.perf 91 | def test_fastq(): 92 | def generate_fastq(seqlen=100): 93 | num_records = randint(100000, 500000) 94 | qualspace = list(chr(i + 33) for i in range(60)) 95 | 96 | def rand_seq(): 97 | return "".join(choices(['A', 'C', 'G', 'T'], k=seqlen)) 98 | 99 | def rand_qual(): 100 | return "".join(choices(qualspace, k=seqlen)) 101 | 102 | return "\n".join( 103 | "\n".join(( 104 | "read{}".format(i), 105 | rand_seq(), 106 | '+', 107 | rand_qual())) 108 | for i in range(num_records)) 109 | return perftest('fastq', generate_fastq) 110 | -------------------------------------------------------------------------------- /tests/test_progress.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from . import * 3 | import xphyle 4 | from xphyle.paths import TempDir 5 | from xphyle.progress import ITERABLE_PROGRESS, PROCESS_PROGRESS 6 | from xphyle.utils import * 7 | 8 | 9 | class MockProgress(object): 10 | def __call__(self, itr, desc, size): 11 | self.desc = desc 12 | self.size = size 13 | i = 0 14 | for i, item in enumerate(itr, 1): 15 | yield item 16 | self.count = i 17 | 18 | 19 | class ProgressTests(TestCase): 20 | def setUp(self): 21 | self.root = TempDir() 22 | xphyle.configure(progress=False) 23 | 24 | def tearDown(self): 25 | self.root.close() 26 | ITERABLE_PROGRESS.enabled = False 27 | ITERABLE_PROGRESS.wrapper = None 28 | PROCESS_PROGRESS.enabled = False 29 | PROCESS_PROGRESS.wrapper = None 30 | 31 | def test_progress(self): 32 | progress = MockProgress() 33 | xphyle.configure(progress=True, progress_wrapper=progress) 34 | path = self.root.make_file() 35 | with open(path, 'wt') as o: 36 | for i in range(100): 37 | o.write(random_text()) 38 | compress_file( 39 | path, compression='gz', use_system=False) 40 | assert 100 == progress.count 41 | 42 | def test_progress_delmited(self): 43 | progress = MockProgress() 44 | xphyle.configure(progress=True, progress_wrapper=progress) 45 | path = self.root.make_file() 46 | with open(path, 'wt') as o: 47 | for i in range(100): 48 | o.write('row\t{}\n'.format(i)) 49 | rows = list(read_delimited(path)) 50 | assert 100 == len(rows) 51 | assert 100 == progress.count 52 | 53 | def test_iter_stream(self): 54 | progress = MockProgress() 55 | xphyle.configure(progress=True, progress_wrapper=progress) 56 | with intercept_stdin('foo\nbar\nbaz'): 57 | with xopen( 58 | STDIN, 'rt', context_wrapper=True, compression=False) as o: 59 | lines = list(o) 60 | self.assertListEqual(['foo\n', 'bar\n', 'baz\n'], lines) 61 | assert 3 == progress.count 62 | -------------------------------------------------------------------------------- /tests/test_types.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from xphyle.types import * 3 | 4 | 5 | class TypeTests(TestCase): 6 | def test_mode_access(self): 7 | for t in ("READ", "READWRITE", "TRUNCATE_READWRITE"): 8 | self.assertTrue(ModeAccess[t].readable) 9 | for t in ("WRITE", "READWRITE", "TRUNCATE_READWRITE", "APPEND", "EXCLUSIVE"): 10 | self.assertTrue(ModeAccess[t].writable) 11 | 12 | def test_file_mode(self): 13 | for f in ( 14 | FileMode(), 15 | FileMode("rt"), 16 | FileMode(access="r"), 17 | FileMode(coding="t"), 18 | FileMode(access=ModeAccess.READ), 19 | FileMode(coding=ModeCoding.TEXT), 20 | FileMode(access="r", coding="t"), 21 | FileMode(access=ModeAccess.READ, coding="t"), 22 | FileMode(access="r", coding=ModeCoding.TEXT), 23 | FileMode(access=ModeAccess.READ, coding=ModeCoding.TEXT), 24 | ): 25 | self.assertEqual(ModeAccess.READ, f.access) 26 | self.assertEqual(ModeCoding.TEXT, f.coding) 27 | self.assertTrue(f.readable) 28 | self.assertFalse(f.writable) 29 | self.assertTrue(f.text) 30 | self.assertFalse(f.binary) 31 | self.assertTrue("rt" in f) 32 | self.assertFalse("b" in f) 33 | self.assertTrue(ModeAccess.READ in f) 34 | self.assertTrue(ModeCoding.TEXT in f) 35 | self.assertEqual("rt", f.value) 36 | self.assertEqual("rt", str(f)) 37 | with self.assertRaises(ValueError): 38 | FileMode("rz") 39 | 40 | def test_permissions(self): 41 | self.assertEqual(os.R_OK, Permission.READ.os_flag) 42 | self.assertEqual(os.W_OK, Permission.WRITE.os_flag) 43 | self.assertEqual(os.X_OK, Permission.EXECUTE.os_flag) 44 | self.assertEqual(stat.S_IREAD, Permission.READ.stat_flag) 45 | self.assertEqual(stat.S_IWRITE, Permission.WRITE.stat_flag) 46 | self.assertEqual(stat.S_IEXEC, Permission.EXECUTE.stat_flag) 47 | 48 | def test_permission_set(self): 49 | for a in ( 50 | PermissionSet("rwx"), 51 | PermissionSet(("r", "w", "x")), 52 | PermissionSet(7), 53 | PermissionSet((1, 2, 4)), 54 | PermissionSet((Permission.READ, Permission.WRITE, Permission.EXECUTE)), 55 | ): 56 | self.assertEqual(7, a.os_flags) 57 | self.assertEqual(448, a.stat_flags) 58 | self.assertEqual("rwx", "".join(f.value for f in a)) 59 | self.assertEqual("rwx", str(a)) 60 | for char in "rwx": 61 | self.assertTrue(char in a) 62 | self.assertTrue(Permission(char) in a) 63 | 64 | a = PermissionSet() 65 | a.add(ModeAccess.READ) 66 | a.add(ModeAccess.WRITE) 67 | self.assertEqual("rw", str(a)) 68 | 69 | def test_cache(self): 70 | fm1 = FileMode("rt") 71 | fm2 = FileMode("rt") 72 | fm3 = FileMode("tr") 73 | self.assertEqual(fm1, fm2) 74 | self.assertEqual(fm1, fm3) 75 | self.assertEqual(id(fm1), id(fm2)) 76 | self.assertNotEqual(id(fm1), id(fm3)) 77 | 78 | perm1 = PermissionSet("rw") 79 | perm2 = PermissionSet("rw") 80 | perm3 = PermissionSet("wr") 81 | self.assertEqual(perm1, perm2) 82 | self.assertEqual(perm1, perm3) 83 | self.assertEqual(id(perm1), id(perm2)) 84 | self.assertNotEqual(id(perm1), id(perm3)) 85 | -------------------------------------------------------------------------------- /tests/test_urls.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from xphyle.urls import * 3 | from xphyle.paths import * 4 | 5 | 6 | good_url = 'https://github.com/jdidion/xphyle/blob/master/tests/foo.gz?raw=True' 7 | bad_url = 'foo' 8 | 9 | 10 | class TestURLs(TestCase): 11 | def test_parse(self): 12 | self.assertEqual( 13 | tuple(parse_url(good_url)), 14 | ('https', 'github.com', 15 | '/jdidion/xphyle/blob/master/tests/foo.gz', 16 | '', 'raw=True', '')) 17 | self.assertIsNone(parse_url(bad_url)) 18 | 19 | def test_open_invalid(self): 20 | self.assertIsNone(open_url(bad_url)) 21 | 22 | def test_get_url_file_name(self): 23 | with TempDir() as temp: 24 | path = abspath(temp.make_file(name='foo.txt')) 25 | url = open_url(path.as_uri()) 26 | assert get_url_file_name(url) == str(path) 27 | # TODO: need to find a reliable compressed file URL with a 28 | # Content-Disposition, or figure out how to mock one up 29 | 30 | def test_mime_types(self): 31 | # TODO: need to find a reliable compressed file URL with a MIME type, 32 | # or figure out how to mock one up 33 | pass 34 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from . import * 3 | import gzip 4 | import bz2 5 | from xphyle.formats import THREADS 6 | from xphyle.paths import TempDir, EXECUTABLE_CACHE 7 | from xphyle.progress import ITERABLE_PROGRESS, PROCESS_PROGRESS 8 | from xphyle.utils import * 9 | 10 | 11 | class UtilsTests(TestCase): 12 | def setUp(self): 13 | self.root = TempDir() 14 | self.system_args = sys.argv 15 | 16 | def tearDown(self): 17 | self.root.close() 18 | ITERABLE_PROGRESS.enabled = False 19 | ITERABLE_PROGRESS.wrapper = None 20 | PROCESS_PROGRESS.enabled = False 21 | PROCESS_PROGRESS.wrapper = None 22 | THREADS.update(1) 23 | EXECUTABLE_CACHE.reset_search_path() 24 | EXECUTABLE_CACHE.cache = {} 25 | 26 | def test_read_lines(self): 27 | self.assertListEqual(list(read_lines(Path('foobar'), errors=False)), []) 28 | 29 | path = self.root.make_file() 30 | with open(path, 'wt') as o: 31 | o.write("1\n2\n3") 32 | self.assertListEqual( 33 | list(read_lines(path)), 34 | ['1', '2', '3']) 35 | self.assertListEqual( 36 | list(read_lines(path, convert=int)), 37 | [1, 2, 3]) 38 | 39 | def test_read_chunked(self): 40 | self.assertListEqual([], list(read_bytes(Path('foobar'), errors=False))) 41 | path = self.root.make_file() 42 | with open(path, 'wt') as o: 43 | o.write("1234567890") 44 | chunks = list(read_bytes(path, 3)) 45 | self.assertListEqual([b'123', b'456', b'789', b'0'], chunks) 46 | 47 | def test_write_lines(self): 48 | linesep_len = len(os.linesep) 49 | path = self.root.make_file() 50 | assert 3 == write_lines(['foo'], path, linesep=None) 51 | assert list(read_lines(path)) == ['foo'] 52 | path = self.root.make_file() 53 | self.assertEqual( 54 | 9 + (2*linesep_len), 55 | write_lines(('foo', 'bar', 'baz'), path, linesep=None)) 56 | self.assertEqual( 57 | list(read_lines(path)), 58 | ['foo', 'bar', 'baz']) 59 | path = self.root.make_file() 60 | self.assertEqual( 61 | 11, write_lines(('foo', 'bar', 'baz'), path, linesep='|')) 62 | assert list(read_lines(path)) == ['foo|bar|baz'] 63 | path = self.root.make_file(permissions='r') 64 | assert -1 == write_lines(['foo'], path, errors=False) 65 | 66 | def test_write_bytes(self): 67 | path = self.root.make_file() 68 | linesep_len = len(os.linesep) 69 | assert 3 == write_bytes([b'foo'], path) 70 | assert list(read_bytes(path)) == [b'foo'] 71 | path = self.root.make_file() 72 | assert 9 + (2*linesep_len) == \ 73 | write_bytes(('foo', 'bar', 'baz'), path, sep=None) 74 | self.assertEqual( 75 | os.linesep.encode().join((b'foo', b'bar', b'baz')), 76 | b''.join(read_bytes(path))) 77 | path = self.root.make_file(permissions='r') 78 | assert -1 == write_bytes([b'foo'], path, errors=False) 79 | 80 | def test_read_dict(self): 81 | path = self.root.make_file() 82 | with open(path, 'wt') as o: 83 | o.write("# This is a comment\n") 84 | o.write("foo=1\n") 85 | o.write("bar=2\n") 86 | d = read_dict(path, convert=int, ordered=True) 87 | assert len(d) == 2 88 | assert d['foo'] == 1 89 | assert d['bar'] == 2 90 | assert list(d.items()) == [('foo', 1), ('bar', 2)] 91 | 92 | def test_write_dict(self): 93 | path = self.root.make_file() 94 | write_dict(OrderedDict([('foo', 1), ('bar', 2)]), path, linesep=None) 95 | assert list(read_lines(path)) == ['foo=1', 'bar=2'] 96 | 97 | def test_tsv(self): 98 | assert [] == list(read_delimited(Path('foobar'), errors=False)) 99 | 100 | path = self.root.make_file() 101 | with open(path, 'wt') as o: 102 | o.write('a\tb\tc\n') 103 | o.write('1\t2\t3\n') 104 | o.write('4\t5\t6\n') 105 | 106 | with self.assertRaises(ValueError): 107 | list(read_delimited(path, header=False, converters='int')) 108 | with self.assertRaises(ValueError): 109 | list(read_delimited( 110 | path, header=False, converters=int, row_type='dict', 111 | yield_header=False)) 112 | 113 | assert [ 114 | ['a', 'b', 'c'], 115 | [1, 2, 3], 116 | [4, 5, 6] 117 | ] == list(read_delimited( 118 | path, header=True, converters=int)) 119 | assert [ 120 | ['a', 'b', 'c'], 121 | (1, 2, 3), 122 | (4, 5, 6) 123 | ] == list(read_delimited( 124 | path, header=True, converters=int, row_type='tuple')) 125 | assert [ 126 | ['a', 'b', 'c'], 127 | (1, 2, 3), 128 | (4, 5, 6) 129 | ] == list(read_delimited( 130 | path, header=True, converters=int, row_type=tuple)) 131 | assert [ 132 | dict(a=1, b=2, c=3), 133 | dict(a=4, b=5, c=6) 134 | ] == list(read_delimited( 135 | path, header=True, converters=int, row_type='dict', 136 | yield_header=False)) 137 | 138 | def test_tsv_dict(self): 139 | path = self.root.make_file() 140 | with open(path, 'wt') as o: 141 | o.write('id\ta\tb\tc\n') 142 | o.write('row1\t1\t2\t3\n') 143 | o.write('row2\t4\t5\t6\n') 144 | 145 | with self.assertRaises(ValueError): 146 | read_delimited_as_dict(path, key='id', header=False) 147 | with self.assertRaises(ValueError): 148 | read_delimited_as_dict(path, key=None, header=False) 149 | 150 | assert dict( 151 | row1=['row1', 1, 2, 3], 152 | row2=['row2', 4, 5, 6] 153 | ) == read_delimited_as_dict( 154 | path, key=0, header=True, converters=(str, int, int, int)) 155 | assert dict( 156 | row1=['row1', 1, 2, 3], 157 | row2=['row2', 4, 5, 6] 158 | ) == read_delimited_as_dict( 159 | path, key='id', header=True, converters=(str, int, int, int)) 160 | 161 | with open(path, 'wt') as o: 162 | o.write('a\tb\tc\n') 163 | o.write('1\t2\t3\n') 164 | o.write('4\t5\t6\n') 165 | 166 | assert dict( 167 | row1=[1, 2, 3], 168 | row4=[4, 5, 6] 169 | ) == read_delimited_as_dict( 170 | path, key=lambda row: 'row{}'.format(row[0]), 171 | header=True, converters=int) 172 | 173 | def test_tsv_dict_dups(self): 174 | path = self.root.make_file() 175 | with open(path, 'wt') as o: 176 | o.write('id\ta\tb\tc\n') 177 | o.write('row1\t1\t2\t3\n') 178 | o.write('row1\t4\t5\t6\n') 179 | 180 | with self.assertRaises(Exception): 181 | read_delimited_as_dict( 182 | path, key='id', header=True, converters=(str, int, int, int)) 183 | 184 | def test_compress_file_no_dest(self): 185 | path = self.root.make_file() 186 | 187 | with self.assertRaises(ValueError): 188 | compress_file(path, compression=True, keep=True) 189 | 190 | with open(path, 'wt') as o: 191 | o.write('foo') 192 | gzfile = compress_file(path, compression='gz', keep=False) 193 | assert gzfile == Path(str(path) + '.gz') 194 | assert not path.exists() 195 | assert gzfile.exists() 196 | with gzip.open(gzfile, 'rt') as i: 197 | assert i.read() == 'foo' 198 | 199 | def test_compress_fileobj(self): 200 | path = self.root.make_file() 201 | with open(path, 'wt') as o: 202 | o.write('foo') 203 | 204 | f = open(path, 'rb') 205 | try: 206 | gzfile = compress_file(f, compression='gz') 207 | assert gzfile == Path(str(path) + '.gz') 208 | assert path.exists() 209 | assert gzfile.exists() 210 | with gzip.open(gzfile, 'rt') as i: 211 | assert i.read() == 'foo' 212 | finally: 213 | f.close() 214 | 215 | gzpath = Path(str(path) + '.gz') 216 | gzfile = gzip.open(gzpath, 'w') 217 | try: 218 | assert gzpath == compress_file(path, gzfile, compression=True) 219 | finally: 220 | gzfile.close() 221 | assert path.exists() 222 | assert gzpath.exists() 223 | with gzip.open(gzpath, 'rt') as i: 224 | assert i.read() == 'foo' 225 | 226 | def test_compress_file_no_compression(self): 227 | path = self.root.make_file() 228 | with open(path, 'wt') as o: 229 | o.write('foo') 230 | gzfile = Path(str(path) + '.gz') 231 | gzfile2 = compress_file(path, gzfile, keep=True) 232 | assert gzfile == gzfile2 233 | assert path.exists() 234 | assert gzfile.exists() 235 | with gzip.open(gzfile, 'rt') as i: 236 | assert i.read() == 'foo' 237 | 238 | def test_decompress_file(self): 239 | path = self.root.make_file() 240 | gzfile = Path(str(path) + '.gz') 241 | with gzip.open(gzfile, 'wt') as o: 242 | o.write('foo') 243 | 244 | path2 = decompress_file(gzfile, keep=True) 245 | assert path == path2 246 | assert path.exists() 247 | assert gzfile.exists() 248 | with open(path, 'rt') as i: 249 | assert i.read() == 'foo' 250 | 251 | with open(gzfile, 'rb') as i: 252 | path2 = decompress_file(i, keep=True) 253 | assert path == path2 254 | assert path.exists() 255 | assert gzfile.exists() 256 | with open(path, 'rt') as j: 257 | assert j.read() == 'foo' 258 | 259 | def test_decompress_file_compression(self): 260 | path = self.root.make_file() 261 | gzfile = Path(str(path) + '.foo') 262 | with gzip.open(gzfile, 'wt') as o: 263 | o.write('foo') 264 | with self.assertRaises(ValueError): 265 | decompress_file(gzfile) 266 | path2 = decompress_file(gzfile, compression='gz', keep=False) 267 | assert path == path2 268 | assert path.exists() 269 | assert not gzfile.exists() 270 | with open(path, 'rt') as i: 271 | assert i.read() == 'foo' 272 | 273 | def test_transcode(self): 274 | path = self.root.make_file() 275 | gzfile = Path(str(path) + '.gz') 276 | with gzip.open(gzfile, 'wt') as o: 277 | o.write('foo') 278 | bzfile = Path(str(path) + '.bz2') 279 | transcode_file(gzfile, bzfile) 280 | with bz2.open(bzfile, 'rt') as i: 281 | assert 'foo' == i.read() 282 | 283 | def test_uncompressed_size(self): 284 | for ext in ('.gz', '.xz'): 285 | with self.subTest(ext): 286 | raw = self.root.make_file(contents=random_text(1000)) 287 | compressed = self.root.make_file(suffix=ext) 288 | compress_file(raw, compressed) 289 | assert 1000 == uncompressed_size(compressed) 290 | 291 | def test_exec_process(self): 292 | inp = self.root.make_file(suffix='.gz') 293 | with gzip.open(inp, 'wt') as o: 294 | o.write('foo') 295 | out = self.root.make_file(suffix='.gz') 296 | exec_process('cat', stdin=inp, stdout=out) 297 | with gzip.open(out, 'rt') as o: 298 | assert 'foo' == o.read() 299 | 300 | def test_linecount(self): 301 | assert -1 == linecount(Path('foobar'), errors=False) 302 | path = self.root.make_file() 303 | with open(path, 'wt') as o: 304 | for i in range(100): 305 | o.write(random_text()) 306 | if i != 99: 307 | o.write('\n') 308 | with self.assertRaises(ValueError): 309 | linecount(path, buffer_size=-1) 310 | with self.assertRaises(ValueError): 311 | linecount(path, mode='wb') 312 | assert 100 == linecount(path) 313 | 314 | def test_linecount_empty(self): 315 | path = self.root.make_file() 316 | assert 0 == linecount(path) 317 | 318 | def test_file_manager(self): 319 | paths12 = dict( 320 | path1=self.root.make_empty_files(1)[0], 321 | path2=self.root.make_empty_files(1)[0]) 322 | with FileManager(paths12, mode='wt') as f: 323 | paths34 = self.root.make_empty_files(2) 324 | for p in paths34: 325 | f.add(p, mode='wt') 326 | self.assertTrue(p in f) 327 | self.assertFalse(f[p].closed) 328 | path5 = self.root.make_file() 329 | path5_fh = open(path5, 'wt') 330 | f.add(path5_fh) 331 | path6 = self.root.make_file() 332 | f['path6'] = path6 333 | assert path6 == f.get_path('path6') 334 | all_paths = list(paths12.values()) + list(paths34) + [path5, path6] 335 | self.assertListEqual(all_paths, f.paths) 336 | assert len(f) == 6 337 | for key, fh in f.iter_files(): 338 | self.assertFalse(fh.closed) 339 | assert f['path2'] is not None 340 | assert f.get('path2') is not None 341 | assert f['path6'] == f.get(5) 342 | with self.assertRaises(KeyError): 343 | _ = f['foo'] 344 | assert f.get('foo') is None 345 | assert len(f) == 6 346 | for key, fh in f.iter_files(): 347 | self.assertTrue(fh.closed) 348 | 349 | def test_file_manager_dup_files(self): 350 | f = FileManager() 351 | path = self.root.make_file() 352 | f.add(path) 353 | with self.assertRaises(ValueError): 354 | f.add(path) 355 | 356 | def test_compress_on_close(self): 357 | path = self.root.make_file() 358 | compressor = CompressOnClose(compression='gz') 359 | with FileWrapper(path, 'wt') as wrapper: 360 | wrapper.register_listener('close', compressor) 361 | wrapper.write('foo') 362 | gzfile = Path(str(path) + '.gz') 363 | assert gzfile == compressor.compressed_path 364 | self.assertTrue(os.path.exists(gzfile)) 365 | with gzip.open(gzfile, 'rt') as i: 366 | assert i.read() == 'foo' 367 | 368 | def test_move_on_close(self): 369 | path = self.root.make_file() 370 | dest = self.root.make_file() 371 | with FileWrapper(path, 'wt') as wrapper: 372 | wrapper.register_listener('close', MoveOnClose(dest=dest)) 373 | wrapper.write('foo') 374 | self.assertFalse(os.path.exists(path)) 375 | self.assertTrue(os.path.exists(dest)) 376 | with open(dest, 'rt') as i: 377 | assert i.read() == 'foo' 378 | 379 | def test_remove_on_close(self): 380 | path = self.root.make_file() 381 | with FileWrapper(path, 'wt') as wrapper: 382 | wrapper.register_listener('close', RemoveOnClose()) 383 | wrapper.write('foo') 384 | self.assertFalse(os.path.exists(path)) 385 | 386 | path = self.root.make_file() 387 | with FileWrapper(open(path, 'wt')) as wrapper: 388 | wrapper.register_listener('close', RemoveOnClose()) 389 | wrapper.write('foo') 390 | self.assertFalse(os.path.exists(path)) 391 | 392 | def test_fileinput(self): 393 | file1 = self.root.make_file(suffix='.gz') 394 | with gzip.open(file1, 'wt') as o: 395 | o.write('foo\nbar\n') 396 | with textinput(file1) as i: 397 | lines = list(i) 398 | self.assertListEqual(['foo\n', 'bar\n'], lines) 399 | file2 = self.root.make_file(suffix='.gz') 400 | with gzip.open(file2, 'wt') as o: 401 | o.write('baz\n') 402 | with textinput((file1, file2)) as i: 403 | lines = list(i) 404 | self.assertListEqual(['foo\n', 'bar\n', 'baz\n'], lines) 405 | with textinput([('key1', file1), ('key2', file2)]) as i: 406 | assert i.filekey is None 407 | assert i.filename is None 408 | assert i.lineno == 0 409 | assert i.filelineno == 0 410 | 411 | assert next(i) == 'foo\n' 412 | assert i.filekey == 'key1' 413 | assert i.filename == file1 414 | assert i.lineno == 1 415 | assert i.filelineno == 1 416 | 417 | assert next(i) == 'bar\n' 418 | assert i.filekey == 'key1' 419 | assert i.filename == file1 420 | assert i.lineno == 2 421 | assert i.filelineno == 2 422 | 423 | assert next(i) == 'baz\n' 424 | assert i.filekey == 'key2' 425 | assert i.filename == file2 426 | assert i.lineno == 3 427 | assert i.filelineno == 1 428 | 429 | def test_pending(self): 430 | file1 = self.root.make_file(suffix='.gz') 431 | with gzip.open(file1, 'wt') as o: 432 | o.write('foo\nbar\n') 433 | f = FileInput(char_mode=TextMode) 434 | self.assertTrue(f._pending) 435 | f.add(file1) 436 | list(f) 437 | self.assertTrue(f.finished) 438 | self.assertFalse(f._pending) 439 | file2 = self.root.make_file(suffix='.gz') 440 | with gzip.open(file2, 'wt') as o: 441 | o.write('baz\n') 442 | f.add(file2) 443 | self.assertTrue(f._pending) 444 | self.assertFalse(f.finished) 445 | assert 'baz\n' == f.readline() 446 | assert '' == f.readline() 447 | with self.assertRaises(StopIteration): 448 | next(f) 449 | self.assertTrue(f.finished) 450 | self.assertFalse(f._pending) 451 | 452 | def test_fileinput_defaults(self): 453 | path = self.root.make_file() 454 | with open(path, 'wt') as o: 455 | o.write('foo\nbar\n') 456 | sys.argv = [self.system_args[0], path] 457 | self.assertEqual( 458 | ['foo\n', 'bar\n'], 459 | list(textinput())) 460 | sys.argv = [] 461 | with intercept_stdin('foo\n'): 462 | lines = list(textinput([STDIN])) 463 | assert 1 == len(lines) 464 | assert 'foo\n' == lines[0] 465 | with intercept_stdin(b'foo\nbar\n', is_bytes=True): 466 | assert [b'foo\n', b'bar\n'] == list(byteinput()) 467 | 468 | def test_single_fileoutput(self): 469 | file1 = self.root.make_file(suffix='.gz') 470 | with textoutput(file1) as o: 471 | o.writelines(('foo', 'bar', 'baz')) 472 | with gzip.open(file1, 'rt') as i: 473 | assert 'foo\nbar\nbaz\n' == i.read() 474 | 475 | def test_tee_fileoutput(self): 476 | file1 = self.root.make_file(suffix='.gz') 477 | file2 = self.root.make_file() 478 | with self.assertRaises(ValueError): 479 | textoutput((file1, file2), access='z') 480 | with textoutput((file1, file2)) as o: 481 | o.writelines(('foo', 'bar', 'baz')) 482 | with gzip.open(file1, 'rt') as i: 483 | assert 'foo\nbar\nbaz\n' == i.read() 484 | with open(file2, 'rt') as i: 485 | assert 'foo\nbar\nbaz\n' == i.read() 486 | 487 | def test_tee_fileoutput_binary(self): 488 | file1 = self.root.make_file(suffix='.gz') 489 | file2 = self.root.make_file() 490 | with byteoutput( 491 | (file1, file2), 492 | file_output_type=TeeFileOutput) as o: 493 | o.writelines((b'foo', b'bar', b'baz')) 494 | with gzip.open(file1, 'rb') as i: 495 | assert b'foo\nbar\nbaz\n' == i.read() 496 | with open(file2, 'rb') as i: 497 | assert b'foo\nbar\nbaz\n' == i.read() 498 | 499 | with textoutput((file1, file2), file_output_type=TeeFileOutput) as o: 500 | o.writelines((b'foo', b'bar', b'baz')) 501 | with gzip.open(file1, 'rt') as i: 502 | assert 'foo\nbar\nbaz\n' == i.read() 503 | with open(file2, 'rt') as i: 504 | assert 'foo\nbar\nbaz\n' == i.read() 505 | 506 | with byteoutput((file1, file2), file_output_type=TeeFileOutput) as o: 507 | o.writelines(('foo', b'bar', b'baz')) 508 | with gzip.open(file1, 'rb') as i: 509 | assert b'foo\nbar\nbaz\n' == i.read() 510 | with open(file2, 'rb') as i: 511 | assert b'foo\nbar\nbaz\n' == i.read() 512 | 513 | def test_tee_fileoutput_no_newline(self): 514 | file1 = self.root.make_file(suffix='.gz') 515 | file2 = self.root.make_file() 516 | with textoutput((file1, file2)) as o: 517 | o.writeline('foo') 518 | o.writeline('bar') 519 | assert 2 == o.num_lines 520 | with gzip.open(file1, 'rb') as i: 521 | assert b'foo\nbar\n' == i.read() 522 | with open(file2, 'rb') as i: 523 | assert b'foo\nbar\n' == i.read() 524 | 525 | def test_fileoutput_stdout(self): 526 | path = self.root.make_file() 527 | sys.argv = [self.system_args, path] 528 | with textoutput() as o: 529 | o.writelines(('foo', 'bar', 'baz')) 530 | with open(path, 'rt') as i: 531 | assert 'foo\nbar\nbaz\n' == i.read() 532 | sys.argv = [] 533 | with intercept_stdout(True) as outbuf: 534 | with byteoutput() as o: 535 | o.writelines((b'foo', b'bar', b'baz')) 536 | assert b'foo\nbar\nbaz\n' == outbuf.getvalue() 537 | 538 | def test_cycle_fileoutput(self): 539 | file1 = self.root.make_file(suffix='.gz') 540 | file2 = self.root.make_file() 541 | with textoutput((file1, file2), file_output_type=CycleFileOutput) as o: 542 | o.writelines(('foo', 'bar', 'baz')) 543 | with gzip.open(file1, 'rt') as i: 544 | assert 'foo\nbaz\n' == i.read() 545 | with open(file2, 'rt') as i: 546 | assert 'bar\n' == i.read() 547 | 548 | def test_ncycle_fileoutput(self): 549 | file1 = self.root.make_file(suffix='.gz') 550 | file2 = self.root.make_file() 551 | with textoutput( 552 | (file1, file2), lines_per_file=2, 553 | file_output_type=NCycleFileOutput) as o: 554 | o.writelines(('foo', 'bar', 'baz', 'blorf', 'bing')) 555 | with gzip.open(file1, 'rt') as i: 556 | assert 'foo\nbar\nbing\n' == i.read() 557 | with open(file2, 'rt') as i: 558 | assert 'baz\nblorf\n' == i.read() 559 | 560 | def test_rolling_fileoutput(self): 561 | path = str(self.root.make_file()) 562 | with RollingFileOutput( 563 | path + '{index}.txt', char_mode=TextMode, linesep=os.linesep, 564 | lines_per_file=3) as out: 565 | for i in range(6): 566 | out.write(str(i)) 567 | with open(path + '0.txt', 'rt') as infile: 568 | assert '0\n1\n2\n' == infile.read() 569 | with open(path + '1.txt', 'rt') as infile: 570 | assert '3\n4\n5\n' == infile.read() 571 | 572 | def test_fileoutput_with_header(self): 573 | path = str(self.root.make_file()) 574 | with textoutput( 575 | path + '{index}.txt', file_output_type=RollingFileOutput, 576 | header="number\n", lines_per_file=3) as out: 577 | for i in range(6): 578 | out.write(str(i)) 579 | with open(path + '0.txt', 'rt') as infile: 580 | assert 'number\n0\n1\n2\n' == infile.read() 581 | with open(path + '1.txt', 'rt') as infile: 582 | assert 'number\n3\n4\n5\n' == infile.read() 583 | 584 | def test_rolling_fileoutput_write(self): 585 | path = str(self.root.make_file()) 586 | with textoutput( 587 | path + '{index}.txt', file_output_type=RollingFileOutput, 588 | lines_per_file=3) as out: 589 | for i in range(6): 590 | out.write(i, False) 591 | for ch in ('a', 'b', 'c'): 592 | out.write(ch, False) 593 | out.write("d\ne\nf") 594 | with open(path + '0.txt', 'rt') as infile: 595 | assert '0\n1\n2\n' == infile.read() 596 | with open(path + '1.txt', 'rt') as infile: 597 | assert '3\n4\n5\n' == infile.read() 598 | with open(path + '2.txt', 'rt') as infile: 599 | assert 'a\nb\nc\n' == infile.read() 600 | with open(path + '3.txt', 'rt') as infile: 601 | assert 'd\ne\nf\n' == infile.read() 602 | 603 | def test_pattern_file_output(self): 604 | path = self.root.make_file() 605 | 606 | def get_tokens(line): 607 | return dict(zip(('a', 'b'), line.split(' '))) 608 | 609 | with textoutput( 610 | str(path) + '{a}.{b}.txt', 611 | file_output_type=PatternFileOutput, 612 | token_func=get_tokens) as out: 613 | for a in range(2): 614 | for b in range(2): 615 | out.writeline(f'{a} {b}') 616 | 617 | for a in range(2): 618 | for b in range(2): 619 | with open(str(path) + f'{a}.{b}.txt', 'rt') as infile: 620 | assert f'{a} {b}\n' == infile.read() 621 | -------------------------------------------------------------------------------- /tests/test_xphyle.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, skipIf 2 | from . import * 3 | import gzip 4 | from io import BytesIO, IOBase 5 | from xphyle import * 6 | from xphyle.paths import TempDir, STDIN, STDOUT, STDERR, EXECUTABLE_CACHE 7 | from xphyle.progress import ITERABLE_PROGRESS, PROCESS_PROGRESS 8 | from xphyle.formats import THREADS 9 | from xphyle.types import EventType 10 | 11 | 12 | # Note: the casts of StringIO/BytesIO to IOBase are only necessary because of 13 | # pycharm bug PY-28155 14 | 15 | 16 | class XphyleTests(TestCase): 17 | def setUp(self): 18 | self.root = TempDir() 19 | 20 | def tearDown(self): 21 | self.root.close() 22 | ITERABLE_PROGRESS.enabled = False 23 | ITERABLE_PROGRESS.wrapper = None 24 | PROCESS_PROGRESS.enabled = False 25 | PROCESS_PROGRESS.wrapper = None 26 | THREADS.update(1) 27 | EXECUTABLE_CACHE.reset_search_path() 28 | EXECUTABLE_CACHE.cache = {} 29 | 30 | def test_configure(self): 31 | def wrapper(a, b, c) -> Iterable: 32 | return [] 33 | 34 | configure( 35 | progress=True, 36 | progress_wrapper=wrapper, 37 | system_progress=True, 38 | system_progress_wrapper="foo", 39 | threads=2, 40 | executable_path=[Path("foo")], 41 | ) 42 | 43 | assert wrapper == ITERABLE_PROGRESS.wrapper 44 | assert ("foo",) == PROCESS_PROGRESS.wrapper 45 | assert 2 == THREADS.threads 46 | assert Path("foo") in EXECUTABLE_CACHE.search_path 47 | 48 | configure(threads=False) 49 | assert 1 == THREADS.threads 50 | 51 | import multiprocessing 52 | 53 | configure(threads=True) 54 | assert multiprocessing.cpu_count() == THREADS.threads 55 | 56 | def test_guess_format(self): 57 | with self.assertRaises(ValueError): 58 | guess_file_format(STDOUT) 59 | with self.assertRaises(ValueError): 60 | guess_file_format(STDERR) 61 | path = self.root.make_file(suffix=".gz") 62 | with gzip.open(path, "wt") as o: 63 | o.write("foo") 64 | assert guess_file_format(path) == "gzip" 65 | path = self.root.make_file() 66 | with gzip.open(path, "wt") as o: 67 | o.write("foo") 68 | assert guess_file_format(path) == "gzip" 69 | 70 | def test_open_(self): 71 | path = self.root.make_file(contents="foo") 72 | with self.assertRaises(ValueError): 73 | with open_(path, wrap_fileobj=False): 74 | pass 75 | with open_(path, compression=False) as fh: 76 | assert fh.read() == "foo" 77 | with open_(path, compression=False) as fh: 78 | assert next(fh) == "foo" 79 | with open(path) as fh: 80 | with open_(fh, compression=False, context_wrapper=True) as fh2: 81 | self.assertTrue(isinstance(fh2, FileLikeWrapper)) 82 | assert fh2.read() == "foo" 83 | with open(path) as fh3: 84 | with open_(fh, wrap_fileobj=False, context_wrapper=True): 85 | self.assertFalse(isinstance(fh3, FileLikeWrapper)) 86 | 87 | def test_open_safe(self): 88 | with self.assertRaises(IOError): 89 | with open_("foobar", mode="r", errors=True) as _: 90 | pass 91 | with self.assertRaises(ValueError): 92 | with open_(cast(IOBase, None), mode="r", errors=True) as _: 93 | pass 94 | with open_("foobar", mode="r", errors=False) as fh: 95 | self.assertIsNone(fh) 96 | with open_(cast(IOBase, None), mode="r", errors=False) as fh: 97 | self.assertIsNone(fh) 98 | 99 | def test_xopen_invalid(self): 100 | # invalid mode 101 | with self.assertRaises(ValueError): 102 | xopen("foo", "z") 103 | with self.assertRaises(ValueError): 104 | xopen("foo", "rz") 105 | with self.assertRaises(ValueError): 106 | xopen("foo", "rU", newline="\n") 107 | with self.assertRaises(ValueError): 108 | xopen(STDOUT, "w", compression=True) 109 | with self.assertRaises(ValueError): 110 | xopen("foo.bar", "w", compression=True) 111 | with self.assertRaises(ValueError): 112 | xopen("foo", file_type=FileType.STDIO) 113 | with self.assertRaises(ValueError): 114 | xopen(STDOUT, file_type=FileType.LOCAL) 115 | with self.assertRaises(ValueError): 116 | xopen("foo", file_type=FileType.URL) 117 | with self.assertRaises(IOError): 118 | xopen("http://foo.com", file_type=FileType.LOCAL) 119 | with self.assertRaises(ValueError): 120 | xopen("xyz", file_type=FileType.FILELIKE) 121 | path = self.root.make_file(contents="foo") 122 | with open(path, "r") as fh: 123 | with self.assertRaises(ValueError): 124 | xopen(fh, "w") 125 | f = xopen(fh, context_wrapper=True) 126 | assert "r" == f.mode 127 | f = xopen(path, context_wrapper=True) 128 | f.close() 129 | with self.assertRaises(IOError): 130 | with f: 131 | pass 132 | with self.assertRaises(ValueError): 133 | with open(path, "rt") as fh: 134 | xopen(fh, "rt", compression=True) 135 | # can't guess compression without a name 136 | with self.assertRaises(ValueError): 137 | b = BytesIO() 138 | b.mode = "wb" 139 | xopen(cast(IOBase, b), "wt") 140 | # can't read from stderr 141 | with self.assertRaises(ValueError): 142 | xopen(STDERR, "rt") 143 | 144 | def test_xopen_std(self): 145 | # Try stdin 146 | with intercept_stdin("foo\n"): 147 | with xopen("-", "r", context_wrapper=True, compression=False) as i: 148 | content = i.read() 149 | assert content == "foo\n" 150 | with intercept_stdin("foo\n"): 151 | with xopen(STDIN, "r", context_wrapper=True, compression=False) as i: 152 | content = i.read() 153 | assert content == "foo\n" 154 | # Try stdout 155 | with intercept_stdout() as i: 156 | with xopen("-", "w", context_wrapper=True, compression=False) as o: 157 | o.write("foo") 158 | assert i.getvalue() == "foo" 159 | with intercept_stdout() as i: 160 | with xopen(STDOUT, "w", context_wrapper=True, compression=False) as o: 161 | o.write("foo") 162 | assert i.getvalue() == "foo" 163 | # Try stderr 164 | with intercept_stderr() as i: 165 | with xopen("_", "w", context_wrapper=True, compression=False) as o: 166 | o.write("foo") 167 | assert i.getvalue() == "foo" 168 | with intercept_stderr() as i: 169 | with xopen(STDERR, "w", context_wrapper=True, compression=False) as o: 170 | o.write("foo") 171 | assert i.getvalue() == "foo" 172 | 173 | # Try binary 174 | with intercept_stdout(True) as i: 175 | with xopen(STDOUT, "wb", context_wrapper=True, compression=False) as o: 176 | o.write(b"foo") 177 | assert i.getvalue() == b"foo" 178 | 179 | # Try compressed 180 | with intercept_stdout(True) as i: 181 | with xopen(STDOUT, "wt", context_wrapper=True, compression="gz") as o: 182 | assert cast(StdWrapper, o).compression == "gzip" 183 | o.write("foo") 184 | assert gzip.decompress(i.getvalue()) == b"foo" 185 | 186 | def test_xopen_compressed_stream(self): 187 | # Try autodetect compressed 188 | with intercept_stdin(gzip.compress(b"foo\n"), is_bytes=True): 189 | with xopen(STDIN, "rt", compression=True, context_wrapper=True) as i: 190 | assert cast(StdWrapper, i).compression == "gzip" 191 | assert i.read() == "foo\n" 192 | 193 | def test_xopen_file(self): 194 | with self.assertRaises(IOError): 195 | xopen("foobar", "r") 196 | path = self.root.make_file(suffix=".gz") 197 | with xopen(path, "rU", context_wrapper=True) as i: 198 | assert "rt" == i.mode 199 | with xopen(path, "w", compression=True, context_wrapper=True) as o: 200 | assert cast(FileLikeWrapper, o).compression == "gzip" 201 | o.write("foo") 202 | with gzip.open(path, "rt") as i: 203 | assert i.read() == "foo" 204 | with self.assertRaises(ValueError): 205 | with xopen(path, "rt", compression="bz2", validate=True): 206 | pass 207 | existing_file = self.root.make_file(contents="abc") 208 | with xopen(existing_file, "wt", overwrite=True) as out: 209 | out.write("def") 210 | with self.assertRaises(ValueError): 211 | with xopen(existing_file, "wt", overwrite=False): 212 | pass 213 | 214 | def test_xopen_fileobj(self): 215 | path = self.root.make_file(suffix=".gz") 216 | with open(path, "wb") as out1: 217 | with open_(out1, "wt") as out2: 218 | out2.write("foo") 219 | assert not out1.closed 220 | with gzip.open(path, "rt") as i: 221 | assert "foo" == i.read() 222 | 223 | def test_xopen_mmap(self): 224 | path = self.root.make_file(suffix=".gz") 225 | with xopen( 226 | path, 227 | "w", 228 | compression=True, 229 | context_wrapper=True, 230 | use_system=False, 231 | memory_map=True, 232 | ) as o: 233 | # since we are opening an empty file, memory mapping will fail 234 | assert not cast(FileWrapper, o).memory_mapped 235 | o.write("foo") 236 | with open(path, "rb") as inp: 237 | with xopen( 238 | inp, 239 | "r", 240 | compression=True, 241 | context_wrapper=True, 242 | use_system=False, 243 | memory_map=True, 244 | ) as i: 245 | assert cast(FileWrapper, i).memory_mapped 246 | assert i.read() == "foo" 247 | 248 | def test_xopen_buffer(self): 249 | buf = BytesIO(b"foo") 250 | f = xopen(cast(IOBase, buf), "rb") 251 | assert b"foo" == f.read(3) 252 | with self.assertRaises(ValueError): 253 | xopen(cast(IOBase, buf), "wb") 254 | 255 | with open_(str) as buf: 256 | buf.write("foo") 257 | assert "foo" == buf.getvalue() 258 | 259 | with open_(bytes) as buf: 260 | buf.write(b"foo") 261 | assert b"foo" == buf.getvalue() 262 | 263 | # with compression 264 | with self.assertRaises(ValueError): 265 | with open_(bytes, compression=True): 266 | pass 267 | with self.assertRaises(ValueError): 268 | with open_(str, compression="gzip"): 269 | pass 270 | 271 | with open_(bytes, mode="wt", compression="gzip") as buf: 272 | buf.write("foo") 273 | assert b"foo" == gzip.decompress(buf.getvalue()) 274 | 275 | # from string/bytes 276 | with self.assertRaises(ValueError): 277 | xopen("foo", "wt", file_type=FileType.BUFFER) 278 | with self.assertRaises(ValueError): 279 | xopen("foo", "rb", file_type=FileType.BUFFER) 280 | with open_("foo", file_type=FileType.BUFFER, context_wrapper=True) as buf: 281 | assert "foo" == buf.read() 282 | 283 | with self.assertRaises(ValueError): 284 | xopen(b"foo", "rt", file_type=FileType.BUFFER) 285 | with open_(b"foo", file_type=FileType.BUFFER, context_wrapper=True) as buf: 286 | assert b"foo" == buf.read() 287 | 288 | @skipIf(no_internet(), "No internet connection") 289 | def test_xopen_url(self): 290 | badurl = "http://google.com/__badurl__" 291 | with self.assertRaises(ValueError): 292 | xopen(badurl) 293 | url = "https://github.com/jdidion/xphyle/blob/master/tests/foo.gz?raw=True" 294 | with self.assertRaises(ValueError): 295 | xopen(url, "w") 296 | with open_(url, "rt") as i: 297 | assert "gzip" == i.compression 298 | assert "foo\n" == i.read() 299 | 300 | def test_open_process(self): 301 | with self.assertRaises(ValueError): 302 | xopen("|cat", "wt", allow_subprocesses=False) 303 | with open_("|cat", "wt") as p: 304 | p.write("foo\n") 305 | assert b"foo\n" == p.stdout 306 | 307 | def test_peek(self): 308 | path = self.root.make_file() 309 | with self.assertRaises(IOError): 310 | with open_(path, "w") as o: 311 | o.peek() 312 | path = self.root.make_file(contents="foo") 313 | with open_(path, "rb") as i: 314 | assert b"f" == i.peek(1) 315 | assert b"foo" == next(i) 316 | with open_(path, "rt") as i: 317 | assert "f" == i.peek(1) 318 | assert "foo" == next(i) 319 | with intercept_stdin("foo"): 320 | with open_(STDIN, validate=False, compression=False) as i: 321 | assert "f" == i.peek(1) 322 | assert "foo\n" == next(i) 323 | 324 | def test_seek(self): 325 | path = self.root.make_file(contents="foo") 326 | with open_(path, "rb") as i: 327 | i.seek(1) 328 | assert b"o" == i.peek(1) 329 | 330 | def test_truncate(self): 331 | path = self.root.make_file(contents="foo") 332 | with open_(path, "r+") as i: 333 | i.truncate(1) 334 | assert i.read() == "f" 335 | 336 | def test_event_listeners(self): 337 | class MockEventListener(EventListener): 338 | def __init__(self): 339 | super().__init__() 340 | self.executed = False 341 | 342 | def execute(self, file_wrapper: FileLikeWrapper, **kwargs): 343 | self.executed = True 344 | 345 | std_listener: MockEventListener = MockEventListener() 346 | with intercept_stdin("foo"): 347 | f = xopen(STDIN, context_wrapper=True) 348 | try: 349 | cast(EventManager, f).register_listener(EventType.CLOSE, std_listener) 350 | finally: 351 | f.close() 352 | self.assertTrue(std_listener.executed) 353 | 354 | file_listener: MockEventListener = MockEventListener() 355 | path = self.root.make_file() 356 | f = xopen(path, "w", context_wrapper=True) 357 | try: 358 | cast(EventManager, f).register_listener(EventType.CLOSE, file_listener) 359 | finally: 360 | f.close() 361 | self.assertTrue(file_listener.executed) 362 | 363 | def test_process(self): 364 | with Process("cat", stdin=PIPE, stdout=PIPE, stderr=PIPE) as p: 365 | self.assertIsNotNone(p.get_writer()) 366 | self.assertIsNotNone(p.get_reader("stdout")) 367 | self.assertIsNotNone(p.get_reader("stderr")) 368 | self.assertFalse(p.seekable()) 369 | assert (p.stdout, p.stderr) == p.get_readers() 370 | p.write(b"foo\n") 371 | p.flush() 372 | assert b"foo\n" == p.stdout 373 | self.assertFalse(p.stderr) 374 | 375 | # wrap pipes 376 | with Process(("zcat", "-cd"), stdin=PIPE, stdout=PIPE) as p: 377 | self.assertTrue(p.readable()) 378 | self.assertTrue(p.writable()) 379 | with self.assertRaises(ValueError): 380 | p.is_wrapped("foo") 381 | with self.assertRaises(ValueError): 382 | p.wrap_pipes(foo=dict(mode="wt")) 383 | p.wrap_pipes(stdin=dict(mode="wt", compression="gzip")) 384 | self.assertTrue(p.is_wrapped("stdin")) 385 | p.write("foo") 386 | assert b"foo" == p.stdout 387 | 388 | def test_process_with_files(self): 389 | inp = self.root.make_file(suffix=".gz") 390 | with gzip.open(inp, "wt") as o: 391 | o.write("foo") 392 | out = self.root.make_file(suffix=".gz") 393 | with self.assertRaises(OSError): 394 | with gzip.open(inp, "rt") as o, open(out, "wt") as i: 395 | with Process("cat", stdin=o, stdout=i) as p: 396 | p.wrap_pipes(stdin=dict(mode="wt")) 397 | with gzip.open(out, "rt") as i: 398 | assert "foo" == i.read() 399 | with popen(("echo", "abc\n123"), stdout=PIPE) as p: 400 | self.assertListEqual([b"abc\n", b"123\n"], list(line for line in p)) 401 | with popen(("echo", "abc\n123"), stdout=PIPE) as p: 402 | assert b"abc\n" == next(p) 403 | assert b"123\n" == next(p) 404 | with popen(("echo", "abc\n123"), stdout=(PIPE, "rt")) as p: 405 | assert "abc\n" == next(p) 406 | assert "123\n" == next(p) 407 | 408 | def test_process_invalid(self): 409 | with self.assertRaises(ValueError): 410 | xopen("|cat", "wt", compression=True) 411 | 412 | def test_process_read(self): 413 | with Process(("echo", "foo"), stdout=PIPE) as p: 414 | assert b"foo\n" == p.read() 415 | with open_("|echo foo", "rt") as p: 416 | assert "foo\n" == p.read() 417 | 418 | def test_process_communicate(self): 419 | with Process("cat", stdin=PIPE, stdout=PIPE, stderr=PIPE) as p: 420 | self.assertTupleEqual((b"foo\n", b""), p.communicate(b"foo\n")) 421 | 422 | def test_process_del(self): 423 | class MockProcessListener(EventListener): 424 | def __init__(self): 425 | super().__init__() 426 | self.executed = False 427 | 428 | def execute(self, process: Process, **kwargs) -> None: 429 | self.executed = True 430 | 431 | listener: MockProcessListener = MockProcessListener() 432 | p = Process("cat", stdin=PIPE, stdout=PIPE) 433 | p.register_listener(EventType.CLOSE, listener) 434 | del p 435 | self.assertTrue(listener.executed) 436 | 437 | def test_process_close(self): 438 | p = Process("cat", stdin=PIPE, stdout=PIPE) 439 | self.assertFalse(p.closed) 440 | p.close() 441 | self.assertTrue(p.closed) 442 | self.assertIsNone(p.close1(raise_on_error=False)) 443 | with self.assertRaises(IOError): 444 | p.close1(raise_on_error=True) 445 | 446 | def test_process_close_hung(self): 447 | p = Process(("sleep", "5")) 448 | with self.assertRaises(Exception): 449 | p.close1(timeout=1, terminate=False) 450 | p = Process(("sleep", "5")) 451 | p.close1(timeout=1, terminate=True) 452 | self.assertTrue(p.closed) 453 | 454 | def test_process_error(self): 455 | p = popen(("exit", "2"), shell=True) 456 | with self.assertRaises(IOError): 457 | p.close1(raise_on_error=True) 458 | self.assertFalse(p.returncode == 0) 459 | -------------------------------------------------------------------------------- /xphyle/progress.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Common interface to enable operations to be wrapped in a progress bar. 3 | By default, pokrok is used for python-level operations and pv for system-level 4 | operations. 5 | """ 6 | from os import PathLike 7 | import shlex 8 | from subprocess import Popen, PIPE 9 | from typing import Iterable, Union, Callable, Tuple, Sequence, Optional 10 | from pokrok import progress_iter 11 | from xphyle.paths import EXECUTABLE_CACHE, check_path 12 | from xphyle.types import PathType, Permission, FileLike 13 | 14 | 15 | # Python-level progress wrapper 16 | 17 | 18 | class IterableProgress: 19 | """Manages the python-level wrapper. 20 | 21 | Args: 22 | default_wrapper: Callable (typically a class) that returns a Callable 23 | with the signature of ``wrap``. 24 | """ 25 | 26 | def __init__(self, default_wrapper: Callable = progress_iter) -> None: 27 | self.enabled = False 28 | self.wrapper: Optional[Callable[..., Iterable]] = None 29 | self.default_wrapper = default_wrapper 30 | 31 | def update( 32 | self, 33 | enable: Optional[bool] = None, 34 | wrapper: Optional[Callable[..., Iterable]] = None, 35 | ) -> None: 36 | """Enable the python progress bar and/or set a new wrapper. 37 | 38 | Args: 39 | enable: Whether to enable use of a progress wrapper. 40 | wrapper: A callable that takes three arguments, itr, desc, size, 41 | and returns an iterable. 42 | """ 43 | if enable is not None: 44 | self.enabled = enable 45 | 46 | if wrapper: 47 | self.wrapper = wrapper 48 | elif self.enabled and not self.wrapper: 49 | try: 50 | self.wrapper = self.default_wrapper() 51 | except ImportError as err: 52 | raise ValueError( 53 | "Could not create default python wrapper; valid wrapper " 54 | "must be specified" 55 | ) from err 56 | 57 | def wrap( 58 | self, itr: Iterable, desc: Optional[str] = None, size: Optional[int] = None 59 | ) -> Iterable: 60 | """Wrap an iterable in a progress bar. 61 | 62 | Args: 63 | itr: The Iterable to wrap. 64 | desc: Optional description. 65 | size: Optional max value of the progress bar. 66 | 67 | Returns: 68 | The wrapped Iterable. 69 | """ 70 | if self.enabled: 71 | return self.wrapper(itr, desc=desc, size=size) 72 | else: 73 | return itr 74 | 75 | 76 | ITERABLE_PROGRESS = IterableProgress() 77 | 78 | 79 | # System-level progress wrapper 80 | 81 | 82 | def system_progress_command( 83 | exe: Union[str, PathLike], *args, require: bool = False 84 | ) -> Tuple: # pragma: no-cover 85 | """Resolve a system-level progress bar command. 86 | 87 | Args: 88 | exe: The executable name or absolute path. 89 | args: A list of additional command line arguments. 90 | require: Whether to raise an exception if the command does not exist. 91 | 92 | Returns: 93 | A tuple of (executable_path, *args). 94 | """ 95 | executable_path = EXECUTABLE_CACHE.get_path(exe) 96 | if executable_path is not None: 97 | check_path(executable_path, PathType.FILE, Permission.EXECUTE) 98 | elif require: 99 | raise IOError("pv is not available on the path") 100 | return (executable_path,) + tuple(args) 101 | 102 | 103 | def pv_command(require: bool = False) -> Tuple: # pragma: no-cover 104 | """Default system wrapper command. 105 | """ 106 | return system_progress_command("pv", "-pre", require=require) 107 | 108 | 109 | class ProcessProgress: 110 | """Manage the system-level progress wrapper. 111 | 112 | Args: 113 | default_wrapper: Callable that returns the argument list for the 114 | default wrapper command. 115 | """ 116 | 117 | def __init__(self, default_wrapper: Callable = pv_command) -> None: 118 | self.enabled = False 119 | self.wrapper: Optional[Sequence[str]] = None 120 | self.default_wrapper = default_wrapper 121 | 122 | def update( 123 | self, 124 | enable: Optional[bool] = None, 125 | wrapper: Optional[Union[str, Sequence[str]]] = None, 126 | ) -> None: 127 | """Enable the python system progress bar and/or set the wrapper 128 | command. 129 | 130 | Args: 131 | enable: Whether to enable use of a progress wrapper. 132 | wrapper: A command string or sequence of command arguments. 133 | """ 134 | if enable is not None: 135 | self.enabled = enable 136 | 137 | if wrapper: 138 | if isinstance(wrapper, str): 139 | self.wrapper = tuple(shlex.split(wrapper)) 140 | else: 141 | self.wrapper = wrapper 142 | elif self.enabled and not self.wrapper: 143 | try: 144 | self.wrapper = self.default_wrapper() 145 | except IOError as err: 146 | raise ValueError( 147 | "Could not create default system wrapper; valid wrapper " 148 | "must be specified" 149 | ) from err 150 | 151 | def wrap( 152 | self, cmd: Sequence[str], stdin: FileLike, stdout: FileLike, **kwargs 153 | ) -> Popen: # pragma: no-cover 154 | """Pipe a system command through a progress bar program. 155 | 156 | For the process to be wrapped, one of ``stdin``, ``stdout`` must not be 157 | None. 158 | 159 | Args: 160 | cmd: Command arguments. 161 | stdin: File-like object to read into the process stdin, or None to 162 | use `PIPE`. 163 | stdout: File-like object to write from the process stdout, or None 164 | to use `PIPE`. 165 | kwargs: Additional arguments to pass to Popen. 166 | 167 | Returns: 168 | Open process. 169 | """ 170 | if not self.enabled or (stdin is None and stdout is None): 171 | return Popen(cmd, stdin=stdin, stdout=stdout, **kwargs) 172 | 173 | if stdin is not None: 174 | proc1 = Popen(self.wrapper, stdin=stdin, stdout=PIPE) 175 | proc2 = Popen(cmd, stdin=proc1.stdout, stdout=stdout) 176 | else: 177 | proc1 = Popen(cmd, stdout=PIPE) 178 | proc2 = Popen(self.wrapper, stdin=proc1.stdout, stdout=stdout) 179 | proc1.stdout.close() 180 | return proc2 181 | 182 | 183 | PROCESS_PROGRESS = ProcessProgress() 184 | 185 | 186 | # Misc functions 187 | 188 | 189 | def iter_file_chunked(fileobj: FileLike, chunksize: int = 1024) -> Iterable: 190 | """Returns a progress bar-wrapped iterator over a file that reads 191 | fixed-size chunks. 192 | 193 | Args: 194 | fileobj: A file-like object. 195 | chunksize: The maximum size in bytes of each chunk. 196 | 197 | Returns: 198 | An iterable over the chunks of the file. 199 | """ 200 | 201 | def _itr(): 202 | while True: 203 | data = fileobj.read(chunksize) 204 | if data: 205 | yield data 206 | else: 207 | break 208 | 209 | name = None 210 | if hasattr(fileobj, "name"): 211 | name = getattr(fileobj, "name") 212 | 213 | return ITERABLE_PROGRESS.wrap(_itr(), desc=name) 214 | -------------------------------------------------------------------------------- /xphyle/types.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Type checking support. Defines commonly used types. 3 | """ 4 | # pylint: disable=wildcard-import, unused-wildcard-import, import-error, invalid-name 5 | from abc import ABCMeta, abstractmethod 6 | import collections 7 | from enum import Enum 8 | from io import IOBase, UnsupportedOperation 9 | import os 10 | from pathlib import PurePath 11 | import stat 12 | from typing import ( 13 | Dict, 14 | Sequence, 15 | List, 16 | Tuple, 17 | Set, 18 | Iterator, 19 | Iterable, 20 | Text, 21 | Union, 22 | Any, 23 | IO, 24 | Pattern, 25 | TypeVar, 26 | cast, 27 | ) 28 | 29 | 30 | class ModeAccess(Enum): 31 | """Enumeration of the access modes allowed when opening files. 32 | 33 | See Also: 34 | https://docs.python.org/3/library/functions.html#open 35 | """ 36 | 37 | READ = "r" 38 | """Read from file.""" 39 | WRITE = "w" 40 | """Write to file, overwriting any existing file.""" 41 | READWRITE = "r+" 42 | """Open file for reading and writing.""" 43 | TRUNCATE_READWRITE = "w+" 44 | """Open file for reading and writing, first truncating the file to 0.""" 45 | APPEND = "a" 46 | """Create file if it doesn't exist, else append to existing file.""" 47 | EXCLUSIVE = "x" 48 | """Exclusive write (fails if file already exists).""" 49 | 50 | @property 51 | def readable(self): 52 | """Whether this is readable mode.""" 53 | return any(char in self.value for char in ("r", "+")) 54 | 55 | @property 56 | def writable(self): 57 | """Whether this is writable mode.""" 58 | return any(char in self.value for char in ("w", "+", "a", "x")) 59 | 60 | @property 61 | def readwritable(self) -> bool: 62 | return "+" in self.value 63 | 64 | 65 | ModeAccessArg = Union[str, ModeAccess] 66 | 67 | 68 | class ModeCoding(Enum): 69 | """Enumeration of file open modes (text or binary). 70 | 71 | See Also: 72 | https://docs.python.org/3/library/functions.html#open 73 | """ 74 | 75 | TEXT = "t" 76 | """Text mode.""" 77 | BINARY = "b" 78 | """Binary mode.""" 79 | 80 | 81 | ModeCodingArg = Union[str, ModeCoding] 82 | 83 | 84 | FILE_MODE_CACHE: Dict[Tuple[str, ModeAccessArg, ModeCodingArg], "FileMode"] = {} 85 | """Cache of FileMode objects.""" 86 | 87 | 88 | class FileMode(object): 89 | """Definition of a file mode as composed of a :class:`ModeAccess` and a 90 | :class:`ModeCoding`. 91 | 92 | Args: 93 | mode: Specify the mode as a string; mutually exclusive with `access` 94 | and `coding`. 95 | access: The file access mode (default: :attribute:`ModeAccess.READ`). 96 | coding: The file open mode (default: :attribute:`ModeCoding.TEXT`). 97 | """ 98 | 99 | def __new__( 100 | cls, 101 | mode: str = None, 102 | access: ModeAccessArg = None, 103 | coding: ModeCodingArg = None, 104 | ) -> "FileMode": 105 | key = (mode, access, coding) 106 | if key not in FILE_MODE_CACHE: 107 | FILE_MODE_CACHE[key] = super().__new__(cls) 108 | return FILE_MODE_CACHE[key] 109 | 110 | def __init__( 111 | self, 112 | mode: str = None, 113 | access: ModeAccessArg = None, 114 | coding: ModeCodingArg = None, 115 | ) -> None: 116 | if mode: 117 | access_val = None 118 | access_char = None 119 | update = False 120 | coding_val = None 121 | for c in mode: 122 | if c in "rwax" and access_char is None: 123 | access_char = c 124 | elif c == "+": 125 | update = True 126 | elif c in "bt" and coding_val is None: 127 | coding_val = ModeCoding(c) 128 | elif c == "U" and coding_val is None: 129 | coding_val = ModeCoding.TEXT 130 | else: 131 | raise ValueError(f"Invalid characters in mode string: {mode}") 132 | 133 | if access_char is not None: 134 | if update: 135 | access_val = ModeAccess(access_char + "+") 136 | else: 137 | access_val = ModeAccess(access_char) 138 | else: 139 | if isinstance(access, str): 140 | access_val = ModeAccess(access) 141 | else: 142 | access_val = cast(ModeAccess, access) 143 | if isinstance(coding, str): 144 | coding_val = ModeCoding(coding) 145 | else: 146 | coding_val = cast(ModeCoding, coding) 147 | 148 | self.access = access_val or ModeAccess.READ 149 | self.coding = coding_val or ModeCoding.TEXT 150 | self.value = "{}{}".format(self.access.value, self.coding.value) 151 | 152 | if mode: 153 | diff = set(mode) - set(str(self) + "U") 154 | if diff: 155 | raise ValueError( 156 | "Invalid characters in mode string: {}".format("".join(diff)) 157 | ) 158 | 159 | @property 160 | def readable(self): 161 | """Whether this is readable mode.""" 162 | return self.access.readable 163 | 164 | @property 165 | def writable(self): 166 | """Whether this is writable mode.""" 167 | return self.access.writable 168 | 169 | @property 170 | def readwritable(self): 171 | """Whether this is read+write mode.""" 172 | return self.access.readwritable 173 | 174 | @property 175 | def binary(self): 176 | """Whether this is binary mode.""" 177 | return self.coding == ModeCoding.BINARY 178 | 179 | def as_binary(self): 180 | """Converts this mode to binary""" 181 | if self.coding == ModeCoding.BINARY: 182 | return self 183 | else: 184 | return FileMode(access=self.access, coding=ModeCoding.BINARY) 185 | 186 | @property 187 | def text(self): 188 | """Whether this is text mode.""" 189 | return self.coding == ModeCoding.TEXT 190 | 191 | def as_text(self): 192 | """Converts this mode to text""" 193 | if self.coding == ModeCoding.TEXT: 194 | return self 195 | else: 196 | return FileMode(access=self.access, coding=ModeCoding.TEXT) 197 | 198 | def __contains__(self, value: Union[str, ModeAccess, ModeCoding]) -> bool: 199 | if isinstance(value, ModeAccess): 200 | return self.access == value 201 | elif isinstance(value, ModeCoding): 202 | return self.coding == value 203 | else: 204 | for v in cast(str, value): 205 | if v not in self.access.value and v not in self.coding.value: 206 | return False 207 | return True 208 | 209 | def __eq__(self, other): 210 | return ( 211 | isinstance(other, FileMode) 212 | and self.access == other.access 213 | and self.coding == other.coding 214 | ) 215 | 216 | def __repr__(self): 217 | return self.value 218 | 219 | 220 | OS_ALIASES = dict(r=os.R_OK, w=os.W_OK, x=os.X_OK, t=0) 221 | """Dictionary mapping mode characters to :module:`os` flags""" 222 | 223 | 224 | STAT_ALIASES = dict( 225 | r=stat.S_IREAD, 226 | w=stat.S_IWRITE, 227 | x=stat.S_IEXEC, 228 | t=stat.S_ISVTX, 229 | f=stat.S_IFREG, 230 | d=stat.S_IFDIR, 231 | fifo=stat.S_IFIFO, 232 | ) 233 | """Dictionary mapping mode characters to :module:`stat` flags""" 234 | 235 | 236 | class Permission(Enum): 237 | """Enumeration of file permission flags ('r', 'w', 'x', 't'). Note that 238 | this isn't a full enumeration of all flags, just those pertaining to the 239 | permissions of the current user. 240 | """ 241 | 242 | READ = "r" 243 | """Read; alias of :attribute:`stat.S_IREAD` and :attribute:`os.R_OK`.""" 244 | WRITE = "w" 245 | """Write; alias of :attribute:`stat.S_IWRITE and :attribute:`os.W_OK``.""" 246 | EXECUTE = "x" 247 | """Execute; alias of :attribute:`stat.S_IEXEC` and :attribute:`os.X_OK`.""" 248 | STICKY = "t" 249 | """The sticky bit, alias of :attribute:`stat.S_ISVTX`.""" 250 | 251 | @property 252 | def stat_flag(self): 253 | """Returns the :module:`stat` flag.""" 254 | return STAT_ALIASES[self.value] 255 | 256 | @property 257 | def os_flag(self): 258 | """Returns the :module:`os` flag.""" 259 | return OS_ALIASES[self.value] 260 | 261 | 262 | PermissionArg = Union[str, int, Permission, ModeAccess] 263 | """Types from which an Permission can be inferred.""" 264 | 265 | 266 | PERMISSION_SET_CACHE: Dict[ 267 | Union[PermissionArg, Iterable[PermissionArg]], "PermissionSet" 268 | ] = {} 269 | 270 | 271 | class PermissionSet(object): 272 | """A set of :class:`Permission`s. 273 | 274 | Args: 275 | flags: Sequence of flags as string ('r', 'w', 'x'), int, 276 | :class:`ModeAccess`, or :class:`Permission`. 277 | """ 278 | 279 | def __new__( 280 | cls, flags: Union[PermissionArg, Iterable[PermissionArg]] = None 281 | ) -> "PermissionSet": 282 | if flags not in PERMISSION_SET_CACHE: 283 | PERMISSION_SET_CACHE[flags] = super().__new__(cls) 284 | return PERMISSION_SET_CACHE[flags] 285 | 286 | def __init__( 287 | self, flags: Union[PermissionArg, Iterable[PermissionArg]] = None 288 | ) -> None: 289 | self.flags: Set[Permission] = set() 290 | if flags: 291 | if isinstance(flags, str) or is_iterable(flags): 292 | self.update(cast(Iterable[PermissionArg], flags)) 293 | else: 294 | self.add(cast(Union[int, Permission, ModeAccess], flags)) 295 | 296 | def add(self, flag: PermissionArg) -> None: 297 | """Add a permission. 298 | 299 | Args: 300 | flag: Permission to add. 301 | """ 302 | if isinstance(flag, str): 303 | self.flags.add(Permission(flag)) 304 | elif isinstance(flag, int): 305 | for f in Permission: 306 | if (f.stat_flag & flag) or (f.os_flag & flag): 307 | self.flags.add(f) 308 | elif isinstance(flag, ModeAccess): 309 | if flag.readable: 310 | self.add(Permission.READ) 311 | if flag.writable: 312 | self.add(Permission.WRITE) 313 | else: 314 | self.flags.add(flag) 315 | 316 | def update(self, flags: Union["PermissionSet", Iterable[PermissionArg]]) -> None: 317 | """Add all flags in `flags` to this `PermissionSet`. 318 | 319 | Args: 320 | flags: Flags to add. 321 | """ 322 | for flag in flags: 323 | self.add(flag) 324 | 325 | @property 326 | def stat_flags(self) -> int: 327 | """Returns the binary OR of the :module:`stat` flags corresponding to 328 | the flags in this `PermissionSet`. 329 | """ 330 | flags = 0 331 | for f in self.flags: 332 | flags |= f.stat_flag 333 | return flags 334 | 335 | @property 336 | def os_flags(self) -> int: 337 | """Returns the binary OR of the :module:`os` flags corresponding to 338 | the flags in this `PermissionSet`. 339 | """ 340 | flags = 0 341 | for f in self.flags: 342 | flags |= f.os_flag 343 | return flags 344 | 345 | def __iter__(self) -> Iterable[Permission]: 346 | """Iterate over flags in the same order they appear in 347 | :class:`Permission`. 348 | """ 349 | for f in Permission: 350 | if f in self.flags: 351 | yield f 352 | 353 | def __eq__(self, other): 354 | return isinstance(other, PermissionSet) and self.flags == other.flags 355 | 356 | def __contains__(self, access_flag: PermissionArg) -> bool: 357 | if isinstance(access_flag, str): 358 | access_flag = Permission(access_flag) 359 | return access_flag in self.flags 360 | 361 | def __repr__(self) -> str: 362 | return "".join(f.value for f in Permission if f in self.flags) 363 | 364 | 365 | class FileType(Enum): 366 | """Enumeration of types of files that can be opened by 367 | :method:`xphyle.xopen`. 368 | """ 369 | 370 | STDIO = "std" 371 | """One of stdin/stdout/stderr.""" 372 | LOCAL = "local" 373 | """A file on the local computer.""" 374 | URL = "url" 375 | """A URL; schema must be recognized by :module:`urllib`.""" 376 | PROCESS = "ps" 377 | """A system command to be executed in a subprocess.""" 378 | FILELIKE = "filelike" 379 | """An object that implements the methods in 380 | :class:`xphyle.types.FileLikeInterface`.""" 381 | BUFFER = "buffer" 382 | """A StringIO or BytesIO.""" 383 | 384 | 385 | class EventType(Enum): 386 | """Enumeration of event types that can be registered on an 387 | :class:`EventManager`. 388 | """ 389 | 390 | CLOSE = "close" 391 | 392 | 393 | AnyChar = Union[bytes, Text] 394 | """Similar to AnyStr, but specifies that strings must be unicode.""" 395 | 396 | 397 | class FileLikeInterface(IO, Iterable[AnyChar], metaclass=ABCMeta): 398 | """This is a marker interface for classes that implement methods (listed 399 | below) to make them behave like python file objects. Provides a subset of 400 | methods from typing.io.IO, plus next() and __iter__. 401 | 402 | See Also: 403 | https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects 404 | """ 405 | 406 | @abstractmethod 407 | def next(self) -> AnyChar: 408 | pass 409 | 410 | 411 | # noinspection PyTypeChecker 412 | class FileLikeBase(FileLikeInterface): 413 | def flush(self) -> None: 414 | pass 415 | 416 | def close(self) -> None: 417 | pass 418 | 419 | def readable(self) -> bool: 420 | return False 421 | 422 | def read(self, n: int = -1) -> AnyChar: 423 | raise UnsupportedOperation() 424 | 425 | def readline(self, hint: int = -1) -> AnyChar: 426 | raise UnsupportedOperation() 427 | 428 | def readlines(self, sizehint: int = -1) -> List[AnyChar]: 429 | raise UnsupportedOperation() 430 | 431 | def writable(self) -> bool: 432 | return False 433 | 434 | def write(self, string: AnyChar) -> int: 435 | raise UnsupportedOperation() 436 | 437 | def writelines(self, lines: Iterable[AnyChar]) -> None: 438 | raise UnsupportedOperation() 439 | 440 | # noinspection PyTypeChecker 441 | def seek(self, offset, whence: int = 0) -> int: 442 | if self.seekable(): 443 | raise UnsupportedOperation() 444 | else: 445 | raise ValueError("Cannot call seek on a non-seekable object") 446 | 447 | def seekable(self) -> bool: 448 | return False 449 | 450 | def tell(self) -> int: 451 | if self.seekable(): 452 | raise UnsupportedOperation() 453 | else: 454 | raise ValueError("Cannot call tell on a non-seekable object") 455 | 456 | def isatty(self) -> bool: 457 | return False 458 | 459 | def fileno(self) -> int: 460 | return -1 461 | 462 | def truncate(self, size: int = None) -> int: 463 | if self.seekable(): 464 | raise UnsupportedOperation() 465 | else: 466 | raise ValueError("Cannot call truncate on a non-seekable object") 467 | 468 | def __enter__(self) -> Any: 469 | return self 470 | 471 | def __exit__(self, exception_type, exception_value, traceback) -> bool: 472 | self.close() 473 | return False 474 | 475 | def __iter__(self) -> Iterator[AnyChar]: 476 | raise UnsupportedOperation() 477 | 478 | def __next__(self) -> AnyChar: 479 | raise UnsupportedOperation() 480 | 481 | def next(self) -> AnyChar: 482 | return self.__next__() 483 | 484 | 485 | class PathType(Enum): 486 | """Enumeration of supported path types (file, directory, FIFO).""" 487 | 488 | FILE = "f" 489 | """Path represents a file.""" 490 | DIR = "d" 491 | """Path represents a directory.""" 492 | FIFO = "|" 493 | """Path represents a FIFO.""" 494 | 495 | 496 | FileLike = Union[IO, IOBase, FileLikeInterface] 497 | """File-like object; either a subclass of :class:`io.IOBase` or a 498 | :class:`FileLikeInterface`. 499 | """ 500 | 501 | 502 | PathLike = Union[os.PathLike, PurePath] 503 | """PurePath is only included because PathLike is not statically assigned as a 504 | superclass of PurePath in python 3.6.""" 505 | 506 | 507 | PathOrFile = Union[PathLike, PurePath, FileLike] 508 | """Either a PathLike or FileLike.""" 509 | 510 | 511 | Range = Tuple[int, int] 512 | """Two-integer tuple representing a range.""" 513 | 514 | 515 | Regexp = Union[str, Pattern] 516 | """A regular expression string or compiled :class:`re`.""" 517 | 518 | 519 | CharMode = TypeVar("CharMode", bytes, Text) 520 | """Type representing how data should be handled when read from a file. 521 | If the value is bytes (:attribute:`BinMode`), raw bytes are returned. If the 522 | value is a string (:attribute:`TextMode`), bytes are decoded using the system 523 | default encoding. 524 | """ 525 | 526 | 527 | BinMode = b"b" 528 | """Value representing binary mode to use for an argument of type CharMode.""" 529 | 530 | 531 | TextMode = "t" 532 | """Value representing text mode to use for an argument of type CharMode.""" 533 | 534 | 535 | # Aliases for commonly used compound argument types 536 | 537 | 538 | PermissionSetArg = Union[PermissionSet, Sequence[PermissionArg]] 539 | """Sequence of stat flags (string, int, or :class:`Permission`).""" 540 | 541 | 542 | ModeArg = Union[str, FileMode] 543 | """A file mode; string, or :class:`FileMode`.""" 544 | 545 | 546 | PathTypeArg = Union[str, PathType] 547 | """A path type string or :class:`PathType`.""" 548 | 549 | 550 | EventTypeArg = Union[str, EventType] 551 | """An event type name or :class:`EventType`.""" 552 | 553 | 554 | CompressionArg = Union[bool, str] 555 | """Compression can be True, False, or the name of a compression format.""" 556 | 557 | 558 | def is_iterable(obj: Any, include_str: bool = False) -> bool: 559 | """Test whether an object is iterable. 560 | 561 | Args: 562 | obj: The object to test. 563 | include_str: Whether a string should be considered an iterable 564 | (default: False). 565 | 566 | Returns: 567 | True if the object is iterable. 568 | """ 569 | return isinstance(obj, collections.abc.Iterable) and ( 570 | include_str or not isinstance(obj, str) 571 | ) 572 | -------------------------------------------------------------------------------- /xphyle/urls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Methods for handling URLs. 3 | """ 4 | import copy 5 | import io 6 | import re 7 | from typing import Optional 8 | from http.client import HTTPResponse 9 | from urllib.error import URLError 10 | from urllib.parse import ParseResult, urlparse 11 | from urllib.request import urlopen, Request 12 | from xphyle.types import Range, Any, cast 13 | 14 | 15 | # URLs 16 | 17 | 18 | def parse_url(url_string: str) -> Optional[ParseResult]: 19 | """Attempts to parse a URL. 20 | 21 | Args: 22 | url_string: String to test. 23 | 24 | Returns: 25 | A 6-tuple, as described in ``urlparse``, or None if the URL cannot be 26 | parsed, or if it lacks a minimum set of attributes. Note that a URL may 27 | be valid and still not be openable (for example, if the scheme is 28 | recognized by urlopen). 29 | """ 30 | url = urlparse(url_string) 31 | if not (url.scheme and (url.netloc or url.path)): 32 | return None 33 | return url 34 | 35 | 36 | def open_url( 37 | url_string: str, 38 | byte_range: Optional[Range] = None, 39 | headers: Optional[dict] = None, 40 | **kwargs 41 | ) -> Any: 42 | """Open a URL for reading. 43 | 44 | Args: 45 | url_string: A valid url string. 46 | byte_range: Range of bytes to read (start, stop). 47 | headers: dict of request headers. 48 | kwargs: Additional arguments to pass to `urlopen`. 49 | 50 | Returns: 51 | A response object, or None if the URL is not valid or cannot be opened. 52 | 53 | Notes: 54 | The return value of `urlopen` is only guaranteed to have 55 | certain methods, not to be of any specific type, thus the `Any` 56 | return type. Furthermore, the response may be wrapped in an 57 | `io.BufferedReader` to ensure that a `peek` method is available. 58 | """ 59 | headers = copy.copy(headers) if headers else {} 60 | if byte_range: 61 | headers["Range"] = "bytes={}-{}".format(*byte_range) 62 | try: 63 | request = Request(url_string, headers=headers, **kwargs) 64 | response = urlopen(request) 65 | # HTTPResponse didn't have 'peek' until 3.5 66 | if response and not hasattr(response, "peek"): 67 | # ISSUE: HTTPResponse inherits BufferedIOBase (rather than 68 | # RawIOBase), but for this purpose it's completely compatible 69 | # with BufferedReader. Not sure how to make it type-compatible. 70 | return io.BufferedReader(cast(HTTPResponse, response)) 71 | else: 72 | return response 73 | except (URLError, ValueError): 74 | return None 75 | 76 | 77 | def get_url_mime_type(response: Any) -> Optional[str]: 78 | """If a response object has HTTP-like headers, extract the MIME type 79 | from the Content-Type header. 80 | 81 | Args: 82 | response: A response object returned by `open_url`. 83 | 84 | Returns: 85 | The content type, or None if the response lacks a 'Content-Type' header. 86 | """ 87 | if hasattr(response, "headers") and "Content-Type" in response.headers: 88 | return response.headers["Content-Type"] 89 | return None 90 | 91 | 92 | CONTENT_DISPOSITION_RE = re.compile("filename=([^;]+)") 93 | 94 | 95 | def get_url_file_name( 96 | response: Any, parsed_url: Optional[ParseResult] = None 97 | ) -> Optional[str]: 98 | """If a response object has HTTP-like headers, extract the filename 99 | from the Content-Disposition header. 100 | 101 | Args: 102 | response: A response object returned by `open_url`. 103 | parsed_url: The result of calling `parse_url`. 104 | 105 | Returns: 106 | The file name, or None if it could not be determined. 107 | """ 108 | if hasattr(response, "headers") and "Content-Disposition" in response.headers: 109 | match = CONTENT_DISPOSITION_RE.search(response.headers["Content-Disposition"]) 110 | if match: 111 | return match.group(1) 112 | if not parsed_url: 113 | parsed_url = parse_url(response.geturl()) 114 | if parsed_url and hasattr(parsed_url, "path"): 115 | return parsed_url.path 116 | return None 117 | --------------------------------------------------------------------------------