├── .coveragerc
├── .gitattributes
├── .gitignore
├── .pylintrc
├── .travis.yml
├── CHANGES.md
├── CITATION
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs
    ├── Makefile
    ├── api
    │   ├── conf.py
    │   └── modules.rst
    ├── conf.py
    ├── developers.rst
    ├── index.rst
    ├── logo.pdf
    ├── logo.png
    └── requirements.txt
├── paper
    ├── codemeta.json
    ├── generate.rb
    ├── paper.bib
    └── paper.md
├── readthedocs.yml
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── foo.gz
    ├── pytest.ini
    ├── test_formats.py
    ├── test_paths.py
    ├── test_performance.py
    ├── test_progress.py
    ├── test_types.py
    ├── test_urls.py
    ├── test_utils.py
    └── test_xphyle.py
└── xphyle
    ├── __init__.py
    ├── formats.py
    ├── paths.py
    ├── progress.py
    ├── types.py
    ├── urls.py
    └── utils.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | omit =
 3 |     tests/*
 4 |     setup.py
 5 |     *site-packages/*
 6 | 
 7 | [report]
 8 | exclude_lines =
 9 |     pragma: no cover
10 |     pragma: no-cover
11 |     def __repr__
12 |     raise NotImplementedError
13 |     if __name__ == .__main__.:
14 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | xphyle/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .cache
 2 | .coverage
 3 | build/
 4 | dist/
 5 | *.pyc
 6 | xphyle.egg-info/
 7 | docs/_build/
 8 | .vscode*
 9 | .mypy_cache/
10 | .idea/
11 | .pytest_cache/
12 | .eggs/
13 | .DS_Store
14 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MESSAGES CONTROL]
2 | # C0303: Trailing whitespace; will add a custom checker that only flags
3 | #        trailing whitespace at the end of non-empty lines
4 | # C0326: Disabled because it incorrectly flags whitespace around default values
5 | #        when function annotations are used; will add a custom checker that
6 | #        flags all other cases
7 | disable=fixme,C0303,C0326,too-few-public-methods,too-many-instance-attributes,too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-function-args,too-many-lines,too-many-boolean-expressions,too-many-return-statements
8 | ignore=__pycache__,_version.py
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | cache:
 4 |   directories:
 5 |     - $HOME/.cache/pip
 6 | os:
 7 |   - linux
 8 | python:
 9 |   - 3.6
10 |   - 3.7
11 |   - 3.8
12 |   - 3.9
13 |   - 3.10
14 | install:
15 |   - pip install --upgrade pip wheel
16 |   - pip install pytest-cov
17 |   - pip install coveralls
18 |   - pip install pylint
19 |   - make install
20 | script:
21 |   - make test
22 | after_success:
23 |   - coveralls
24 |   - pylint xphyle
25 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
  1 | # Changes
  2 | 
  3 | ## v4.4.1 (2020.12.06)
  4 | 
  5 | * Fix #41 - Windows does not support SIGPIPE
  6 | 
  7 | ## v4.4.0 (2020.08.27)
  8 | 
  9 | * Add support for memory mapping, using the `memory_map` argument to `open_`/`xopen`
 10 | 
 11 | ## v4.3.0 (2020.07.30)
 12 | 
 13 | * Add support for igzip
 14 | * Remove useless -p argument when decompressing with pigz
 15 | 
 16 | ## v4.2.2 (2020.01.02)
 17 | 
 18 | * Handle differences between gzip and pigz -l output
 19 | 
 20 | ## v4.2.1 (2019.12.13)
 21 | 
 22 | * Switch from versioneer to setup_tools_scm for version managment.
 23 | 
 24 | ## v4.2.0 (2019.11.20)
 25 | 
 26 | * Add `xphyle.get_compressor`
 27 | * Fix Python 3.8 issue with importing from collections
 28 | 
 29 | ## v4.1.3 (2019.10.09)
 30 | 
 31 | * Fixed issue with opening bgzip files
 32 | 
 33 | ## v4.1.2 (2019.06.14)
 34 | 
 35 | * Correctly handle file modes when detecting placeholders
 36 | 
 37 | ## v4.1.1 (2019.06.14)
 38 | 
 39 | * Correctly handle placeholder strings ('-', '_') as arguments to xopen
 40 | 
 41 | ## v4.1.0 (2019.06.14)
 42 | 
 43 | * Add support for zstd
 44 | * Adjusted default compression levels based on benchmarking of compression tools.
 45 | * Handle placeholder strings ('-', '_') as arguments to xopen
 46 | 
 47 | ## v4.0.8 (2019.04.08)
 48 | 
 49 | * Add pathlib.PurePath as a member of the PathLike type, to work around the lack of os.PathLike as a static superclass of PurePath in python 3.6
 50 | 
 51 | ## v4.0.7 (2019.04.06)
 52 | 
 53 | * Don't complain when writing a bgzip file and the extension is gz
 54 | * Reformat codebase using black, and other code cleanup
 55 | 
 56 | ## v4.0.5 (2019.01.10)
 57 | 
 58 | * Fix setup.py and Makefile to perform pypi upload correctly
 59 | * Add readthedocs.yml and update docs config to get docs building correctly
 60 | 
 61 | ## v4.0.0 (2019.01.10)
 62 | 
 63 | * Official 4.0.0 release
 64 | 
 65 | ## v4.0.0-rc1 (2018.08.02)
 66 | 
 67 | * Support non-.gz extensions when decompressing bgzip files.
 68 | 
 69 | ## v4.0.0-rc0 (2018.03.18)
 70 | 
 71 | * Starting with v4, xphyle requires python 3.6+
 72 | * All path-oriented functions now use pathlib paths by default. Support for string paths is deprecated.
 73 | * Moved to pokrok for progress bar management.
 74 | 
 75 | ## v3.1.6 (2018.01.16)
 76 | 
 77 | * Fix bug when specifying file_type=FileType.FILELIKE.
 78 | 
 79 | ## v3.1.5 (2017.12.11)
 80 | 
 81 | * Added `close_fileobj` parameters to `xopen()` to allow user to specify whether the file/buffer should be closed when the wrapper is closed.
 82 | 
 83 | ## v3.1.2 (2017.11.18)
 84 | 
 85 | * Added `xphyle.utils.uncompressed_size()`.
 86 | 
 87 | ## v3.1.1 (2017.10.13)
 88 | 
 89 | * Added 'overwrite' parameter to xopen (defaults to True).
 90 | 
 91 | ## v3.1.0 (2017.08.31)
 92 | 
 93 | * *Possible breaking change*: We discovered that python 3.3 support never fully worked due to some incompatibilities in the backported libraries for features we rely on that were introduced in 3.4. Thus, we are officially dropping support for python 3.3. This also reverts the change made in 3.0.7.
 94 | * Please ignore releases 3.0.8 and 3.0.9.
 95 | 
 96 | ## v3.0.7 (2017.07.22)
 97 | 
 98 | * Add missing pathlib backport dependency for py3.3.
 99 | 
100 | ## v3.0.6 (2017.07.22)
101 | 
102 | * Added 'list_extensions' method to xphyle.formats.Formats.
103 | * Fixed subtle bug that would cause failure when calling xopen on stdout that has been monkeypatched (as is done by pytest).
104 | 
105 | ## v3.0.5 (2017.07.19)
106 | 
107 | * Fixed #13: opening corrupt gzip file fails silently.
108 | 
109 | ## v3.0.3 (2017.06.14)
110 | 
111 | * Added basic performance testing.
112 | * Fixed #12: xphyle not recognizing when system-level lzma not installed.
113 | 
114 | ## v3.0.2 (2017.05.23)
115 | 
116 | * Forcing use of backports.typing for python < 3.6.
117 | 
118 | ## v3.0.1 (2017.04.29)
119 | 
120 | * Added a paper for submission to JOSS.
121 | * Enabled DOI generation using Zenodo.
122 | 
123 | ## v3.0.0 (2017.04.18)
124 | 
125 | * Lots of fixes for bugs and type errors using mypy.
126 | * Two breakting changes that necessitate the major version bump:
127 |     * Several methods were erroneously named "uncompress_..." and have been corrected to "decompress_..."
128 |     * Default values were erroneously used for the char_mode and linesep parameters of fileinput(), fileoutput(), FileInput, FileOutput, and all their subclasses. textinput(), textoutput(), byteinput(), and byteoutput() convenience methods were added, and default values were set to None.
129 | 
130 | ## v2.2.3 (2017.04.09)
131 | 
132 | 
133 | * Add get_compression_format_name() method to Formats.
134 | * Validate the compression type in xopen.
135 | 
136 | ## v2.2.1 (2017.03.01)
137 | 
138 | 
139 | * Switch to pytest for testing.
140 | * Bugfixes in fileoutput.
141 | * Add ability to specifiy a file header for each file opened by fileoutput.
142 | * Add ability to pass initializing text/bytes to xopen with file_type==BUFFER to create a readable buffer.
143 | 
144 | ## v2.2.0 (2017.02.17)
145 | 
146 | 
147 | * Add caching for FileMode and PermissionSet
148 | * Add PatternFileOutput subclass of FileOuptut for generating output files from a pattern and tokens derived from lines in the file.
149 | 
150 | ## v2.1.1 (2017.02.13)
151 | 
152 | 
153 | * Minor bug fixes
154 | * Code cleanup (thanks to Codacy)
155 | 
156 | ## v2.1.0 (2017.02.11)
157 | 
158 | 
159 | * Added support for opening buffer types.
160 | 
161 | ## v2.0.0 (2017.02.11)
162 | 
163 | * The major version change reflects the introduction of potentially breaking changes:
164 |     1. When a file object is passed to `open_`, it is now wrapped in a `FileLikeWrapper` by default. To avoid this behavior, set `wrap_fileobj=False`, but note that if the file-like object is not a context manager, an error will be raised.
165 |     2. `xopen` no longer wraps files in `FileLikeWrapper` by default. To revert to the old behavior, set `xphyle.configure(default_xopen_context_wrapper=True)`.
166 |     3. For several methods in the `xphyle.paths` module, the `mode` argument has been renamed to `access` to avoid ambiguity.
167 |     4. `xphyle.paths.check_writeable_file` and `xphyle.paths.safe_check_writeable_file` have been changed to 'writable' to be consistent with the spelling used in core python.
168 |     5. In the `xphyle.paths` module:
169 |         * `check_file_mode` is removed.
170 |         * `get_access` is renamed to `get_permissions`.
171 |         * Many attribute and method names changed, mostly due to renaming of 'access' to 'permissions'.
172 |     6. In the context of `FileInput`, `mode` parameters have been changed to `char_mode`.
173 |     7. The `is_iterable` method has moved from `xphyle.utils` to `xphyle.types`.
174 |     8. The `types` parameter of `xphyle.utils.find` is renamed to path_types.
175 |     9. The string name of the FIFO path type has changed from 'fifo' to '|'.
176 | * Added `xphyle.popen`, which opens subprocesses (i.e. `subprocess.Popen` instances) and uses `xopen` to open stdin/stdout/sterr files or wrap PIPEs. This enables sending compressed data to/reading compressed data from subprocesses without knowing in advance what the compression format will be or whether native compression/decompression programs are available.
177 | * `xopen` now accepts two additional argument types: file objects and system commands. The later are specified as a string beginning with '|' (similar to the toolshed `nopen` method). PIPEs are automatically opened for stdin, stdout, and stderr. Additionally, if a compression type is specified, it is used to wrap one of the pipes as follows:
178 |     * If mode is read or readwrite, `xopen` opens a PIPE to stdout.
179 |     * Otherwise, `xopen` opens a PIPE to stdin.
180 | * Enumerated types are now provided (in `xphyle.typing`) for all argument types in which fixed sets of strings were used previously (e.g. file open mode, path type). All methods with these argument types now accept either the string or Enum value.
181 | 


--------------------------------------------------------------------------------
/CITATION:
--------------------------------------------------------------------------------
 1 | Didion, JP (2017) xphyle: Extraordinarily simple file handling. Journal of Open Source Software; [doi:10.21105/joss.00255](https://doi.org/10.21105/joss.00255)
 2 | 
 3 | @article{Didion2017,
 4 |   doi = {10.21105/joss.00255},
 5 |   url = {https://doi.org/10.21105/joss.00255},
 6 |   year = {2017},
 7 |   publisher = {The Open Journal},
 8 |   volume = {2},
 9 |   number = {14},
10 |   pages = {255},
11 |   author = {John Didion},
12 |   title = {xphyle: Extraordinarily simple file handling},
13 |   journal = {Journal of Open Source Software}
14 | }
15 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at [INSERT EMAIL ADDRESS]. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 | 
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | We welcome any contributions via pull requests. We are especially interested in a collaborator that would either backport xphyle to Python 2.x or implement a compatibility layer to my xphyle Python version-independent.
2 | 
3 | All code must be written in idiomatic python 3. Note that we use [PEP484](https://www.python.org/dev/peps/pep-0484/) type hints. Variable annotations are
4 | defined where needed using the comment syntax. Static code analysis is performed usying [mypy](http://mypy-lang.org/) and pylint.
5 | 
6 | Style-wise, we try to adhere to the Google python style guidelines. We use Google-style docstrings, which are formatted by the [Napoleon Sphinx Plugin](https://pypi.python.org/pypi/sphinxcontrib-napoleon). 
7 | 
8 | We enforce the [Contributor Covenant](http://contributor-covenant.org/) code of conduct.
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | **MIT License**
 2 | 
 3 | Copyright 2017 John P Didion
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 
 6 | this software and associated documentation files (the "Software"), to deal in 
 7 | the Software without restriction, including without limitation the rights to 
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
 9 | of the Software, and to permit persons to whom the Software is furnished to do 
10 | so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all 
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO E
18 | VENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
19 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
21 | THE SOFTWARE.
22 | 
23 | **Copyrighted Works**
24 | This software may use copyrighted and/or public domain works and distributes 
25 | these works under the terms of their respective licenses. All copyright 
26 | restrictions still apply to these 'third-party' packages. Furthermore, xphyle is 
27 | a community project with contributors within and outside of the US Government; 
28 | these authors retain copyright on their work, which they may relinquish via a
29 | public domain dedication. Below is a list of contributors, and either the 
30 | license under which their work is governed, or the release of copyright under
31 | public domain dedication.
32 | 
33 | **List of Contributors**
34 | John P Didion   2016-2017   Public Domain
35 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include tests/test*.py
4 | include docs/*.rst
5 | include docs/conf.py
6 | include docs/Makefile
7 | include docs/logo.png
8 | include docs/api/*.rst
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | module = xphyle
 2 | #pytestops = "--full-trace"
 3 | #pytestops = "-v -s"
 4 | repo = jdidion/$(module)
 5 | desc = Release $(version)
 6 | tests = tests
 7 | desc = ''
 8 | # Use this option to show full stack trace for errors
 9 | #pytestopts = "--full-trace"
10 | 
11 | all: install test
12 | 
13 | install:
14 | 	python setup.py install
15 | 
16 | test:
17 | 	pytest -m "not perf" -vv --cov --cov-report term-missing $(pytestopts) $(tests)
18 | 
19 | perftest:
20 | 	pytest -m "perf" $(tests)
21 | 
22 | clean:
23 | 	rm -Rf __pycache__
24 | 	rm -Rf **/__pycache__/*
25 | 	rm -Rf dist
26 | 	rm -Rf build
27 | 	rm -Rf *.egg-info
28 | 	rm -Rf .pytest_cache
29 | 	rm -Rf .coverage
30 | 
31 | tag:
32 | 	git tag $(version)
33 | 
34 | release: clean tag install test
35 | 	echo "Releasing version $(version)"
36 | 	python setup.py sdist bdist_wheel
37 | 	# pypi doesn't accept eggs
38 | 	rm dist/*.egg
39 | 	# release
40 | 	#python setup.py upload -r pypi
41 | 	twine upload -u "__token__" -p "$(pypi_token)" dist/*
42 | 	# push new tag after successful build
43 | 	git push origin --tags
44 | 	# create release in GitHub
45 | 	curl -v -i -X POST \
46 | 		-H "Content-Type:application/json" \
47 | 		-H "Authorization: token $(github_token)" \
48 | 		https://api.github.com/repos/$(repo)/releases \
49 | 		-d '{ \
50 | 		  "tag_name":"$(version)", \
51 | 		  "target_commitish": "master", \
52 | 		  "name": "$(version)", \
53 | 		  "body": "$(desc)", \
54 | 		  "draft": false, \
55 | 		  "prerelease": false \
56 | 		}'
57 | 
58 | docs:
59 | 	make -C docs api
60 | 	make -C docs html
61 | 
62 | readme:
63 | 	pandoc --from=markdown --to=rst --output=README.rst README.md
64 | 
65 | lint:
66 | 	pylint $(module)
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # xphyle: extraordinarily simple file handling
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/xphyle.svg?branch=master)](https://pypi.python.org/pypi/xphyle)
  4 | [![Travis CI](https://img.shields.io/travis/jdidion/xphyle/master.svg)](https://travis-ci.org/jdidion/xphyle)
  5 | [![Coverage Status](https://img.shields.io/coveralls/jdidion/xphyle/master.svg)](https://coveralls.io/github/jdidion/xphyle?branch=master)
  6 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/b2c0baa52b604e39a09ed108ac2f53ee)](https://www.codacy.com/app/jdidion/xphyle?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=jdidion/xphyle&amp;utm_campaign=Badge_Grade)
  7 | [![Documentation Status](https://readthedocs.org/projects/xphyle/badge/?version=latest)](http://xphyle.readthedocs.io/en/latest/?badge=latest)
  8 | [![DOI](https://zenodo.org/badge/71260678.svg)](https://zenodo.org/badge/latestdoi/71260678)
  9 | [![JOSS](http://joss.theoj.org/papers/10.21105/joss.00255/status.svg)](http://joss.theoj.org/papers/10.21105/joss.00255)
 10 | 
 11 | <img src="https://github.com/jdidion/xphyle/blob/master/docs/logo.png?raw=true"
 12 |      alt="logo" width="200" height="200">
 13 | 
 14 | xphyle is a small python library that makes it easy to open compressed
 15 | files. Most importantly, xphyle will use the appropriate program (e.g. 'gzip') to compress/decompress a file if it is available on your system; this is almost always faster than using the corresponding python library. xphyle also provides methods that simplify common file I/O operations.
 16 | 
 17 | Recent version of xphyle (4.0.0+) require python 3.6. Older versions of xphyle support python 3.4+.
 18 | 
 19 | Please note that xphyle may work on Windows, but it is not tested.
 20 | 
 21 | # Installation
 22 | 
 23 | ```
 24 | pip install xphyle
 25 | ```
 26 | 
 27 | # Building from source
 28 | 
 29 | Clone this repository and run
 30 | 
 31 | ```
 32 | make
 33 | ```
 34 | 
 35 | # Example usages:
 36 | 
 37 | ```python
 38 | from xphyle import *
 39 | from xphyle.paths import STDIN, STDOUT
 40 | 
 41 | # Open a compressed file...
 42 | myfile = xopen('infile.gz')
 43 | 
 44 | # ...or a compressed stream
 45 | # e.g. gzip -c afile | python my_program.py
 46 | stdin = xopen(STDIN)
 47 | 
 48 | # Easily write to the stdin of a subprocess
 49 | with open_('|cat', 'wt') as process:
 50 |     process.write('foo')
 51 | 
 52 | # We have to tell xopen what kind of compression
 53 | # to use when writing to stdout
 54 | stdout = xopen(STDOUT, compression='gz')
 55 | 
 56 | # The `open_` method ensures that the file is usable with the `with` keyword.
 57 | # Print all lines in a compressed file...
 58 | with open_('infile.gz') as myfile:
 59 |     for line in myfile:
 60 |         print(line)
 61 | 
 62 | # ... or a compressed URL
 63 | with open_('http://foo.com/myfile.gz') as myfile:
 64 |     for line in myfile:
 65 |         print(line)
 66 | 
 67 | # Transparently handle paths and file objects
 68 | def dostuff(path_or_file):
 69 |     with open_(path_or_file) as myfile:
 70 |         for line in myfile:
 71 |             print(line)
 72 | 
 73 | # Read all lines in a compressed file into a list
 74 | from xphyle.utils import read_lines
 75 | lines = list(read_lines('infile.gz'))
 76 | 
 77 | # Sum the rows in a compressed file where each line is an integer value
 78 | total = sum(read_lines('infile.gz', convert=int))
 79 | ```
 80 | 
 81 | See the [Documentation](https://xphyle.readthedocs.io/en/latest/) for full usage information.
 82 | 
 83 | # Supported compression formats
 84 | 
 85 | * `gzip` (uses `igzip` or `pigz` if available)
 86 | * `bgzip`
 87 | * `bzip2` (uses `pbzip2` if available)
 88 | * `lzma`
 89 | * `zstd`
 90 | 
 91 | # Issues
 92 | 
 93 | Please report bugs and request enhancements using the [issue tracker](https://github.com/jdidion/xphyle).
 94 | 
 95 | # Roadmap
 96 | 
 97 | Future releases are mapped out using [GitHub Projects](https://github.com/jdidion/xphyle/projects).
 98 | 
 99 | # Citing xphyle
100 | 
101 | [Didion, JP (2017) xphyle: Extraordinarily simple file handling. Journal of Open Source Software; doi:10.21105/joss.00255](https://joss.theoj.org/papers/10.21105/joss.00255#)
102 | 
103 | # Acknowledgements
104 | 
105 | * [Dependencies scanned by PyUp.io](http://pyup.io/)
106 | * Thanks to [@ctb](https://github.com/ctb) for reviewing the xphyle paper


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXAPI     = sphinx-apidoc
  7 | SPHINXBUILD   = sphinx-build
  8 | PAPER         =
  9 | BUILDDIR      = _build
 10 | 
 11 | # User-friendly check for sphinx-build
 12 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 13 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 14 | endif
 15 | 
 16 | # Internal variables.
 17 | PAPEROPT_a4     = -D latex_paper_size=a4
 18 | PAPEROPT_letter = -D latex_paper_size=letter
 19 | MOSTSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
 20 | ALLSPHINXOPTS   = $(MOSTSPHINXOPTS) .
 21 | # the i18n builder cannot share the environment and doctrees with the others
 22 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 23 | 
 24 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 25 | 
 26 | all: html
 27 | 
 28 | help:
 29 | 	@echo "Please use \`make <target>' where <target> is one of"
 30 | 	@echo "  html       to make standalone HTML files"
 31 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 32 | 	@echo "  singlehtml to make a single large HTML file"
 33 | 	@echo "  pickle     to make pickle files"
 34 | 	@echo "  json       to make JSON files"
 35 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 36 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 37 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 38 | 	@echo "  epub       to make an epub"
 39 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 40 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 41 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 42 | 	@echo "  text       to make text files"
 43 | 	@echo "  man        to make manual pages"
 44 | 	@echo "  texinfo    to make Texinfo files"
 45 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 46 | 	@echo "  gettext    to make PO message catalogs"
 47 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 48 | 	@echo "  xml        to make Docutils-native XML files"
 49 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 50 | 	@echo "  linkcheck  to check all external links for integrity"
 51 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 52 | 
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | api:
 57 | 	$(SPHINXAPI) -f -o api .. ../setup.py ../tests/*
 58 | 
 59 | html:
 60 | 	$(SPHINXBUILD) -b html $(MOSTSPHINXOPTS) api $(BUILDDIR)/html
 61 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 62 | 	@echo
 63 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 64 | 
 65 | dirhtml:
 66 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 67 | 	@echo
 68 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 69 | 
 70 | singlehtml:
 71 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 72 | 	@echo
 73 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 74 | 
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | json:
 81 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 82 | 	@echo
 83 | 	@echo "Build finished; now you can process the JSON files."
 84 | 
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 		  ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | qthelp:
 92 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 93 | 	@echo
 94 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 95 | 		  ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 96 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/atropos.qhcp"
 97 | 	@echo "To view the help file:"
 98 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/atropos.qhc"
 99 | 
100 | devhelp:
101 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
102 | 	@echo
103 | 	@echo "Build finished."
104 | 	@echo "To view the help file:"
105 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/atropos"
106 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/atropos"
107 | 	@echo "# devhelp"
108 | 
109 | epub:
110 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
111 | 	@echo
112 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
113 | 
114 | latex:
115 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
116 | 	@echo
117 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
118 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
119 | 		  "(use \`make latexpdf' here to do that automatically)."
120 | 
121 | latexpdf:
122 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
123 | 	@echo "Running LaTeX files through pdflatex..."
124 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
125 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
126 | 
127 | latexpdfja:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
130 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
131 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
132 | 
133 | text:
134 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
135 | 	@echo
136 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
137 | 
138 | man:
139 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
140 | 	@echo
141 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
142 | 
143 | texinfo:
144 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
145 | 	@echo
146 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
147 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
148 | 		  "(use \`make info' here to do that automatically)."
149 | 
150 | info:
151 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
152 | 	@echo "Running Texinfo files through makeinfo..."
153 | 	make -C $(BUILDDIR)/texinfo info
154 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
155 | 
156 | gettext:
157 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
158 | 	@echo
159 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
160 | 
161 | changes:
162 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
163 | 	@echo
164 | 	@echo "The overview file is in $(BUILDDIR)/changes."
165 | 
166 | linkcheck:
167 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
168 | 	@echo
169 | 	@echo "Link check complete; look for any errors in the above output " \
170 | 		  "or in $(BUILDDIR)/linkcheck/output.txt."
171 | 
172 | doctest:
173 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
174 | 	@echo "Testing of doctests in the sources finished, look at the " \
175 | 		  "results in $(BUILDDIR)/doctest/output.txt."
176 | 
177 | xml:
178 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
179 | 	@echo
180 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
181 | 
182 | pseudoxml:
183 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
184 | 	@echo
185 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
186 | 


--------------------------------------------------------------------------------
/docs/api/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # xphyle API documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Sep 12 09:11:16 2014.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | sys.path.insert(0, os.path.abspath(os.pardir))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | # needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | # Add autodoc and napoleon to the extensions list
 32 | extensions = ["sphinx.ext.autodoc", "sphinxcontrib.napoleon"]
 33 | 
 34 | # Add any paths that contain templates here, relative to this directory.
 35 | templates_path = ["_templates"]
 36 | 
 37 | # The suffix of source filenames.
 38 | source_suffix = ".rst"
 39 | 
 40 | # The encoding of source files.
 41 | # source_encoding = 'utf-8-sig'
 42 | 
 43 | # The master toctree document.
 44 | master_doc = "modules"
 45 | 
 46 | # General information about the project.
 47 | project = u"xphyle"
 48 | copyright = u"Public domain (government work), by John P Didion"
 49 | 
 50 | # The version info for the project you're documenting, acts as replacement for
 51 | # |version| and |release|, also used in various other places throughout the
 52 | # built documents.
 53 | 
 54 | from xphyle import __version__
 55 | 
 56 | #
 57 | # The short X.Y version.
 58 | version = __version__
 59 | # The full version, including alpha/beta/rc tags.
 60 | release = __version__
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | # language = None
 65 | 
 66 | # There are two options for replacing |today|: either, you set today to some
 67 | # non-false value, then it is used:
 68 | # today = ''
 69 | # Else, today_fmt is used as the format for a strftime call.
 70 | # today_fmt = '%B %d, %Y'
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | exclude_patterns = ["tests", "setup.py", "build", "dist", "_build"]
 75 | 
 76 | # The reST default role (used for this markup: `text`) to use for all
 77 | # documents.
 78 | # default_role = None
 79 | 
 80 | # If true, '()' will be appended to :func: etc. cross-reference text.
 81 | # add_function_parentheses = True
 82 | 
 83 | # If true, the current module name will be prepended to all description
 84 | # unit titles (such as .. function::).
 85 | # add_module_names = True
 86 | 
 87 | # If true, sectionauthor and moduleauthor directives will be shown in the
 88 | # output. They are ignored by default.
 89 | # show_authors = False
 90 | 
 91 | # The name of the Pygments (syntax highlighting) style to use.
 92 | pygments_style = "sphinx"
 93 | 
 94 | # A list of ignored prefixes for module index sorting.
 95 | # modindex_common_prefix = []
 96 | 
 97 | # If true, keep warnings as "system message" paragraphs in the built documents.
 98 | # keep_warnings = False
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | html_theme = "default"
106 | try:
107 |     from better import better_theme_path
108 | 
109 |     html_theme_path = [better_theme_path]
110 |     html_theme = "better"
111 | except ImportError:
112 |     pass
113 | 
114 | 
115 | # Theme options are theme-specific and customize the look and feel of a theme
116 | # further.  For a list of options available for each theme, see the
117 | # documentation.
118 | # html_theme_options = {}
119 | 
120 | # Add any paths that contain custom themes here, relative to this directory.
121 | # html_theme_path = []
122 | 
123 | # The name for this set of Sphinx documents.  If None, it defaults to
124 | # "<project> v<release> documentation".
125 | # html_title = None
126 | 
127 | # A shorter title for the navigation bar.  Default is the same as html_title.
128 | # html_short_title = None
129 | 
130 | # The name of an image file (relative to this directory) to place at the top
131 | # of the sidebar.
132 | # html_logo = 'logo.png'
133 | 
134 | # The name of an image file (within the static path) to use as favicon of the
135 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
136 | # pixels large.
137 | # html_favicon = None
138 | 
139 | # Add any paths that contain custom static files (such as style sheets) here,
140 | # relative to this directory. They are copied after the builtin static files,
141 | # so a file named "default.css" will overwrite the builtin "default.css".
142 | html_static_path = ["_static"]
143 | 
144 | # Add any extra paths that contain custom files (such as robots.txt or
145 | # .htaccess) here, relative to this directory. These files are copied
146 | # directly to the root of the documentation.
147 | # html_extra_path = []
148 | 
149 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
150 | # using the given strftime format.
151 | # html_last_updated_fmt = '%b %d, %Y'
152 | 
153 | # If true, SmartyPants will be used to convert quotes and dashes to
154 | # typographically correct entities.
155 | html_use_smartypants = True
156 | 
157 | # Custom sidebar templates, maps document names to template names.
158 | # html_sidebars = {}
159 | 
160 | # Additional templates that should be rendered to pages, maps page names to
161 | # template names.
162 | # html_additional_pages = {}
163 | 
164 | # If false, no module index is generated.
165 | # html_domain_indices = True
166 | 
167 | # If false, no index is generated.
168 | # html_use_index = True
169 | 
170 | # If true, the index is split into individual pages for each letter.
171 | # html_split_index = False
172 | 
173 | # If true, links to the reST sources are added to the pages.
174 | # html_show_sourcelink = True
175 | 
176 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
177 | # html_show_sphinx = True
178 | 
179 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
180 | # html_show_copyright = True
181 | 
182 | # If true, an OpenSearch description file will be output, and all pages will
183 | # contain a <link> tag referring to it.  The value of this option must be the
184 | # base URL from which the finished HTML is served.
185 | # html_use_opensearch = ''
186 | 
187 | # This is the file name suffix for HTML files (e.g. ".xhtml").
188 | # html_file_suffix = None
189 | 
190 | # Output file base name for HTML help builder.
191 | htmlhelp_basename = "xphyledoc"
192 | 
193 | 
194 | # -- Options for LaTeX output ---------------------------------------------
195 | 
196 | latex_elements = {
197 |     # The paper size ('letterpaper' or 'a4paper').
198 |     "papersize": "a4paper",
199 |     # The font size ('10pt', '11pt' or '12pt').
200 |     #'pointsize': '10pt',
201 |     # Additional stuff for the LaTeX preamble.
202 |     #'preamble': '',
203 | }
204 | 
205 | # Grouping the document tree into LaTeX files. List of tuples
206 | # (source start file, target name, title,
207 | #  author, documentclass [howto, manual, or own class]).
208 | latex_documents = [
209 |     ("index", "xphyle.tex", u"xphyle Documentation", u"John P Didion", "manual"),
210 | ]
211 | 
212 | # The name of an image file (relative to this directory) to place at the top of
213 | # the title page.
214 | # latex_logo = None
215 | 
216 | # For "manual" documents, if this is true, then toplevel headings are parts,
217 | # not chapters.
218 | # latex_use_parts = False
219 | 
220 | # If true, show page references after internal links.
221 | # latex_show_pagerefs = False
222 | 
223 | # If true, show URL addresses after external links.
224 | # latex_show_urls = False
225 | 
226 | # Documents to append as an appendix to all manuals.
227 | # latex_appendices = []
228 | 
229 | # If false, no module index is generated.
230 | # latex_domain_indices = True
231 | 
232 | 
233 | # -- Options for manual page output ---------------------------------------
234 | 
235 | # One entry per manual page. List of tuples
236 | # (source start file, name, description, authors, manual section).
237 | man_pages = [("index", "xphyle", u"xphyle Documentation", [u"John P Didion"], 1)]
238 | 
239 | # If true, show URL addresses after external links.
240 | # man_show_urls = False
241 | 
242 | 
243 | # -- Options for Texinfo output -------------------------------------------
244 | 
245 | # Grouping the document tree into Texinfo files. List of tuples
246 | # (source start file, target name, title, author,
247 | #  dir menu entry, description, category)
248 | texinfo_documents = [
249 |     (
250 |         "index",
251 |         "xphyle",
252 |         u"xphyle Documentation",
253 |         u"John P Didion",
254 |         "xphyle",
255 |         "Transparently open compressed files",
256 |         "io",
257 |     ),
258 | ]
259 | 
260 | # Documents to append as an appendix to all manuals.
261 | # texinfo_appendices = []
262 | 
263 | # If false, no module index is generated.
264 | # texinfo_domain_indices = True
265 | 
266 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
267 | # texinfo_show_urls = 'footnote'
268 | 
269 | # If true, do not generate a @detailmenu in the "Top" node's menu.
270 | # texinfo_no_detailmenu = False
271 | 


--------------------------------------------------------------------------------
/docs/api/modules.rst:
--------------------------------------------------------------------------------
 1 | xphyle package
 2 | ==============
 3 | 
 4 | Public API
 5 | ----------
 6 | 
 7 | xphyle module
 8 | ~~~~~~~~~~~~~
 9 | 
10 | .. automodule:: xphyle
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | xphyle.utils module
16 | ~~~~~~~~~~~~~~~~~~~
17 | 
18 | .. automodule:: xphyle.utils
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | xphyle.paths module
24 | ~~~~~~~~~~~~~~~~~~~
25 | 
26 | .. automodule:: xphyle.paths
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | Plugin API
32 | ----------
33 | 
34 | You shouldn't need these modules unless you want to extend xphyle functionality.
35 | 
36 | xphyle.formats module
37 | ~~~~~~~~~~~~~~~~~~~~~
38 | 
39 | .. automodule:: xphyle.formats
40 |     :members:
41 |     :undoc-members:
42 |     :show-inheritance:
43 | 
44 | xphyle.progress module
45 | ~~~~~~~~~~~~~~~~~~~~~~
46 | 
47 | .. automodule:: xphyle.progress
48 |     :members:
49 |     :undoc-members:
50 |     :show-inheritance:
51 | 
52 | xphyle.urls module
53 | ~~~~~~~~~~~~~~~~~~
54 | 
55 | .. automodule:: xphyle.urls
56 |     :members:
57 |     :undoc-members:
58 |     :show-inheritance:
59 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # xphyle documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Sep 12 09:11:16 2014.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | sys.path.insert(0, os.path.abspath(os.pardir))
 23 | 
 24 | # The version info for the project you're documenting, acts as replacement for
 25 | # |version| and |release|, also used in various other places throughout the
 26 | # built documents.
 27 | 
 28 | from xphyle import __version__
 29 | 
 30 | # The short X.Y version.
 31 | version = __version__
 32 | # The full version, including alpha/beta/rc tags.
 33 | release = __version__
 34 | 
 35 | # -- General configuration ------------------------------------------------
 36 | 
 37 | # If your documentation needs a minimal Sphinx version, state it here.
 38 | # needs_sphinx = '1.0'
 39 | 
 40 | # Add any Sphinx extension module names here, as strings. They can be
 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 42 | # ones.
 43 | # Add autodoc and napoleon to the extensions list
 44 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
 45 | 
 46 | # Add any paths that contain templates here, relative to this directory.
 47 | templates_path = ["_templates"]
 48 | 
 49 | # The suffix of source filenames.
 50 | source_suffix = ".rst"
 51 | 
 52 | # The encoding of source files.
 53 | # source_encoding = 'utf-8-sig'
 54 | 
 55 | # The master toctree document.
 56 | master_doc = "index"
 57 | 
 58 | # General information about the project.
 59 | project = u"xphyle"
 60 | copyright_ = u"Public domain (government work), by John P Didion"
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | # language = None
 65 | 
 66 | # There are two options for replacing |today|: either, you set today to some
 67 | # non-false value, then it is used:
 68 | # today = ''
 69 | # Else, today_fmt is used as the format for a strftime call.
 70 | # today_fmt = '%B %d, %Y'
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | exclude_patterns = ["tests", "setup.py", "build", "dist", "_build"]
 75 | 
 76 | # The reST default role (used for this markup: `text`) to use for all
 77 | # documents.
 78 | # default_role = None
 79 | 
 80 | # If true, '()' will be appended to :func: etc. cross-reference text.
 81 | # add_function_parentheses = True
 82 | 
 83 | # If true, the current module name will be prepended to all description
 84 | # unit titles (such as .. function::).
 85 | # add_module_names = True
 86 | 
 87 | # If true, sectionauthor and moduleauthor directives will be shown in the
 88 | # output. They are ignored by default.
 89 | # show_authors = False
 90 | 
 91 | # The name of the Pygments (syntax highlighting) style to use.
 92 | pygments_style = "sphinx"
 93 | 
 94 | # A list of ignored prefixes for module index sorting.
 95 | # modindex_common_prefix = []
 96 | 
 97 | # If true, keep warnings as "system message" paragraphs in the built documents.
 98 | # keep_warnings = False
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | html_theme = "default"
106 | try:
107 |     from better import better_theme_path
108 | 
109 |     html_theme_path = [better_theme_path]
110 |     html_theme = "better"
111 | except ImportError:
112 |     pass
113 | 
114 | # Theme options are theme-specific and customize the look and feel of a theme
115 | # further.  For a list of options available for each theme, see the
116 | # documentation.
117 | # html_theme_options = {}
118 | 
119 | # Add any paths that contain custom themes here, relative to this directory.
120 | # html_theme_path = []
121 | 
122 | # The name for this set of Sphinx documents.  If None, it defaults to
123 | # "<project> v<release> documentation".
124 | # html_title = None
125 | 
126 | # A shorter title for the navigation bar.  Default is the same as html_title.
127 | # html_short_title = None
128 | 
129 | # The name of an image file (relative to this directory) to place at the top
130 | # of the sidebar.
131 | # html_logo = 'logo.png'
132 | 
133 | # The name of an image file (within the static path) to use as favicon of the
134 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
135 | # pixels large.
136 | # html_favicon = None
137 | 
138 | # Add any paths that contain custom static files (such as style sheets) here,
139 | # relative to this directory. They are copied after the builtin static files,
140 | # so a file named "default.css" will overwrite the builtin "default.css".
141 | html_static_path = ["_static"]
142 | 
143 | # Add any extra paths that contain custom files (such as robots.txt or
144 | # .htaccess) here, relative to this directory. These files are copied
145 | # directly to the root of the documentation.
146 | # html_extra_path = []
147 | 
148 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
149 | # using the given strftime format.
150 | # html_last_updated_fmt = '%b %d, %Y'
151 | 
152 | # If true, SmartyPants will be used to convert quotes and dashes to
153 | # typographically correct entities.
154 | html_use_smartypants = True
155 | 
156 | # Custom sidebar templates, maps document names to template names.
157 | # html_sidebars = {}
158 | 
159 | # Additional templates that should be rendered to pages, maps page names to
160 | # template names.
161 | # html_additional_pages = {}
162 | 
163 | # If false, no module index is generated.
164 | # html_domain_indices = True
165 | 
166 | # If false, no index is generated.
167 | # html_use_index = True
168 | 
169 | # If true, the index is split into individual pages for each letter.
170 | # html_split_index = False
171 | 
172 | # If true, links to the reST sources are added to the pages.
173 | # html_show_sourcelink = True
174 | 
175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
176 | # html_show_sphinx = True
177 | 
178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
179 | # html_show_copyright = True
180 | 
181 | # If true, an OpenSearch description file will be output, and all pages will
182 | # contain a <link> tag referring to it.  The value of this option must be the
183 | # base URL from which the finished HTML is served.
184 | # html_use_opensearch = ''
185 | 
186 | # This is the file name suffix for HTML files (e.g. ".xhtml").
187 | # html_file_suffix = None
188 | 
189 | # Output file base name for HTML help builder.
190 | htmlhelp_basename = "xphyledoc"
191 | 
192 | # -- Options for LaTeX output ---------------------------------------------
193 | 
194 | latex_elements = {
195 |     # The paper size ('letterpaper' or 'a4paper').
196 |     "papersize": "a4paper",
197 |     # The font size ('10pt', '11pt' or '12pt').
198 |     # 'pointsize': '10pt',
199 |     # Additional stuff for the LaTeX preamble.
200 |     # 'preamble': '',
201 | }
202 | 
203 | # Grouping the document tree into LaTeX files. List of tuples
204 | # (source start file, target name, title,
205 | #  author, documentclass [howto, manual, or own class]).
206 | latex_documents = [
207 |     ("index", "xphyle.tex", u"xphyle Documentation", u"John P Didion", "manual"),
208 | ]
209 | 
210 | # The name of an image file (relative to this directory) to place at the top of
211 | # the title page.
212 | # latex_logo = None
213 | 
214 | # For "manual" documents, if this is true, then toplevel headings are parts,
215 | # not chapters.
216 | # latex_use_parts = False
217 | 
218 | # If true, show page references after internal links.
219 | # latex_show_pagerefs = False
220 | 
221 | # If true, show URL addresses after external links.
222 | # latex_show_urls = False
223 | 
224 | # Documents to append as an appendix to all manuals.
225 | # latex_appendices = []
226 | 
227 | # If false, no module index is generated.
228 | # latex_domain_indices = True
229 | 
230 | 
231 | # -- Options for manual page output ---------------------------------------
232 | 
233 | # One entry per manual page. List of tuples
234 | # (source start file, name, description, authors, manual section).
235 | man_pages = [("index", "xphyle", u"xphyle Documentation", [u"John P Didion"], 1)]
236 | 
237 | # If true, show URL addresses after external links.
238 | # man_show_urls = False
239 | 
240 | 
241 | # -- Options for Texinfo output -------------------------------------------
242 | 
243 | # Grouping the document tree into Texinfo files. List of tuples
244 | # (source start file, target name, title, author,
245 | #  dir menu entry, description, category)
246 | texinfo_documents = [
247 |     (
248 |         "index",
249 |         "xphyle",
250 |         u"xphyle Documentation",
251 |         u"John P Didion",
252 |         "xphyle",
253 |         "Transparently open compressed files",
254 |         "io",
255 |     ),
256 | ]
257 | 
258 | # Documents to append as an appendix to all manuals.
259 | # texinfo_appendices = []
260 | 
261 | # If false, no module index is generated.
262 | # texinfo_domain_indices = True
263 | 
264 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
265 | # texinfo_show_urls = 'footnote'
266 | 
267 | # If true, do not generate a @detailmenu in the "Top" node's menu.
268 | # texinfo_no_detailmenu = False
269 | 


--------------------------------------------------------------------------------
/docs/developers.rst:
--------------------------------------------------------------------------------
 1 | Style-wise, we try to adhere to the [Google python style guidelines](https://google.github.io/styleguide/pyguide.html).
 2 | 
 3 | We use Google-style docstrings, which are formatted by the [Napoleon Sphinx Plugin](https://pypi.python.org/pypi/sphinxcontrib-napoleon).
 4 | 
 5 | We run pylint as part of each build and strive to maintain a 10/10 score. However, we disable some pylint checks:
 6 | 
 7 | * Function annotations: pylint does not properly handle whitespace around function annotations (https://github.com/PyCQA/pylint/issues/238).
 8 | * White space on empty lines: we use white space as a visual guide to the structure of the code. Each blank line should have whitespace matching the indent level of the next non-blank line.
 9 | * Checks that are arbitrary/overly restrictive (e.g. 'too-many-xxx'; see .pylintrc for full list)
10 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | xphyle: extraordinarily simple file handling
  2 | ============================================
  3 | 
  4 | .. image:: logo.png
  5 |    :height: 200px
  6 |    :width: 200 px
  7 | 
  8 | xphyle is a small python (3.4+) library that makes it easy to open compressed
  9 | files and URLs for the highest possible performance available on your system.
 10 | 
 11 | * `API <api/modules.html>`_
 12 | * `Source code <https://github.com/jdidion/xphyle/>`_
 13 | * `Report an issue <https://github.com/jdidion/xphyle/issues>`_
 14 | 
 15 | Installation
 16 | ------------
 17 | 
 18 | xphyle is available from pypi::
 19 | 
 20 |     pip install xphyle
 21 | 
 22 | xphyle tries to use the compression programs installed on your local machine (e.g. gzip, bzip2); if it can't, it will use the built-in python libraries (which are slower). Thus, xphyle has no required dependencies, but we recommend that if you install gzip, etc. if you don't already have them.
 23 | 
 24 | xphyle will use alternative programs for multi-threaded compression if it is available:
 25 | 
 26 | * gzip: `igzip <https://github.com/intel/isa-l/tree/master/igzip>`_ or `pigz <http://zlib.net/pigz/>`_.
 27 | * bzip2: `pbzip2 <https://github.com/ruanhuabin/pbzip2>`_
 28 | 
 29 | Multithreading support is disabled by default; to set the number of threads that xphyle should use::
 30 | 
 31 |     xphyle.configure(threads=4)
 32 | 
 33 | or, to automatically set it to the number of cores available on your system::
 34 | 
 35 |     xphyle.configure(threads=True)
 36 | 
 37 | If you have programs installed at a location that is not on your path, you can add those locations to xphyle's executable search::
 38 | 
 39 |     xphyle.configure(executable_path=['/path', '/another/path', ...])
 40 | 
 41 | If you would like progress bars displayed for file operations, you need to configure one or both of the python-level and system-level progress bars.
 42 | 
 43 | For python-level operations, the `pokrok <https://pypi.python.org/pypi/pokrok>`_ API is used by default. Pokrok provides access to many popular progress bar libraries with a single, standard interface. Please see the documentation for more information about which libraries are currently supported and how to configure them. To enable this::
 44 | 
 45 |     > pip install pokrok
 46 | 
 47 |     xphyle.configure(progress=True)
 48 | 
 49 | You can also use you own preferred progress bar by passing a callable, which must take a single iterable argument and two optional keyword arguments and return an iterable::
 50 | 
 51 |     def my_progress_wrapper(itr, desc='My progress bar', size=None):
 52 |         ...
 53 | 
 54 |     xphyle.configure(progress=my_progress_wrapper)
 55 | 
 56 | For system-level operations, an executable is required that reads from stdin and writes to stdout; `pv <http://www.ivarch.com/programs/quickref/pv.shtml>`_ is used by default. To enable this::
 57 | 
 58 |     xphyle.configure(system_progress=True)
 59 | 
 60 | You can also use your own preferred program by passing a tuple with the command and arguments (:py:func:`<xphyle.progress.system_progress_command>` simplifies this)::
 61 | 
 62 |     xphyle.configure(system_progress=xphyle.progress.system_progress_command(
 63 |         'pv', '-pre', require=True))
 64 | 
 65 | Working with files
 66 | ------------------
 67 | 
 68 | The heart of xphyle is the simplicity of working with files. There is a single interface -- ``xopen`` -- for opening "file-like objects", regardless of whether they represent local files, remote files (referenced by URLs), or system streams (stdin, stdout, stderr); and regardless of whether they are compressed.
 69 | 
 70 | The following are functionally equivalent ways to open a gzip file::
 71 | 
 72 |     import gzip
 73 |     f = gzip.open('input.gz', 'rt')
 74 | 
 75 |     from xphyle import xopen
 76 |     f = xopen('input.gz', 'rt')
 77 | 
 78 | So then why use xphyle? Two reasons:
 79 | 
 80 | 1. The ``gzip.open`` method of opening a gzip file above requires you to know that you are expecting a gzip file and only a gzip file. If your program optionally accepts either a compressed or a decompressed file, then you'll need several extra lines of code to either detect the file format or to make the user specify the format of the file they are providing. This becomes increasingly cumbersome with each additional format you want to support. On the other hand, ``xopen`` has the same interface regardless of the compression format. Furthermore, if xphyle doesn't currently support a file format that you would like to use, it enables you to add it via a simple API.
 81 | 2. The ``gzip.open`` method of opening a gzip file uses python code to decompress the file. It's well written, highly optimized python code, but unfortunately it's still slower than your natively compiled system-level applications (e.g. pigz or gzip). The ``xopen`` method of opening a gzip file first tries to use pigz or gzip to decompress the file and provides access to the resulting stream of decompressed data (as a file-like object), and only falls back to ``gzip.open`` if neither program is available.
 82 | 
 83 | If you want to be explicit about whether to expect a compressed file, what type of compression to expect, or whether to try and use system programs, you can::
 84 | 
 85 |     from xphyle import xopen
 86 |     from xphyle.paths import STDIN
 87 | 
 88 |     # Expect the file to not be compressed
 89 |     f = xopen('input', 'rb', compression=False)
 90 | 
 91 |     # Open a remote file. Expect the file to be compressed, and throw an error
 92 |     # if it's not, or if the compression format cannot be determined.
 93 |     f = xopen('http://foo.com/input.gz', 'rt', compression=True)
 94 | 
 95 |     # Open stdin. Expect the input to be gzip compressed, and throw an error if
 96 |     # it's not
 97 |     f = xopen(STDIN, 'rt', compression='gzip')
 98 | 
 99 |     # Do not try to use the system-level gzip program for decompression
100 |     f = xopen('input.gz', 'rt', compression='gzip', use_system=False)
101 | 
102 | By default, ``xopen`` returns the file. If desired, ``xopen`` can also wrap the file such that it behaves just like a file with a few additional features:
103 | 
104 | * A file iterator is wrapped in a progress bar (if they have been enabled via the ``configure`` method described above).
105 | * A simple event system that enables callbacks to be registered for various events. Currently, the only supported event is closing the file. The ``xphyle.utils`` package provides a few useful event listeners, e.g. to compress, move, or delete the file when it is closed.
106 | * ContextManager functionality, such that the file is always compatible with ``with``, e.g.::
107 | 
108 |     def print_lines(path):
109 |         # this works whether path refers to a local file, URL or STDIN
110 |         with xopen(path, context_wrapper=True) as infile:
111 |             for line in infile:
112 |                 print(line)
113 | 
114 | The wrapping behavior can be enabled by passing ``context_wrapper=True`` to ``xopen``. You can configure ``xopen`` to wrap files by default::
115 | 
116 |     xphyle.configure(default_xopen_context_wrapper=True)
117 | 
118 | **Note that this represents a change from xphyle 1.x, in which wrapping occurred by default.**
119 | 
120 | Another common pattern is to write functions that accept either a path or an open file object. Rather than having to test whether the user passed a path or a file and handle each differently, you can use the ``open_`` convenience method::
121 | 
122 |     from xphyle import open_
123 | 
124 |     def print_lines(path_or_file):
125 |         with open_(path_or_file) as infile:
126 |             for line in infile:
127 |                 print(line)
128 | 
129 | Note that ``open_`` wraps files by default, including already open file-like objects. To disable this, set ``wrap_fileobj=False``.
130 | 
131 | Supported file formats
132 | ~~~~~~~~~~~~~~~~~~~~~~
133 | 
134 | xphyle supports the most commonly used file formats: gzip, bzip2/7zip, and lzma/xz.
135 | 
136 | Also supported are:
137 | 
138 | * zstandard
139 | * Brotli
140 | * block-based gzip (bgzip), a format commonly used in bioinformatics. Somewhat confusingly, '.gz' is an acceptable extension for bgzip files, and gzip will decompress bgzip files. Thus, to specifically use bgzip, either use a '.bgz' file extension or specify 'bgzip' as the compression format::
141 | 
142 |     f = xopen('input.gz', 'rt', compression='bgzip', validate=False)
143 | 
144 | Additional compression formats may be added in the future. To get the most up-to-date list::
145 | 
146 |     from xphyle.formats import FORMATS
147 |     print(', '.join(FORMATS.list_compression_formats())
148 | 
149 | When a file is opened for decompression, its extension is used to determine which decompressor to use. If the extension is not recognized, or if the filename is not available (e.g. when decompressing a stream or buffer), then xphyle attempts to determine the file format from the "magic bytes" at the beginning of the file.
150 | 
151 | Processes
152 | ~~~~~~~~~
153 | 
154 | As of xphyle 2.0.0, you can easily open subprocesses using the ``xphyle.popen`` method. This method is similar to python ``subprocess.Popen``, except that it uses ``xopen`` to open files passed to stdin, stdout, and stderr, and/or to wrap subprocess PIPEs. ``xphyle.popen`` returns an ``xphyle.Process`` object, which is a subclass of ``subprocess.Popen`` but adds additional functionality, essentially making a Process behave like a regular file. Writing to a process writes to its stdin PIPE, and reading from a process reads from its stdout or stderr PIPE::
155 | 
156 |     from xphyle import popen, PIPE
157 |     proc = popen('cat', stdin=PIPE, stdout='myfile.gz')
158 |     try:
159 |         proc.write('foo')
160 |     finally:
161 |         proc.close()
162 | 
163 |     # equivalent to:
164 |     with popen('cat', stdin=PIPE, stdout='myfile.gz') as proc:
165 |         proc.write('foo')
166 | 
167 |     # and also to:
168 |     popen('cat', stdin=PIPE, stdout='myfile.gz').communicate('foo')
169 | 
170 |     # for the common case above, there's also a shortcut method
171 |     from xphyle.utils import exec_process
172 |     exec_process('cat', 'foo', stdout='myfile.gz')
173 | 
174 | In addition, ``open_`` and ``xopen`` can open subprocesses. The primary difference is that ``popen`` enables customization of stdin, stdout, and stderr, whereas opening a process through ``open_`` or ``xopen`` uses default behavior of opening PIPEs for all of the streams, and wrapping the PIPE indicated by the file mode. For example::
175 | 
176 |     # write to the process stdin
177 |     with open_('|cat', 'wt') as proc:
178 |         proc.write('foo')
179 | 
180 |     # this command wraps stdin with gzip compression
181 |     with open_('|zcat', 'wt', compression='gzip') as proc:
182 |         proc.write('foo')
183 | 
184 |     # this command wraps stdout with gzip decompression;
185 |     # furthermore, the compression format is determined
186 |     # automatically
187 |     with open_('|gzip -c foobar.txt', 'rt') as proc:
188 |         text = proc.read()
189 | 
190 | Note that with ``open_`` and ``xopen``, the system command must be specified as a string starting with '|'.
191 | 
192 | Buffers
193 | ~~~~~~~
194 | 
195 | As of xphyle 2.1.0, ``open_`` and ``xopen`` can also open buffer types. A buffer is an instance of ``io.StringIO`` or ``io.BytesIO`` (or similar) -- basically an in memory read/write buffer. Passing open buffer objects worked before (they were treated as file-like), but now there is a special file type -- ``FileType.BUFFER`` -- that will cause them to be handled a bit differently. In addition, you can now pass ``str`` or ``bytes`` (the type objects) to automatically create the corresponding buffer type::
196 | 
197 |     with open_(str) as buf:
198 |         buf.write('foo')
199 |     string_foo = buf.getvalue()
200 | 
201 |     # with compression, type must be 'bytes'
202 |     with open_(bytes, compression='gzip') as buf:
203 |         buf.write('foo')
204 |     compressed_foo = buf.getvalue()
205 | 
206 | You can also create readable buffers by passing the string/bytes to read instead of a path, and explicitly specifying the file type::
207 | 
208 |     with open_("This is a string I want to read", file_type=FileType.BUFFER) as buf:
209 |         buf_str = buf.read()
210 | 
211 | Reading/writing data
212 | ~~~~~~~~~~~~~~~~~~~~
213 | 
214 | The ``xphyle.utils`` module provides methods for many of the common operations that you'll want to perform on files. A few examples are shown below; you can read the `API docs <api/modules.html#module-xphyle.utils>`_ for a full list of methods and more detailed descriptions of each.
215 | 
216 | There are pairs of methods for reading/writing text and binary data using iterators::
217 | 
218 |     # Copy from one file to another, changing the line separator from
219 |     # unix to windows
220 |     from xphyle.utils import read_lines, write_lines
221 |     write_lines(
222 |         read_lines('linux_file.txt')
223 |         'windows_file.txt',
224 |         linesep='\r')
225 | 
226 |     # Copy from one binary file to another, changing the encoding from
227 |     # ascii to utf-8
228 |     from xphyle.utils import read_bytes, write_bytes
229 |     def ascii2utf8(x):
230 |         if isinstance(x, bytes):
231 |             x = x.decode('ascii')
232 |         return x.encode('utf-8')
233 |     write_bytes(
234 |         read_bytes('ascii_file.txt', convert=ascii2utf8),
235 |         'utf8-file.txt')
236 | 
237 | There's another pair of methods for reading/writing key=value files::
238 | 
239 |     from collections import OrderedDict
240 |     from xphyle.utils import read_dict, write_dict
241 |     cats = OrderedDict((fluffy,'calico'), (droopy,'tabby'), (sneezy,'siamese'))
242 |     write_dict(cats, 'cats.txt.gz')
243 |     # change from '=' to '\t' delimited; preserve the order of the items
244 |     write_dict(
245 |         read_dict(cats, 'cats.txt.gz', ordered=True),
246 |         'cats.tsv', sep='\t')
247 | 
248 | You can also read from delimited files such as csv and tsv::
249 | 
250 |     from xphyle.utils import read_delimited, read_delimited_as_dict
251 | 
252 |     class Dog(object):
253 |         def __init__(self, name, age, breed):
254 |             self.name = name
255 |             self.age = age
256 |             self.breed = breed
257 |         def pet(self): ...
258 |         def say(self, message): ...
259 | 
260 |     for dog in read_delimited(
261 |             'dogs.txt.gz', header=True,
262 |             converters=(str,int,str),
263 |             row_type=Dog):
264 |         dog.pet()
265 | 
266 |     dogs = read_delimited_as_dict(
267 |             'dogs.txt.gz', header=True,
268 |             key='name', converters=(str,int,str),
269 |             row_type=Dog):
270 |     dogs['Barney'].say('Good Boy!')
271 | 
272 | There are convenience methods for compressing and decompressing files::
273 | 
274 |     from xphyle.utils import compress_file, decompress_file, transcode_file
275 | 
276 |     # Gzip compress recipes.txt, and delete the original
277 |     compress_file('recipes.txt', compression='gzip', keep=False)
278 | 
279 |     # decompress a remote compressed file to a local file
280 |     decompress_file('http://recipes.com/allrecipes.txt.gz',
281 |                     'local_recipes.txt')
282 | 
283 |     # Change from gzip to bz2 compression:
284 |     transcode_file('http://recipes.com/allrecipes.txt.gz',
285 |                    'local_recipes.txt.bz2')
286 | 
287 | There is a replacement for ``fileinput``::
288 | 
289 |     from xphyle.utils import fileinput
290 | 
291 |     # By default, read from the files specified as command line arguments,
292 |     # or stdin if there are no command line arguments, and autodetect
293 |     # the compression format
294 |     for line in fileinput():
295 |         print(line)
296 | 
297 |     # Read from multiple files as if they were one
298 |     for line in fileinput(('myfile.txt', 'myotherfile.txt.gz')):
299 |         print(line)
300 | 
301 | There's also a set of classes for writing to multiple files::
302 | 
303 |     from xphyle.utils import fileoutput
304 |     from xphyle.utils import TeeFileOutput, CycleFileOutput, NCycleFileOutput
305 | 
306 |     # write all lines in sourcefile.txt to both file1 and file2.gz
307 |     with fileoutput(
308 |             ('file1', 'file2.gz'),
309 |             file_output_type=TeeFileOutput) as out:
310 |         out.writelines(read_lines('sourcefile.txt'))
311 | 
312 |     # Alternate writing each line in sourcefile.txt to file1 and file2.gz
313 |     with fileoutput(
314 |             ('file1', 'file2.gz'),
315 |             file_output_type=CycleFileOutput) as out:
316 |         out.writelines(read_lines('sourcefile.txt'))
317 | 
318 |     # Alternate writing four lines in sourcefile.txt to file1 and file2.gz
319 |     with fileoutput(
320 |             ('file1', 'file2.gz'),
321 |             file_output_type=NCycleFileOutput, n=4) as out:
322 |         out.writelines(read_lines('sourcefile.txt'))
323 | 
324 |     # Write up to 10,000 lines in each file before opening the next file
325 |     with RollingFileOutput('file{}.gz', n=10000) as out:
326 |         out.writelines(read_lines('sourcefile.txt'))
327 | 
328 | And finally, there's some miscellaneous methods such as linecount::
329 | 
330 |     from xphyle.utils import linecount
331 |     print("There are {} lines in file {}".format(
332 |         linecount(path), path))
333 | 
334 | File paths
335 | ~~~~~~~~~~
336 | 
337 | The ``xphyle.paths`` module provides methods for working with file paths. The `API docs <api/modules.html#module-xphyle.paths>`_ have a full list of methods and more detailed descriptions of each. Here are a few examples::
338 | 
339 |     from xphyle.paths import *
340 | 
341 |     # Get the absolute path, being smart about STDIN/STDOUT/STDERR and
342 |     # home directory shortcuts
343 |     abspath('/foo/bar/baz') # -> /foo/bar/baz
344 |     abspath('foo') # -> /path/to/current/dir/foo
345 |     abspath('~/foo') # -> /home/myname/foo
346 |     abspath(STDIN) # -> STDIN
347 | 
348 |     # Splat a path into its component parts
349 |     dir, name, *extensions = split_path('/home/joe/foo.txt.gz') # ->
350 |         # dir = '/home/joe'
351 |         # name = 'foo'
352 |         # extensions = ['txt', 'gz']
353 | 
354 |     # Check that a path exists, is a file, and allows reading
355 |     # Raises IOError if any of the expectations are violated,
356 |     # otherwise returns the fully resolved path
357 |     path = check_path('file.txt.gz', 'f', 'r')
358 | 
359 |     # Shortcuts to check whether a file is readable/writeable
360 |     path = check_readable_file('file.txt')
361 |     path = check_writeable_file('file.txt')
362 | 
363 |     # There are also 'safe' versions of the methods that return
364 |     # None rather than raise IOError
365 |     path = safe_check_readable_file('nonexistant_file.txt') # path = None
366 | 
367 |     # Find all files in a directory (recursively) that match a
368 |     # regular expression pattern
369 |     find('mydir', 'file.*\.txt\.gz')
370 | 
371 |     # Lookup the path to an executable
372 |     gzip_path = get_executable_path('gzip')
373 | 
374 | `TempDir <api/modules.html#xphyle.paths.TempDir>`_ is a particularly useful class, especially for unit testing. In fact, it us used extensively for unit testing xphyle itself. TempDir can be thought of as a virtual file system. It creates a temporary directory, and it provides methods to create subdirectories and files within that directory. When the ``close()`` method is called, the entire temporary directory is deleted. ``TempDir`` can also be used as a ContextManager::
375 | 
376 |     with TempDir() as temp:
377 |         # create three randomly named files under 'tempdir'
378 |         paths = temp.make_empty_files(3)
379 |         # create directory 'tempdir/foo'
380 |         foo = temp.make_directory('foo')
381 |         # create a randomly named file with the '.gz' suffix
382 |         # within directory 'tempdir/foo'
383 |         gzfile = temp[foo].make_file(suffix='.gz')
384 | 
385 | Another useful set of classes is `FileSpec <api/modules.html#xphyle.paths.FileSpec>`_, `DirSpec <api/modules.html#xphyle.paths.DirSpec>`_, and `PathSpec <api/modules.html#xphyle.paths.PathSpec>`_. These classes help with the common problem of working files that match a specific pattern, especially when you need to then extract some pieces of information from the file names. For example, you may need to find all the files starting with 'foo' within any subdirectory of '/bar', and then performing different operations depending on the extension. You could use a PathSpec for this::
386 | 
387 |     spec = PathSpec(
388 |         DirSpec(PathVar('subdir'), template=os.path.join('/bar', '{subdir}')),
389 |         FileSpec(
390 |             PathVar('name', pattern='foo.*'),
391 |             PathVar('ext'),
392 |             template='{name}.{ext}'))
393 |     files = spec.find(recursive=True)
394 |     for f in files:
395 |         if f['ext'] == 'txt':
396 |             process_text_file(f)
397 |         else:
398 |             process_binary_file(f)
399 | 
400 | A FileSpec or DirSpec has two related fields: a template, which is a python `fstring <https://www.python.org/dev/peps/pep-0498>`_ and is used for constructing filenames from component pieces; and a pattern, which is a regular expression and is used for matching to path strings. The named components of the template correspond to path variables (instances of the `PathVar <api/modules.html#xphyle.paths.PathVar>`_ class). Each PathVar can provide its own pattern, as well as lists of valid or invalid values. If a pattern is not specified during FileSpec/DirSpec creation, the pattern is automatically created by simply substituting the PathVar patterns for the corresponding components in the template string ('.*' by default).
401 | 
402 | Note that a DirSpec is only able to construct/match directory paths, and a FileSpec is only able to construct/match file names. A PathSpec is simply a composite type of a DirSpec and a FileSpec that can be used to construct/match full paths.
403 | 
404 | Each of the *Spec classes has three methods:
405 | 
406 | * construct: Given values for all of the path vars, construct a new path. Note that __call__ is an alias for construct.
407 | * parse: Match a path against the *Spec's pattern. If the path matches, the component's are extracted (through the use of named capture groups), otherwise an exception is raised.
408 | * find: Find all directories/files/paths that match the *Spec's pattern.
409 | 
410 | All of these methods return a PathInst, which is a subclass of pathlib.Path (specifically, a subclass of pathlib.WindowsPath when code is run on Windows, otherwise a PosixPath) that has an additional slot, 'values', that is a dictionary of the component name, value pairs, and overrides a few methods.
411 | 
412 | Extending xphyle
413 | ----------------
414 | 
415 | You can add support for another compression format by extending one of the base classes in :py:mod:`<xphyle.format>`::
416 | 
417 |     import xphyle.formats
418 | 
419 |     class FooFormat(xphyle.formats.SingleExeCompressionFormat):
420 |         """Implementation of CompressionFormat for foo files.
421 |         """
422 |         @property
423 |         def name(self) -> str:
424 |             return 'foo'
425 | 
426 |         @property
427 |         def exts(self) -> Tuple[str, ...]:
428 |             return ('foo',)
429 | 
430 |         @property
431 |         def system_commands(self) -> Tuple[str, ...]:
432 |             return ('foo',)
433 | 
434 |         @property
435 |         def compresslevel_range(self) -> Tuple[int, int]:
436 |             # because of course it goes to 11
437 |             return (1, 11)
438 | 
439 |         @property
440 |         def default_compresslevel(self) -> int:
441 |             return 6
442 | 
443 |         @property
444 |         def magic_bytes(self) -> Tuple[Tuple[int, ...], ...]:
445 |             return ((0x0F, 0x00),)
446 | 
447 |         @property
448 |         def mime_types(self) -> Tuple[str, ...]:
449 |             return ('application/foo',)
450 | 
451 |         # build the system command
452 |         # op = 'c' for compress, 'd' for decompress
453 |         # src = the source file, or STDIN if input should be read from stdin
454 |         # stdout = True if output should be written to stdout
455 |         # compresslevel = the compression level
456 |         def get_command(self, op, src=STDIN, stdout=True, compresslevel=6):
457 |             cmd = [self.executable_path]
458 |             if op == 'c':
459 |                 # adjust the compresslevel to be within the range allowed
460 |                 # by the program
461 |                 compresslevel = self._get_compresslevel(compresslevel)
462 |                 cmd.append('-{}'.format(compresslevel))
463 |                 cmd.append('-z')
464 |             elif op == 'd':
465 |                 cmd.append('-d')
466 |             if stdout:
467 |                 cmd.append('-c')
468 |             if src != STDIN:
469 |                 cmd.append(src)
470 |             return cmd
471 | 
472 |         def open_file_python(self, filename, mode, **kwargs):
473 |             # self.lib is a property that lazily imports and returns the
474 |             # python library named in the ``name`` member above
475 |             return self.lib.open_foo(filename, mode, **kwargs)
476 | 
477 | Then, register your format::
478 | 
479 |     xphyle.formats.register_compression_format(FooFormat)
480 | 
481 | Also, note that you can support custom URL schemes by the standard method of adding `urllib <https://docs.python.org/3/library/urllib.request.html#openerdirector-objects>`_ handlers::
482 | 
483 |     import urllib.request
484 |     urllib.request.OpenerDirector.add_handler(my_handler)
485 | 


--------------------------------------------------------------------------------
/docs/logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jdidion/xphyle/6bbb79c3cdf680205f7f4fafcf0e6631999a62f2/docs/logo.pdf


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jdidion/xphyle/6bbb79c3cdf680205f7f4fafcf0e6631999a62f2/docs/logo.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==7.2.6
2 | pokrok==0.2.0


--------------------------------------------------------------------------------
/paper/codemeta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld",
 3 |   "@type": "Code",
 4 |   "author": ["John P Didion"],
 5 |   "identifier": "http://dx.doi.org/10.5281/zenodo.569933",
 6 |   "codeRepository": "https://github.com/jdidion/xphyle",
 7 |   "datePublished": "2017-04-29",
 8 |   "dateModified": "2017-04-29",
 9 |   "dateCreated": "2017-04-29",
10 |   "description": "xphyle: extraordinarily simple file handling",
11 |   "keywords": "python, io, file",
12 |   "license": "CC0",
13 |   "title": "xphyle",
14 |   "version": "3.0.1"
15 | }
16 | 


--------------------------------------------------------------------------------
/paper/generate.rb:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/ruby
  2 | 
  3 | # For an OO language, this is distinctly procedural. Should probably fix that.
  4 | require 'json'
  5 | 
  6 | details = Hash.new({})
  7 | 
  8 | capture_params = [
  9 |   { :name => "title", :message => "Enter project name." },
 10 |   { :name => "url", :message => "Enter the URL of the project repository." },
 11 |   { :name => "description", :message => "Enter the (short) project description." },
 12 |   { :name => "license", :message => "Enter the license this software shared under. (hit enter to skip)\nFor example MIT, BSD, GPL v3.0, Apache 2.0" },
 13 |   { :name => "doi", :message => "Enter the DOI of the archived version of this code. (hit enter to skip)\nFor example http://dx.doi.org/10.6084/m9.figshare.828487" },
 14 |   { :name => "keywords", :message => "Enter keywords that should be associated with this project (hit enter to skip)\nComma-separated, for example: turkey, chicken, pot pie" },
 15 |   { :name => "version", :message => "Enter the version of your software (hit enter to skip)\nSEMVER preferred: http://semver.org e.g. v1.0.0" }
 16 | ]
 17 | 
 18 | puts "I'm going to try and help you prepare some things for your JOSS submission"
 19 | puts "If all goes well then we'll have a nice codemeta.json file soon..."
 20 | puts ""
 21 | puts "************************************"
 22 | puts "*    First, some basic details     *"
 23 | puts "************************************"
 24 | puts ""
 25 | 
 26 | # Loop through the desired captures and print out for clarity
 27 | capture_params.each do |param|
 28 |   puts param[:message]
 29 |   print "> "
 30 |   input = gets
 31 | 
 32 |   details[param[:name]] = input.chomp
 33 | 
 34 |   puts ""
 35 |   puts "OK, your project has #{param[:name]}: #{input}"
 36 |   puts ""
 37 | end
 38 | 
 39 | puts ""
 40 | puts "************************************"
 41 | puts "*        Experimental stuff        *"
 42 | puts "************************************"
 43 | puts ""
 44 | 
 45 | puts "Would you like me to try and build a list of authors for you?"
 46 | puts "(You need to be running this script in a git repository for this to work)"
 47 | print "> (Y/N)"
 48 | answer = gets.chomp
 49 | 
 50 | case answer.downcase
 51 | when "y", "yes"
 52 | 
 53 |   # Use git shortlog to extract a list of author names and commit counts.
 54 |   # Note we don't extract emails here as there's often different emails for
 55 |   # each user. Instead we capture emails at the end.
 56 | 
 57 |   git_log = `git shortlog --summary --numbered --no-merges`
 58 | 
 59 |   # ["252\tMichael Jackson", "151\tMC Hammer"]
 60 |   authors_and_counts = git_log.split("\n").map(&:strip)
 61 | 
 62 |   authors_and_counts.each do |author_count|
 63 |     count, author = author_count.split("\t").map(&:strip)
 64 | 
 65 |     puts "Looks like #{author} made #{count} commits"
 66 |     puts "Add them to the output?"
 67 |     print "> (Y/N)"
 68 |     answer = gets.chomp
 69 | 
 70 |     # If a user chooses to add this author to the output then we ask for some
 71 |     # additional information including their email, ORCID and affiliation.
 72 |     case answer.downcase
 73 |     when "y", "yes"
 74 |       puts "What is #{author}'s email address? (hit enter to skip)"
 75 |       print "> "
 76 |       email = gets.chomp
 77 | 
 78 |       puts "What is #{author}'s ORCID? (hit enter to skip)"
 79 |       puts "For example: http://orcid.org/0000-0000-0000-0000"
 80 |       print "> "
 81 |       orcid = gets.chomp
 82 | 
 83 |       puts "What is #{author}'s affiliation? (hit enter to skip)"
 84 |       print "> "
 85 |       affiliation = gets.chomp
 86 | 
 87 | 
 88 |       details['authors'].merge!(author => { 'commits' => count,
 89 |                                             'email' => email,
 90 |                                             'orcid' => orcid,
 91 |                                             'affiliation' => affiliation })
 92 | 
 93 |     when "n", "no"
 94 |       puts "OK boss..."
 95 |       puts ""
 96 |     end
 97 |   end
 98 | when "n", "no"
 99 |   puts "OK boss..."
100 |   puts ""
101 | end
102 | 
103 | puts "Reticulating splines"
104 | 
105 | 5.times do
106 |   print "."
107 |   sleep 0.5
108 | end
109 | 
110 | puts ""
111 | puts "Generating some JSON goodness..."
112 | 
113 | # TODO: work out how to use some kind of JSON template here.
114 | # Build the output list of authors from the inputs we've collected.
115 | output_authors = []
116 | 
117 | details['authors'].each do |author_name, values|
118 |   entry = {
119 |     "@id" => values['orcid'],
120 |     "@type" => "Person",
121 |     "email" => values['email'],
122 |     "name" => author_name,
123 |     "affiliation" => values['affiliation']
124 |   }
125 |   output_authors << entry
126 | end
127 | 
128 | # TODO: this is currently a static template (written out here). It would be good
129 | # to do something smarter here.
130 | output = {
131 |   "@context" => "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld",
132 |   "@type" => "Code",
133 |   "author" => output_authors,
134 |   "identifier" => details['doi'],
135 |   "codeRepository" => details['url'],
136 |   "datePublished" => Time.now.strftime("%Y-%m-%d"),
137 |   "dateModified" => Time.now.strftime("%Y-%m-%d"),
138 |   "dateCreated" => Time.now.strftime("%Y-%m-%d"),
139 |   "description" => details['description'],
140 |   "keywords" => details['keywords'],
141 |   "license" => details['license'],
142 |   "title" => details['title'],
143 |   "version" => details['version']
144 | }
145 | 
146 | File.open('codemeta.json', 'w') {|f| f.write(JSON.pretty_generate(output)) }
147 | 


--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
1 | @online{xphyle,
2 |   author = {John P Didion},
3 |   title = {xphyle: extraordinarily simple file handling},
4 |   year = 2017,
5 |   url = {https://github.com/jdidion/xphyle},
6 |   urldate = {2017-04-29}
7 | }
8 | 


--------------------------------------------------------------------------------
/paper/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: 'xphyle: Extraordinarily simple file handling'
 3 | tags:
 4 |   - io
 5 |   - files
 6 |   - python
 7 | authors:
 8 |  - name: John P Didion
 9 |    orcid: 0000-0002-8111-6261
10 |    affiliation: 1
11 | affiliations:
12 |  - name: National Human Genome Research Institute, NIH, Bethesda, MD, USA
13 |    index: 1
14 | date: 29 April 2017
15 | bibliography: paper.bib
16 | ---
17 | 
18 | # Summary
19 | 
20 | Data compression is commonly used to reduce the storage requirements for large datasets. It is often necessary for software that operates on big data to support several commonly used compression algorithms, including gzip, bzip2, and lzma. Handling these and other types of data sources, such as URLs and in-memory buffers, requires special consideration by software developers. We created xphyle [@xphyle], a small python (3.3+) library, to provide transparent access to files regardless of their source or compression type. Most importantly, xphyle uses the appropriate program (e.g. 'gzip') to compress/decompress a file if the program is available on the host system, which is generally faster than using the corresponding python library. xphyle also provides methods that simplify common file I/O operations.
21 | 
22 | # References
23 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 |     image: latest
3 | 
4 | python:
5 |     version: 3.6


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pokrok==0.2.0


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 
4 | [options]
5 | setup_requires =
6 |   setuptools_scm==8.0.4
7 | 
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import codecs
 2 | import os
 3 | from setuptools import setup
 4 | import sys
 5 | 
 6 | 
 7 | version_info = sys.version_info
 8 | if version_info < (3, 6):
 9 |     sys.stdout.write(
10 |         "xphyle 4+ requires python3.6. Use xphyle 3 with python 3.4 or 3.5.\n"
11 |     )
12 |     sys.exit(1)
13 | 
14 | 
15 | setup(
16 |     name="xphyle",
17 |     use_scm_version=True,
18 |     description="Utilities for working with files.",
19 |     long_description_content_type="text/markdown",
20 |     long_description=codecs.open(
21 |         os.path.join(os.path.dirname(os.path.realpath(__file__)), "README.md"),
22 |         "rb",
23 |         "utf-8",
24 |     ).read(),
25 |     url="https://github.com/jdidion/xphyle",
26 |     author="John Didion",
27 |     author_email="github@didion.net",
28 |     license="MIT",
29 |     packages=["xphyle"],
30 |     setup_requires=["setuptools_scm"],
31 |     install_requires=["pokrok"],
32 |     extras_require={"performance": ["lorem"], "zstd": ["zstandard"]},
33 |     tests_require=["pytest", "pytest-cov"],
34 |     classifiers=[
35 |         "Development Status :: 5 - Production/Stable",
36 |         "Intended Audience :: Developers",
37 |         "Topic :: Software Development :: Libraries :: Python Modules",
38 |         "License :: OSI Approved :: MIT License",
39 |         "License :: Public Domain",
40 |         "Programming Language :: Python :: 3 :: Only",
41 |         "Programming Language :: Python :: 3.6",
42 |         "Programming Language :: Python :: 3.7",
43 |         "Programming Language :: Python :: 3.8",
44 |     ],
45 | )
46 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from io import BytesIO, TextIOWrapper, BufferedIOBase
 3 | import random
 4 | from typing import cast
 5 | from unittest.mock import patch
 6 | import urllib.request
 7 | 
 8 | 
 9 | # Note: the casts of StringIO/BytesIO to BufferedIOBase are only necessary because of
10 | # pycharm bug PY-28155
11 | 
12 | 
13 | def random_text(n=1024):
14 |     return ''.join(chr(random.randint(32, 126)) for _ in range(n))
15 | 
16 | 
17 | class MockStdout(object):
18 |     def __init__(self, name, as_bytes):
19 |         self.bytes_io = BytesIO()
20 |         object.__setattr__(self.bytes_io, 'name', name)
21 |         self.wrapper = TextIOWrapper(cast(BufferedIOBase, self.bytes_io))
22 |         self.wrapper.mode = 'w'
23 |         self.as_bytes = as_bytes
24 | 
25 |     def getvalue(self):
26 |         self.wrapper.flush()
27 |         val = self.bytes_io.getvalue()
28 |         if not self.as_bytes:
29 |             val = val.decode()
30 |         return val
31 | 
32 | 
33 | @contextmanager
34 | def intercept_stdout(as_bytes=False):
35 |     i = MockStdout('<stdout>', as_bytes)
36 |     with patch('sys.stdout', i.wrapper):
37 |         yield i
38 | 
39 | 
40 | @contextmanager
41 | def intercept_stderr(as_bytes=False):
42 |     i = MockStdout('<stderr>', as_bytes)
43 |     with patch('sys.stderr', i.wrapper):
44 |         yield i
45 | 
46 | 
47 | @contextmanager
48 | def intercept_stdin(content, is_bytes=False):
49 |     if not is_bytes:
50 |         content = content.encode()
51 |     i = BytesIO()
52 |     object.__setattr__(i, 'name', '<stdin>')
53 |     i.write(content)
54 |     if not (is_bytes or content.endswith(b'\n')):
55 |         i.write(b'\n')
56 |     i.seek(0)
57 |     i = TextIOWrapper(cast(BufferedIOBase, i))
58 |     i.mode = 'r'
59 |     with patch('sys.stdin', i):
60 |         yield
61 | 
62 | 
63 | def no_internet():
64 |     """Test whether there's no internet connection available.
65 |     """
66 |     try:
67 |         urllib.request.urlopen("https://github.com").info()
68 |         return False
69 |     except:
70 |         return True
71 | 


--------------------------------------------------------------------------------
/tests/foo.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jdidion/xphyle/6bbb79c3cdf680205f7f4fafcf0e6631999a62f2/tests/foo.gz


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     perf: marks performance tests
4 | 


--------------------------------------------------------------------------------
/tests/test_formats.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase, skipIf
  2 | import gzip
  3 | import string
  4 | import sys
  5 | from xphyle.formats import *
  6 | from xphyle.paths import TempDir, EXECUTABLE_CACHE
  7 | from . import *
  8 | 
  9 | 
 10 | def get_format(ext):
 11 |     return FORMATS.get_compression_format(FORMATS.guess_compression_format(ext))
 12 | 
 13 | 
 14 | def write_file(fmt, path, use_system, content, mode="wt"):
 15 |     with fmt.open_file(path, mode=mode, use_system=use_system) as f:
 16 |         f.write(content)
 17 | 
 18 | 
 19 | def read_file(fmt, path, use_system, mode="rt"):
 20 |     with fmt.open_file(path, mode=mode, use_system=use_system) as f:
 21 |         return f.read()
 22 | 
 23 | 
 24 | def create_truncated_file(path, fmt):
 25 |     # Random text
 26 |     text = "".join(random.choice(string.ascii_lowercase) for _ in range(200))
 27 |     f = fmt.open_file(path, "w")
 28 |     f.write(text)
 29 |     f.close()
 30 |     f = open(path, "a")
 31 |     f.truncate(os.stat(path).st_size - 10)
 32 |     f.close()
 33 | 
 34 | 
 35 | gz_path = get_format("gz").executable_path
 36 | # TODO: enable executable to be injected so we can test all variants
 37 | no_pigz = gz_path is None or get_format("gz").executable_name != "pigz"
 38 | no_igzip = gz_path is None or get_format("gz").executable_name != "igzip"
 39 | bgz_compress_path = get_format("bgz").compress_path
 40 | bgz_decompress_path = get_format("bgz").decompress_path
 41 | bz_path = get_format("bz2").executable_path
 42 | no_pbzip2 = bz_path is None or get_format("bz2").executable_name != "pbzip2"
 43 | xz_path = get_format("xz").executable_path
 44 | zstd_path = get_format("zstd").executable_path
 45 | 
 46 | 
 47 | class ThreadsTests(TestCase):
 48 |     def test_threads(self):
 49 |         threads = ThreadsVar(default_value=2)
 50 |         threads.update(None)
 51 |         assert 2 == threads.threads
 52 |         threads.update(False)
 53 |         assert 1 == threads.threads
 54 |         threads.update(0)
 55 |         assert 1 == threads.threads
 56 |         import multiprocessing
 57 | 
 58 |         threads.update(True)
 59 |         assert multiprocessing.cpu_count() == threads.threads
 60 |         threads.update(4)
 61 |         assert 4 == threads.threads
 62 | 
 63 | 
 64 | class CompressionTests(TestCase):
 65 |     def tearDown(self):
 66 |         EXECUTABLE_CACHE.cache = {}
 67 |         THREADS.update(1)
 68 | 
 69 |     def test_list_formats(self):
 70 |         self.assertSetEqual(
 71 |             {"gzip", "bgzip", "bz2", "lzma", "zstd", "brotli"},
 72 |             set(FORMATS.list_compression_formats()),
 73 |         )
 74 |         self.assertSetEqual(
 75 |             {"gzip", "gz", "pigz", "igzip"}, set(get_format("gzip").aliases)
 76 |         )
 77 | 
 78 |     def test_list_extensions(self):
 79 |         self.assertSetEqual(
 80 |             {
 81 |                 ".gz",
 82 |                 ".bgz",
 83 |                 ".bz2",
 84 |                 ".bzip",
 85 |                 ".bzip2",
 86 |                 ".xz",
 87 |                 ".lzma",
 88 |                 ".7z",
 89 |                 ".7zip",
 90 |                 ".zst",
 91 |                 ".br"
 92 |             },
 93 |             set(FORMATS.list_extensions(True)),
 94 |         )
 95 | 
 96 |     def test_guess_format(self):
 97 |         assert "gzip" == FORMATS.guess_compression_format("gz")
 98 |         assert "gzip" == FORMATS.guess_compression_format(".gz")
 99 |         assert "gzip" == FORMATS.guess_compression_format("foo.gz")
100 | 
101 |     def test_invalid_format(self):
102 |         self.assertIsNone(FORMATS.guess_compression_format("foo"))
103 |         with self.assertRaises(ValueError):
104 |             FORMATS.get_compression_format("foo")
105 | 
106 |     def test_get_format_from_mime_type(self):
107 |         self.assertEqual("gzip", FORMATS.get_format_for_mime_type("application/gz"))
108 |         self.assertEqual("bz2", FORMATS.get_format_for_mime_type("application/bz2"))
109 |         self.assertEqual("lzma", FORMATS.get_format_for_mime_type("application/lzma"))
110 | 
111 |     # TODO: need a way to force selection of a specific executable to properly
112 |     # test all possible scenarios
113 | 
114 |     def _test_format(self, fmt):
115 |         assert fmt.default_compresslevel == fmt._get_compresslevel(None)
116 |         assert fmt.compresslevel_range[0] == fmt._get_compresslevel(-1)
117 |         assert fmt.compresslevel_range[1] == fmt._get_compresslevel(100)
118 | 
119 |     @skipIf(gz_path is None, "'gzip' not available")
120 |     def test_gzip(self):
121 |         gz = get_format("gz")
122 |         self._test_format(gz)
123 |         assert gz.default_ext == "gz"
124 |         self.assertEqual(
125 |             gz.get_command("c", compresslevel=5), [str(gz_path), "-5", "-c"]
126 |         )
127 |         self.assertEqual(
128 |             gz.get_command("c", "foo.bar", compresslevel=5),
129 |             [str(gz_path), "-5", "-c", "foo.bar"],
130 |         )
131 |         self.assertEqual(gz.get_command("d"), [str(gz_path), "-d", "-c"])
132 |         self.assertEqual(
133 |             gz.get_command("d", "foo.gz"), [str(gz_path), "-d", "-c", "foo.gz"]
134 |         )
135 | 
136 |     @skipIf(no_pigz, "'pigz' not available")
137 |     def test_pigz(self):
138 |         THREADS.update(2)
139 |         gz = get_format("gz")
140 |         assert gz.default_ext == "gz"
141 |         self.assertEqual(
142 |             gz.get_command("c", compresslevel=5), [str(gz_path), "-5", "-c", "-p", "2"]
143 |         )
144 |         self.assertEqual(
145 |             gz.get_command("c", "foo.bar", compresslevel=5),
146 |             [str(gz_path), "-5", "-c", "-p", "2", "foo.bar"],
147 |         )
148 |         self.assertEqual(gz.get_command("d"), [str(gz_path), "-d", "-c"])
149 |         self.assertEqual(
150 |             gz.get_command("d", "foo.gz"), [str(gz_path), "-d", "-c", "foo.gz"]
151 |         )
152 | 
153 |     @skipIf(no_igzip, "'igzip' not available")
154 |     def test_igzip(self):
155 |         THREADS.update(2)
156 |         gz = get_format("gz")
157 |         assert gz.default_ext == "gz"
158 |         self.assertEqual(
159 |             gz.get_command("c", compresslevel=2), [str(gz_path), "-2", "-c", "-T", "2"]
160 |         )
161 |         self.assertEqual(
162 |             gz.get_command("c", "foo.bar", compresslevel=2),
163 |             [str(gz_path), "-2", "-c", "-T", "2", "foo.bar"],
164 |         )
165 |         self.assertEqual(gz.get_command("d"), [str(gz_path), "-d", "-c"])
166 |         self.assertEqual(
167 |             gz.get_command("d", "foo.gz"), [str(gz_path), "-d", "-c", "foo.gz"]
168 |         )
169 | 
170 |     @skipIf(bgz_compress_path is None, "'bgzip' not available")
171 |     def test_bgzip_compress(self):
172 |         THREADS.update(2)
173 |         bgz = get_format("bgz")
174 |         assert bgz.default_ext == "bgz"
175 |         self.assertEqual(
176 |             bgz.get_command("c"), [str(bgz_compress_path), "-l", "4", "-c", "-@", "2"]
177 |         )
178 |         self.assertEqual(
179 |             bgz.get_command("c", "foo.bar", compresslevel=5),
180 |             [str(bgz_compress_path), "-l", "5", "-c", "-@", "2", "foo.bar"],
181 |         )
182 | 
183 |     @skipIf(bgz_decompress_path is None, "'gzip/pigz' not available")
184 |     def test_bgzip_decompress(self):
185 |         THREADS.update(2)
186 |         bgz = get_format("bgz")
187 |         if bgz.decompress_name == "pigz":
188 |             self.assertEqual(
189 |                 bgz.get_command("d"), [str(bgz_decompress_path), "-d", "-c"]
190 |             )
191 |             self.assertEqual(
192 |                 bgz.get_command("d", PurePath("foo.gz")),
193 |                 [str(bgz_decompress_path), "-d", "-c", "foo.gz"],
194 |             )
195 |             self.assertEqual(
196 |                 bgz.get_command("d", PurePath("foo.bar")),
197 |                 [str(bgz_decompress_path), "-d", "-c", "-S", ".bar", "foo.bar"],
198 |             )
199 |         else:
200 |             self.assertEqual(
201 |                 bgz.get_command("d"), [str(bgz_decompress_path), "-d", "-c"]
202 |             )
203 |             self.assertEqual(
204 |                 bgz.get_command("d", PurePath("foo.gz")),
205 |                 [str(bgz_decompress_path), "-d", "-c", "foo.gz"],
206 |             )
207 |             self.assertEqual(
208 |                 bgz.get_command("d", PurePath("foo.bar")),
209 |                 [str(bgz_decompress_path), "-d", "-c", "-S", ".bar", "foo.bar"],
210 |             )
211 | 
212 |     @skipIf(bz_path is None, "'bzip2' not available")
213 |     def test_bzip2(self):
214 |         bz = get_format("bz2")
215 |         self._test_format(bz)
216 |         assert bz.default_ext == "bz2"
217 |         self.assertEqual(
218 |             bz.get_command("c", compresslevel=5), [str(bz_path), "-5", "-z", "-c"]
219 |         )
220 |         self.assertEqual(
221 |             bz.get_command("c", "foo.bar", compresslevel=5),
222 |             [str(bz_path), "-5", "-z", "-c", "foo.bar"],
223 |         )
224 |         self.assertEqual(bz.get_command("d"), [str(bz_path), "-d", "-c"])
225 |         self.assertEqual(
226 |             bz.get_command("d", "foo.bz2"), [str(bz_path), "-d", "-c", "foo.bz2"]
227 |         )
228 | 
229 |     @skipIf(no_pbzip2, "'pbzip2' not available")
230 |     def test_pbzip2(self):
231 |         THREADS.update(2)
232 |         bz = get_format("bz2")
233 |         assert bz.default_ext == "bz2"
234 |         self.assertEqual(
235 |             bz.get_command("c", compresslevel=5),
236 |             [str(bz_path), "-5", "-z", "-c", "-p2"],
237 |         )
238 |         self.assertEqual(
239 |             bz.get_command("c", "foo.bar", compresslevel=5),
240 |             [str(bz_path), "-5", "-z", "-c", "-p2", "foo.bar"],
241 |         )
242 |         self.assertEqual(bz.get_command("d"), [str(bz_path), "-d", "-c", "-p2"])
243 |         self.assertEqual(
244 |             bz.get_command("d", "foo.bz2"), [str(bz_path), "-d", "-c", "-p2", "foo.bz2"]
245 |         )
246 | 
247 |     @skipIf(xz_path is None, "'xz' not available")
248 |     def test_lzma(self):
249 |         xz = get_format("xz")
250 |         self._test_format(xz)
251 |         assert xz.default_ext == "xz"
252 |         self.assertEqual(
253 |             xz.get_command("c", compresslevel=5), [str(xz_path), "-5", "-z", "-c"]
254 |         )
255 |         self.assertEqual(
256 |             xz.get_command("c", "foo.bar", compresslevel=5),
257 |             [str(xz_path), "-5", "-z", "-c", "foo.bar"],
258 |         )
259 |         self.assertEqual(xz.get_command("d"), [str(xz_path), "-d", "-c"])
260 |         self.assertEqual(
261 |             xz.get_command("d", "foo.xz"), [str(xz_path), "-d", "-c", "foo.xz"]
262 |         )
263 |         # Test with threads
264 |         THREADS.update(2)
265 |         self.assertEqual(
266 |             xz.get_command("c", compresslevel=5),
267 |             [str(xz_path), "-5", "-z", "-c", "-T", "2"],
268 |         )
269 |         self.assertEqual(
270 |             xz.get_command("c", "foo.bar", compresslevel=5),
271 |             [str(xz_path), "-5", "-z", "-c", "-T", "2", "foo.bar"],
272 |         )
273 |         self.assertEqual(xz.get_command("d"), [str(xz_path), "-d", "-c", "-T", "2"])
274 | 
275 |     @skipIf(zstd_path is None, "'zstd' not available")
276 |     def test_zstd(self):
277 |         zstd = get_format("zstd")
278 |         self._test_format(zstd)
279 |         assert zstd.default_ext == "zst"
280 |         self.assertEqual(
281 |             zstd.get_command("c", compresslevel=5),
282 |             [str(zstd_path), "-5", "-c", "--single-thread"],
283 |         )
284 |         self.assertEqual(
285 |             zstd.get_command("c", "foo.bar", compresslevel=5),
286 |             [str(zstd_path), "-5", "-c", "--single-thread", "foo.bar"],
287 |         )
288 |         self.assertEqual(
289 |             zstd.get_command("d"), [str(zstd_path), "-d", "-c", "--single-thread"]
290 |         )
291 |         self.assertEqual(
292 |             zstd.get_command("d", "foo.xz"),
293 |             [str(zstd_path), "-d", "-c", "--single-thread", "foo.xz"],
294 |         )
295 |         # Test with threads
296 |         THREADS.update(3)
297 |         self.assertEqual(
298 |             zstd.get_command("c", compresslevel=5), [str(zstd_path), "-5", "-c", "-T2"]
299 |         )
300 |         self.assertEqual(
301 |             zstd.get_command("c", "foo.bar", compresslevel=5),
302 |             [str(zstd_path), "-5", "-c", "-T2", "foo.bar"],
303 |         )
304 |         self.assertEqual(zstd.get_command("d"), [str(zstd_path), "-d", "-c", "-T2"])
305 | 
306 | 
307 | class FileTests(TestCase):
308 |     def setUp(self):
309 |         self.root = TempDir()
310 | 
311 |     def tearDown(self):
312 |         self.root.close()
313 | 
314 |     def test_invalid(self):
315 |         with self.assertRaises(ValueError):
316 |             get_format("gz").open_file(Path("foo"), "n")
317 | 
318 |     def write_read_file(self, ext, use_system, mode="t", content=None):
319 |         if content is None:
320 |             content = random_text()  # generate 1 kb of random text
321 |             if mode == "b":
322 |                 content = b"".join(c.encode() for c in content)
323 |         path = self.root.make_file(suffix=ext)
324 |         fmt = get_format(ext)
325 |         write_file(fmt, path, use_system, content, "w" + mode)
326 |         in_text = read_file(fmt, path, use_system, "r" + mode)
327 |         assert content == in_text
328 | 
329 |     def test_write_read_bytes_python(self):
330 |         for fmt in (".gz", ".bz2", ".xz"):
331 |             with self.subTest(fmt=fmt):
332 |                 self.write_read_file(fmt, False, "b")
333 | 
334 |     def test_write_read_text_python(self):
335 |         for fmt in (".gz", ".bz2", ".xz"):
336 |             with self.subTest(fmt=fmt):
337 |                 self.write_read_file(fmt, False, "t")
338 | 
339 |     # These tests will be skipped if the required system-level executables
340 |     # are not available
341 | 
342 |     @skipIf(gz_path is None, "'gzip' not available")
343 |     def test_system_gzip(self):
344 |         self.write_read_file(".gz", True)
345 | 
346 |     @skipIf(gz_path is None, "'gzip' not available")
347 |     def test_iter_system(self):
348 |         path = self.root.make_file(suffix=".gz")
349 |         text = "line1\nline2\nline3"
350 |         fmt = get_format(".gz")
351 |         # Have to open in bytes mode, or it will get wrapped in a
352 |         # TextBuffer, which does not use the underlying __iter__
353 |         with fmt.open_file(path, mode="wb", ext=".gz", use_system=True) as f:
354 |             f.write(text.encode())
355 |         with fmt.open_file(path, mode="rb", ext=".gz", use_system=True) as f:
356 |             lines = list(line.rstrip().decode() for line in iter(f))
357 |         self.assertListEqual(lines, ["line1", "line2", "line3"])
358 | 
359 |     @skipIf(bz_path is None, "'bzip2' not available")
360 |     def test_system_bzip(self):
361 |         self.write_read_file(".bz2", True)
362 | 
363 |     @skipIf(xz_path is None, "'xz' not available")
364 |     def test_system_lzma(self):
365 |         self.write_read_file(".xz", True)
366 | 
367 |     @skipIf(zstd_path is None, "'zstd' not available")
368 |     def test_system_zstd(self):
369 |         self.write_read_file(".zst", True)
370 | 
371 |     def test_compress_path(self):
372 |         b = (True, False) if gz_path else (False,)
373 |         for use_system in b:
374 |             with self.subTest(use_system=use_system):
375 |                 path = self.root.make_file()
376 |                 with open(path, "wt") as o:
377 |                     o.write("foo")
378 |                 fmt = get_format(".gz")
379 |                 dest = fmt.compress_file(path, use_system=use_system)
380 |                 gzfile = Path(str(path) + ".gz")
381 |                 assert dest == gzfile
382 |                 self.assertTrue(os.path.exists(path))
383 |                 self.assertTrue(os.path.exists(gzfile))
384 |                 with gzip.open(gzfile, "rt") as i:
385 |                     assert i.read() == "foo"
386 | 
387 |                 path = self.root.make_file()
388 |                 with open(path, "wt") as o:
389 |                     o.write("foo")
390 |                 gzfile = Path(str(path) + ".bar")
391 |                 fmt = get_format(".gz")
392 |                 dest = fmt.compress_file(
393 |                     path, gzfile, keep=False, use_system=use_system
394 |                 )
395 |                 assert dest == gzfile
396 |                 self.assertFalse(os.path.exists(path))
397 |                 self.assertTrue(os.path.exists(gzfile))
398 |                 with gzip.open(gzfile, "rt") as i:
399 |                     assert i.read() == "foo"
400 | 
401 |     def test_compress_file(self):
402 |         b = (True, False) if gz_path else (False,)
403 |         for use_system in b:
404 |             with self.subTest(use_system=use_system):
405 |                 path = self.root.make_file()
406 |                 with open(path, "wt") as o:
407 |                     o.write("foo")
408 |                 with open(path, "rb") as i:
409 |                     fmt = get_format(".gz")
410 |                     dest = fmt.compress_file(i, use_system=use_system)
411 |                 gzfile = Path(str(path) + ".gz")
412 |                 assert dest == gzfile
413 |                 self.assertTrue(os.path.exists(gzfile))
414 |                 with gzip.open(gzfile, "rt") as i:
415 |                     assert i.read() == "foo"
416 | 
417 |                 path = self.root.make_file()
418 |                 with open(path, "wt") as o:
419 |                     o.write("foo")
420 |                 gzfile = Path(str(path) + ".bar")
421 |                 with open(path, "rb") as i:
422 |                     fmt = get_format(".gz")
423 |                     dest = fmt.compress_file(
424 |                         i, gzfile, keep=False, use_system=use_system
425 |                     )
426 |                 assert dest == gzfile
427 |                 self.assertFalse(os.path.exists(path))
428 |                 self.assertTrue(os.path.exists(gzfile))
429 |                 with gzip.open(gzfile, "rt") as i:
430 |                     assert i.read() == "foo"
431 | 
432 |     def test_decompress_path_error(self):
433 |         path = self.root.make_file()
434 |         with gzip.open(path, "wt") as o:
435 |             o.write("foo")
436 |         with self.assertRaises(Exception):
437 |             fmt = get_format(".gz")
438 |             fmt.decompress_file(path)
439 | 
440 |     def test_decompress_path(self):
441 |         b = (True, False) if gz_path else (False,)
442 |         for use_system in b:
443 |             with self.subTest(use_system=use_system):
444 |                 path = self.root.make_file()
445 |                 gzfile = Path(str(path) + ".gz")
446 |                 with gzip.open(gzfile, "wt") as o:
447 |                     o.write("foo")
448 |                 fmt = get_format(".gz")
449 |                 dest = fmt.decompress_file(gzfile, use_system=use_system)
450 |                 assert dest == path
451 |                 self.assertTrue(os.path.exists(path))
452 |                 self.assertTrue(os.path.exists(gzfile))
453 |                 with open(path, "rt") as i:
454 |                     assert i.read() == "foo"
455 | 
456 |                 path = self.root.make_file()
457 |                 gzfile = Path(str(path) + ".gz")
458 |                 with gzip.open(gzfile, "wt") as o:
459 |                     o.write("foo")
460 |                 fmt = get_format(".gz")
461 |                 dest = fmt.decompress_file(
462 |                     gzfile, path, keep=False, use_system=use_system
463 |                 )
464 |                 assert dest == path
465 |                 self.assertTrue(os.path.exists(path))
466 |                 self.assertFalse(os.path.exists(gzfile))
467 |                 with open(path, "rt") as i:
468 |                     assert i.read() == "foo"
469 | 
470 |     def test_decompress_file(self):
471 |         b = (True, False) if gz_path else (False,)
472 |         for use_system in b:
473 |             with self.subTest(use_system=use_system):
474 |                 path = self.root.make_file()
475 |                 gzfile = Path(str(path) + ".gz")
476 |                 with gzip.open(gzfile, "wt") as o:
477 |                     o.write("foo")
478 |                 with open(gzfile, "rb") as i:
479 |                     fmt = get_format(".gz")
480 |                     dest = fmt.decompress_file(i, use_system=use_system)
481 |                 assert Path(dest) == path
482 |                 self.assertTrue(os.path.exists(path))
483 |                 self.assertTrue(os.path.exists(gzfile))
484 |                 with open(path, "rt") as i:
485 |                     assert i.read() == "foo"
486 | 
487 |                 with gzip.open(gzfile, "wt") as o:
488 |                     o.write("foo")
489 |                 dest = self.root.make_file()
490 |                 with open(gzfile, "rb") as i, open(dest, "wb") as o:
491 |                     fmt = get_format(".gz")
492 |                     fmt.decompress_file(source=i, dest=o, use_system=use_system)
493 |                 self.assertTrue(os.path.exists(dest))
494 |                 self.assertTrue(os.path.exists(gzfile))
495 |                 with open(dest, "rt") as i:
496 |                     assert i.read() == "foo"
497 | 
498 |                 path = self.root.make_file()
499 |                 gzfile = Path(str(path) + ".bar")
500 |                 with gzip.open(gzfile, "wt") as o:
501 |                     o.write("foo")
502 |                 with open(gzfile, "rb") as i:
503 |                     fmt = get_format(".gz")
504 |                     dest = fmt.decompress_file(
505 |                         i, path, keep=False, use_system=use_system
506 |                     )
507 |                 assert dest == path
508 |                 self.assertFalse(os.path.exists(gzfile))
509 |                 self.assertTrue(os.path.exists(path))
510 |                 with open(path, "rt") as i:
511 |                     assert i.read() == "foo"
512 | 
513 |     # Disable this test in python 3.3
514 |     @skipIf(sys.version_info[:2] <= (3, 3), "Incompatible test")
515 |     def test_truncated_gz(self):
516 |         fmt = get_format(".gz")
517 |         for use_system in (True, False):
518 |             with self.subTest(use_system=use_system):
519 |                 path = self.root.make_path()
520 |                 gzfile = Path(str(path) + ".gz")
521 |                 create_truncated_file(gzfile, fmt)
522 |                 with self.assertRaises(IOError):
523 |                     fmt.decompress_file(gzfile, use_system=use_system)
524 | 
525 | 
526 | class StringTests(TestCase):
527 |     def test_compress(self):
528 |         for ext in (".gz", ".bz2", ".xz"):
529 |             with self.subTest(ext=ext):
530 |                 fmt = get_format(ext)
531 |                 _bytes = random_text().encode()
532 |                 compressed = fmt.compress(_bytes)
533 |                 decompressed = fmt.decompress(compressed)
534 |                 assert _bytes == decompressed
535 | 
536 |     def test_compress_string(self):
537 |         for ext in (".gz", ".bz2", ".xz"):
538 |             with self.subTest(ext=ext):
539 |                 fmt = get_format(ext)
540 |                 text = random_text()
541 |                 compressed = fmt.compress_string(text)
542 |                 decompressed = fmt.decompress_string(compressed)
543 |                 assert text == decompressed
544 | 
545 |     def test_compress_iterable(self):
546 |         for ext in (".gz", ".bz2", ".xz"):
547 |             with self.subTest(ext=ext):
548 |                 fmt = get_format(ext)
549 |                 strings = ["line1", "line2", "line3"]
550 |                 compressed = fmt.compress_iterable(strings, delimiter=b"|")
551 |                 decompressed = fmt.decompress_string(compressed)
552 |                 self.assertListEqual(strings, decompressed.split("|"))
553 | 
554 | 
555 | class UncompressedSizeTests(TestCase):
556 |     @skipIf(gz_path is None, "'gzip' not available")
557 |     def test_get_uncompressed_size(self):
558 |         for ext in (".gz", ".xz"):
559 |             with self.subTest(ext=ext):
560 |                 with TempDir() as temp:
561 |                     raw = temp.make_file(contents=random_text(1000))
562 |                     compressed = temp.make_file(suffix=ext)
563 |                     fmt = get_format(ext)
564 |                     fmt.compress_file(raw, compressed)
565 |                     assert 1000 == fmt.uncompressed_size(compressed)
566 | 


--------------------------------------------------------------------------------
/tests/test_paths.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | import subprocess
  3 | from xphyle.paths import *
  4 | 
  5 | 
  6 | class TempDirTests(TestCase):
  7 |     def test_descriptor(self):
  8 |         with self.assertRaises(ValueError):
  9 |             TempPathDescriptor(path_type='d', contents='foo')
 10 |         with self.assertRaises(IOError):
 11 |             _ = TempPathDescriptor().absolute_path
 12 |         with TempDir(permissions='rwx') as temp:
 13 |             f = temp.make_file(name='foo', permissions=None)
 14 |             f.unlink()
 15 |             assert temp[f].set_permissions('r') is None
 16 |         with TempDir(permissions='rwx') as temp:
 17 |             f = temp.make_file(name='foo', permissions=None)
 18 |             assert Path('foo') in temp
 19 |             assert temp[f].exists
 20 |             assert Path('foo') == temp[f].relative_path
 21 |             assert temp.absolute_path / 'foo' == temp[f].absolute_path
 22 |             assert PermissionSet('rwx') == temp[f].permissions
 23 |             assert PermissionSet('r') == temp[f].set_permissions('r')
 24 |             with self.assertRaises(PermissionError):
 25 |                 open(f, 'w')
 26 |         with TempDir(permissions='rwx') as temp:
 27 |             desc = TempPathDescriptor(
 28 |                 name='foo', path_type='f', parent=temp)
 29 |             assert Path('foo') == desc.relative_path
 30 |             assert temp.absolute_path / 'foo' == desc.absolute_path
 31 | 
 32 |     def test_context_manager(self):
 33 |         with TempDir() as temp:
 34 |             with open(temp.make_file(name='foo'), 'wt') as o:
 35 |                 o.write('foo')
 36 |         assert not temp.absolute_path.exists()
 37 | 
 38 |     def test_dir(self):
 39 |         temp = TempDir()
 40 |         foo = temp.make_directory(name='foo')
 41 |         assert foo == temp.absolute_path / 'foo'
 42 |         bar = temp.make_directory(name='bar', parent=foo)
 43 |         assert bar == temp.absolute_path / 'foo' / 'bar'
 44 |         assert (temp.absolute_path / 'foo' / 'bar').exists()
 45 |         temp.close()
 46 |         assert not temp.absolute_path.exists()
 47 |         # make sure trying to close again doesn't raise error
 48 |         temp.close()
 49 | 
 50 |     def test_tree(self):
 51 |         temp = TempDir()
 52 |         foo = temp.make_directory(name='foo')
 53 |         bar = temp.make_directory(name='bar', parent=foo)
 54 |         f = temp.make_file(name='baz', parent=bar)
 55 |         assert f == temp.absolute_path / 'foo' / 'bar' / 'baz'
 56 |         temp.close()
 57 |         assert not f.exists()
 58 | 
 59 |     def test_mode(self):
 60 |         # with self.assertRaises(IOError):
 61 |         #    with TempDir(permissions=None) as temp:
 62 |         #        _ = temp.mode
 63 |         with TempDir('r') as temp:
 64 |             # Raises error because the tempdir is read-only
 65 |             with self.assertRaises(PermissionError):
 66 |                 temp.make_file(name='bar')
 67 |         # Should be able to create the tempdir with existing read-only files
 68 |         with TempDir(
 69 |                 'r', [TempPathDescriptor(name='foo', contents='foo')]) as d:
 70 |             assert d.absolute_path.exists()
 71 |             assert (d.absolute_path / 'foo').exists()
 72 |             with open(d.absolute_path / 'foo', 'rt') as i:
 73 |                 assert 'foo' == i.read()
 74 | 
 75 |     def test_fifo(self):
 76 |         with TempDir() as temp:
 77 |             with self.assertRaises(Exception):
 78 |                 _ = temp.make_fifo(contents='foo')
 79 |             path = temp.make_fifo()
 80 |             p = subprocess.Popen('echo foo > {}'.format(path), shell=True)
 81 |             with open(path, 'rt') as i:
 82 |                 assert i.read() == 'foo\n'
 83 |             p.communicate()
 84 | 
 85 | 
 86 | class PathTests(TestCase):
 87 |     def setUp(self):
 88 |         self.root = TempDir()
 89 | 
 90 |     def tearDown(self):
 91 |         self.root.close()
 92 |         EXECUTABLE_CACHE.cache.clear()
 93 | 
 94 |     def test_get_set_permissions(self):
 95 |         path = self.root.make_file(permissions='rw')
 96 |         assert PermissionSet('rw') == get_permissions(path)
 97 |         set_permissions(path, 'wx')
 98 |         assert PermissionSet('wx') == get_permissions(path)
 99 | 
100 |     def test_check_access_std(self):
101 |         check_access(STDIN_OR_STDOUT, 'r')
102 |         check_access(STDIN_OR_STDOUT, 'w')
103 |         check_access(STDIN, 'r')
104 |         check_access(STDOUT, 'w')
105 |         check_access(STDERR, 'w')
106 |         with self.assertRaises(IOError):
107 |             check_access(STDOUT, 'x')
108 |         with self.assertRaises(IOError):
109 |             check_access(STDERR, 'r')
110 | 
111 |     def test_check_access_file(self):
112 |         path = self.root.make_file(permissions='rwx')
113 |         check_access(path, 'r')
114 |         check_access(path, 'w')
115 |         check_access(path, 'x')
116 | 
117 |     def test_set_permissions(self):
118 |         path = self.root.make_file()
119 |         with self.assertRaises(ValueError):
120 |             set_permissions(path, 'z')
121 |         set_permissions(path, 'r')
122 |         with self.assertRaises(IOError):
123 |             check_access(path, 'w')
124 | 
125 |     def test_no_permissions(self):
126 |         with self.assertRaises(IOError):
127 |             path = self.root.make_file(permissions='r')
128 |             check_access(path, 'w')
129 | 
130 |     def test_abspath_std(self):
131 |         assert abspath(STDOUT) == STDOUT
132 |         assert abspath(STDERR) == STDERR
133 | 
134 |     def test_abspath_home(self):
135 |         home = os.path.expanduser("~")
136 |         assert abspath(Path('~/foo')) == Path(home) / 'foo'
137 | 
138 |     def test_abspath_rel(self):
139 |         cwd = os.getcwd()
140 |         assert abspath(Path('foo')) == Path(cwd) / 'foo'
141 | 
142 |     def test_get_root(self):
143 |         # Need to do a different test for posix vs windows
144 |         if os.sep == '/':
145 |             assert '/' == get_root()
146 |             assert '/' == get_root(PosixPath('/foo/bar/baz'))
147 |         else:
148 |             script_drive = os.path.splitdrive(sys.executable)[0]
149 |             assert script_drive == get_root()
150 |             assert 'C:\\' == get_root(WindowsPath('C:\\foo\\bar\\baz'))
151 | 
152 |     def test_split_path(self):
153 |         parent = self.root.make_directory()
154 |         assert split_path(parent / 'foo', keep_seps=False) == (parent, 'foo')
155 |         assert split_path(parent / 'foo.tar.gz', keep_seps=False) == \
156 |             (parent, 'foo', 'tar', 'gz')
157 |         assert split_path(parent / 'foo.tar.gz', keep_seps=True) == \
158 |             (parent, 'foo', '.tar', '.gz')
159 | 
160 |     def test_filename(self):
161 |         assert filename(Path('/path/to/foo.tar.gz')) == 'foo'
162 | 
163 |     def test_convert_std_placeholder(self):
164 |         assert STDIN == convert_std_placeholder("-", "r")
165 |         assert STDOUT == convert_std_placeholder("-", "w")
166 |         assert STDERR == convert_std_placeholder("_", "w")
167 |         assert "foo" == convert_std_placeholder("foo")
168 | 
169 |     def test_resolve_std(self):
170 |         assert STDOUT == resolve_path(STDOUT)
171 |         assert STDERR == resolve_path(STDERR)
172 | 
173 |     def test_resolve_file(self):
174 |         path = self.root.make_file()
175 |         assert abspath(path) == resolve_path(path)
176 | 
177 |     def test_resolve_with_parent(self):
178 |         self.root.make_directory(name='foo')
179 |         path = self.root.make_file(parent=self.root[Path('foo')])
180 |         name = path.name
181 |         parent = path.parent
182 |         assert path == resolve_path(Path(name), parent)
183 | 
184 |     def test_resolve_missing(self):
185 |         with self.assertRaises(IOError):
186 |             resolve_path(Path('foo'))
187 | 
188 |     def test_check_readable_file(self):
189 |         readable = self.root.make_file(permissions='r')
190 |         non_readable = self.root.make_file(permissions='w')
191 |         directory = self.root.make_directory()
192 |         check_readable_file(readable)
193 |         with self.assertRaises(IOError):
194 |             check_readable_file(non_readable)
195 |         with self.assertRaises(IOError):
196 |             check_readable_file(Path('foo'))
197 |         with self.assertRaises(IOError):
198 |             check_readable_file(directory)
199 |         assert safe_check_readable_file(readable)
200 |         assert safe_check_readable_file(non_readable) is None
201 | 
202 |     def test_check_writable_file(self):
203 |         writable = self.root.make_file(permissions='w')
204 |         non_writable = self.root.make_file(permissions='r')
205 |         check_writable_file(writable)
206 |         with self.assertRaises(IOError):
207 |             check_writable_file(non_writable)
208 |         parent = self.root.make_directory()
209 |         check_writable_file(parent / 'foo')
210 |         subdir_path = parent / 'bar' / 'foo'
211 |         check_writable_file(subdir_path)
212 |         assert subdir_path.parent.exists()
213 |         with self.assertRaises(IOError):
214 |             parent = self.root.make_directory(permissions='r')
215 |             check_writable_file(parent / 'foo')
216 |         assert safe_check_writable_file(writable)
217 |         assert safe_check_writable_file(non_writable) is None
218 | 
219 |     def test_check_path_std(self):
220 |         check_path(STDIN_OR_STDOUT, 'f', 'r')
221 |         check_path(STDIN_OR_STDOUT, 'f', 'w')
222 |         check_path(STDIN, 'f', 'r')
223 |         check_path(STDOUT, 'f', 'w')
224 |         check_path(STDERR, 'f', 'w')
225 |         with self.assertRaises(IOError):
226 |             check_path(STDIN, 'f', 'w')
227 |         with self.assertRaises(IOError):
228 |             check_path(STDOUT, 'f', 'r')
229 |         with self.assertRaises(IOError):
230 |             check_path(STDERR, 'f', 'r')
231 |         with self.assertRaises(IOError):
232 |             check_path(STDOUT, 'd', 'r')
233 | 
234 |     def test_safe_checks(self):
235 |         path = self.root.make_file(permissions='r')
236 |         assert safe_check_path(path, 'f', 'r')
237 |         assert not safe_check_path(path, 'd', 'r')
238 |         assert not safe_check_path(path, 'f', 'w')
239 | 
240 |     def test_find(self):
241 |         level1 = self.root.make_directory()
242 |         level2 = self.root.make_directory(prefix='foo', parent=level1)
243 |         paths = self.root.make_empty_files(3, prefix='bar', parent=level2)
244 | 
245 |         # recursive
246 |         x = find(level1, 'foo.*', 'd', recursive=True)
247 |         assert 1 == len(x)
248 |         assert level2 == x[0]
249 |         y = find(level1, 'bar.*', 'f', recursive=True)
250 |         assert 3 == len(y)
251 |         assert sorted(paths) == sorted(y)
252 | 
253 |         # non-recursive
254 |         x = find(level1, 'foo.*', 'd', recursive=False)
255 |         assert 1 == len(x)
256 |         assert level2 == x[0]
257 |         y = find(level1, 'bar.*', 'f', recursive=False)
258 |         assert 0 == len(y)
259 | 
260 |         # absolute match
261 |         x = find(
262 |             level1, os.path.join(str(level1), 'foo.*', 'bar.*'), 'f',
263 |             recursive=True)
264 |         assert 3 == len(x)
265 |         assert sorted(paths) == sorted(x)
266 | 
267 |         # fifo
268 |         path = self.root.make_fifo(prefix='baz', parent=level1)
269 |         x = find(level1, 'baz.*', '|')
270 |         assert 1 == len(x)
271 |         assert path == x[0]
272 | 
273 |     def test_find_with_matches(self):
274 |         level1 = self.root.make_directory()
275 |         level2 = self.root.make_directory(prefix='foo', parent=level1)
276 |         path = self.root.make_path(name='bar123', parent=level2)
277 |         result = cast(Sequence[Tuple[PurePath, Match]], find(
278 |             level1, 'bar(.*)', 'f', recursive=True, return_matches=True))
279 |         assert 1 == len(result)
280 |         assert path == result[0][0]
281 |         assert '123' == result[0][1].group(1)
282 | 
283 |     def test_get_executable_path(self):
284 |         exe = self.root.make_file(suffix=".exe")
285 |         exe_path = EXECUTABLE_CACHE.get_path(exe)
286 |         assert exe_path is not None
287 |         assert exe_path == EXECUTABLE_CACHE.get_path(exe.name)
288 |         EXECUTABLE_CACHE.cache.clear()
289 |         EXECUTABLE_CACHE.add_search_path(exe.parent)
290 |         assert exe_path == EXECUTABLE_CACHE.get_path(exe.name)
291 |         # TODO: how to test this fully, since we can't be sure of what
292 |         # executables will be available on the installed system?
293 | 
294 |     def test_resolve_exe(self):
295 |         exe = self.root.make_file(suffix=".exe")
296 |         exe_name = exe.name
297 |         path = EXECUTABLE_CACHE.resolve_exe([exe_name])
298 |         assert path is None
299 |         EXECUTABLE_CACHE.cache.clear()
300 |         EXECUTABLE_CACHE.add_search_path(exe.parent)
301 |         path = EXECUTABLE_CACHE.resolve_exe([exe_name])
302 |         assert path is not None
303 |         assert exe == path[0]
304 | 
305 |     def test_pathvar(self):
306 |         pv = StrPathVar('id', pattern='[A-Z0-9_]+', default='ABC123')
307 |         assert 'ABC123' == pv(None)
308 | 
309 |         pv = StrPathVar('id', pattern='[A-Z0-9_]+', optional=True)
310 |         assert '' == pv(None)
311 | 
312 |         pv = StrPathVar('id', pattern='[A-Z0-9_]+')
313 |         with self.assertRaises(ValueError):
314 |             pv(None)
315 | 
316 |     def test_filespec(self):
317 |         null = FileSpec()
318 |         assert '{file}' == null.template
319 |         assert 'file' in null.path_vars
320 | 
321 |         path = self.root.make_file(name='ABC123.txt')
322 |         base = path.name
323 | 
324 |         spec = FileSpec(
325 |             StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('XYZ999',)),
326 |             StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')),
327 |             template='{id}.{ext}')
328 | 
329 |         # get a single file
330 |         pathinst = spec(id='ABC123', ext='txt')
331 |         assert path_inst(base, dict(id='ABC123', ext='txt')) == pathinst
332 |         assert 'ABC123' == pathinst['id']
333 |         assert 'txt' == pathinst['ext']
334 | 
335 |         with self.assertRaises(ValueError):
336 |             spec(id='abc123', ext='txt')
337 | 
338 |         with self.assertRaises(ValueError):
339 |             spec(id='ABC123', ext='foo')
340 | 
341 |         with self.assertRaises(ValueError):
342 |             spec(id='XYZ999', ext='txt')
343 | 
344 |         pathinst = spec.parse(path, fullpath=True)
345 |         assert path_inst(path.name, dict(id='ABC123', ext='txt')) == pathinst
346 | 
347 |         path2 = self.root.make_file(name='abc123.txt')
348 |         with self.assertRaises(ValueError):
349 |             spec.parse(path2)
350 | 
351 |         all_paths = spec.find(self.root.absolute_path)
352 |         assert 1 == len(all_paths)
353 |         assert path_inst(path, dict(id='ABC123', ext='txt')) == all_paths[0]
354 | 
355 |     def test_dirspec(self):
356 |         null = DirSpec()
357 |         assert '{dir}' == null.template
358 |         assert 'dir' in null.path_vars
359 | 
360 |         level1 = self.root.make_directory(name='ABC123')
361 |         level2 = self.root.make_directory(parent=level1, name='AAA')
362 |         base = level1.parent
363 | 
364 |         spec = DirSpec(
365 |             PathPathVar('root'),
366 |             StrPathVar('subdir', pattern='[A-Z0-9_]+', invalid=('XYZ999',)),
367 |             StrPathVar('leaf', pattern='[^_]+', valid=('AAA', 'BBB')),
368 |             template=os.path.join('{root}', '{subdir}', '{leaf}'))
369 | 
370 |         # get a single dir
371 |         pathinst = spec(root=base, subdir='ABC123', leaf='AAA')
372 |         assert \
373 |             path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \
374 |             pathinst
375 |         assert base == pathinst['root']
376 |         assert 'ABC123' == pathinst['subdir']
377 |         assert 'AAA' == pathinst['leaf']
378 | 
379 |         with self.assertRaises(ValueError):
380 |             spec(root=base, subdir='abc123', leaf='AAA')
381 | 
382 |         with self.assertRaises(ValueError):
383 |             spec(root=base, subdir='ABC123', leaf='CCC')
384 | 
385 |         with self.assertRaises(ValueError):
386 |             spec(root=base, subdir='XYZ999', leaf='AAA')
387 | 
388 |         pathinst = spec.parse(level2)
389 |         assert \
390 |             path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \
391 |             pathinst
392 | 
393 |         path = self.root.make_file(parent=level2)
394 |         pathinst = spec.parse(path, fullpath=True)
395 |         assert \
396 |             path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \
397 |             pathinst
398 | 
399 |         path2 = self.root.make_directory(name='abc123')
400 |         with self.assertRaises(ValueError):
401 |             spec.parse(path2)
402 | 
403 |         all_paths = spec.find(base, recursive=True)
404 |         assert 1 == len(all_paths)
405 |         assert \
406 |             path_inst(level2, dict(root=base, subdir='ABC123', leaf='AAA')) == \
407 |             all_paths[0]
408 | 
409 |     def test_pathspec(self):
410 |         level1 = self.root.make_directory(name='ABC123')
411 |         level2 = self.root.make_directory(parent=level1, name='AAA')
412 |         path = self.root.make_file(parent=level2, name='FFF555.txt')
413 |         base = level1.parent
414 | 
415 |         spec = PathSpec(
416 |             DirSpec(
417 |                 PathPathVar('root'),
418 |                 StrPathVar('subdir', pattern=r'[A-Z0-9_]+', invalid=('XYZ999',)),
419 |                 StrPathVar('leaf', pattern=r'[^_]+', valid=('AAA', 'BBB')),
420 |                 template=os.path.join('{root}', '{subdir}', '{leaf}')),
421 |             FileSpec(
422 |                 StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('ABC123',)),
423 |                 StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')),
424 |                 template='{id}.{ext}'))
425 | 
426 |         path_var_values = dict(root=base, subdir='ABC123', leaf='AAA',
427 |                                id='FFF555', ext='txt')
428 |         pathinst = spec(**path_var_values)
429 |         assert path_inst(path, path_var_values) == pathinst
430 |         assert base == pathinst['root']
431 |         assert 'ABC123' == pathinst['subdir']
432 |         assert 'AAA' == pathinst['leaf']
433 |         assert 'FFF555' == pathinst['id']
434 |         assert 'txt' == pathinst['ext']
435 | 
436 |         fail1 = dict(path_var_values)
437 |         # should fail because expecting all caps
438 |         fail1['id'] = 'abc123'
439 |         with self.assertRaises(ValueError):
440 |             spec(**fail1)
441 | 
442 |         fail2 = dict(path_var_values)
443 |         # should fail because foo is not in the valid list
444 |         fail2['ext'] = 'foo'
445 |         with self.assertRaises(ValueError):
446 |             spec(**fail2)
447 | 
448 |         fail3 = dict(path_var_values)
449 |         # should fail because ABC123 is in the invalid list
450 |         fail3['id'] = 'ABC123'
451 |         with self.assertRaises(ValueError):
452 |             spec(**fail3)
453 | 
454 |         pathinst = spec.parse(path)
455 |         assert path_inst(path, path_var_values) == pathinst
456 | 
457 |         path2 = self.root.make_file(parent=level2, name='fff555.txt')
458 |         with self.assertRaises(ValueError):
459 |             spec.parse(path2)
460 | 
461 |         all_paths = spec.find(base, recursive=True)
462 |         assert 1 == len(all_paths)
463 |         assert path_inst(path, path_var_values) == all_paths[0]
464 | 
465 |         # make sure it works with plain paths
466 |         spec = PathSpec(
467 |             level2,
468 |             FileSpec(
469 |                 StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('ABC123',)),
470 |                 StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')),
471 |                 template='{id}.{ext}'))
472 |         assert path_inst(path, dict(id='FFF555', ext='txt')) == spec.parse(path)
473 |         with self.assertRaises(ValueError):
474 |             bad_path = Path(get_root()) / 'foo' / 'bar' / path.name
475 |             spec.parse(bad_path)
476 | 
477 |         spec = PathSpec(
478 |             DirSpec(
479 |                 PathPathVar('root'),
480 |                 StrPathVar('subdir', pattern='[A-Z0-9_]+', invalid=('XYZ999',)),
481 |                 StrPathVar('leaf', pattern='[^_]+', valid=('AAA', 'BBB')),
482 |                 template=os.path.join('{root}', '{subdir}', '{leaf}')),
483 |             path.name)
484 |         assert \
485 |             path_inst(path, dict(root=base, subdir='ABC123', leaf='AAA')) == \
486 |             spec.parse(path)
487 | 
488 |         spec = PathSpec(level2, path.name)
489 |         all_paths = spec.find()
490 |         assert 1 == len(all_paths)
491 |         assert path_inst(path) == all_paths[0]
492 | 
493 |     def test_default_search(self):
494 |         spec = FileSpec(
495 |             StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('XYZ999',)),
496 |             StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')),
497 |             template='{id}.{ext}')
498 |         with self.assertRaises(ValueError):
499 |             spec.find()
500 | 
501 |         level1 = self.root.make_directory(name='ABC123')
502 |         level2 = self.root.make_directory(parent=level1, name='AAA')
503 |         base = level1.parent
504 | 
505 |         spec = DirSpec(
506 |             StrPathVar('subdir', pattern='[A-Z0-9_]+', invalid=('XYZ999',)),
507 |             StrPathVar('leaf', pattern='[^_]+', valid=('AAA', 'BBB')),
508 |             template=os.path.join(base, '{subdir}', '{leaf}'))
509 | 
510 |         all_paths = spec.find(recursive=True)
511 |         assert 1 == len(all_paths)
512 |         assert \
513 |             path_inst(level2, dict(subdir='ABC123', leaf='AAA')) == \
514 |             all_paths[0]
515 | 
516 |     def test_pathspec_default_search(self):
517 |         path = self.root.make_file(name='FFF555.txt')
518 |         base = path.parent
519 | 
520 |         spec = PathSpec(
521 |             DirSpec(template=str(base)),
522 |             FileSpec(
523 |                 StrPathVar('id', pattern=r'[A-Z0-9_]+', invalid=('ABC123',)),
524 |                 StrPathVar('ext', pattern=r'[^\.]+', valid=('txt', 'exe')),
525 |                 template='{id}.{ext}'))
526 | 
527 |         all_paths = spec.find()
528 |         assert 1 == len(all_paths)
529 |         assert path_inst(path, dict(id='FFF555', ext='txt')) == all_paths[0]
530 | 


--------------------------------------------------------------------------------
/tests/test_performance.py:
--------------------------------------------------------------------------------
  1 | """Self-contained performance tests.
  2 | """
  3 | from bisect import bisect
  4 | import gzip
  5 | from itertools import accumulate
  6 | from random import random, randint
  7 | import time
  8 | from xphyle.utils import read_lines
  9 | from xphyle.paths import TempDir
 10 | import pytest
 11 | 
 12 | 
 13 | class TimeKeeper:
 14 |     def __init__(self, msg, **kwargs):
 15 |         self.msg = msg
 16 |         self.msg_args = kwargs
 17 |         self.duration = 0
 18 | 
 19 |     def __enter__(self):
 20 |         self.start = time.clock()
 21 |         return self
 22 | 
 23 |     def __exit__(self, exception_type, exception_value, traceback):
 24 |         self.stop = time.clock()
 25 |         self.duration = self.stop - self.start
 26 |         print(self.msg.format(
 27 |             duration=self.duration,
 28 |             **self.msg_args))
 29 | 
 30 | 
 31 | def choices(population, weights=None, *, cum_weights=None, k=1):
 32 |     """Return a k sized list of population elements chosen with replacement.
 33 |     If the relative weights or cumulative weights are not specified,
 34 |     the selections are made with equal probability.
 35 | 
 36 |     This function is borrowed from the python 3.6 'random' package.
 37 |     """
 38 |     if cum_weights is None:
 39 |         if weights is None:
 40 |             _int = int
 41 |             total = len(population)
 42 |             return [population[_int(random() * total)] for _ in range(k)]
 43 |         cum_weights = list(accumulate(weights))
 44 |     elif weights is not None:
 45 |         raise TypeError('Cannot specify both weights and cumulative weights')
 46 |     if len(cum_weights) != len(population):
 47 |         raise ValueError('The number of weights does not match the population')
 48 |     total = cum_weights[-1]
 49 |     return [population[bisect(cum_weights, random() * total)] for _ in range(k)]
 50 | 
 51 | 
 52 | def perftest(name, text_generator, num_iter=10):
 53 |     # generate a big text
 54 |     msg = """
 55 |     Timing of {iter} {name} tests with total size {size:,d} characters and 
 56 |     use_system = {use_system}: {duration:0.2f} sec"""
 57 |     total_size = 0
 58 | 
 59 |     with TempDir() as root:
 60 |         paths = tuple(
 61 |             root.make_file(suffix='.gz')
 62 |             for _ in range(num_iter))
 63 |         for path in paths:
 64 |             txt = text_generator()
 65 |             total_size += len(txt)
 66 |             with gzip.open(path, 'wt') as out:
 67 |                 out.write(txt)
 68 | 
 69 |         with TimeKeeper(
 70 |                 msg, name=name, iter=num_iter, size=total_size,
 71 |                 use_system=None):
 72 |             for path in paths:
 73 |                 list(gzip.open(path))
 74 | 
 75 |         for use_system in (True, False):
 76 |             with TimeKeeper(
 77 |                     msg, name=name, iter=num_iter, size=total_size,
 78 |                     use_system=use_system):
 79 |                 for path in paths:
 80 |                     list(read_lines(path, use_system=use_system))
 81 | 
 82 | 
 83 | @pytest.mark.perf
 84 | def test_lorem_ipsum():
 85 |     from lorem.text import TextLorem
 86 |     generate_lorem = TextLorem(prange=(500, 1000), trange=(500, 1000))
 87 |     return perftest('lorem ipsum', generate_lorem.text)
 88 | 
 89 | 
 90 | @pytest.mark.perf
 91 | def test_fastq():
 92 |     def generate_fastq(seqlen=100):
 93 |         num_records = randint(100000, 500000)
 94 |         qualspace = list(chr(i + 33) for i in range(60))
 95 | 
 96 |         def rand_seq():
 97 |             return "".join(choices(['A', 'C', 'G', 'T'], k=seqlen))
 98 | 
 99 |         def rand_qual():
100 |             return "".join(choices(qualspace, k=seqlen))
101 | 
102 |         return "\n".join(
103 |             "\n".join((
104 |                 "read{}".format(i),
105 |                 rand_seq(),
106 |                 '+',
107 |                 rand_qual()))
108 |             for i in range(num_records))
109 |     return perftest('fastq', generate_fastq)
110 | 


--------------------------------------------------------------------------------
/tests/test_progress.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from . import *
 3 | import xphyle
 4 | from xphyle.paths import TempDir
 5 | from xphyle.progress import ITERABLE_PROGRESS, PROCESS_PROGRESS
 6 | from xphyle.utils import *
 7 | 
 8 | 
 9 | class MockProgress(object):
10 |     def __call__(self, itr, desc, size):
11 |         self.desc = desc
12 |         self.size = size
13 |         i = 0
14 |         for i, item in enumerate(itr, 1):
15 |             yield item
16 |         self.count = i
17 | 
18 | 
19 | class ProgressTests(TestCase):
20 |     def setUp(self):
21 |         self.root = TempDir()
22 |         xphyle.configure(progress=False)
23 |     
24 |     def tearDown(self):
25 |         self.root.close()
26 |         ITERABLE_PROGRESS.enabled = False
27 |         ITERABLE_PROGRESS.wrapper = None
28 |         PROCESS_PROGRESS.enabled = False
29 |         PROCESS_PROGRESS.wrapper = None
30 |     
31 |     def test_progress(self):
32 |         progress = MockProgress()
33 |         xphyle.configure(progress=True, progress_wrapper=progress)
34 |         path = self.root.make_file()
35 |         with open(path, 'wt') as o:
36 |             for i in range(100):
37 |                 o.write(random_text())
38 |         compress_file(
39 |             path, compression='gz', use_system=False)
40 |         assert 100 == progress.count
41 |     
42 |     def test_progress_delmited(self):
43 |         progress = MockProgress()
44 |         xphyle.configure(progress=True, progress_wrapper=progress)
45 |         path = self.root.make_file()
46 |         with open(path, 'wt') as o:
47 |             for i in range(100):
48 |                 o.write('row\t{}\n'.format(i))
49 |         rows = list(read_delimited(path))
50 |         assert 100 == len(rows)
51 |         assert 100 == progress.count
52 |     
53 |     def test_iter_stream(self):
54 |         progress = MockProgress()
55 |         xphyle.configure(progress=True, progress_wrapper=progress)
56 |         with intercept_stdin('foo\nbar\nbaz'):
57 |             with xopen(
58 |                     STDIN, 'rt', context_wrapper=True, compression=False) as o:
59 |                 lines = list(o)
60 |                 self.assertListEqual(['foo\n', 'bar\n', 'baz\n'], lines)
61 |         assert 3 == progress.count
62 | 


--------------------------------------------------------------------------------
/tests/test_types.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from xphyle.types import *
 3 | 
 4 | 
 5 | class TypeTests(TestCase):
 6 |     def test_mode_access(self):
 7 |         for t in ("READ", "READWRITE", "TRUNCATE_READWRITE"):
 8 |             self.assertTrue(ModeAccess[t].readable)
 9 |         for t in ("WRITE", "READWRITE", "TRUNCATE_READWRITE", "APPEND", "EXCLUSIVE"):
10 |             self.assertTrue(ModeAccess[t].writable)
11 | 
12 |     def test_file_mode(self):
13 |         for f in (
14 |             FileMode(),
15 |             FileMode("rt"),
16 |             FileMode(access="r"),
17 |             FileMode(coding="t"),
18 |             FileMode(access=ModeAccess.READ),
19 |             FileMode(coding=ModeCoding.TEXT),
20 |             FileMode(access="r", coding="t"),
21 |             FileMode(access=ModeAccess.READ, coding="t"),
22 |             FileMode(access="r", coding=ModeCoding.TEXT),
23 |             FileMode(access=ModeAccess.READ, coding=ModeCoding.TEXT),
24 |         ):
25 |             self.assertEqual(ModeAccess.READ, f.access)
26 |             self.assertEqual(ModeCoding.TEXT, f.coding)
27 |             self.assertTrue(f.readable)
28 |             self.assertFalse(f.writable)
29 |             self.assertTrue(f.text)
30 |             self.assertFalse(f.binary)
31 |             self.assertTrue("rt" in f)
32 |             self.assertFalse("b" in f)
33 |             self.assertTrue(ModeAccess.READ in f)
34 |             self.assertTrue(ModeCoding.TEXT in f)
35 |             self.assertEqual("rt", f.value)
36 |             self.assertEqual("rt", str(f))
37 |         with self.assertRaises(ValueError):
38 |             FileMode("rz")
39 | 
40 |     def test_permissions(self):
41 |         self.assertEqual(os.R_OK, Permission.READ.os_flag)
42 |         self.assertEqual(os.W_OK, Permission.WRITE.os_flag)
43 |         self.assertEqual(os.X_OK, Permission.EXECUTE.os_flag)
44 |         self.assertEqual(stat.S_IREAD, Permission.READ.stat_flag)
45 |         self.assertEqual(stat.S_IWRITE, Permission.WRITE.stat_flag)
46 |         self.assertEqual(stat.S_IEXEC, Permission.EXECUTE.stat_flag)
47 | 
48 |     def test_permission_set(self):
49 |         for a in (
50 |             PermissionSet("rwx"),
51 |             PermissionSet(("r", "w", "x")),
52 |             PermissionSet(7),
53 |             PermissionSet((1, 2, 4)),
54 |             PermissionSet((Permission.READ, Permission.WRITE, Permission.EXECUTE)),
55 |         ):
56 |             self.assertEqual(7, a.os_flags)
57 |             self.assertEqual(448, a.stat_flags)
58 |             self.assertEqual("rwx", "".join(f.value for f in a))
59 |             self.assertEqual("rwx", str(a))
60 |             for char in "rwx":
61 |                 self.assertTrue(char in a)
62 |                 self.assertTrue(Permission(char) in a)
63 | 
64 |         a = PermissionSet()
65 |         a.add(ModeAccess.READ)
66 |         a.add(ModeAccess.WRITE)
67 |         self.assertEqual("rw", str(a))
68 | 
69 |     def test_cache(self):
70 |         fm1 = FileMode("rt")
71 |         fm2 = FileMode("rt")
72 |         fm3 = FileMode("tr")
73 |         self.assertEqual(fm1, fm2)
74 |         self.assertEqual(fm1, fm3)
75 |         self.assertEqual(id(fm1), id(fm2))
76 |         self.assertNotEqual(id(fm1), id(fm3))
77 | 
78 |         perm1 = PermissionSet("rw")
79 |         perm2 = PermissionSet("rw")
80 |         perm3 = PermissionSet("wr")
81 |         self.assertEqual(perm1, perm2)
82 |         self.assertEqual(perm1, perm3)
83 |         self.assertEqual(id(perm1), id(perm2))
84 |         self.assertNotEqual(id(perm1), id(perm3))
85 | 


--------------------------------------------------------------------------------
/tests/test_urls.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from xphyle.urls import *
 3 | from xphyle.paths import *
 4 | 
 5 | 
 6 | good_url = 'https://github.com/jdidion/xphyle/blob/master/tests/foo.gz?raw=True'
 7 | bad_url = 'foo'
 8 | 
 9 | 
10 | class TestURLs(TestCase):
11 |     def test_parse(self):
12 |         self.assertEqual(
13 |             tuple(parse_url(good_url)),
14 |             ('https', 'github.com',
15 |              '/jdidion/xphyle/blob/master/tests/foo.gz',
16 |              '', 'raw=True', ''))
17 |         self.assertIsNone(parse_url(bad_url))
18 | 
19 |     def test_open_invalid(self):
20 |         self.assertIsNone(open_url(bad_url))
21 | 
22 |     def test_get_url_file_name(self):
23 |         with TempDir() as temp:
24 |             path = abspath(temp.make_file(name='foo.txt'))
25 |             url = open_url(path.as_uri())
26 |             assert get_url_file_name(url) == str(path)
27 |         # TODO: need to find a reliable compressed file URL with a
28 |         # Content-Disposition, or figure out how to mock one up
29 | 
30 |     def test_mime_types(self):
31 |         # TODO: need to find a reliable compressed file URL with a MIME type,
32 |         # or figure out how to mock one up
33 |         pass
34 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | from . import *
  3 | import gzip
  4 | import bz2
  5 | from xphyle.formats import THREADS
  6 | from xphyle.paths import TempDir, EXECUTABLE_CACHE
  7 | from xphyle.progress import ITERABLE_PROGRESS, PROCESS_PROGRESS
  8 | from xphyle.utils import *
  9 | 
 10 | 
 11 | class UtilsTests(TestCase):
 12 |     def setUp(self):
 13 |         self.root = TempDir()
 14 |         self.system_args = sys.argv
 15 | 
 16 |     def tearDown(self):
 17 |         self.root.close()
 18 |         ITERABLE_PROGRESS.enabled = False
 19 |         ITERABLE_PROGRESS.wrapper = None
 20 |         PROCESS_PROGRESS.enabled = False
 21 |         PROCESS_PROGRESS.wrapper = None
 22 |         THREADS.update(1)
 23 |         EXECUTABLE_CACHE.reset_search_path()
 24 |         EXECUTABLE_CACHE.cache = {}
 25 | 
 26 |     def test_read_lines(self):
 27 |         self.assertListEqual(list(read_lines(Path('foobar'), errors=False)), [])
 28 | 
 29 |         path = self.root.make_file()
 30 |         with open(path, 'wt') as o:
 31 |             o.write("1\n2\n3")
 32 |         self.assertListEqual(
 33 |             list(read_lines(path)),
 34 |             ['1', '2', '3'])
 35 |         self.assertListEqual(
 36 |             list(read_lines(path, convert=int)),
 37 |             [1, 2, 3])
 38 | 
 39 |     def test_read_chunked(self):
 40 |         self.assertListEqual([], list(read_bytes(Path('foobar'), errors=False)))
 41 |         path = self.root.make_file()
 42 |         with open(path, 'wt') as o:
 43 |             o.write("1234567890")
 44 |         chunks = list(read_bytes(path, 3))
 45 |         self.assertListEqual([b'123', b'456', b'789', b'0'], chunks)
 46 | 
 47 |     def test_write_lines(self):
 48 |         linesep_len = len(os.linesep)
 49 |         path = self.root.make_file()
 50 |         assert 3 == write_lines(['foo'], path, linesep=None)
 51 |         assert list(read_lines(path)) == ['foo']
 52 |         path = self.root.make_file()
 53 |         self.assertEqual(
 54 |             9 + (2*linesep_len),
 55 |             write_lines(('foo', 'bar', 'baz'), path, linesep=None))
 56 |         self.assertEqual(
 57 |             list(read_lines(path)),
 58 |             ['foo', 'bar', 'baz'])
 59 |         path = self.root.make_file()
 60 |         self.assertEqual(
 61 |             11, write_lines(('foo', 'bar', 'baz'), path, linesep='|'))
 62 |         assert list(read_lines(path)) == ['foo|bar|baz']
 63 |         path = self.root.make_file(permissions='r')
 64 |         assert -1 == write_lines(['foo'], path, errors=False)
 65 | 
 66 |     def test_write_bytes(self):
 67 |         path = self.root.make_file()
 68 |         linesep_len = len(os.linesep)
 69 |         assert 3 == write_bytes([b'foo'], path)
 70 |         assert list(read_bytes(path)) == [b'foo']
 71 |         path = self.root.make_file()
 72 |         assert 9 + (2*linesep_len) == \
 73 |             write_bytes(('foo', 'bar', 'baz'), path, sep=None)
 74 |         self.assertEqual(
 75 |             os.linesep.encode().join((b'foo', b'bar', b'baz')),
 76 |             b''.join(read_bytes(path)))
 77 |         path = self.root.make_file(permissions='r')
 78 |         assert -1 == write_bytes([b'foo'], path, errors=False)
 79 | 
 80 |     def test_read_dict(self):
 81 |         path = self.root.make_file()
 82 |         with open(path, 'wt') as o:
 83 |             o.write("# This is a comment\n")
 84 |             o.write("foo=1\n")
 85 |             o.write("bar=2\n")
 86 |         d = read_dict(path, convert=int, ordered=True)
 87 |         assert len(d) == 2
 88 |         assert d['foo'] == 1
 89 |         assert d['bar'] == 2
 90 |         assert list(d.items()) == [('foo', 1), ('bar', 2)]
 91 | 
 92 |     def test_write_dict(self):
 93 |         path = self.root.make_file()
 94 |         write_dict(OrderedDict([('foo', 1), ('bar', 2)]), path, linesep=None)
 95 |         assert list(read_lines(path)) == ['foo=1', 'bar=2']
 96 | 
 97 |     def test_tsv(self):
 98 |         assert [] == list(read_delimited(Path('foobar'), errors=False))
 99 | 
100 |         path = self.root.make_file()
101 |         with open(path, 'wt') as o:
102 |             o.write('a\tb\tc\n')
103 |             o.write('1\t2\t3\n')
104 |             o.write('4\t5\t6\n')
105 | 
106 |         with self.assertRaises(ValueError):
107 |             list(read_delimited(path, header=False, converters='int'))
108 |         with self.assertRaises(ValueError):
109 |             list(read_delimited(
110 |                 path, header=False, converters=int, row_type='dict',
111 |                 yield_header=False))
112 | 
113 |         assert [
114 |             ['a', 'b', 'c'],
115 |             [1, 2, 3],
116 |             [4, 5, 6]
117 |         ] == list(read_delimited(
118 |             path, header=True, converters=int))
119 |         assert [
120 |             ['a', 'b', 'c'],
121 |             (1, 2, 3),
122 |             (4, 5, 6)
123 |         ] == list(read_delimited(
124 |             path, header=True, converters=int, row_type='tuple'))
125 |         assert [
126 |             ['a', 'b', 'c'],
127 |             (1, 2, 3),
128 |             (4, 5, 6)
129 |         ] == list(read_delimited(
130 |             path, header=True, converters=int, row_type=tuple))
131 |         assert [
132 |             dict(a=1, b=2, c=3),
133 |             dict(a=4, b=5, c=6)
134 |         ] == list(read_delimited(
135 |             path, header=True, converters=int, row_type='dict',
136 |             yield_header=False))
137 | 
138 |     def test_tsv_dict(self):
139 |         path = self.root.make_file()
140 |         with open(path, 'wt') as o:
141 |             o.write('id\ta\tb\tc\n')
142 |             o.write('row1\t1\t2\t3\n')
143 |             o.write('row2\t4\t5\t6\n')
144 | 
145 |         with self.assertRaises(ValueError):
146 |             read_delimited_as_dict(path, key='id', header=False)
147 |         with self.assertRaises(ValueError):
148 |             read_delimited_as_dict(path, key=None, header=False)
149 | 
150 |         assert dict(
151 |                 row1=['row1', 1, 2, 3],
152 |                 row2=['row2', 4, 5, 6]
153 |         ) == read_delimited_as_dict(
154 |             path, key=0, header=True, converters=(str, int, int, int))
155 |         assert dict(
156 |                 row1=['row1', 1, 2, 3],
157 |                 row2=['row2', 4, 5, 6]
158 |         ) == read_delimited_as_dict(
159 |             path, key='id', header=True, converters=(str, int, int, int))
160 | 
161 |         with open(path, 'wt') as o:
162 |             o.write('a\tb\tc\n')
163 |             o.write('1\t2\t3\n')
164 |             o.write('4\t5\t6\n')
165 | 
166 |         assert dict(
167 |             row1=[1, 2, 3],
168 |             row4=[4, 5, 6]
169 |         ) == read_delimited_as_dict(
170 |             path, key=lambda row: 'row{}'.format(row[0]),
171 |             header=True, converters=int)
172 | 
173 |     def test_tsv_dict_dups(self):
174 |         path = self.root.make_file()
175 |         with open(path, 'wt') as o:
176 |             o.write('id\ta\tb\tc\n')
177 |             o.write('row1\t1\t2\t3\n')
178 |             o.write('row1\t4\t5\t6\n')
179 | 
180 |         with self.assertRaises(Exception):
181 |             read_delimited_as_dict(
182 |                 path, key='id', header=True, converters=(str, int, int, int))
183 | 
184 |     def test_compress_file_no_dest(self):
185 |         path = self.root.make_file()
186 | 
187 |         with self.assertRaises(ValueError):
188 |             compress_file(path, compression=True, keep=True)
189 | 
190 |         with open(path, 'wt') as o:
191 |             o.write('foo')
192 |         gzfile = compress_file(path, compression='gz', keep=False)
193 |         assert gzfile == Path(str(path) + '.gz')
194 |         assert not path.exists()
195 |         assert gzfile.exists()
196 |         with gzip.open(gzfile, 'rt') as i:
197 |             assert i.read() == 'foo'
198 | 
199 |     def test_compress_fileobj(self):
200 |         path = self.root.make_file()
201 |         with open(path, 'wt') as o:
202 |             o.write('foo')
203 | 
204 |         f = open(path, 'rb')
205 |         try:
206 |             gzfile = compress_file(f, compression='gz')
207 |             assert gzfile == Path(str(path) + '.gz')
208 |             assert path.exists()
209 |             assert gzfile.exists()
210 |             with gzip.open(gzfile, 'rt') as i:
211 |                 assert i.read() == 'foo'
212 |         finally:
213 |             f.close()
214 | 
215 |         gzpath = Path(str(path) + '.gz')
216 |         gzfile = gzip.open(gzpath, 'w')
217 |         try:
218 |             assert gzpath == compress_file(path, gzfile, compression=True)
219 |         finally:
220 |             gzfile.close()
221 |         assert path.exists()
222 |         assert gzpath.exists()
223 |         with gzip.open(gzpath, 'rt') as i:
224 |             assert i.read() == 'foo'
225 | 
226 |     def test_compress_file_no_compression(self):
227 |         path = self.root.make_file()
228 |         with open(path, 'wt') as o:
229 |             o.write('foo')
230 |         gzfile = Path(str(path) + '.gz')
231 |         gzfile2 = compress_file(path, gzfile, keep=True)
232 |         assert gzfile == gzfile2
233 |         assert path.exists()
234 |         assert gzfile.exists()
235 |         with gzip.open(gzfile, 'rt') as i:
236 |             assert i.read() == 'foo'
237 | 
238 |     def test_decompress_file(self):
239 |         path = self.root.make_file()
240 |         gzfile = Path(str(path) + '.gz')
241 |         with gzip.open(gzfile, 'wt') as o:
242 |             o.write('foo')
243 | 
244 |         path2 = decompress_file(gzfile, keep=True)
245 |         assert path == path2
246 |         assert path.exists()
247 |         assert gzfile.exists()
248 |         with open(path, 'rt') as i:
249 |             assert i.read() == 'foo'
250 | 
251 |         with open(gzfile, 'rb') as i:
252 |             path2 = decompress_file(i, keep=True)
253 |             assert path == path2
254 |             assert path.exists()
255 |             assert gzfile.exists()
256 |             with open(path, 'rt') as j:
257 |                 assert j.read() == 'foo'
258 | 
259 |     def test_decompress_file_compression(self):
260 |         path = self.root.make_file()
261 |         gzfile = Path(str(path) + '.foo')
262 |         with gzip.open(gzfile, 'wt') as o:
263 |             o.write('foo')
264 |         with self.assertRaises(ValueError):
265 |             decompress_file(gzfile)
266 |         path2 = decompress_file(gzfile, compression='gz', keep=False)
267 |         assert path == path2
268 |         assert path.exists()
269 |         assert not gzfile.exists()
270 |         with open(path, 'rt') as i:
271 |             assert i.read() == 'foo'
272 | 
273 |     def test_transcode(self):
274 |         path = self.root.make_file()
275 |         gzfile = Path(str(path) + '.gz')
276 |         with gzip.open(gzfile, 'wt') as o:
277 |             o.write('foo')
278 |         bzfile = Path(str(path) + '.bz2')
279 |         transcode_file(gzfile, bzfile)
280 |         with bz2.open(bzfile, 'rt') as i:
281 |             assert 'foo' == i.read()
282 | 
283 |     def test_uncompressed_size(self):
284 |         for ext in ('.gz', '.xz'):
285 |             with self.subTest(ext):
286 |                 raw = self.root.make_file(contents=random_text(1000))
287 |                 compressed = self.root.make_file(suffix=ext)
288 |                 compress_file(raw, compressed)
289 |                 assert 1000 == uncompressed_size(compressed)
290 | 
291 |     def test_exec_process(self):
292 |         inp = self.root.make_file(suffix='.gz')
293 |         with gzip.open(inp, 'wt') as o:
294 |             o.write('foo')
295 |         out = self.root.make_file(suffix='.gz')
296 |         exec_process('cat', stdin=inp, stdout=out)
297 |         with gzip.open(out, 'rt') as o:
298 |             assert 'foo' == o.read()
299 | 
300 |     def test_linecount(self):
301 |         assert -1 == linecount(Path('foobar'), errors=False)
302 |         path = self.root.make_file()
303 |         with open(path, 'wt') as o:
304 |             for i in range(100):
305 |                 o.write(random_text())
306 |                 if i != 99:
307 |                     o.write('\n')
308 |         with self.assertRaises(ValueError):
309 |             linecount(path, buffer_size=-1)
310 |         with self.assertRaises(ValueError):
311 |             linecount(path, mode='wb')
312 |         assert 100 == linecount(path)
313 | 
314 |     def test_linecount_empty(self):
315 |         path = self.root.make_file()
316 |         assert 0 == linecount(path)
317 | 
318 |     def test_file_manager(self):
319 |         paths12 = dict(
320 |             path1=self.root.make_empty_files(1)[0],
321 |             path2=self.root.make_empty_files(1)[0])
322 |         with FileManager(paths12, mode='wt') as f:
323 |             paths34 = self.root.make_empty_files(2)
324 |             for p in paths34:
325 |                 f.add(p, mode='wt')
326 |                 self.assertTrue(p in f)
327 |                 self.assertFalse(f[p].closed)
328 |             path5 = self.root.make_file()
329 |             path5_fh = open(path5, 'wt')
330 |             f.add(path5_fh)
331 |             path6 = self.root.make_file()
332 |             f['path6'] = path6
333 |             assert path6 == f.get_path('path6')
334 |             all_paths = list(paths12.values()) + list(paths34) + [path5, path6]
335 |             self.assertListEqual(all_paths, f.paths)
336 |             assert len(f) == 6
337 |             for key, fh in f.iter_files():
338 |                 self.assertFalse(fh.closed)
339 |             assert f['path2'] is not None
340 |             assert f.get('path2') is not None
341 |             assert f['path6'] == f.get(5)
342 |             with self.assertRaises(KeyError):
343 |                 _ = f['foo']
344 |             assert f.get('foo') is None
345 |         assert len(f) == 6
346 |         for key, fh in f.iter_files():
347 |             self.assertTrue(fh.closed)
348 | 
349 |     def test_file_manager_dup_files(self):
350 |         f = FileManager()
351 |         path = self.root.make_file()
352 |         f.add(path)
353 |         with self.assertRaises(ValueError):
354 |             f.add(path)
355 | 
356 |     def test_compress_on_close(self):
357 |         path = self.root.make_file()
358 |         compressor = CompressOnClose(compression='gz')
359 |         with FileWrapper(path, 'wt') as wrapper:
360 |             wrapper.register_listener('close', compressor)
361 |             wrapper.write('foo')
362 |         gzfile = Path(str(path) + '.gz')
363 |         assert gzfile == compressor.compressed_path
364 |         self.assertTrue(os.path.exists(gzfile))
365 |         with gzip.open(gzfile, 'rt') as i:
366 |             assert i.read() == 'foo'
367 | 
368 |     def test_move_on_close(self):
369 |         path = self.root.make_file()
370 |         dest = self.root.make_file()
371 |         with FileWrapper(path, 'wt') as wrapper:
372 |             wrapper.register_listener('close', MoveOnClose(dest=dest))
373 |             wrapper.write('foo')
374 |         self.assertFalse(os.path.exists(path))
375 |         self.assertTrue(os.path.exists(dest))
376 |         with open(dest, 'rt') as i:
377 |             assert i.read() == 'foo'
378 | 
379 |     def test_remove_on_close(self):
380 |         path = self.root.make_file()
381 |         with FileWrapper(path, 'wt') as wrapper:
382 |             wrapper.register_listener('close', RemoveOnClose())
383 |             wrapper.write('foo')
384 |         self.assertFalse(os.path.exists(path))
385 | 
386 |         path = self.root.make_file()
387 |         with FileWrapper(open(path, 'wt')) as wrapper:
388 |             wrapper.register_listener('close', RemoveOnClose())
389 |             wrapper.write('foo')
390 |         self.assertFalse(os.path.exists(path))
391 | 
392 |     def test_fileinput(self):
393 |         file1 = self.root.make_file(suffix='.gz')
394 |         with gzip.open(file1, 'wt') as o:
395 |             o.write('foo\nbar\n')
396 |         with textinput(file1) as i:
397 |             lines = list(i)
398 |             self.assertListEqual(['foo\n', 'bar\n'], lines)
399 |         file2 = self.root.make_file(suffix='.gz')
400 |         with gzip.open(file2, 'wt') as o:
401 |             o.write('baz\n')
402 |         with textinput((file1, file2)) as i:
403 |             lines = list(i)
404 |             self.assertListEqual(['foo\n', 'bar\n', 'baz\n'], lines)
405 |         with textinput([('key1', file1), ('key2', file2)]) as i:
406 |             assert i.filekey is None
407 |             assert i.filename is None
408 |             assert i.lineno == 0
409 |             assert i.filelineno == 0
410 | 
411 |             assert next(i) == 'foo\n'
412 |             assert i.filekey == 'key1'
413 |             assert i.filename == file1
414 |             assert i.lineno == 1
415 |             assert i.filelineno == 1
416 | 
417 |             assert next(i) == 'bar\n'
418 |             assert i.filekey == 'key1'
419 |             assert i.filename == file1
420 |             assert i.lineno == 2
421 |             assert i.filelineno == 2
422 | 
423 |             assert next(i) == 'baz\n'
424 |             assert i.filekey == 'key2'
425 |             assert i.filename == file2
426 |             assert i.lineno == 3
427 |             assert i.filelineno == 1
428 | 
429 |     def test_pending(self):
430 |         file1 = self.root.make_file(suffix='.gz')
431 |         with gzip.open(file1, 'wt') as o:
432 |             o.write('foo\nbar\n')
433 |         f = FileInput(char_mode=TextMode)
434 |         self.assertTrue(f._pending)
435 |         f.add(file1)
436 |         list(f)
437 |         self.assertTrue(f.finished)
438 |         self.assertFalse(f._pending)
439 |         file2 = self.root.make_file(suffix='.gz')
440 |         with gzip.open(file2, 'wt') as o:
441 |             o.write('baz\n')
442 |         f.add(file2)
443 |         self.assertTrue(f._pending)
444 |         self.assertFalse(f.finished)
445 |         assert 'baz\n' == f.readline()
446 |         assert '' == f.readline()
447 |         with self.assertRaises(StopIteration):
448 |             next(f)
449 |         self.assertTrue(f.finished)
450 |         self.assertFalse(f._pending)
451 | 
452 |     def test_fileinput_defaults(self):
453 |         path = self.root.make_file()
454 |         with open(path, 'wt') as o:
455 |             o.write('foo\nbar\n')
456 |         sys.argv = [self.system_args[0], path]
457 |         self.assertEqual(
458 |             ['foo\n', 'bar\n'],
459 |             list(textinput()))
460 |         sys.argv = []
461 |         with intercept_stdin('foo\n'):
462 |             lines = list(textinput([STDIN]))
463 |             assert 1 == len(lines)
464 |             assert 'foo\n' == lines[0]
465 |         with intercept_stdin(b'foo\nbar\n', is_bytes=True):
466 |             assert [b'foo\n', b'bar\n'] == list(byteinput())
467 | 
468 |     def test_single_fileoutput(self):
469 |         file1 = self.root.make_file(suffix='.gz')
470 |         with textoutput(file1) as o:
471 |             o.writelines(('foo', 'bar', 'baz'))
472 |         with gzip.open(file1, 'rt') as i:
473 |             assert 'foo\nbar\nbaz\n' == i.read()
474 | 
475 |     def test_tee_fileoutput(self):
476 |         file1 = self.root.make_file(suffix='.gz')
477 |         file2 = self.root.make_file()
478 |         with self.assertRaises(ValueError):
479 |             textoutput((file1, file2), access='z')
480 |         with textoutput((file1, file2)) as o:
481 |             o.writelines(('foo', 'bar', 'baz'))
482 |         with gzip.open(file1, 'rt') as i:
483 |             assert 'foo\nbar\nbaz\n' == i.read()
484 |         with open(file2, 'rt') as i:
485 |             assert 'foo\nbar\nbaz\n' == i.read()
486 | 
487 |     def test_tee_fileoutput_binary(self):
488 |         file1 = self.root.make_file(suffix='.gz')
489 |         file2 = self.root.make_file()
490 |         with byteoutput(
491 |                 (file1, file2),
492 |                 file_output_type=TeeFileOutput) as o:
493 |             o.writelines((b'foo', b'bar', b'baz'))
494 |         with gzip.open(file1, 'rb') as i:
495 |             assert b'foo\nbar\nbaz\n' == i.read()
496 |         with open(file2, 'rb') as i:
497 |             assert b'foo\nbar\nbaz\n' == i.read()
498 | 
499 |         with textoutput((file1, file2), file_output_type=TeeFileOutput) as o:
500 |             o.writelines((b'foo', b'bar', b'baz'))
501 |         with gzip.open(file1, 'rt') as i:
502 |             assert 'foo\nbar\nbaz\n' == i.read()
503 |         with open(file2, 'rt') as i:
504 |             assert 'foo\nbar\nbaz\n' == i.read()
505 | 
506 |         with byteoutput((file1, file2), file_output_type=TeeFileOutput) as o:
507 |             o.writelines(('foo', b'bar', b'baz'))
508 |         with gzip.open(file1, 'rb') as i:
509 |             assert b'foo\nbar\nbaz\n' == i.read()
510 |         with open(file2, 'rb') as i:
511 |             assert b'foo\nbar\nbaz\n' == i.read()
512 | 
513 |     def test_tee_fileoutput_no_newline(self):
514 |         file1 = self.root.make_file(suffix='.gz')
515 |         file2 = self.root.make_file()
516 |         with textoutput((file1, file2)) as o:
517 |             o.writeline('foo')
518 |             o.writeline('bar')
519 |             assert 2 == o.num_lines
520 |         with gzip.open(file1, 'rb') as i:
521 |             assert b'foo\nbar\n' == i.read()
522 |         with open(file2, 'rb') as i:
523 |             assert b'foo\nbar\n' == i.read()
524 | 
525 |     def test_fileoutput_stdout(self):
526 |         path = self.root.make_file()
527 |         sys.argv = [self.system_args, path]
528 |         with textoutput() as o:
529 |             o.writelines(('foo', 'bar', 'baz'))
530 |         with open(path, 'rt') as i:
531 |             assert 'foo\nbar\nbaz\n' == i.read()
532 |         sys.argv = []
533 |         with intercept_stdout(True) as outbuf:
534 |             with byteoutput() as o:
535 |                 o.writelines((b'foo', b'bar', b'baz'))
536 |             assert b'foo\nbar\nbaz\n' == outbuf.getvalue()
537 | 
538 |     def test_cycle_fileoutput(self):
539 |         file1 = self.root.make_file(suffix='.gz')
540 |         file2 = self.root.make_file()
541 |         with textoutput((file1, file2), file_output_type=CycleFileOutput) as o:
542 |             o.writelines(('foo', 'bar', 'baz'))
543 |         with gzip.open(file1, 'rt') as i:
544 |             assert 'foo\nbaz\n' == i.read()
545 |         with open(file2, 'rt') as i:
546 |             assert 'bar\n' == i.read()
547 | 
548 |     def test_ncycle_fileoutput(self):
549 |         file1 = self.root.make_file(suffix='.gz')
550 |         file2 = self.root.make_file()
551 |         with textoutput(
552 |                 (file1, file2), lines_per_file=2,
553 |                 file_output_type=NCycleFileOutput) as o:
554 |             o.writelines(('foo', 'bar', 'baz', 'blorf', 'bing'))
555 |         with gzip.open(file1, 'rt') as i:
556 |             assert 'foo\nbar\nbing\n' == i.read()
557 |         with open(file2, 'rt') as i:
558 |             assert 'baz\nblorf\n' == i.read()
559 | 
560 |     def test_rolling_fileoutput(self):
561 |         path = str(self.root.make_file())
562 |         with RollingFileOutput(
563 |                 path + '{index}.txt', char_mode=TextMode, linesep=os.linesep,
564 |                 lines_per_file=3) as out:
565 |             for i in range(6):
566 |                 out.write(str(i))
567 |         with open(path + '0.txt', 'rt') as infile:
568 |             assert '0\n1\n2\n' == infile.read()
569 |         with open(path + '1.txt', 'rt') as infile:
570 |             assert '3\n4\n5\n' == infile.read()
571 | 
572 |     def test_fileoutput_with_header(self):
573 |         path = str(self.root.make_file())
574 |         with textoutput(
575 |                 path + '{index}.txt', file_output_type=RollingFileOutput,
576 |                 header="number\n", lines_per_file=3) as out:
577 |             for i in range(6):
578 |                 out.write(str(i))
579 |         with open(path + '0.txt', 'rt') as infile:
580 |             assert 'number\n0\n1\n2\n' == infile.read()
581 |         with open(path + '1.txt', 'rt') as infile:
582 |             assert 'number\n3\n4\n5\n' == infile.read()
583 | 
584 |     def test_rolling_fileoutput_write(self):
585 |         path = str(self.root.make_file())
586 |         with textoutput(
587 |                 path + '{index}.txt', file_output_type=RollingFileOutput,
588 |                 lines_per_file=3) as out:
589 |             for i in range(6):
590 |                 out.write(i, False)
591 |             for ch in ('a', 'b', 'c'):
592 |                 out.write(ch, False)
593 |             out.write("d\ne\nf")
594 |         with open(path + '0.txt', 'rt') as infile:
595 |             assert '0\n1\n2\n' == infile.read()
596 |         with open(path + '1.txt', 'rt') as infile:
597 |             assert '3\n4\n5\n' == infile.read()
598 |         with open(path + '2.txt', 'rt') as infile:
599 |             assert 'a\nb\nc\n' == infile.read()
600 |         with open(path + '3.txt', 'rt') as infile:
601 |             assert 'd\ne\nf\n' == infile.read()
602 | 
603 |     def test_pattern_file_output(self):
604 |         path = self.root.make_file()
605 | 
606 |         def get_tokens(line):
607 |             return dict(zip(('a', 'b'), line.split(' ')))
608 | 
609 |         with textoutput(
610 |                 str(path) + '{a}.{b}.txt',
611 |                 file_output_type=PatternFileOutput,
612 |                 token_func=get_tokens) as out:
613 |             for a in range(2):
614 |                 for b in range(2):
615 |                     out.writeline(f'{a} {b}')
616 | 
617 |         for a in range(2):
618 |             for b in range(2):
619 |                 with open(str(path) + f'{a}.{b}.txt', 'rt') as infile:
620 |                     assert f'{a} {b}\n' == infile.read()
621 | 


--------------------------------------------------------------------------------
/tests/test_xphyle.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase, skipIf
  2 | from . import *
  3 | import gzip
  4 | from io import BytesIO, IOBase
  5 | from xphyle import *
  6 | from xphyle.paths import TempDir, STDIN, STDOUT, STDERR, EXECUTABLE_CACHE
  7 | from xphyle.progress import ITERABLE_PROGRESS, PROCESS_PROGRESS
  8 | from xphyle.formats import THREADS
  9 | from xphyle.types import EventType
 10 | 
 11 | 
 12 | # Note: the casts of StringIO/BytesIO to IOBase are only necessary because of
 13 | # pycharm bug PY-28155
 14 | 
 15 | 
 16 | class XphyleTests(TestCase):
 17 |     def setUp(self):
 18 |         self.root = TempDir()
 19 | 
 20 |     def tearDown(self):
 21 |         self.root.close()
 22 |         ITERABLE_PROGRESS.enabled = False
 23 |         ITERABLE_PROGRESS.wrapper = None
 24 |         PROCESS_PROGRESS.enabled = False
 25 |         PROCESS_PROGRESS.wrapper = None
 26 |         THREADS.update(1)
 27 |         EXECUTABLE_CACHE.reset_search_path()
 28 |         EXECUTABLE_CACHE.cache = {}
 29 | 
 30 |     def test_configure(self):
 31 |         def wrapper(a, b, c) -> Iterable:
 32 |             return []
 33 | 
 34 |         configure(
 35 |             progress=True,
 36 |             progress_wrapper=wrapper,
 37 |             system_progress=True,
 38 |             system_progress_wrapper="foo",
 39 |             threads=2,
 40 |             executable_path=[Path("foo")],
 41 |         )
 42 | 
 43 |         assert wrapper == ITERABLE_PROGRESS.wrapper
 44 |         assert ("foo",) == PROCESS_PROGRESS.wrapper
 45 |         assert 2 == THREADS.threads
 46 |         assert Path("foo") in EXECUTABLE_CACHE.search_path
 47 | 
 48 |         configure(threads=False)
 49 |         assert 1 == THREADS.threads
 50 | 
 51 |         import multiprocessing
 52 | 
 53 |         configure(threads=True)
 54 |         assert multiprocessing.cpu_count() == THREADS.threads
 55 | 
 56 |     def test_guess_format(self):
 57 |         with self.assertRaises(ValueError):
 58 |             guess_file_format(STDOUT)
 59 |         with self.assertRaises(ValueError):
 60 |             guess_file_format(STDERR)
 61 |         path = self.root.make_file(suffix=".gz")
 62 |         with gzip.open(path, "wt") as o:
 63 |             o.write("foo")
 64 |         assert guess_file_format(path) == "gzip"
 65 |         path = self.root.make_file()
 66 |         with gzip.open(path, "wt") as o:
 67 |             o.write("foo")
 68 |         assert guess_file_format(path) == "gzip"
 69 | 
 70 |     def test_open_(self):
 71 |         path = self.root.make_file(contents="foo")
 72 |         with self.assertRaises(ValueError):
 73 |             with open_(path, wrap_fileobj=False):
 74 |                 pass
 75 |         with open_(path, compression=False) as fh:
 76 |             assert fh.read() == "foo"
 77 |         with open_(path, compression=False) as fh:
 78 |             assert next(fh) == "foo"
 79 |         with open(path) as fh:
 80 |             with open_(fh, compression=False, context_wrapper=True) as fh2:
 81 |                 self.assertTrue(isinstance(fh2, FileLikeWrapper))
 82 |                 assert fh2.read() == "foo"
 83 |         with open(path) as fh3:
 84 |             with open_(fh, wrap_fileobj=False, context_wrapper=True):
 85 |                 self.assertFalse(isinstance(fh3, FileLikeWrapper))
 86 | 
 87 |     def test_open_safe(self):
 88 |         with self.assertRaises(IOError):
 89 |             with open_("foobar", mode="r", errors=True) as _:
 90 |                 pass
 91 |         with self.assertRaises(ValueError):
 92 |             with open_(cast(IOBase, None), mode="r", errors=True) as _:
 93 |                 pass
 94 |         with open_("foobar", mode="r", errors=False) as fh:
 95 |             self.assertIsNone(fh)
 96 |         with open_(cast(IOBase, None), mode="r", errors=False) as fh:
 97 |             self.assertIsNone(fh)
 98 | 
 99 |     def test_xopen_invalid(self):
100 |         # invalid mode
101 |         with self.assertRaises(ValueError):
102 |             xopen("foo", "z")
103 |         with self.assertRaises(ValueError):
104 |             xopen("foo", "rz")
105 |         with self.assertRaises(ValueError):
106 |             xopen("foo", "rU", newline="\n")
107 |         with self.assertRaises(ValueError):
108 |             xopen(STDOUT, "w", compression=True)
109 |         with self.assertRaises(ValueError):
110 |             xopen("foo.bar", "w", compression=True)
111 |         with self.assertRaises(ValueError):
112 |             xopen("foo", file_type=FileType.STDIO)
113 |         with self.assertRaises(ValueError):
114 |             xopen(STDOUT, file_type=FileType.LOCAL)
115 |         with self.assertRaises(ValueError):
116 |             xopen("foo", file_type=FileType.URL)
117 |         with self.assertRaises(IOError):
118 |             xopen("http://foo.com", file_type=FileType.LOCAL)
119 |         with self.assertRaises(ValueError):
120 |             xopen("xyz", file_type=FileType.FILELIKE)
121 |         path = self.root.make_file(contents="foo")
122 |         with open(path, "r") as fh:
123 |             with self.assertRaises(ValueError):
124 |                 xopen(fh, "w")
125 |             f = xopen(fh, context_wrapper=True)
126 |             assert "r" == f.mode
127 |         f = xopen(path, context_wrapper=True)
128 |         f.close()
129 |         with self.assertRaises(IOError):
130 |             with f:
131 |                 pass
132 |         with self.assertRaises(ValueError):
133 |             with open(path, "rt") as fh:
134 |                 xopen(fh, "rt", compression=True)
135 |         # can't guess compression without a name
136 |         with self.assertRaises(ValueError):
137 |             b = BytesIO()
138 |             b.mode = "wb"
139 |             xopen(cast(IOBase, b), "wt")
140 |         # can't read from stderr
141 |         with self.assertRaises(ValueError):
142 |             xopen(STDERR, "rt")
143 | 
144 |     def test_xopen_std(self):
145 |         # Try stdin
146 |         with intercept_stdin("foo\n"):
147 |             with xopen("-", "r", context_wrapper=True, compression=False) as i:
148 |                 content = i.read()
149 |                 assert content == "foo\n"
150 |         with intercept_stdin("foo\n"):
151 |             with xopen(STDIN, "r", context_wrapper=True, compression=False) as i:
152 |                 content = i.read()
153 |                 assert content == "foo\n"
154 |         # Try stdout
155 |         with intercept_stdout() as i:
156 |             with xopen("-", "w", context_wrapper=True, compression=False) as o:
157 |                 o.write("foo")
158 |             assert i.getvalue() == "foo"
159 |         with intercept_stdout() as i:
160 |             with xopen(STDOUT, "w", context_wrapper=True, compression=False) as o:
161 |                 o.write("foo")
162 |             assert i.getvalue() == "foo"
163 |         # Try stderr
164 |         with intercept_stderr() as i:
165 |             with xopen("_", "w", context_wrapper=True, compression=False) as o:
166 |                 o.write("foo")
167 |             assert i.getvalue() == "foo"
168 |         with intercept_stderr() as i:
169 |             with xopen(STDERR, "w", context_wrapper=True, compression=False) as o:
170 |                 o.write("foo")
171 |             assert i.getvalue() == "foo"
172 | 
173 |         # Try binary
174 |         with intercept_stdout(True) as i:
175 |             with xopen(STDOUT, "wb", context_wrapper=True, compression=False) as o:
176 |                 o.write(b"foo")
177 |             assert i.getvalue() == b"foo"
178 | 
179 |         # Try compressed
180 |         with intercept_stdout(True) as i:
181 |             with xopen(STDOUT, "wt", context_wrapper=True, compression="gz") as o:
182 |                 assert cast(StdWrapper, o).compression == "gzip"
183 |                 o.write("foo")
184 |             assert gzip.decompress(i.getvalue()) == b"foo"
185 | 
186 |     def test_xopen_compressed_stream(self):
187 |         # Try autodetect compressed
188 |         with intercept_stdin(gzip.compress(b"foo\n"), is_bytes=True):
189 |             with xopen(STDIN, "rt", compression=True, context_wrapper=True) as i:
190 |                 assert cast(StdWrapper, i).compression == "gzip"
191 |                 assert i.read() == "foo\n"
192 | 
193 |     def test_xopen_file(self):
194 |         with self.assertRaises(IOError):
195 |             xopen("foobar", "r")
196 |         path = self.root.make_file(suffix=".gz")
197 |         with xopen(path, "rU", context_wrapper=True) as i:
198 |             assert "rt" == i.mode
199 |         with xopen(path, "w", compression=True, context_wrapper=True) as o:
200 |             assert cast(FileLikeWrapper, o).compression == "gzip"
201 |             o.write("foo")
202 |         with gzip.open(path, "rt") as i:
203 |             assert i.read() == "foo"
204 |         with self.assertRaises(ValueError):
205 |             with xopen(path, "rt", compression="bz2", validate=True):
206 |                 pass
207 |         existing_file = self.root.make_file(contents="abc")
208 |         with xopen(existing_file, "wt", overwrite=True) as out:
209 |             out.write("def")
210 |         with self.assertRaises(ValueError):
211 |             with xopen(existing_file, "wt", overwrite=False):
212 |                 pass
213 | 
214 |     def test_xopen_fileobj(self):
215 |         path = self.root.make_file(suffix=".gz")
216 |         with open(path, "wb") as out1:
217 |             with open_(out1, "wt") as out2:
218 |                 out2.write("foo")
219 |             assert not out1.closed
220 |         with gzip.open(path, "rt") as i:
221 |             assert "foo" == i.read()
222 | 
223 |     def test_xopen_mmap(self):
224 |         path = self.root.make_file(suffix=".gz")
225 |         with xopen(
226 |             path,
227 |             "w",
228 |             compression=True,
229 |             context_wrapper=True,
230 |             use_system=False,
231 |             memory_map=True,
232 |         ) as o:
233 |             # since we are opening an empty file, memory mapping will fail
234 |             assert not cast(FileWrapper, o).memory_mapped
235 |             o.write("foo")
236 |         with open(path, "rb") as inp:
237 |             with xopen(
238 |                 inp,
239 |                 "r",
240 |                 compression=True,
241 |                 context_wrapper=True,
242 |                 use_system=False,
243 |                 memory_map=True,
244 |             ) as i:
245 |                 assert cast(FileWrapper, i).memory_mapped
246 |                 assert i.read() == "foo"
247 | 
248 |     def test_xopen_buffer(self):
249 |         buf = BytesIO(b"foo")
250 |         f = xopen(cast(IOBase, buf), "rb")
251 |         assert b"foo" == f.read(3)
252 |         with self.assertRaises(ValueError):
253 |             xopen(cast(IOBase, buf), "wb")
254 | 
255 |         with open_(str) as buf:
256 |             buf.write("foo")
257 |         assert "foo" == buf.getvalue()
258 | 
259 |         with open_(bytes) as buf:
260 |             buf.write(b"foo")
261 |         assert b"foo" == buf.getvalue()
262 | 
263 |         # with compression
264 |         with self.assertRaises(ValueError):
265 |             with open_(bytes, compression=True):
266 |                 pass
267 |         with self.assertRaises(ValueError):
268 |             with open_(str, compression="gzip"):
269 |                 pass
270 | 
271 |         with open_(bytes, mode="wt", compression="gzip") as buf:
272 |             buf.write("foo")
273 |         assert b"foo" == gzip.decompress(buf.getvalue())
274 | 
275 |         # from string/bytes
276 |         with self.assertRaises(ValueError):
277 |             xopen("foo", "wt", file_type=FileType.BUFFER)
278 |         with self.assertRaises(ValueError):
279 |             xopen("foo", "rb", file_type=FileType.BUFFER)
280 |         with open_("foo", file_type=FileType.BUFFER, context_wrapper=True) as buf:
281 |             assert "foo" == buf.read()
282 | 
283 |         with self.assertRaises(ValueError):
284 |             xopen(b"foo", "rt", file_type=FileType.BUFFER)
285 |         with open_(b"foo", file_type=FileType.BUFFER, context_wrapper=True) as buf:
286 |             assert b"foo" == buf.read()
287 | 
288 |     @skipIf(no_internet(), "No internet connection")
289 |     def test_xopen_url(self):
290 |         badurl = "http://google.com/__badurl__"
291 |         with self.assertRaises(ValueError):
292 |             xopen(badurl)
293 |         url = "https://github.com/jdidion/xphyle/blob/master/tests/foo.gz?raw=True"
294 |         with self.assertRaises(ValueError):
295 |             xopen(url, "w")
296 |         with open_(url, "rt") as i:
297 |             assert "gzip" == i.compression
298 |             assert "foo\n" == i.read()
299 | 
300 |     def test_open_process(self):
301 |         with self.assertRaises(ValueError):
302 |             xopen("|cat", "wt", allow_subprocesses=False)
303 |         with open_("|cat", "wt") as p:
304 |             p.write("foo\n")
305 |         assert b"foo\n" == p.stdout
306 | 
307 |     def test_peek(self):
308 |         path = self.root.make_file()
309 |         with self.assertRaises(IOError):
310 |             with open_(path, "w") as o:
311 |                 o.peek()
312 |         path = self.root.make_file(contents="foo")
313 |         with open_(path, "rb") as i:
314 |             assert b"f" == i.peek(1)
315 |             assert b"foo" == next(i)
316 |         with open_(path, "rt") as i:
317 |             assert "f" == i.peek(1)
318 |             assert "foo" == next(i)
319 |         with intercept_stdin("foo"):
320 |             with open_(STDIN, validate=False, compression=False) as i:
321 |                 assert "f" == i.peek(1)
322 |                 assert "foo\n" == next(i)
323 | 
324 |     def test_seek(self):
325 |         path = self.root.make_file(contents="foo")
326 |         with open_(path, "rb") as i:
327 |             i.seek(1)
328 |             assert b"o" == i.peek(1)
329 | 
330 |     def test_truncate(self):
331 |         path = self.root.make_file(contents="foo")
332 |         with open_(path, "r+") as i:
333 |             i.truncate(1)
334 |             assert i.read() == "f"
335 | 
336 |     def test_event_listeners(self):
337 |         class MockEventListener(EventListener):
338 |             def __init__(self):
339 |                 super().__init__()
340 |                 self.executed = False
341 | 
342 |             def execute(self, file_wrapper: FileLikeWrapper, **kwargs):
343 |                 self.executed = True
344 | 
345 |         std_listener: MockEventListener = MockEventListener()
346 |         with intercept_stdin("foo"):
347 |             f = xopen(STDIN, context_wrapper=True)
348 |             try:
349 |                 cast(EventManager, f).register_listener(EventType.CLOSE, std_listener)
350 |             finally:
351 |                 f.close()
352 |             self.assertTrue(std_listener.executed)
353 | 
354 |         file_listener: MockEventListener = MockEventListener()
355 |         path = self.root.make_file()
356 |         f = xopen(path, "w", context_wrapper=True)
357 |         try:
358 |             cast(EventManager, f).register_listener(EventType.CLOSE, file_listener)
359 |         finally:
360 |             f.close()
361 |         self.assertTrue(file_listener.executed)
362 | 
363 |     def test_process(self):
364 |         with Process("cat", stdin=PIPE, stdout=PIPE, stderr=PIPE) as p:
365 |             self.assertIsNotNone(p.get_writer())
366 |             self.assertIsNotNone(p.get_reader("stdout"))
367 |             self.assertIsNotNone(p.get_reader("stderr"))
368 |             self.assertFalse(p.seekable())
369 |             assert (p.stdout, p.stderr) == p.get_readers()
370 |             p.write(b"foo\n")
371 |             p.flush()
372 |         assert b"foo\n" == p.stdout
373 |         self.assertFalse(p.stderr)
374 | 
375 |         # wrap pipes
376 |         with Process(("zcat", "-cd"), stdin=PIPE, stdout=PIPE) as p:
377 |             self.assertTrue(p.readable())
378 |             self.assertTrue(p.writable())
379 |             with self.assertRaises(ValueError):
380 |                 p.is_wrapped("foo")
381 |             with self.assertRaises(ValueError):
382 |                 p.wrap_pipes(foo=dict(mode="wt"))
383 |             p.wrap_pipes(stdin=dict(mode="wt", compression="gzip"))
384 |             self.assertTrue(p.is_wrapped("stdin"))
385 |             p.write("foo")
386 |         assert b"foo" == p.stdout
387 | 
388 |     def test_process_with_files(self):
389 |         inp = self.root.make_file(suffix=".gz")
390 |         with gzip.open(inp, "wt") as o:
391 |             o.write("foo")
392 |         out = self.root.make_file(suffix=".gz")
393 |         with self.assertRaises(OSError):
394 |             with gzip.open(inp, "rt") as o, open(out, "wt") as i:
395 |                 with Process("cat", stdin=o, stdout=i) as p:
396 |                     p.wrap_pipes(stdin=dict(mode="wt"))
397 |         with gzip.open(out, "rt") as i:
398 |             assert "foo" == i.read()
399 |         with popen(("echo", "abc\n123"), stdout=PIPE) as p:
400 |             self.assertListEqual([b"abc\n", b"123\n"], list(line for line in p))
401 |         with popen(("echo", "abc\n123"), stdout=PIPE) as p:
402 |             assert b"abc\n" == next(p)
403 |             assert b"123\n" == next(p)
404 |         with popen(("echo", "abc\n123"), stdout=(PIPE, "rt")) as p:
405 |             assert "abc\n" == next(p)
406 |             assert "123\n" == next(p)
407 | 
408 |     def test_process_invalid(self):
409 |         with self.assertRaises(ValueError):
410 |             xopen("|cat", "wt", compression=True)
411 | 
412 |     def test_process_read(self):
413 |         with Process(("echo", "foo"), stdout=PIPE) as p:
414 |             assert b"foo\n" == p.read()
415 |         with open_("|echo foo", "rt") as p:
416 |             assert "foo\n" == p.read()
417 | 
418 |     def test_process_communicate(self):
419 |         with Process("cat", stdin=PIPE, stdout=PIPE, stderr=PIPE) as p:
420 |             self.assertTupleEqual((b"foo\n", b""), p.communicate(b"foo\n"))
421 | 
422 |     def test_process_del(self):
423 |         class MockProcessListener(EventListener):
424 |             def __init__(self):
425 |                 super().__init__()
426 |                 self.executed = False
427 | 
428 |             def execute(self, process: Process, **kwargs) -> None:
429 |                 self.executed = True
430 | 
431 |         listener: MockProcessListener = MockProcessListener()
432 |         p = Process("cat", stdin=PIPE, stdout=PIPE)
433 |         p.register_listener(EventType.CLOSE, listener)
434 |         del p
435 |         self.assertTrue(listener.executed)
436 | 
437 |     def test_process_close(self):
438 |         p = Process("cat", stdin=PIPE, stdout=PIPE)
439 |         self.assertFalse(p.closed)
440 |         p.close()
441 |         self.assertTrue(p.closed)
442 |         self.assertIsNone(p.close1(raise_on_error=False))
443 |         with self.assertRaises(IOError):
444 |             p.close1(raise_on_error=True)
445 | 
446 |     def test_process_close_hung(self):
447 |         p = Process(("sleep", "5"))
448 |         with self.assertRaises(Exception):
449 |             p.close1(timeout=1, terminate=False)
450 |         p = Process(("sleep", "5"))
451 |         p.close1(timeout=1, terminate=True)
452 |         self.assertTrue(p.closed)
453 | 
454 |     def test_process_error(self):
455 |         p = popen(("exit", "2"), shell=True)
456 |         with self.assertRaises(IOError):
457 |             p.close1(raise_on_error=True)
458 |         self.assertFalse(p.returncode == 0)
459 | 


--------------------------------------------------------------------------------
/xphyle/progress.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Common interface to enable operations to be wrapped in a progress bar.
  3 | By default, pokrok is used for python-level operations and pv for system-level
  4 | operations.
  5 | """
  6 | from os import PathLike
  7 | import shlex
  8 | from subprocess import Popen, PIPE
  9 | from typing import Iterable, Union, Callable, Tuple, Sequence, Optional
 10 | from pokrok import progress_iter
 11 | from xphyle.paths import EXECUTABLE_CACHE, check_path
 12 | from xphyle.types import PathType, Permission, FileLike
 13 | 
 14 | 
 15 | # Python-level progress wrapper
 16 | 
 17 | 
 18 | class IterableProgress:
 19 |     """Manages the python-level wrapper.
 20 | 
 21 |     Args:
 22 |         default_wrapper: Callable (typically a class) that returns a Callable
 23 |             with the signature of ``wrap``.
 24 |     """
 25 | 
 26 |     def __init__(self, default_wrapper: Callable = progress_iter) -> None:
 27 |         self.enabled = False
 28 |         self.wrapper: Optional[Callable[..., Iterable]] = None
 29 |         self.default_wrapper = default_wrapper
 30 | 
 31 |     def update(
 32 |         self,
 33 |         enable: Optional[bool] = None,
 34 |         wrapper: Optional[Callable[..., Iterable]] = None,
 35 |     ) -> None:
 36 |         """Enable the python progress bar and/or set a new wrapper.
 37 | 
 38 |         Args:
 39 |             enable: Whether to enable use of a progress wrapper.
 40 |             wrapper: A callable that takes three arguments, itr, desc, size,
 41 |                 and returns an iterable.
 42 |         """
 43 |         if enable is not None:
 44 |             self.enabled = enable
 45 | 
 46 |         if wrapper:
 47 |             self.wrapper = wrapper
 48 |         elif self.enabled and not self.wrapper:
 49 |             try:
 50 |                 self.wrapper = self.default_wrapper()
 51 |             except ImportError as err:
 52 |                 raise ValueError(
 53 |                     "Could not create default python wrapper; valid wrapper "
 54 |                     "must be specified"
 55 |                 ) from err
 56 | 
 57 |     def wrap(
 58 |         self, itr: Iterable, desc: Optional[str] = None, size: Optional[int] = None
 59 |     ) -> Iterable:
 60 |         """Wrap an iterable in a progress bar.
 61 | 
 62 |         Args:
 63 |             itr: The Iterable to wrap.
 64 |             desc: Optional description.
 65 |             size: Optional max value of the progress bar.
 66 | 
 67 |         Returns:
 68 |             The wrapped Iterable.
 69 |         """
 70 |         if self.enabled:
 71 |             return self.wrapper(itr, desc=desc, size=size)
 72 |         else:
 73 |             return itr
 74 | 
 75 | 
 76 | ITERABLE_PROGRESS = IterableProgress()
 77 | 
 78 | 
 79 | # System-level progress wrapper
 80 | 
 81 | 
 82 | def system_progress_command(
 83 |     exe: Union[str, PathLike], *args, require: bool = False
 84 | ) -> Tuple:  # pragma: no-cover
 85 |     """Resolve a system-level progress bar command.
 86 | 
 87 |     Args:
 88 |         exe: The executable name or absolute path.
 89 |         args: A list of additional command line arguments.
 90 |         require: Whether to raise an exception if the command does not exist.
 91 | 
 92 |     Returns:
 93 |         A tuple of (executable_path, *args).
 94 |     """
 95 |     executable_path = EXECUTABLE_CACHE.get_path(exe)
 96 |     if executable_path is not None:
 97 |         check_path(executable_path, PathType.FILE, Permission.EXECUTE)
 98 |     elif require:
 99 |         raise IOError("pv is not available on the path")
100 |     return (executable_path,) + tuple(args)
101 | 
102 | 
103 | def pv_command(require: bool = False) -> Tuple:  # pragma: no-cover
104 |     """Default system wrapper command.
105 |     """
106 |     return system_progress_command("pv", "-pre", require=require)
107 | 
108 | 
109 | class ProcessProgress:
110 |     """Manage the system-level progress wrapper.
111 | 
112 |     Args:
113 |         default_wrapper: Callable that returns the argument list for the
114 |             default wrapper command.
115 |     """
116 | 
117 |     def __init__(self, default_wrapper: Callable = pv_command) -> None:
118 |         self.enabled = False
119 |         self.wrapper: Optional[Sequence[str]] = None
120 |         self.default_wrapper = default_wrapper
121 | 
122 |     def update(
123 |         self,
124 |         enable: Optional[bool] = None,
125 |         wrapper: Optional[Union[str, Sequence[str]]] = None,
126 |     ) -> None:
127 |         """Enable the python system progress bar and/or set the wrapper
128 |         command.
129 | 
130 |         Args:
131 |             enable: Whether to enable use of a progress wrapper.
132 |             wrapper: A command string or sequence of command arguments.
133 |         """
134 |         if enable is not None:
135 |             self.enabled = enable
136 | 
137 |         if wrapper:
138 |             if isinstance(wrapper, str):
139 |                 self.wrapper = tuple(shlex.split(wrapper))
140 |             else:
141 |                 self.wrapper = wrapper
142 |         elif self.enabled and not self.wrapper:
143 |             try:
144 |                 self.wrapper = self.default_wrapper()
145 |             except IOError as err:
146 |                 raise ValueError(
147 |                     "Could not create default system wrapper; valid wrapper "
148 |                     "must be specified"
149 |                 ) from err
150 | 
151 |     def wrap(
152 |         self, cmd: Sequence[str], stdin: FileLike, stdout: FileLike, **kwargs
153 |     ) -> Popen:  # pragma: no-cover
154 |         """Pipe a system command through a progress bar program.
155 | 
156 |         For the process to be wrapped, one of ``stdin``, ``stdout`` must not be
157 |         None.
158 | 
159 |         Args:
160 |             cmd: Command arguments.
161 |             stdin: File-like object to read into the process stdin, or None to
162 |                 use `PIPE`.
163 |             stdout: File-like object to write from the process stdout, or None
164 |                 to use `PIPE`.
165 |             kwargs: Additional arguments to pass to Popen.
166 | 
167 |         Returns:
168 |             Open process.
169 |         """
170 |         if not self.enabled or (stdin is None and stdout is None):
171 |             return Popen(cmd, stdin=stdin, stdout=stdout, **kwargs)
172 | 
173 |         if stdin is not None:
174 |             proc1 = Popen(self.wrapper, stdin=stdin, stdout=PIPE)
175 |             proc2 = Popen(cmd, stdin=proc1.stdout, stdout=stdout)
176 |         else:
177 |             proc1 = Popen(cmd, stdout=PIPE)
178 |             proc2 = Popen(self.wrapper, stdin=proc1.stdout, stdout=stdout)
179 |         proc1.stdout.close()
180 |         return proc2
181 | 
182 | 
183 | PROCESS_PROGRESS = ProcessProgress()
184 | 
185 | 
186 | # Misc functions
187 | 
188 | 
189 | def iter_file_chunked(fileobj: FileLike, chunksize: int = 1024) -> Iterable:
190 |     """Returns a progress bar-wrapped iterator over a file that reads
191 |     fixed-size chunks.
192 | 
193 |     Args:
194 |         fileobj: A file-like object.
195 |         chunksize: The maximum size in bytes of each chunk.
196 | 
197 |     Returns:
198 |         An iterable over the chunks of the file.
199 |     """
200 | 
201 |     def _itr():
202 |         while True:
203 |             data = fileobj.read(chunksize)
204 |             if data:
205 |                 yield data
206 |             else:
207 |                 break
208 | 
209 |     name = None
210 |     if hasattr(fileobj, "name"):
211 |         name = getattr(fileobj, "name")
212 | 
213 |     return ITERABLE_PROGRESS.wrap(_itr(), desc=name)
214 | 


--------------------------------------------------------------------------------
/xphyle/types.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Type checking support. Defines commonly used types.
  3 | """
  4 | # pylint: disable=wildcard-import, unused-wildcard-import, import-error, invalid-name
  5 | from abc import ABCMeta, abstractmethod
  6 | import collections
  7 | from enum import Enum
  8 | from io import IOBase, UnsupportedOperation
  9 | import os
 10 | from pathlib import PurePath
 11 | import stat
 12 | from typing import (
 13 |     Dict,
 14 |     Sequence,
 15 |     List,
 16 |     Tuple,
 17 |     Set,
 18 |     Iterator,
 19 |     Iterable,
 20 |     Text,
 21 |     Union,
 22 |     Any,
 23 |     IO,
 24 |     Pattern,
 25 |     TypeVar,
 26 |     cast,
 27 | )
 28 | 
 29 | 
 30 | class ModeAccess(Enum):
 31 |     """Enumeration of the access modes allowed when opening files.
 32 | 
 33 |     See Also:
 34 |         https://docs.python.org/3/library/functions.html#open
 35 |     """
 36 | 
 37 |     READ = "r"
 38 |     """Read from file."""
 39 |     WRITE = "w"
 40 |     """Write to file, overwriting any existing file."""
 41 |     READWRITE = "r+"
 42 |     """Open file for reading and writing."""
 43 |     TRUNCATE_READWRITE = "w+"
 44 |     """Open file for reading and writing, first truncating the file to 0."""
 45 |     APPEND = "a"
 46 |     """Create file if it doesn't exist, else append to existing file."""
 47 |     EXCLUSIVE = "x"
 48 |     """Exclusive write (fails if file already exists)."""
 49 | 
 50 |     @property
 51 |     def readable(self):
 52 |         """Whether this is readable mode."""
 53 |         return any(char in self.value for char in ("r", "+"))
 54 | 
 55 |     @property
 56 |     def writable(self):
 57 |         """Whether this is writable mode."""
 58 |         return any(char in self.value for char in ("w", "+", "a", "x"))
 59 | 
 60 |     @property
 61 |     def readwritable(self) -> bool:
 62 |         return "+" in self.value
 63 | 
 64 | 
 65 | ModeAccessArg = Union[str, ModeAccess]
 66 | 
 67 | 
 68 | class ModeCoding(Enum):
 69 |     """Enumeration of file open modes (text or binary).
 70 | 
 71 |     See Also:
 72 |         https://docs.python.org/3/library/functions.html#open
 73 |     """
 74 | 
 75 |     TEXT = "t"
 76 |     """Text mode."""
 77 |     BINARY = "b"
 78 |     """Binary mode."""
 79 | 
 80 | 
 81 | ModeCodingArg = Union[str, ModeCoding]
 82 | 
 83 | 
 84 | FILE_MODE_CACHE: Dict[Tuple[str, ModeAccessArg, ModeCodingArg], "FileMode"] = {}
 85 | """Cache of FileMode objects."""
 86 | 
 87 | 
 88 | class FileMode(object):
 89 |     """Definition of a file mode as composed of a :class:`ModeAccess` and a
 90 |     :class:`ModeCoding`.
 91 | 
 92 |     Args:
 93 |         mode: Specify the mode as a string; mutually exclusive with `access`
 94 |             and `coding`.
 95 |         access: The file access mode (default: :attribute:`ModeAccess.READ`).
 96 |         coding: The file open mode (default: :attribute:`ModeCoding.TEXT`).
 97 |     """
 98 | 
 99 |     def __new__(
100 |         cls,
101 |         mode: str = None,
102 |         access: ModeAccessArg = None,
103 |         coding: ModeCodingArg = None,
104 |     ) -> "FileMode":
105 |         key = (mode, access, coding)
106 |         if key not in FILE_MODE_CACHE:
107 |             FILE_MODE_CACHE[key] = super().__new__(cls)
108 |         return FILE_MODE_CACHE[key]
109 | 
110 |     def __init__(
111 |         self,
112 |         mode: str = None,
113 |         access: ModeAccessArg = None,
114 |         coding: ModeCodingArg = None,
115 |     ) -> None:
116 |         if mode:
117 |             access_val = None
118 |             access_char = None
119 |             update = False
120 |             coding_val = None
121 |             for c in mode:
122 |                 if c in "rwax" and access_char is None:
123 |                     access_char = c
124 |                 elif c == "+":
125 |                     update = True
126 |                 elif c in "bt" and coding_val is None:
127 |                     coding_val = ModeCoding(c)
128 |                 elif c == "U" and coding_val is None:
129 |                     coding_val = ModeCoding.TEXT
130 |                 else:
131 |                     raise ValueError(f"Invalid characters in mode string: {mode}")
132 | 
133 |             if access_char is not None:
134 |                 if update:
135 |                     access_val = ModeAccess(access_char + "+")
136 |                 else:
137 |                     access_val = ModeAccess(access_char)
138 |         else:
139 |             if isinstance(access, str):
140 |                 access_val = ModeAccess(access)
141 |             else:
142 |                 access_val = cast(ModeAccess, access)
143 |             if isinstance(coding, str):
144 |                 coding_val = ModeCoding(coding)
145 |             else:
146 |                 coding_val = cast(ModeCoding, coding)
147 | 
148 |         self.access = access_val or ModeAccess.READ
149 |         self.coding = coding_val or ModeCoding.TEXT
150 |         self.value = "{}{}".format(self.access.value, self.coding.value)
151 | 
152 |         if mode:
153 |             diff = set(mode) - set(str(self) + "U")
154 |             if diff:
155 |                 raise ValueError(
156 |                     "Invalid characters in mode string: {}".format("".join(diff))
157 |                 )
158 | 
159 |     @property
160 |     def readable(self):
161 |         """Whether this is readable mode."""
162 |         return self.access.readable
163 | 
164 |     @property
165 |     def writable(self):
166 |         """Whether this is writable mode."""
167 |         return self.access.writable
168 | 
169 |     @property
170 |     def readwritable(self):
171 |         """Whether this is read+write mode."""
172 |         return self.access.readwritable
173 | 
174 |     @property
175 |     def binary(self):
176 |         """Whether this is binary mode."""
177 |         return self.coding == ModeCoding.BINARY
178 | 
179 |     def as_binary(self):
180 |         """Converts this mode to binary"""
181 |         if self.coding == ModeCoding.BINARY:
182 |             return self
183 |         else:
184 |             return FileMode(access=self.access, coding=ModeCoding.BINARY)
185 | 
186 |     @property
187 |     def text(self):
188 |         """Whether this is text mode."""
189 |         return self.coding == ModeCoding.TEXT
190 | 
191 |     def as_text(self):
192 |         """Converts this mode to text"""
193 |         if self.coding == ModeCoding.TEXT:
194 |             return self
195 |         else:
196 |             return FileMode(access=self.access, coding=ModeCoding.TEXT)
197 | 
198 |     def __contains__(self, value: Union[str, ModeAccess, ModeCoding]) -> bool:
199 |         if isinstance(value, ModeAccess):
200 |             return self.access == value
201 |         elif isinstance(value, ModeCoding):
202 |             return self.coding == value
203 |         else:
204 |             for v in cast(str, value):
205 |                 if v not in self.access.value and v not in self.coding.value:
206 |                     return False
207 |             return True
208 | 
209 |     def __eq__(self, other):
210 |         return (
211 |             isinstance(other, FileMode)
212 |             and self.access == other.access
213 |             and self.coding == other.coding
214 |         )
215 | 
216 |     def __repr__(self):
217 |         return self.value
218 | 
219 | 
220 | OS_ALIASES = dict(r=os.R_OK, w=os.W_OK, x=os.X_OK, t=0)
221 | """Dictionary mapping mode characters to :module:`os` flags"""
222 | 
223 | 
224 | STAT_ALIASES = dict(
225 |     r=stat.S_IREAD,
226 |     w=stat.S_IWRITE,
227 |     x=stat.S_IEXEC,
228 |     t=stat.S_ISVTX,
229 |     f=stat.S_IFREG,
230 |     d=stat.S_IFDIR,
231 |     fifo=stat.S_IFIFO,
232 | )
233 | """Dictionary mapping mode characters to :module:`stat` flags"""
234 | 
235 | 
236 | class Permission(Enum):
237 |     """Enumeration of file permission flags ('r', 'w', 'x', 't'). Note that
238 |     this isn't a full enumeration of all flags, just those pertaining to the
239 |     permissions of the current user.
240 |     """
241 | 
242 |     READ = "r"
243 |     """Read; alias of :attribute:`stat.S_IREAD` and :attribute:`os.R_OK`."""
244 |     WRITE = "w"
245 |     """Write; alias of :attribute:`stat.S_IWRITE and :attribute:`os.W_OK``."""
246 |     EXECUTE = "x"
247 |     """Execute; alias of :attribute:`stat.S_IEXEC` and :attribute:`os.X_OK`."""
248 |     STICKY = "t"
249 |     """The sticky bit, alias of :attribute:`stat.S_ISVTX`."""
250 | 
251 |     @property
252 |     def stat_flag(self):
253 |         """Returns the :module:`stat` flag."""
254 |         return STAT_ALIASES[self.value]
255 | 
256 |     @property
257 |     def os_flag(self):
258 |         """Returns the :module:`os` flag."""
259 |         return OS_ALIASES[self.value]
260 | 
261 | 
262 | PermissionArg = Union[str, int, Permission, ModeAccess]
263 | """Types from which an Permission can be inferred."""
264 | 
265 | 
266 | PERMISSION_SET_CACHE: Dict[
267 |     Union[PermissionArg, Iterable[PermissionArg]], "PermissionSet"
268 | ] = {}
269 | 
270 | 
271 | class PermissionSet(object):
272 |     """A set of :class:`Permission`s.
273 | 
274 |     Args:
275 |         flags: Sequence of flags as string ('r', 'w', 'x'), int,
276 |             :class:`ModeAccess`, or :class:`Permission`.
277 |     """
278 | 
279 |     def __new__(
280 |         cls, flags: Union[PermissionArg, Iterable[PermissionArg]] = None
281 |     ) -> "PermissionSet":
282 |         if flags not in PERMISSION_SET_CACHE:
283 |             PERMISSION_SET_CACHE[flags] = super().__new__(cls)
284 |         return PERMISSION_SET_CACHE[flags]
285 | 
286 |     def __init__(
287 |         self, flags: Union[PermissionArg, Iterable[PermissionArg]] = None
288 |     ) -> None:
289 |         self.flags: Set[Permission] = set()
290 |         if flags:
291 |             if isinstance(flags, str) or is_iterable(flags):
292 |                 self.update(cast(Iterable[PermissionArg], flags))
293 |             else:
294 |                 self.add(cast(Union[int, Permission, ModeAccess], flags))
295 | 
296 |     def add(self, flag: PermissionArg) -> None:
297 |         """Add a permission.
298 | 
299 |         Args:
300 |             flag: Permission to add.
301 |         """
302 |         if isinstance(flag, str):
303 |             self.flags.add(Permission(flag))
304 |         elif isinstance(flag, int):
305 |             for f in Permission:
306 |                 if (f.stat_flag & flag) or (f.os_flag & flag):
307 |                     self.flags.add(f)
308 |         elif isinstance(flag, ModeAccess):
309 |             if flag.readable:
310 |                 self.add(Permission.READ)
311 |             if flag.writable:
312 |                 self.add(Permission.WRITE)
313 |         else:
314 |             self.flags.add(flag)
315 | 
316 |     def update(self, flags: Union["PermissionSet", Iterable[PermissionArg]]) -> None:
317 |         """Add all flags in `flags` to this `PermissionSet`.
318 | 
319 |         Args:
320 |             flags: Flags to add.
321 |         """
322 |         for flag in flags:
323 |             self.add(flag)
324 | 
325 |     @property
326 |     def stat_flags(self) -> int:
327 |         """Returns the binary OR of the :module:`stat` flags corresponding to
328 |         the flags in this `PermissionSet`.
329 |         """
330 |         flags = 0
331 |         for f in self.flags:
332 |             flags |= f.stat_flag
333 |         return flags
334 | 
335 |     @property
336 |     def os_flags(self) -> int:
337 |         """Returns the binary OR of the :module:`os` flags corresponding to
338 |         the flags in this `PermissionSet`.
339 |         """
340 |         flags = 0
341 |         for f in self.flags:
342 |             flags |= f.os_flag
343 |         return flags
344 | 
345 |     def __iter__(self) -> Iterable[Permission]:
346 |         """Iterate over flags in the same order they appear in
347 |         :class:`Permission`.
348 |         """
349 |         for f in Permission:
350 |             if f in self.flags:
351 |                 yield f
352 | 
353 |     def __eq__(self, other):
354 |         return isinstance(other, PermissionSet) and self.flags == other.flags
355 | 
356 |     def __contains__(self, access_flag: PermissionArg) -> bool:
357 |         if isinstance(access_flag, str):
358 |             access_flag = Permission(access_flag)
359 |         return access_flag in self.flags
360 | 
361 |     def __repr__(self) -> str:
362 |         return "".join(f.value for f in Permission if f in self.flags)
363 | 
364 | 
365 | class FileType(Enum):
366 |     """Enumeration of types of files that can be opened by
367 |     :method:`xphyle.xopen`.
368 |     """
369 | 
370 |     STDIO = "std"
371 |     """One of stdin/stdout/stderr."""
372 |     LOCAL = "local"
373 |     """A file on the local computer."""
374 |     URL = "url"
375 |     """A URL; schema must be recognized by :module:`urllib`."""
376 |     PROCESS = "ps"
377 |     """A system command to be executed in a subprocess."""
378 |     FILELIKE = "filelike"
379 |     """An object that implements the methods in
380 |     :class:`xphyle.types.FileLikeInterface`."""
381 |     BUFFER = "buffer"
382 |     """A StringIO or BytesIO."""
383 | 
384 | 
385 | class EventType(Enum):
386 |     """Enumeration of event types that can be registered on an
387 |     :class:`EventManager`.
388 |     """
389 | 
390 |     CLOSE = "close"
391 | 
392 | 
393 | AnyChar = Union[bytes, Text]
394 | """Similar to AnyStr, but specifies that strings must be unicode."""
395 | 
396 | 
397 | class FileLikeInterface(IO, Iterable[AnyChar], metaclass=ABCMeta):
398 |     """This is a marker interface for classes that implement methods (listed
399 |     below) to make them behave like python file objects. Provides a subset of
400 |     methods from typing.io.IO, plus next() and __iter__.
401 | 
402 |     See Also:
403 |         https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects
404 |     """
405 | 
406 |     @abstractmethod
407 |     def next(self) -> AnyChar:
408 |         pass
409 | 
410 | 
411 | # noinspection PyTypeChecker
412 | class FileLikeBase(FileLikeInterface):
413 |     def flush(self) -> None:
414 |         pass
415 | 
416 |     def close(self) -> None:
417 |         pass
418 | 
419 |     def readable(self) -> bool:
420 |         return False
421 | 
422 |     def read(self, n: int = -1) -> AnyChar:
423 |         raise UnsupportedOperation()
424 | 
425 |     def readline(self, hint: int = -1) -> AnyChar:
426 |         raise UnsupportedOperation()
427 | 
428 |     def readlines(self, sizehint: int = -1) -> List[AnyChar]:
429 |         raise UnsupportedOperation()
430 | 
431 |     def writable(self) -> bool:
432 |         return False
433 | 
434 |     def write(self, string: AnyChar) -> int:
435 |         raise UnsupportedOperation()
436 | 
437 |     def writelines(self, lines: Iterable[AnyChar]) -> None:
438 |         raise UnsupportedOperation()
439 | 
440 |     # noinspection PyTypeChecker
441 |     def seek(self, offset, whence: int = 0) -> int:
442 |         if self.seekable():
443 |             raise UnsupportedOperation()
444 |         else:
445 |             raise ValueError("Cannot call seek on a non-seekable object")
446 | 
447 |     def seekable(self) -> bool:
448 |         return False
449 | 
450 |     def tell(self) -> int:
451 |         if self.seekable():
452 |             raise UnsupportedOperation()
453 |         else:
454 |             raise ValueError("Cannot call tell on a non-seekable object")
455 | 
456 |     def isatty(self) -> bool:
457 |         return False
458 | 
459 |     def fileno(self) -> int:
460 |         return -1
461 | 
462 |     def truncate(self, size: int = None) -> int:
463 |         if self.seekable():
464 |             raise UnsupportedOperation()
465 |         else:
466 |             raise ValueError("Cannot call truncate on a non-seekable object")
467 | 
468 |     def __enter__(self) -> Any:
469 |         return self
470 | 
471 |     def __exit__(self, exception_type, exception_value, traceback) -> bool:
472 |         self.close()
473 |         return False
474 | 
475 |     def __iter__(self) -> Iterator[AnyChar]:
476 |         raise UnsupportedOperation()
477 | 
478 |     def __next__(self) -> AnyChar:
479 |         raise UnsupportedOperation()
480 | 
481 |     def next(self) -> AnyChar:
482 |         return self.__next__()
483 | 
484 | 
485 | class PathType(Enum):
486 |     """Enumeration of supported path types (file, directory, FIFO)."""
487 | 
488 |     FILE = "f"
489 |     """Path represents a file."""
490 |     DIR = "d"
491 |     """Path represents a directory."""
492 |     FIFO = "|"
493 |     """Path represents a FIFO."""
494 | 
495 | 
496 | FileLike = Union[IO, IOBase, FileLikeInterface]
497 | """File-like object; either a subclass of :class:`io.IOBase` or a
498 | :class:`FileLikeInterface`.
499 | """
500 | 
501 | 
502 | PathLike = Union[os.PathLike, PurePath]
503 | """PurePath is only included because PathLike is not statically assigned as a 
504 | superclass of PurePath in python 3.6."""
505 | 
506 | 
507 | PathOrFile = Union[PathLike, PurePath, FileLike]
508 | """Either a PathLike or FileLike."""
509 | 
510 | 
511 | Range = Tuple[int, int]
512 | """Two-integer tuple representing a range."""
513 | 
514 | 
515 | Regexp = Union[str, Pattern]
516 | """A regular expression string or compiled :class:`re`."""
517 | 
518 | 
519 | CharMode = TypeVar("CharMode", bytes, Text)
520 | """Type representing how data should be handled when read from a file.
521 | If the value is bytes (:attribute:`BinMode`), raw bytes are returned. If the
522 | value is a string (:attribute:`TextMode`), bytes are decoded using the system
523 | default encoding.
524 | """
525 | 
526 | 
527 | BinMode = b"b"
528 | """Value representing binary mode to use for an argument of type CharMode."""
529 | 
530 | 
531 | TextMode = "t"
532 | """Value representing text mode to use for an argument of type CharMode."""
533 | 
534 | 
535 | # Aliases for commonly used compound argument types
536 | 
537 | 
538 | PermissionSetArg = Union[PermissionSet, Sequence[PermissionArg]]
539 | """Sequence of stat flags (string, int, or :class:`Permission`)."""
540 | 
541 | 
542 | ModeArg = Union[str, FileMode]
543 | """A file mode; string, or :class:`FileMode`."""
544 | 
545 | 
546 | PathTypeArg = Union[str, PathType]
547 | """A path type string or :class:`PathType`."""
548 | 
549 | 
550 | EventTypeArg = Union[str, EventType]
551 | """An event type name or :class:`EventType`."""
552 | 
553 | 
554 | CompressionArg = Union[bool, str]
555 | """Compression can be True, False, or the name of a compression format."""
556 | 
557 | 
558 | def is_iterable(obj: Any, include_str: bool = False) -> bool:
559 |     """Test whether an object is iterable.
560 | 
561 |     Args:
562 |         obj: The object to test.
563 |         include_str: Whether a string should be considered an iterable
564 |             (default: False).
565 | 
566 |     Returns:
567 |         True if the object is iterable.
568 |     """
569 |     return isinstance(obj, collections.abc.Iterable) and (
570 |         include_str or not isinstance(obj, str)
571 |     )
572 | 


--------------------------------------------------------------------------------
/xphyle/urls.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Methods for handling URLs.
  3 | """
  4 | import copy
  5 | import io
  6 | import re
  7 | from typing import Optional
  8 | from http.client import HTTPResponse
  9 | from urllib.error import URLError
 10 | from urllib.parse import ParseResult, urlparse
 11 | from urllib.request import urlopen, Request
 12 | from xphyle.types import Range, Any, cast
 13 | 
 14 | 
 15 | # URLs
 16 | 
 17 | 
 18 | def parse_url(url_string: str) -> Optional[ParseResult]:
 19 |     """Attempts to parse a URL.
 20 |     
 21 |     Args:
 22 |         url_string: String to test.
 23 |     
 24 |     Returns:
 25 |         A 6-tuple, as described in ``urlparse``, or  None if the URL cannot be
 26 |         parsed, or if it lacks a minimum set of attributes. Note that a URL may
 27 |         be valid and still not be openable (for example, if the scheme is
 28 |         recognized by urlopen).
 29 |     """
 30 |     url = urlparse(url_string)
 31 |     if not (url.scheme and (url.netloc or url.path)):
 32 |         return None
 33 |     return url
 34 | 
 35 | 
 36 | def open_url(
 37 |     url_string: str,
 38 |     byte_range: Optional[Range] = None,
 39 |     headers: Optional[dict] = None,
 40 |     **kwargs
 41 | ) -> Any:
 42 |     """Open a URL for reading.
 43 |     
 44 |     Args:
 45 |         url_string: A valid url string.
 46 |         byte_range: Range of bytes to read (start, stop).
 47 |         headers: dict of request headers.
 48 |         kwargs: Additional arguments to pass to `urlopen`.
 49 |     
 50 |     Returns:
 51 |         A response object, or None if the URL is not valid or cannot be opened.
 52 |     
 53 |     Notes:
 54 |         The return value of `urlopen` is only guaranteed to have
 55 |         certain methods, not to be of any specific type, thus the `Any`
 56 |         return type. Furthermore, the response may be wrapped in an
 57 |         `io.BufferedReader` to ensure that a `peek` method is available.
 58 |     """
 59 |     headers = copy.copy(headers) if headers else {}
 60 |     if byte_range:
 61 |         headers["Range"] = "bytes={}-{}".format(*byte_range)
 62 |     try:
 63 |         request = Request(url_string, headers=headers, **kwargs)
 64 |         response = urlopen(request)
 65 |         # HTTPResponse didn't have 'peek' until 3.5
 66 |         if response and not hasattr(response, "peek"):
 67 |             # ISSUE: HTTPResponse inherits BufferedIOBase (rather than
 68 |             # RawIOBase), but for this purpose it's completely compatible
 69 |             # with BufferedReader. Not sure how to make it type-compatible.
 70 |             return io.BufferedReader(cast(HTTPResponse, response))
 71 |         else:
 72 |             return response
 73 |     except (URLError, ValueError):
 74 |         return None
 75 | 
 76 | 
 77 | def get_url_mime_type(response: Any) -> Optional[str]:
 78 |     """If a response object has HTTP-like headers, extract the MIME type
 79 |     from the Content-Type header.
 80 |     
 81 |     Args:
 82 |         response: A response object returned by `open_url`.
 83 |     
 84 |     Returns:
 85 |         The content type, or None if the response lacks a 'Content-Type' header.
 86 |     """
 87 |     if hasattr(response, "headers") and "Content-Type" in response.headers:
 88 |         return response.headers["Content-Type"]
 89 |     return None
 90 | 
 91 | 
 92 | CONTENT_DISPOSITION_RE = re.compile("filename=([^;]+)")
 93 | 
 94 | 
 95 | def get_url_file_name(
 96 |     response: Any, parsed_url: Optional[ParseResult] = None
 97 | ) -> Optional[str]:
 98 |     """If a response object has HTTP-like headers, extract the filename
 99 |     from the Content-Disposition header.
100 |     
101 |     Args:
102 |         response: A response object returned by `open_url`.
103 |         parsed_url: The result of calling `parse_url`.
104 |     
105 |     Returns:
106 |         The file name, or None if it could not be determined.
107 |     """
108 |     if hasattr(response, "headers") and "Content-Disposition" in response.headers:
109 |         match = CONTENT_DISPOSITION_RE.search(response.headers["Content-Disposition"])
110 |         if match:
111 |             return match.group(1)
112 |     if not parsed_url:
113 |         parsed_url = parse_url(response.geturl())
114 |     if parsed_url and hasattr(parsed_url, "path"):
115 |         return parsed_url.path
116 |     return None
117 | 


--------------------------------------------------------------------------------