├── .cruft.json
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── FUNDING.yml
    ├── PULL_REQUEST_TEMPLATE.md
    ├── codecov.yml
    └── workflows
    │   ├── cruft.yml
    │   └── tests.yml
├── .gitignore
├── .readthedocs.yml
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    └── source
    │   ├── cli.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── installation.rst
    │   ├── usage.rst
    │   └── utils.rst
├── pyproject.toml
├── src
    └── pystow
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── api.py
    │   ├── cache.py
    │   ├── cli.py
    │   ├── config_api.py
    │   ├── constants.py
    │   ├── impl.py
    │   ├── py.typed
    │   ├── utils.py
    │   └── version.py
├── tests
    ├── resources
    │   ├── test.txt
    │   ├── test.txt.md5
    │   ├── test_1.csv
    │   ├── test_1.json
    │   ├── test_1.pkl
    │   ├── test_1.tsv
    │   ├── test_verbose.txt.md5
    │   └── test_wrong.txt.md5
    ├── test_api.py
    ├── test_caching.py
    ├── test_config.py
    ├── test_module.py
    └── test_utils.py
└── tox.ini


/.cruft.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "template": "https://github.com/cthoyt/cookiecutter-snekpack",
 3 |   "commit": "3257f18b9b5dc6922830dea64f9e0ab8b42a40e4",
 4 |   "checkout": null,
 5 |   "context": {
 6 |     "cookiecutter": {
 7 |       "package_name": "pystow",
 8 |       "package_name_stylized": "PyStow",
 9 |       "short_description": "Easily pick a place to store data for your Python code",
10 |       "author_name": "Charles Tapley Hoyt",
11 |       "author_github": "cthoyt",
12 |       "author_email": "cthoyt@gmail.com",
13 |       "github_organization_name": "cthoyt",
14 |       "github_repository_name": "pystow",
15 |       "command_line_interface": true,
16 |       "gitlab": false,
17 |       "runner": "tox",
18 |       "__runner": "tox -e",
19 |       "__runner_uv": "--with tox-uv tox -e",
20 |       "__runner_pip": "tox tox-uv",
21 |       "__runner_install_uv": "uv tool install tox --with tox-uv",
22 |       "__runner_install_pip": "python3 -m pip install tox tox-uv",
23 |       "__runner_tests": "py",
24 |       "__gh_slug": "cthoyt/pystow",
25 |       "_template": "https://github.com/cthoyt/cookiecutter-snekpack",
26 |       "_commit": "3257f18b9b5dc6922830dea64f9e0ab8b42a40e4"
27 |     }
28 |   },
29 |   "directory": null
30 | }
31 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | - Demonstrating empathy and kindness toward other people
 21 | - Being respectful of differing opinions, viewpoints, and experiences
 22 | - Giving and gracefully accepting constructive feedback
 23 | - Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | - Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | - The use of sexualized language or imagery, and sexual attention or advances of
 31 |   any kind
 32 | - Trolling, insulting or derogatory comments, and personal or political attacks
 33 | - Public or private harassment
 34 | - Publishing others' private information, such as a physical or email address,
 35 |   without their explicit permission
 36 | - Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | cthoyt@gmail.com. All complaints will be reviewed and investigated promptly and
 64 | fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series of
 86 | actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or permanent
 93 | ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within the
113 | community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.1, available at
119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder][Mozilla CoC].
123 | 
124 | For answers to common questions about this code of conduct, see the FAQ at
125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
126 | [https://www.contributor-covenant.org/translations][translations].
127 | 
128 | [homepage]: https://www.contributor-covenant.org
129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
130 | [Mozilla CoC]: https://github.com/mozilla/diversity
131 | [FAQ]: https://www.contributor-covenant.org/faq
132 | [translations]: https://www.contributor-covenant.org/translations
133 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Contributions to this repository are welcomed and encouraged.
  4 | 
  5 | ## Code Contribution
  6 | 
  7 | This project uses the [GitHub Flow](https://guides.github.com/introduction/flow)
  8 | model for code contributions. Follow these steps:
  9 | 
 10 | 1. [Create a fork](https://help.github.com/articles/fork-a-repo) of the upstream
 11 |    repository at [`cthoyt/pystow`](https://github.com/cthoyt/pystow) on your
 12 |    GitHub account (or in one of your organizations)
 13 | 2. [Clone your fork](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository)
 14 |    with `git clone https://github.com/<your namespace here>/pystow.git`
 15 | 3. Make and commit changes to your fork with `git commit`
 16 | 4. Push changes to your fork with `git push`
 17 | 5. Repeat steps 3 and 4 as needed
 18 | 6. Submit a pull request back to the upstream repository
 19 | 
 20 | ### Merge Model
 21 | 
 22 | This repository uses
 23 | [squash merges](https://docs.github.com/en/github/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges#squash-and-merge-your-pull-request-commits)
 24 | to group all related commits in a given pull request into a single commit upon
 25 | acceptance and merge into the main branch. This has several benefits:
 26 | 
 27 | 1. Keeps the commit history on the main branch focused on high-level narrative
 28 | 2. Enables people to make lots of small commits without worrying about muddying
 29 |    up the commit history
 30 | 3. Commits correspond 1-to-1 with pull requests
 31 | 
 32 | ### Code Style
 33 | 
 34 | This project uses `tox` for running code quality checks. Start by installing it
 35 | with `pip install tox tox-uv`.
 36 | 
 37 | This project encourages the use of optional static typing. It uses
 38 | [`mypy`](http://mypy-lang.org/) as a type checker. You can check if your code
 39 | passes `mypy` with `tox -e mypy`.
 40 | 
 41 | This project uses [`ruff`](https://docs.astral.sh/ruff/) to automatically
 42 | enforce a consistent code style. You can apply `ruff format` and other
 43 | pre-configured formatters with `tox -e format`.
 44 | 
 45 | This project uses [`ruff`](https://docs.astral.sh/ruff/) and several plugins for
 46 | additional checks of documentation style, security issues, good variable
 47 | nomenclature, and more (see `pyproject.toml` for a list of Ruff plugins). You
 48 | can check if your code passes `ruff check` with `tox -e lint`.
 49 | 
 50 | Each of these checks are run on each commit using GitHub Actions as a continuous
 51 | integration service. Passing all of them is required for accepting a
 52 | contribution. If you're unsure how to address the feedback from one of these
 53 | tools, please say so either in the description of your pull request or in a
 54 | comment, and we will help you.
 55 | 
 56 | ### Logging
 57 | 
 58 | Python's builtin `print()` should not be used (except when writing to files),
 59 | it's checked by the
 60 | [`flake8-print` (T20)](https://docs.astral.sh/ruff/rules/#flake8-print-t20)
 61 | plugin to `ruff`. If you're in a command line setting or `main()` function for a
 62 | module, you can use `click.echo()`. Otherwise, you can use the builtin `logging`
 63 | module by adding `logger = logging.getLogger(__name__)` below the imports at the
 64 | top of your file.
 65 | 
 66 | ### Documentation
 67 | 
 68 | All public functions (i.e., not starting with an underscore `_`) must be
 69 | documented using the
 70 | [sphinx documentation format](https://sphinx-rtd-tutorial.readthedocs.io/en/latest/docstrings.html#the-sphinx-docstring-format).
 71 | The [`darglint2`](https://github.com/akaihola/darglint2) tool reports on
 72 | functions that are not fully documented.
 73 | 
 74 | This project uses [`sphinx`](https://www.sphinx-doc.org) to automatically build
 75 | documentation into a narrative structure. You can check that the documentation
 76 | builds properly in an isolated environment with `tox -e docs-test` and actually
 77 | build it locally with `tox -e docs`.
 78 | 
 79 | ### Testing
 80 | 
 81 | Functions in this repository should be unit tested. These can either be written
 82 | using the `unittest` framework in the `tests/` directory or as embedded
 83 | doctests. You can check that the unit tests pass with `tox -e py` and that the
 84 | doctests pass with `tox -e doctests`. These tests are required to pass for
 85 | accepting a contribution.
 86 | 
 87 | ### Syncing your fork
 88 | 
 89 | If other code is updated before your contribution gets merged, you might need to
 90 | resolve conflicts against the main branch. After cloning, you should add the
 91 | upstream repository with
 92 | 
 93 | ```shell
 94 | $ git remote add cthoyt https://github.com/cthoyt/pystow.git
 95 | ```
 96 | 
 97 | Then, you can merge upstream code into your branch. You can also use the GitHub
 98 | UI to do this by following
 99 | [this tutorial](https://docs.github.com/en/github/collaborating-with-pull-requests/working-with-forks/syncing-a-fork).
100 | 
101 | ### Python Version Compatibility
102 | 
103 | This project aims to support all versions of Python that have not passed their
104 | end-of-life dates. After end-of-life, the version will be removed from the Trove
105 | qualifiers in the `pyproject.toml` and from the GitHub Actions testing
106 | configuration.
107 | 
108 | See https://endoflife.date/python for a timeline of Python release and
109 | end-of-life dates.
110 | 
111 | ## Acknowledgements
112 | 
113 | These code contribution guidelines are derived from the
114 | [cthoyt/cookiecutter-snekpack](https://github.com/cthoyt/cookiecutter-snekpack)
115 | Python package template. They're free to reuse and modify as long as they're
116 | properly acknowledged.
117 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/displaying-a-sponsor-button-in-your-repository
2 | github:
3 |   - cthoyt
4 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thanks for contributing to `pystow`.
 3 | To help us out with reviewing, please consider the following:
 4 | 
 5 | - Does this pull request include a summary of the change? (See below.)
 6 | - Does this pull request include a descriptive title?
 7 | - Does this pull request include references to any relevant issues?
 8 | 
 9 | Caution: the maintainers often take an active role in pull requests,
10 | and may push to your branch. Therefore, you should always sync your
11 | local copy of the repository with the remote before continuing your
12 | work.
13 | -->
14 | 
15 | ## Summary
16 | 
17 | <!-- What's the purpose of the change? What does it do, and why? -->
18 | 


--------------------------------------------------------------------------------
/.github/codecov.yml:
--------------------------------------------------------------------------------
1 | # see https://docs.codecov.com/v4.6/docs/codecov-yaml
2 | ignore:
3 |   - "src/pystow/__main__.py"
4 |   - "src/pystow/cli.py"
5 | 


--------------------------------------------------------------------------------
/.github/workflows/cruft.yml:
--------------------------------------------------------------------------------
 1 | # from https://cruft.github.io/cruft/#automating-updates-with-github-actions
 2 | 
 3 | name: Update repository with Cruft
 4 | 
 5 | permissions: { }
 6 | 
 7 | on:
 8 |   workflow_dispatch:
 9 |   schedule:
10 |     - cron: "0 2 * * 1" # Every Monday at 2am
11 | 
12 | jobs:
13 |   update:
14 |     permissions:
15 |       contents: write
16 |       pull-requests: write
17 |     runs-on: ubuntu-latest
18 |     strategy:
19 |       fail-fast: true
20 |       matrix:
21 |         include:
22 |           - add-paths: .
23 |             body: Use this to merge the changes to this repository.
24 |             branch: cruft/update
25 |             commit-message: "chore: accept new Cruft update"
26 |             title: New updates detected with Cruft
27 |           - add-paths: .cruft.json
28 |             body: Use this to reject the changes in this repository.
29 |             branch: cruft/reject
30 |             commit-message: "chore: reject new Cruft update"
31 |             title: Reject new updates detected with Cruft
32 |     steps:
33 |       - uses: actions/checkout@v3
34 | 
35 |       - uses: actions/setup-python@v4
36 |         with:
37 |           python-version: "3.10"
38 | 
39 |       - name: Install Cruft
40 |         run: pip3 install cruft
41 | 
42 |       - name: Check if update is available
43 |         continue-on-error: false
44 |         id: check
45 |         run: |
46 |           CHANGES=0
47 |           if [ -f .cruft.json ]; then
48 |             if ! cruft check; then
49 |               CHANGES=1
50 |             fi
51 |           else
52 |             echo "No .cruft.json file"
53 |           fi
54 | 
55 |           echo "has_changes=$CHANGES" >> "$GITHUB_OUTPUT"
56 | 
57 |       - name: Run update if available
58 |         if: steps.check.outputs.has_changes == '1'
59 |         run: |
60 |           git config --global user.email "you@example.com"
61 |           git config --global user.name "GitHub"
62 | 
63 |           cruft update --skip-apply-ask --refresh-private-variables
64 |           git restore --staged .
65 | 
66 |       - name: Create pull request
67 |         if: steps.check.outputs.has_changes == '1'
68 |         uses: peter-evans/create-pull-request@v4
69 |         with:
70 |           token: ${{ secrets.GITHUB_TOKEN }}
71 |           add-paths: ${{ matrix.add-paths }}
72 |           commit-message: ${{ matrix.commit-message }}
73 |           branch: ${{ matrix.branch }}
74 |           delete-branch: true
75 |           branch-suffix: timestamp
76 |           title: ${{ matrix.title }}
77 |           body: |
78 |             This is an autogenerated PR. ${{ matrix.body }}
79 | 
80 |             [Cruft](https://cruft.github.io/cruft/) has detected updates from the Cookiecutter repository.
81 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | # This file configures the continuous integration (CI) system on GitHub.
  2 | # Introductory materials can be found here: https://docs.github.com/en/actions/learn-github-actions/understanding-github-actions.
  3 | # Documentation for editing this file can be found here: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions
  4 | 
  5 | name: Tests
  6 | 
  7 | # by default, give the GITHUB_TOKEN no permissions
  8 | # See https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/controlling-permissions-for-github_token
  9 | permissions: { }
 10 | 
 11 | on:
 12 |   push:
 13 |     branches: [ main ]
 14 |   pull_request:
 15 |     branches: [ main ]
 16 | 
 17 | jobs:
 18 |   lint:
 19 |     name: Code Quality
 20 |     permissions:
 21 |       # give only read-only access to the contents of the repository
 22 |       # this is the only permission this job requires, so keep it to the least privilege
 23 |       # i.e., not to issues, discussions, actions, etc.
 24 |       contents: read
 25 |     runs-on: ubuntu-latest
 26 |     strategy:
 27 |       matrix:
 28 |         python-version: [ "3.13", "3.9" ]
 29 |         tox-command: [  "lint", "pyroma", "mypy" ]
 30 |     steps:
 31 |       - uses: actions/checkout@v4
 32 |       - name: "Install uv"
 33 |         uses: "astral-sh/setup-uv@v3"
 34 |         with:
 35 |           enable-cache: true
 36 |           cache-dependency-glob: "pyproject.toml"
 37 |       - name: "Run command"
 38 |         run: |
 39 |           uvx -p ${{ matrix.python-version }} --with tox-uv tox -e ${{ matrix.tox-command }}
 40 | 
 41 |   docs:
 42 |     name: Documentation
 43 |     permissions:
 44 |       contents: read
 45 |     runs-on: ubuntu-latest
 46 |     strategy:
 47 |       matrix:
 48 |         # We only test documentation on the latest version
 49 |         # sphinx 8.0 / sphinx-rtd-theme 3.0 discontinued Python 3.9 support
 50 |         # a year early, which prompted re-thinking about this.
 51 |         python-version: [ "3.13" ]
 52 |     steps:
 53 |       - uses: actions/checkout@v4
 54 |       - name: "Install uv"
 55 |         uses: "astral-sh/setup-uv@v3"
 56 |         with:
 57 |           enable-cache: true
 58 |           cache-dependency-glob: "pyproject.toml"
 59 |       - name: Install dependencies
 60 |         run: |
 61 |           sudo apt-get install graphviz
 62 |       - name: Check RST conformity with doc8
 63 |         run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e doc8
 64 |       - name: Check docstring coverage
 65 |         run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e docstr-coverage
 66 |       - name: Check documentation build with Sphinx
 67 |         run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e docs-test
 68 | 
 69 |       # Check all markdown files are properly formatted
 70 |       # inspired by https://github.com/astral-sh/uv/blob/98523e2014e9a5c69706623344026d76296e178f/.github/workflows/ci.yml#L67C1-L70C61
 71 |       - name: Check markdown formatting
 72 |         run: |
 73 |           npx --yes prettier --prose-wrap always --check "**/*.md"
 74 | 
 75 |   tests:
 76 |     name: Tests
 77 |     permissions:
 78 |       contents: read
 79 |     runs-on: ${{ matrix.os }}
 80 |     strategy:
 81 |       matrix:
 82 |         os: [ ubuntu-latest ]
 83 |         python-version: [ "3.13", "3.9" ]
 84 |     steps:
 85 |       - uses: actions/checkout@v4
 86 |       - name: "Install uv"
 87 |         uses: "astral-sh/setup-uv@v3"
 88 |         with:
 89 |           enable-cache: true
 90 |           cache-dependency-glob: "pyproject.toml"
 91 |       - name: Test with pytest and generate coverage file
 92 |         run:
 93 |           uvx -p ${{ matrix.python-version }} --with tox-uv tox -e py
 94 |       - name: Run doctests
 95 |         run:
 96 |           uvx -p ${{ matrix.python-version }} --with tox-uv tox -e doctests
 97 |       - name: Upload coverage report to codecov
 98 |         uses: codecov/codecov-action@v4
 99 |         if: success()
100 |         with:
101 |           file: coverage.xml
102 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/macos,linux,windows,python,jupyternotebooks,jetbrains,pycharm,vim,emacs,visualstudiocode,visualstudio
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos,linux,windows,python,jupyternotebooks,jetbrains,pycharm,vim,emacs,visualstudiocode,visualstudio
  3 | 
  4 | ### Emacs ###
  5 | # -*- mode: gitignore; -*-
  6 | *~
  7 | \#*\#
  8 | /.emacs.desktop
  9 | /.emacs.desktop.lock
 10 | *.elc
 11 | auto-save-list
 12 | tramp
 13 | .\#*
 14 | 
 15 | # Org-mode
 16 | .org-id-locations
 17 | *_archive
 18 | 
 19 | # flymake-mode
 20 | *_flymake.*
 21 | 
 22 | # eshell files
 23 | /eshell/history
 24 | /eshell/lastdir
 25 | 
 26 | # elpa packages
 27 | /elpa/
 28 | 
 29 | # reftex files
 30 | *.rel
 31 | 
 32 | # AUCTeX auto folder
 33 | /auto/
 34 | 
 35 | # cask packages
 36 | .cask/
 37 | dist/
 38 | 
 39 | # Flycheck
 40 | flycheck_*.el
 41 | 
 42 | # server auth directory
 43 | /server/
 44 | 
 45 | # projectiles files
 46 | .projectile
 47 | 
 48 | # directory configuration
 49 | .dir-locals.el
 50 | 
 51 | # network security
 52 | /network-security.data
 53 | 
 54 | 
 55 | ### JetBrains ###
 56 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 57 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 58 | 
 59 | # User-specific stuff
 60 | .idea/**/workspace.xml
 61 | .idea/**/tasks.xml
 62 | .idea/**/usage.statistics.xml
 63 | .idea/**/dictionaries
 64 | .idea/**/shelf
 65 | 
 66 | # AWS User-specific
 67 | .idea/**/aws.xml
 68 | 
 69 | # Generated files
 70 | .idea/**/contentModel.xml
 71 | 
 72 | # Sensitive or high-churn files
 73 | .idea/**/dataSources/
 74 | .idea/**/dataSources.ids
 75 | .idea/**/dataSources.local.xml
 76 | .idea/**/sqlDataSources.xml
 77 | .idea/**/dynamic.xml
 78 | .idea/**/uiDesigner.xml
 79 | .idea/**/dbnavigator.xml
 80 | 
 81 | # Gradle
 82 | .idea/**/gradle.xml
 83 | .idea/**/libraries
 84 | 
 85 | # Gradle and Maven with auto-import
 86 | # When using Gradle or Maven with auto-import, you should exclude module files,
 87 | # since they will be recreated, and may cause churn.  Uncomment if using
 88 | # auto-import.
 89 | # .idea/artifacts
 90 | # .idea/compiler.xml
 91 | # .idea/jarRepositories.xml
 92 | # .idea/modules.xml
 93 | # .idea/*.iml
 94 | # .idea/modules
 95 | # *.iml
 96 | # *.ipr
 97 | 
 98 | # CMake
 99 | cmake-build-*/
100 | 
101 | # Mongo Explorer plugin
102 | .idea/**/mongoSettings.xml
103 | 
104 | # File-based project format
105 | *.iws
106 | 
107 | # IntelliJ
108 | out/
109 | 
110 | # mpeltonen/sbt-idea plugin
111 | .idea_modules/
112 | 
113 | # JIRA plugin
114 | atlassian-ide-plugin.xml
115 | 
116 | # Cursive Clojure plugin
117 | .idea/replstate.xml
118 | 
119 | # SonarLint plugin
120 | .idea/sonarlint/
121 | 
122 | # Crashlytics plugin (for Android Studio and IntelliJ)
123 | com_crashlytics_export_strings.xml
124 | crashlytics.properties
125 | crashlytics-build.properties
126 | fabric.properties
127 | 
128 | # Editor-based Rest Client
129 | .idea/httpRequests
130 | 
131 | # Android studio 3.1+ serialized cache file
132 | .idea/caches/build_file_checksums.ser
133 | 
134 | ### JetBrains Patch ###
135 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
136 | 
137 | # *.iml
138 | # modules.xml
139 | # .idea/misc.xml
140 | # *.ipr
141 | 
142 | # Sonarlint plugin
143 | # https://plugins.jetbrains.com/plugin/7973-sonarlint
144 | .idea/**/sonarlint/
145 | 
146 | # SonarQube Plugin
147 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
148 | .idea/**/sonarIssues.xml
149 | 
150 | # Markdown Navigator plugin
151 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
152 | .idea/**/markdown-navigator.xml
153 | .idea/**/markdown-navigator-enh.xml
154 | .idea/**/markdown-navigator/
155 | 
156 | # Cache file creation bug
157 | # See https://youtrack.jetbrains.com/issue/JBR-2257
158 | .idea/$CACHE_FILE$
159 | 
160 | # CodeStream plugin
161 | # https://plugins.jetbrains.com/plugin/12206-codestream
162 | .idea/codestream.xml
163 | 
164 | ### JupyterNotebooks ###
165 | # gitignore template for Jupyter Notebooks
166 | # website: http://jupyter.org/
167 | 
168 | .ipynb_checkpoints
169 | */.ipynb_checkpoints/*
170 | 
171 | # IPython
172 | profile_default/
173 | ipython_config.py
174 | 
175 | # Remove previous ipynb_checkpoints
176 | #   git rm -r .ipynb_checkpoints/
177 | 
178 | ### Linux ###
179 | 
180 | # temporary files which can be created if a process still has a handle open of a deleted file
181 | .fuse_hidden*
182 | 
183 | # KDE directory preferences
184 | .directory
185 | 
186 | # Linux trash folder which might appear on any partition or disk
187 | .Trash-*
188 | 
189 | # .nfs files are created when an open file is removed but is still being accessed
190 | .nfs*
191 | 
192 | ### macOS ###
193 | # General
194 | .DS_Store
195 | .AppleDouble
196 | .LSOverride
197 | 
198 | # Icon must end with two \r
199 | Icon
200 | 
201 | 
202 | # Thumbnails
203 | ._*
204 | 
205 | # Files that might appear in the root of a volume
206 | .DocumentRevisions-V100
207 | .fseventsd
208 | .Spotlight-V100
209 | .TemporaryItems
210 | .Trashes
211 | .VolumeIcon.icns
212 | .com.apple.timemachine.donotpresent
213 | 
214 | # Directories potentially created on remote AFP share
215 | .AppleDB
216 | .AppleDesktop
217 | Network Trash Folder
218 | Temporary Items
219 | .apdisk
220 | 
221 | ### PyCharm ###
222 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
223 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
224 | 
225 | # User-specific stuff
226 | 
227 | # AWS User-specific
228 | 
229 | # Generated files
230 | 
231 | # Sensitive or high-churn files
232 | 
233 | # Gradle
234 | 
235 | # Gradle and Maven with auto-import
236 | # When using Gradle or Maven with auto-import, you should exclude module files,
237 | # since they will be recreated, and may cause churn.  Uncomment if using
238 | # auto-import.
239 | # .idea/artifacts
240 | # .idea/compiler.xml
241 | # .idea/jarRepositories.xml
242 | # .idea/modules.xml
243 | # .idea/*.iml
244 | # .idea/modules
245 | # *.iml
246 | # *.ipr
247 | 
248 | # CMake
249 | 
250 | # Mongo Explorer plugin
251 | 
252 | # File-based project format
253 | 
254 | # IntelliJ
255 | 
256 | # mpeltonen/sbt-idea plugin
257 | 
258 | # JIRA plugin
259 | 
260 | # Cursive Clojure plugin
261 | 
262 | # SonarLint plugin
263 | 
264 | # Crashlytics plugin (for Android Studio and IntelliJ)
265 | 
266 | # Editor-based Rest Client
267 | 
268 | # Android studio 3.1+ serialized cache file
269 | 
270 | ### PyCharm Patch ###
271 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
272 | 
273 | # *.iml
274 | # modules.xml
275 | # .idea/misc.xml
276 | # *.ipr
277 | 
278 | # Sonarlint plugin
279 | # https://plugins.jetbrains.com/plugin/7973-sonarlint
280 | 
281 | # SonarQube Plugin
282 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
283 | 
284 | # Markdown Navigator plugin
285 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
286 | 
287 | # Cache file creation bug
288 | # See https://youtrack.jetbrains.com/issue/JBR-2257
289 | 
290 | # CodeStream plugin
291 | # https://plugins.jetbrains.com/plugin/12206-codestream
292 | 
293 | ### Python ###
294 | # Byte-compiled / optimized / DLL files
295 | __pycache__/
296 | *.py[cod]
297 | *$py.class
298 | 
299 | # C extensions
300 | *.so
301 | 
302 | # Distribution / packaging
303 | .Python
304 | build/
305 | develop-eggs/
306 | downloads/
307 | eggs/
308 | .eggs/
309 | lib/
310 | lib64/
311 | parts/
312 | sdist/
313 | var/
314 | wheels/
315 | share/python-wheels/
316 | *.egg-info/
317 | .installed.cfg
318 | *.egg
319 | MANIFEST
320 | 
321 | # PyInstaller
322 | #  Usually these files are written by a python script from a template
323 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
324 | *.manifest
325 | *.spec
326 | 
327 | # Installer logs
328 | pip-log.txt
329 | pip-delete-this-directory.txt
330 | 
331 | # Unit test / coverage reports
332 | htmlcov/
333 | .tox/
334 | .nox/
335 | .coverage
336 | .coverage.*
337 | .cache
338 | nosetests.xml
339 | coverage.xml
340 | *.cover
341 | *.py,cover
342 | .hypothesis/
343 | .pytest_cache/
344 | cover/
345 | 
346 | # Translations
347 | *.mo
348 | *.pot
349 | 
350 | # Django stuff:
351 | *.log
352 | local_settings.py
353 | db.sqlite3
354 | db.sqlite3-journal
355 | 
356 | # Flask stuff:
357 | instance/
358 | .webassets-cache
359 | 
360 | # Scrapy stuff:
361 | .scrapy
362 | 
363 | # Sphinx documentation
364 | docs/_build/
365 | docs/build
366 | docs/source/api
367 | 
368 | # PyBuilder
369 | .pybuilder/
370 | target/
371 | 
372 | # Jupyter Notebook
373 | 
374 | # IPython
375 | 
376 | # pyenv
377 | #   For a library or package, you might want to ignore these files since the code is
378 | #   intended to run in multiple environments; otherwise, check them in:
379 | # .python-version
380 | 
381 | # pipenv
382 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
383 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
384 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
385 | #   install all needed dependencies.
386 | #Pipfile.lock
387 | 
388 | # poetry
389 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
390 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
391 | #   commonly ignored for libraries.
392 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
393 | #poetry.lock
394 | 
395 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
396 | __pypackages__/
397 | 
398 | # Celery stuff
399 | celerybeat-schedule
400 | celerybeat.pid
401 | 
402 | # SageMath parsed files
403 | *.sage.py
404 | 
405 | # Environments
406 | .env
407 | .venv
408 | env/
409 | venv/
410 | ENV/
411 | env.bak/
412 | venv.bak/
413 | 
414 | # Spyder project settings
415 | .spyderproject
416 | .spyproject
417 | 
418 | # Rope project settings
419 | .ropeproject
420 | 
421 | # mkdocs documentation
422 | /site
423 | 
424 | # mypy
425 | .mypy_cache/
426 | .dmypy.json
427 | dmypy.json
428 | 
429 | # Pyre type checker
430 | .pyre/
431 | 
432 | # pytype static type analyzer
433 | .pytype/
434 | 
435 | # Cython debug symbols
436 | cython_debug/
437 | 
438 | # PyCharm
439 | #  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
440 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
441 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
442 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
443 | #.idea/
444 | 
445 | ### Vim ###
446 | # Swap
447 | [._]*.s[a-v][a-z]
448 | !*.svg  # comment out if you don't need vector files
449 | [._]*.sw[a-p]
450 | [._]s[a-rt-v][a-z]
451 | [._]ss[a-gi-z]
452 | [._]sw[a-p]
453 | 
454 | # Session
455 | Session.vim
456 | Sessionx.vim
457 | 
458 | # Temporary
459 | .netrwhist
460 | # Auto-generated tag files
461 | tags
462 | # Persistent undo
463 | [._]*.un~
464 | 
465 | ### VisualStudioCode ###
466 | .vscode/*
467 | !.vscode/settings.json
468 | !.vscode/tasks.json
469 | !.vscode/launch.json
470 | !.vscode/extensions.json
471 | !.vscode/*.code-snippets
472 | 
473 | # Local History for Visual Studio Code
474 | .history/
475 | 
476 | # Built Visual Studio Code Extensions
477 | *.vsix
478 | 
479 | ### VisualStudioCode Patch ###
480 | # Ignore all local history of files
481 | .history
482 | .ionide
483 | 
484 | # Support for Project snippet scope
485 | 
486 | ### Windows ###
487 | # Windows thumbnail cache files
488 | Thumbs.db
489 | Thumbs.db:encryptable
490 | ehthumbs.db
491 | ehthumbs_vista.db
492 | 
493 | # Dump file
494 | *.stackdump
495 | 
496 | # Folder config file
497 | [Dd]esktop.ini
498 | 
499 | # Recycle Bin used on file shares
500 | $RECYCLE.BIN/
501 | 
502 | # Windows Installer files
503 | *.cab
504 | *.msi
505 | *.msix
506 | *.msm
507 | *.msp
508 | 
509 | # Windows shortcuts
510 | *.lnk
511 | 
512 | ### VisualStudio ###
513 | ## Ignore Visual Studio temporary files, build results, and
514 | ## files generated by popular Visual Studio add-ons.
515 | ##
516 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
517 | 
518 | # User-specific files
519 | *.rsuser
520 | *.suo
521 | *.user
522 | *.userosscache
523 | *.sln.docstates
524 | 
525 | # User-specific files (MonoDevelop/Xamarin Studio)
526 | *.userprefs
527 | 
528 | # Mono auto generated files
529 | mono_crash.*
530 | 
531 | # Build results
532 | [Dd]ebug/
533 | [Dd]ebugPublic/
534 | [Rr]elease/
535 | [Rr]eleases/
536 | x64/
537 | x86/
538 | [Ww][Ii][Nn]32/
539 | [Aa][Rr][Mm]/
540 | [Aa][Rr][Mm]64/
541 | bld/
542 | [Bb]in/
543 | [Oo]bj/
544 | [Ll]og/
545 | [Ll]ogs/
546 | 
547 | # Visual Studio 2015/2017 cache/options directory
548 | .vs/
549 | # Uncomment if you have tasks that create the project's static files in wwwroot
550 | #wwwroot/
551 | 
552 | # Visual Studio 2017 auto generated files
553 | Generated\ Files/
554 | 
555 | # MSTest test Results
556 | [Tt]est[Rr]esult*/
557 | [Bb]uild[Ll]og.*
558 | 
559 | # NUnit
560 | *.VisualState.xml
561 | TestResult.xml
562 | nunit-*.xml
563 | 
564 | # Build Results of an ATL Project
565 | [Dd]ebugPS/
566 | [Rr]eleasePS/
567 | dlldata.c
568 | 
569 | # Benchmark Results
570 | BenchmarkDotNet.Artifacts/
571 | 
572 | # .NET Core
573 | project.lock.json
574 | project.fragment.lock.json
575 | artifacts/
576 | 
577 | # ASP.NET Scaffolding
578 | ScaffoldingReadMe.txt
579 | 
580 | # StyleCop
581 | StyleCopReport.xml
582 | 
583 | # Files built by Visual Studio
584 | *_i.c
585 | *_p.c
586 | *_h.h
587 | *.ilk
588 | *.meta
589 | *.obj
590 | *.iobj
591 | *.pch
592 | *.pdb
593 | *.ipdb
594 | *.pgc
595 | *.pgd
596 | *.rsp
597 | *.sbr
598 | *.tlb
599 | *.tli
600 | *.tlh
601 | *.tmp
602 | *.tmp_proj
603 | *_wpftmp.csproj
604 | *.tlog
605 | *.vspscc
606 | *.vssscc
607 | .builds
608 | *.pidb
609 | *.svclog
610 | *.scc
611 | 
612 | # Chutzpah Test files
613 | _Chutzpah*
614 | 
615 | # Visual C++ cache files
616 | ipch/
617 | *.aps
618 | *.ncb
619 | *.opendb
620 | *.opensdf
621 | *.sdf
622 | *.cachefile
623 | *.VC.db
624 | *.VC.VC.opendb
625 | 
626 | # Visual Studio profiler
627 | *.psess
628 | *.vsp
629 | *.vspx
630 | *.sap
631 | 
632 | # Visual Studio Trace Files
633 | *.e2e
634 | 
635 | # TFS 2012 Local Workspace
636 | $tf/
637 | 
638 | # Guidance Automation Toolkit
639 | *.gpState
640 | 
641 | # ReSharper is a .NET coding add-in
642 | _ReSharper*/
643 | *.[Rr]e[Ss]harper
644 | *.DotSettings.user
645 | 
646 | # TeamCity is a build add-in
647 | _TeamCity*
648 | 
649 | # DotCover is a Code Coverage Tool
650 | *.dotCover
651 | 
652 | # AxoCover is a Code Coverage Tool
653 | .axoCover/*
654 | !.axoCover/settings.json
655 | 
656 | # Coverlet is a free, cross platform Code Coverage Tool
657 | coverage*.json
658 | coverage*.xml
659 | coverage*.info
660 | 
661 | # Visual Studio code coverage results
662 | *.coverage
663 | *.coveragexml
664 | 
665 | # NCrunch
666 | _NCrunch_*
667 | .*crunch*.local.xml
668 | nCrunchTemp_*
669 | 
670 | # MightyMoose
671 | *.mm.*
672 | AutoTest.Net/
673 | 
674 | # Web workbench (sass)
675 | .sass-cache/
676 | 
677 | # Installshield output folder
678 | [Ee]xpress/
679 | 
680 | # DocProject is a documentation generator add-in
681 | DocProject/buildhelp/
682 | DocProject/Help/*.HxT
683 | DocProject/Help/*.HxC
684 | DocProject/Help/*.hhc
685 | DocProject/Help/*.hhk
686 | DocProject/Help/*.hhp
687 | DocProject/Help/Html2
688 | DocProject/Help/html
689 | 
690 | # Click-Once directory
691 | publish/
692 | 
693 | # Publish Web Output
694 | *.[Pp]ublish.xml
695 | *.azurePubxml
696 | # Note: Comment the next line if you want to checkin your web deploy settings,
697 | # but database connection strings (with potential passwords) will be unencrypted
698 | *.pubxml
699 | *.publishproj
700 | 
701 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
702 | # checkin your Azure Web App publish settings, but sensitive information contained
703 | # in these scripts will be unencrypted
704 | PublishScripts/
705 | 
706 | # NuGet Packages
707 | *.nupkg
708 | # NuGet Symbol Packages
709 | *.snupkg
710 | # The packages folder can be ignored because of Package Restore
711 | **/[Pp]ackages/*
712 | # except build/, which is used as an MSBuild target.
713 | !**/[Pp]ackages/build/
714 | # Uncomment if necessary however generally it will be regenerated when needed
715 | #!**/[Pp]ackages/repositories.config
716 | # NuGet v3's project.json files produces more ignorable files
717 | *.nuget.props
718 | *.nuget.targets
719 | 
720 | # Microsoft Azure Build Output
721 | csx/
722 | *.build.csdef
723 | 
724 | # Microsoft Azure Emulator
725 | ecf/
726 | rcf/
727 | 
728 | # Windows Store app package directories and files
729 | AppPackages/
730 | BundleArtifacts/
731 | Package.StoreAssociation.xml
732 | _pkginfo.txt
733 | *.appx
734 | *.appxbundle
735 | *.appxupload
736 | 
737 | # Visual Studio cache files
738 | # files ending in .cache can be ignored
739 | *.[Cc]ache
740 | # but keep track of directories ending in .cache
741 | !?*.[Cc]ache/
742 | 
743 | # Others
744 | ClientBin/
745 | ~$*
746 | *.dbmdl
747 | *.dbproj.schemaview
748 | *.jfm
749 | *.pfx
750 | *.publishsettings
751 | orleans.codegen.cs
752 | 
753 | # Including strong name files can present a security risk
754 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
755 | #*.snk
756 | 
757 | # Since there are multiple workflows, uncomment next line to ignore bower_components
758 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
759 | #bower_components/
760 | 
761 | # RIA/Silverlight projects
762 | Generated_Code/
763 | 
764 | # Backup & report files from converting an old project file
765 | # to a newer Visual Studio version. Backup files are not needed,
766 | # because we have git ;-)
767 | _UpgradeReport_Files/
768 | Backup*/
769 | UpgradeLog*.XML
770 | UpgradeLog*.htm
771 | ServiceFabricBackup/
772 | *.rptproj.bak
773 | 
774 | # SQL Server files
775 | *.mdf
776 | *.ldf
777 | *.ndf
778 | 
779 | # Business Intelligence projects
780 | *.rdl.data
781 | *.bim.layout
782 | *.bim_*.settings
783 | *.rptproj.rsuser
784 | *- [Bb]ackup.rdl
785 | *- [Bb]ackup ([0-9]).rdl
786 | *- [Bb]ackup ([0-9][0-9]).rdl
787 | 
788 | # Microsoft Fakes
789 | FakesAssemblies/
790 | 
791 | # GhostDoc plugin setting file
792 | *.GhostDoc.xml
793 | 
794 | # Node.js Tools for Visual Studio
795 | .ntvs_analysis.dat
796 | node_modules/
797 | 
798 | # Visual Studio 6 build log
799 | *.plg
800 | 
801 | # Visual Studio 6 workspace options file
802 | *.opt
803 | 
804 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
805 | *.vbw
806 | 
807 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
808 | *.vbp
809 | 
810 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
811 | *.dsw
812 | *.dsp
813 | 
814 | # Visual Studio 6 technical files
815 | 
816 | # Visual Studio LightSwitch build output
817 | **/*.HTMLClient/GeneratedArtifacts
818 | **/*.DesktopClient/GeneratedArtifacts
819 | **/*.DesktopClient/ModelManifest.xml
820 | **/*.Server/GeneratedArtifacts
821 | **/*.Server/ModelManifest.xml
822 | _Pvt_Extensions
823 | 
824 | # Paket dependency manager
825 | .paket/paket.exe
826 | paket-files/
827 | 
828 | # FAKE - F# Make
829 | .fake/
830 | 
831 | # CodeRush personal settings
832 | .cr/personal
833 | 
834 | # Python Tools for Visual Studio (PTVS)
835 | *.pyc
836 | 
837 | # Cake - Uncomment if you are using it
838 | # tools/**
839 | # !tools/packages.config
840 | 
841 | # Tabs Studio
842 | *.tss
843 | 
844 | # Telerik's JustMock configuration file
845 | *.jmconfig
846 | 
847 | # BizTalk build output
848 | *.btp.cs
849 | *.btm.cs
850 | *.odx.cs
851 | *.xsd.cs
852 | 
853 | # OpenCover UI analysis results
854 | OpenCover/
855 | 
856 | # Azure Stream Analytics local run output
857 | ASALocalRun/
858 | 
859 | # MSBuild Binary and Structured Log
860 | *.binlog
861 | 
862 | # NVidia Nsight GPU debugger configuration file
863 | *.nvuser
864 | 
865 | # MFractors (Xamarin productivity tool) working folder
866 | .mfractor/
867 | 
868 | # Local History for Visual Studio
869 | .localhistory/
870 | 
871 | # Visual Studio History (VSHistory) files
872 | .vshistory/
873 | 
874 | # BeatPulse healthcheck temp database
875 | healthchecksdb
876 | 
877 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
878 | MigrationBackup/
879 | 
880 | # Ionide (cross platform F# VS Code tools) working folder
881 | .ionide/
882 | 
883 | # Fody - auto-generated XML schema
884 | FodyWeavers.xsd
885 | 
886 | # VS Code files for those working on multiple tools
887 | *.code-workspace
888 | 
889 | # Local History for Visual Studio Code
890 | 
891 | # Windows Installer files from build outputs
892 | 
893 | # JetBrains Rider
894 | *.sln.iml
895 | 
896 | ### VisualStudio Patch ###
897 | # Additional files built by Visual Studio
898 | 
899 | # End of https://www.toptal.com/developers/gitignore/api/macos,linux,windows,python,jupyternotebooks,jetbrains,pycharm,vim,emacs,visualstudiocode,visualstudio
900 | 
901 | scratch/
902 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | version: 2
 6 | 
 7 | sphinx:
 8 |   # Path to your Sphinx configuration file, required as of
 9 |   # https://about.readthedocs.com/blog/2024/12/deprecate-config-files-without-sphinx-or-mkdocs-config/
10 |   configuration: docs/source/conf.py
11 | 
12 | # Set the version of Python and other tools you might need
13 | build:
14 |   os: ubuntu-22.04
15 |   apt_packages:
16 |     - graphviz
17 |   tools:
18 |     python: "3.12"
19 | 
20 |   # adapted from uv recipe at https://docs.readthedocs.io/en/stable/build-customization.html#install-dependencies-with-uv
21 |   # and comment at https://github.com/readthedocs/readthedocs.org/issues/11289#issuecomment-2103832834
22 |   commands:
23 |     - asdf plugin add uv
24 |     - asdf install uv latest
25 |     - asdf global uv latest
26 |     - uv venv $READTHEDOCS_VIRTUALENV_PATH
27 |     - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH UV_PREVIEW=1 uv pip install .[docs,rdf,pandas,xml,aws]
28 |     - python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs/source $READTHEDOCS_OUTPUT/html
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Charles Tapley Hoyt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">
  2 |   PyStow
  3 | </h1>
  4 | 
  5 | <p align="center">
  6 |     <a href="https://github.com/cthoyt/pystow/actions/workflows/tests.yml">
  7 |         <img alt="Tests" src="https://github.com/cthoyt/pystow/actions/workflows/tests.yml/badge.svg" /></a>
  8 |     <a href="https://pypi.org/project/pystow">
  9 |         <img alt="PyPI" src="https://img.shields.io/pypi/v/pystow" /></a>
 10 |     <a href="https://pypi.org/project/pystow">
 11 |         <img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/pystow" /></a>
 12 |     <a href="https://github.com/cthoyt/pystow/blob/main/LICENSE">
 13 |         <img alt="PyPI - License" src="https://img.shields.io/pypi/l/pystow" /></a>
 14 |     <a href='https://pystow.readthedocs.io/en/latest/?badge=latest'>
 15 |         <img src='https://readthedocs.org/projects/pystow/badge/?version=latest' alt='Documentation Status' /></a>
 16 |     <a href="https://codecov.io/gh/cthoyt/pystow/branch/main">
 17 |         <img src="https://codecov.io/gh/cthoyt/pystow/branch/main/graph/badge.svg" alt="Codecov status" /></a>  
 18 |     <a href="https://github.com/cthoyt/cookiecutter-python-package">
 19 |         <img alt="Cookiecutter template from @cthoyt" src="https://img.shields.io/badge/Cookiecutter-snekpack-blue" /></a>
 20 |     <a href="https://github.com/astral-sh/ruff">
 21 |         <img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json" alt="Ruff" style="max-width:100%;"></a>
 22 |     <a href="https://github.com/cthoyt/pystow/blob/main/.github/CODE_OF_CONDUCT.md">
 23 |         <img src="https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg" alt="Contributor Covenant"/></a>
 24 |     <a href="https://zenodo.org/badge/latestdoi/318194121">
 25 |         <img src="https://zenodo.org/badge/318194121.svg" alt="DOI"></a>
 26 | </p>
 27 | 
 28 | 👜 Easily pick a place to store data for your Python code
 29 | 
 30 | ## 💪 Getting Started
 31 | 
 32 | Get a directory for your application.
 33 | 
 34 | ```python
 35 | import pystow
 36 | 
 37 | # Get a directory (as a pathlib.Path) for ~/.data/pykeen
 38 | pykeen_directory = pystow.join('pykeen')
 39 | 
 40 | # Get a subdirectory (as a pathlib.Path) for ~/.data/pykeen/experiments
 41 | pykeen_experiments_directory = pystow.join('pykeen', 'experiments')
 42 | 
 43 | # You can go as deep as you want
 44 | pykeen_deep_directory = pystow.join('pykeen', 'experiments', 'a', 'b', 'c')
 45 | ```
 46 | 
 47 | If you reuse the same directory structure a lot, you can save them in a module:
 48 | 
 49 | ```python
 50 | import pystow
 51 | 
 52 | pykeen_module = pystow.module("pykeen")
 53 | 
 54 | # Access the module's directory with .base
 55 | assert pystow.join("pykeen") == pystow.module("pykeen").base
 56 | 
 57 | # Get a subdirectory (as a pathlib.Path) for ~/.data/pykeen/experiments
 58 | pykeen_experiments_directory = pykeen_module.join('experiments')
 59 | 
 60 | # You can go as deep as you want past the original "pykeen" module
 61 | pykeen_deep_directory = pykeen_module.join('experiments', 'a', 'b', 'c')
 62 | ```
 63 | 
 64 | Get a file path for your application by adding the `name` keyword argument. This
 65 | is made explicit so PyStow knows which parent directories to automatically
 66 | create. This works with `pystow` or any module you create with `pystow.module`.
 67 | 
 68 | ```python
 69 | import pystow
 70 | 
 71 | # Get a directory (as a pathlib.Path) for ~/.data/indra/database.tsv
 72 | indra_database_path = pystow.join('indra', 'database', name='database.tsv')
 73 | ```
 74 | 
 75 | Ensure a file from the internet is available in your application's directory:
 76 | 
 77 | ```python
 78 | import pystow
 79 | 
 80 | url = 'https://raw.githubusercontent.com/pykeen/pykeen/master/src/pykeen/datasets/nations/test.txt'
 81 | path = pystow.ensure('pykeen', 'datasets', 'nations', url=url)
 82 | ```
 83 | 
 84 | Ensure a tabular data file from the internet and load it for usage (requires
 85 | `pip install pandas`):
 86 | 
 87 | ```python
 88 | import pystow
 89 | import pandas as pd
 90 | 
 91 | url = 'https://raw.githubusercontent.com/pykeen/pykeen/master/src/pykeen/datasets/nations/test.txt'
 92 | df: pd.DataFrame = pystow.ensure_csv('pykeen', 'datasets', 'nations', url=url)
 93 | ```
 94 | 
 95 | Ensure a comma-separated tabular data file from the internet and load it for
 96 | usage (requires `pip install pandas`):
 97 | 
 98 | ```python
 99 | import pystow
100 | import pandas as pd
101 | 
102 | url = 'https://raw.githubusercontent.com/cthoyt/pystow/main/tests/resources/test_1.csv'
103 | df: pd.DataFrame = pystow.ensure_csv('pykeen', 'datasets', 'nations', url=url, read_csv_kwargs=dict(sep=","))
104 | ```
105 | 
106 | Ensure a RDF file from the internet and load it for usage (requires
107 | `pip install rdflib`)
108 | 
109 | ```python
110 | import pystow
111 | import rdflib
112 | 
113 | url = 'https://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz'
114 | rdf_graph: rdflib.Graph = pystow.ensure_rdf('rhea', url=url)
115 | ```
116 | 
117 | Also see `pystow.ensure_excel()`, `pystow.ensure_rdf()`,
118 | `pystow.ensure_zip_df()`, and `pystow.ensure_tar_df()`.
119 | 
120 | If your data comes with a lot of different files in an archive, you can ensure
121 | the archive is downloaded and get specific files from it:
122 | 
123 | ```python
124 | import numpy as np
125 | import pystow
126 | 
127 | url = "https://cloud.enterprise.informatik.uni-leipzig.de/index.php/s/LHPbMCre7SLqajB/download/MultiKE_D_Y_15K_V1.zip"
128 | # the path inside the archive to the file you want
129 | inner_path = "MultiKE/D_Y_15K_V1/721_5fold/1/20210219183115/ent_embeds.npy"
130 | with pystow.ensure_open_zip("kiez", url=url, inner_path=inner_path) as file:
131 |     emb = np.load(file)
132 | ```
133 | 
134 | Also see `pystow.module.ensure_open_lzma()`,
135 | `pystow.module.ensure_open_tarfile()` and `pystow.module.ensure_open_gz()`.
136 | 
137 | ## ⚙️️ Configuration
138 | 
139 | By default, data is stored in the `$HOME/.data` directory. By default, the
140 | `<app>` app will create the `$HOME/.data/<app>` folder.
141 | 
142 | If you want to use an alternate folder name to `.data` inside the home
143 | directory, you can set the `PYSTOW_NAME` environment variable. For example, if
144 | you set `PYSTOW_NAME=mydata`, then the following code for the `pykeen` app will
145 | create the `$HOME/mydata/pykeen/` directory:
146 | 
147 | ```python
148 | import os
149 | import pystow
150 | 
151 | # Only for demonstration purposes. You should set environment
152 | # variables either with your .bashrc or in the command line REPL.
153 | os.environ['PYSTOW_NAME'] = 'mydata'
154 | 
155 | # Get a directory (as a pathlib.Path) for ~/mydata/pykeen
156 | pykeen_directory = pystow.join('pykeen')
157 | ```
158 | 
159 | If you want to specify a completely custom directory that isn't relative to your
160 | home directory, you can set the `PYSTOW_HOME` environment variable. For example,
161 | if you set `PYSTOW_HOME=/usr/local/`, then the following code for the `pykeen`
162 | app will create the `/usr/local/pykeen/` directory:
163 | 
164 | ```python
165 | import os
166 | import pystow
167 | 
168 | # Only for demonstration purposes. You should set environment
169 | # variables either with your .bashrc or in the command line REPL.
170 | os.environ['PYSTOW_HOME'] = '/usr/local/'
171 | 
172 | # Get a directory (as a pathlib.Path) for /usr/local/pykeen
173 | pykeen_directory = pystow.join('pykeen')
174 | ```
175 | 
176 | Note: if you set `PYSTOW_HOME`, then `PYSTOW_NAME` is disregarded.
177 | 
178 | ### X Desktop Group (XDG) Compatibility
179 | 
180 | While PyStow's main goal is to make application data less opaque and less
181 | hidden, some users might want to use the
182 | [XDG specifications](http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html)
183 | for storing their app data.
184 | 
185 | If you set the environment variable `PYSTOW_USE_APPDIRS` to `true` or `True`,
186 | then the [`appdirs`](https://pypi.org/project/appdirs/) package will be used to
187 | choose the base directory based on the `user data dir` option. This can still be
188 | overridden by `PYSTOW_HOME`.
189 | 
190 | ## 🚀 Installation
191 | 
192 | The most recent release can be installed from
193 | [PyPI](https://pypi.org/project/pystow/) with uv:
194 | 
195 | ```console
196 | $ uv pip install pystow
197 | ```
198 | 
199 | or with pip:
200 | 
201 | ```console
202 | $ python3 -m pip install pystow
203 | ```
204 | 
205 | The most recent code and data can be installed directly from GitHub with uv:
206 | 
207 | ```console
208 | $ uv --preview pip install git+https://github.com/cthoyt/pystow.git
209 | ```
210 | 
211 | or with pip:
212 | 
213 | ```console
214 | $ UV_PREVIEW=1 python3 -m pip install git+https://github.com/cthoyt/pystow.git
215 | ```
216 | 
217 | Note that this requires setting `UV_PREVIEW` mode enabled until the uv build
218 | backend becomes a stable feature.
219 | 
220 | ## 👐 Contributing
221 | 
222 | Contributions, whether filing an issue, making a pull request, or forking, are
223 | appreciated. See
224 | [CONTRIBUTING.md](https://github.com/cthoyt/pystow/blob/master/.github/CONTRIBUTING.md)
225 | for more information on getting involved.
226 | 
227 | ## 👋 Attribution
228 | 
229 | ### ⚖️ License
230 | 
231 | The code in this package is licensed under the MIT License.
232 | 
233 | ### 🍪 Cookiecutter
234 | 
235 | This package was created with
236 | [@audreyfeldroy](https://github.com/audreyfeldroy)'s
237 | [cookiecutter](https://github.com/cookiecutter/cookiecutter) package using
238 | [@cthoyt](https://github.com/cthoyt)'s
239 | [cookiecutter-snekpack](https://github.com/cthoyt/cookiecutter-snekpack)
240 | template.
241 | 
242 | ## 🛠️ For Developers
243 | 
244 | <details>
245 |   <summary>See developer instructions</summary>
246 | 
247 | The final section of the README is for if you want to get involved by making a
248 | code contribution.
249 | 
250 | ### Development Installation
251 | 
252 | To install in development mode, use the following:
253 | 
254 | ```console
255 | $ git clone git+https://github.com/cthoyt/pystow.git
256 | $ cd pystow
257 | $ uv --preview pip install -e .
258 | ```
259 | 
260 | Alternatively, install using pip:
261 | 
262 | ```console
263 | $ UV_PREVIEW=1 python3 -m pip install -e .
264 | ```
265 | 
266 | Note that this requires setting `UV_PREVIEW` mode enabled until the uv build
267 | backend becomes a stable feature.
268 | 
269 | ### Updating Package Boilerplate
270 | 
271 | This project uses `cruft` to keep boilerplate (i.e., configuration, contribution
272 | guidelines, documentation configuration) up-to-date with the upstream
273 | cookiecutter package. Install cruft with either `uv tool install cruft` or
274 | `python3 -m pip install cruft` then run:
275 | 
276 | ```console
277 | $ cruft update
278 | ```
279 | 
280 | More info on Cruft's update command is available
281 | [here](https://github.com/cruft/cruft?tab=readme-ov-file#updating-a-project).
282 | 
283 | ### 🥼 Testing
284 | 
285 | After cloning the repository and installing `tox` with
286 | `uv tool install tox --with tox-uv` or `python3 -m pip install tox tox-uv`, the
287 | unit tests in the `tests/` folder can be run reproducibly with:
288 | 
289 | ```console
290 | $ tox -e py
291 | ```
292 | 
293 | Additionally, these tests are automatically re-run with each commit in a
294 | [GitHub Action](https://github.com/cthoyt/pystow/actions?query=workflow%3ATests).
295 | 
296 | ### 📖 Building the Documentation
297 | 
298 | The documentation can be built locally using the following:
299 | 
300 | ```console
301 | $ git clone git+https://github.com/cthoyt/pystow.git
302 | $ cd pystow
303 | $ tox -e docs
304 | $ open docs/build/html/index.html
305 | ```
306 | 
307 | The documentation automatically installs the package as well as the `docs` extra
308 | specified in the [`pyproject.toml`](pyproject.toml). `sphinx` plugins like
309 | `texext` can be added there. Additionally, they need to be added to the
310 | `extensions` list in [`docs/source/conf.py`](docs/source/conf.py).
311 | 
312 | The documentation can be deployed to [ReadTheDocs](https://readthedocs.io) using
313 | [this guide](https://docs.readthedocs.io/en/stable/intro/import-guide.html). The
314 | [`.readthedocs.yml`](.readthedocs.yml) YAML file contains all the configuration
315 | you'll need. You can also set up continuous integration on GitHub to check not
316 | only that Sphinx can build the documentation in an isolated environment (i.e.,
317 | with `tox -e docs-test`) but also that
318 | [ReadTheDocs can build it too](https://docs.readthedocs.io/en/stable/pull-requests.html).
319 | 
320 | #### Configuring ReadTheDocs
321 | 
322 | 1. Log in to ReadTheDocs with your GitHub account to install the integration at
323 |    https://readthedocs.org/accounts/login/?next=/dashboard/
324 | 2. Import your project by navigating to https://readthedocs.org/dashboard/import
325 |    then clicking the plus icon next to your repository
326 | 3. You can rename the repository on the next screen using a more stylized name
327 |    (i.e., with spaces and capital letters)
328 | 4. Click next, and you're good to go!
329 | 
330 | ### 📦 Making a Release
331 | 
332 | #### Configuring Zenodo
333 | 
334 | [Zenodo](https://zenodo.org) is a long-term archival system that assigns a DOI
335 | to each release of your package.
336 | 
337 | 1. Log in to Zenodo via GitHub with this link:
338 |    https://zenodo.org/oauth/login/github/?next=%2F. This brings you to a page
339 |    that lists all of your organizations and asks you to approve installing the
340 |    Zenodo app on GitHub. Click "grant" next to any organizations you want to
341 |    enable the integration for, then click the big green "approve" button. This
342 |    step only needs to be done once.
343 | 2. Navigate to https://zenodo.org/account/settings/github/, which lists all of
344 |    your GitHub repositories (both in your username and any organizations you
345 |    enabled). Click the on/off toggle for any relevant repositories. When you
346 |    make a new repository, you'll have to come back to this
347 | 
348 | After these steps, you're ready to go! After you make "release" on GitHub (steps
349 | for this are below), you can navigate to
350 | https://zenodo.org/account/settings/github/repository/cthoyt/pystow to see the
351 | DOI for the release and link to the Zenodo record for it.
352 | 
353 | #### Registering with the Python Package Index (PyPI)
354 | 
355 | You only have to do the following steps once.
356 | 
357 | 1. Register for an account on the
358 |    [Python Package Index (PyPI)](https://pypi.org/account/register)
359 | 2. Navigate to https://pypi.org/manage/account and make sure you have verified
360 |    your email address. A verification email might not have been sent by default,
361 |    so you might have to click the "options" dropdown next to your address to get
362 |    to the "re-send verification email" button
363 | 3. 2-Factor authentication is required for PyPI since the end of 2023 (see this
364 |    [blog post from PyPI](https://blog.pypi.org/posts/2023-05-25-securing-pypi-with-2fa/)).
365 |    This means you have to first issue account recovery codes, then set up
366 |    2-factor authentication
367 | 4. Issue an API token from https://pypi.org/manage/account/token
368 | 
369 | #### Configuring your machine's connection to PyPI
370 | 
371 | You have to do the following steps once per machine.
372 | 
373 | ```console
374 | $ uv tool install keyring
375 | $ keyring set https://upload.pypi.org/legacy/ __token__
376 | $ keyring set https://test.pypi.org/legacy/ __token__
377 | ```
378 | 
379 | Note that this deprecates previous workflows using `.pypirc`.
380 | 
381 | #### Uploading to PyPI
382 | 
383 | After installing the package in development mode and installing `tox` with
384 | `uv tool install tox --with tox-uv` or `python3 -m pip install tox tox-uv`, run
385 | the following from the console:
386 | 
387 | ```console
388 | $ tox -e finish
389 | ```
390 | 
391 | This script does the following:
392 | 
393 | 1. Uses [bump-my-version](https://github.com/callowayproject/bump-my-version) to
394 |    switch the version number in the `pyproject.toml`, `CITATION.cff`,
395 |    `src/pystow/version.py`, and [`docs/source/conf.py`](docs/source/conf.py) to
396 |    not have the `-dev` suffix
397 | 2. Packages the code in both a tar archive and a wheel using
398 |    [`uv build`](https://docs.astral.sh/uv/guides/publish/#building-your-package)
399 | 3. Uploads to PyPI using
400 |    [`uv publish`](https://docs.astral.sh/uv/guides/publish/#publishing-your-package).
401 | 4. Push to GitHub. You'll need to make a release going with the commit where the
402 |    version was bumped.
403 | 5. Bump the version to the next patch. If you made big changes and want to bump
404 |    the version by minor, you can use `tox -e bumpversion -- minor` after.
405 | 
406 | #### Releasing on GitHub
407 | 
408 | 1. Navigate to https://github.com/cthoyt/pystow/releases/new to draft a new
409 |    release
410 | 2. Click the "Choose a Tag" dropdown and select the tag corresponding to the
411 |    release you just made
412 | 3. Click the "Generate Release Notes" button to get a quick outline of recent
413 |    changes. Modify the title and description as you see fit
414 | 4. Click the big green "Publish Release" button
415 | 
416 | This will trigger Zenodo to assign a DOI to your release as well.
417 | 
418 | </details>
419 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | 
 3 | # You can set these variables from the command line.
 4 | SPHINXOPTS    =
 5 | SPHINXBUILD   = sphinx-build
 6 | SPHINXPROJ    = PyStow
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/cli.rst:
--------------------------------------------------------------------------------
1 | Command Line Interface
2 | ======================
3 | pystow automatically installs the command :code:`pystow`. See
4 | :code:`pystow --help` for usage details.
5 | 
6 | .. click:: pystow.cli:main
7 |    :prog: pystow
8 |    :show-nested:
9 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration file for the Sphinx documentation builder.
  3 | 
  4 | This file does only contain a selection of the most common options. For a
  5 | full list see the documentation:
  6 | http://www.sphinx-doc.org/en/master/config
  7 | 
  8 | -- Path setup --------------------------------------------------------------
  9 | 
 10 | If extensions (or modules to document with autodoc) are in another directory,
 11 | add these directories to ``sys.path`` here. If the directory is relative to the
 12 | documentation root, use ``os.path.abspath`` to make it absolute, like shown here.
 13 | """
 14 | 
 15 | import os
 16 | import re
 17 | import sys
 18 | from datetime import date
 19 | 
 20 | sys.path.insert(0, os.path.abspath("../../src"))
 21 | 
 22 | # -- Project information -----------------------------------------------------
 23 | 
 24 | project = "pystow"
 25 | copyright = f"{date.today().year}, Charles Tapley Hoyt"
 26 | author = "Charles Tapley Hoyt"
 27 | 
 28 | # The full version, including alpha/beta/rc tags.
 29 | release = "0.7.1-dev"
 30 | 
 31 | # The short X.Y version.
 32 | parsed_version = re.match(
 33 |     r"(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?",
 34 |     release,
 35 | )
 36 | version = parsed_version.expand(r"\g<major>.\g<minor>.\g<patch>")
 37 | 
 38 | if parsed_version.group("release"):
 39 |     tags.add("prerelease")  # noqa:F821
 40 | 
 41 | 
 42 | # See https://about.readthedocs.com/blog/2024/07/addons-by-default/
 43 | # Define the canonical URL if you are using a custom domain on Read the Docs
 44 | html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
 45 | 
 46 | # See https://about.readthedocs.com/blog/2024/07/addons-by-default/
 47 | # Tell Jinja2 templates the build is running on Read the Docs
 48 | if os.environ.get("READTHEDOCS", "") == "True":
 49 |     if "html_context" not in globals():
 50 |         html_context = {}
 51 |     html_context["READTHEDOCS"] = True
 52 | 
 53 | 
 54 | # -- General configuration ---------------------------------------------------
 55 | 
 56 | # If your documentation needs a minimal Sphinx version, state it here.
 57 | #
 58 | # needs_sphinx = '1.0'
 59 | 
 60 | # If true, the current module name will be prepended to all description
 61 | # unit titles (such as .. function::).
 62 | add_module_names = False
 63 | 
 64 | # A list of prefixes that are ignored when creating the module index. (new in Sphinx 0.6)
 65 | modindex_common_prefix = ["pystow."]
 66 | 
 67 | # Add any Sphinx extension module names here, as strings. They can be
 68 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 69 | # ones.
 70 | extensions = [
 71 |     "sphinx.ext.autosummary",
 72 |     "sphinx.ext.autodoc",
 73 |     "sphinx.ext.coverage",
 74 |     "sphinx.ext.intersphinx",
 75 |     "sphinx.ext.todo",
 76 |     "sphinx.ext.mathjax",
 77 |     "sphinx.ext.viewcode",
 78 |     "sphinx_automodapi.automodapi",
 79 |     "sphinx_automodapi.smart_resolver",
 80 | ]
 81 | 
 82 | 
 83 | extensions.append("sphinx_click.ext")
 84 | 
 85 | 
 86 | # generate autosummary pages
 87 | autosummary_generate = True
 88 | 
 89 | # Add any paths that contain templates here, relative to this directory.
 90 | templates_path = ["_templates"]
 91 | 
 92 | # The suffix(es) of source filenames.
 93 | # You can specify multiple suffix as a list of string:
 94 | #
 95 | # source_suffix = ['.rst', '.md']
 96 | source_suffix = {
 97 |     ".rst": "restructuredtext",
 98 | }
 99 | 
100 | # The master toctree document.
101 | master_doc = "index"
102 | 
103 | # The language for content autogenerated by Sphinx. Refer to documentation
104 | # for a list of supported languages.
105 | #
106 | # This is also used if you do content translation via gettext catalogs.
107 | # Usually you set "language" from the command line for these cases.
108 | language = "en"
109 | 
110 | # List of patterns, relative to source directory, that match files and
111 | # directories to ignore when looking for source files.
112 | # This pattern also affects html_static_path and html_extra_path.
113 | exclude_patterns = []
114 | 
115 | # The name of the Pygments (syntax highlighting) style to use.
116 | pygments_style = "sphinx"
117 | 
118 | # -- Options for HTML output -------------------------------------------------
119 | 
120 | # The theme to use for HTML and HTML Help pages.  See the documentation for
121 | # a list of builtin themes.
122 | #
123 | html_theme = "sphinx_rtd_theme"
124 | 
125 | # Theme options are theme-specific and customize the look and feel of a theme
126 | # further.  For a list of options available for each theme, see the
127 | # documentation.
128 | #
129 | # html_theme_options = {}
130 | 
131 | # Add any paths that contain custom static files (such as style sheets) here,
132 | # relative to this directory. They are copied after the builtin static files,
133 | # so a file named "default.css" will overwrite the builtin "default.css".
134 | # html_static_path = ['_static']
135 | 
136 | # Custom sidebar templates, must be a dictionary that maps document names
137 | # to template names.
138 | #
139 | # The default sidebars (for documents that don't match any pattern) are
140 | # defined by theme itself.  Builtin themes are using these templates by
141 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
142 | # 'searchbox.html']``.
143 | #
144 | # html_sidebars = {}
145 | 
146 | # The name of an image file (relative to this directory) to place at the top
147 | # of the sidebar.
148 | #
149 | if os.path.exists("logo.png"):
150 |     html_logo = "logo.png"
151 | 
152 | # -- Options for HTMLHelp output ---------------------------------------------
153 | 
154 | # Output file base name for HTML help builder.
155 | htmlhelp_basename = "pystow_doc"
156 | 
157 | # -- Options for LaTeX output ------------------------------------------------
158 | 
159 | # latex_elements = {
160 | #     The paper size ('letterpaper' or 'a4paper').
161 | #
162 | #     'papersize': 'letterpaper',
163 | #
164 | #     The font size ('10pt', '11pt' or '12pt').
165 | #
166 | #     'pointsize': '10pt',
167 | #
168 | #     Additional stuff for the LaTeX preamble.
169 | #
170 | #     'preamble': '',
171 | #
172 | #     Latex figure (float) alignment
173 | #
174 | #     'figure_align': 'htbp',
175 | # }
176 | 
177 | # Grouping the document tree into LaTeX files. List of tuples
178 | # (source start file, target name, title,
179 | #  author, documentclass [howto, manual, or own class]).
180 | # latex_documents = [
181 | #     (
182 | #         master_doc,
183 | #         'pystow.tex',
184 | #         'PyStow Documentation',
185 | #         author,
186 | #         'manual',
187 | #     ),
188 | # ]
189 | 
190 | # -- Options for manual page output ------------------------------------------
191 | 
192 | # One entry per manual page. List of tuples
193 | # (source start file, name, description, authors, manual section).
194 | man_pages = [
195 |     (
196 |         master_doc,
197 |         "pystow",
198 |         "PyStow Documentation",
199 |         [author],
200 |         1,
201 |     ),
202 | ]
203 | 
204 | # -- Options for Texinfo output ----------------------------------------------
205 | 
206 | # Grouping the document tree into Texinfo files. List of tuples
207 | # (source start file, target name, title, author,
208 | #  dir menu entry, description, category)
209 | texinfo_documents = [
210 |     (
211 |         master_doc,
212 |         "pystow",
213 |         "PyStow Documentation",
214 |         author,
215 |         "Charles Tapley Hoyt",
216 |         "Easily pick a place to store data for your Python code",
217 |         "Miscellaneous",
218 |     ),
219 | ]
220 | 
221 | # -- Options for Epub output -------------------------------------------------
222 | 
223 | # Bibliographic Dublin Core info.
224 | # epub_title = project
225 | 
226 | # The unique identifier of the text. This can be a ISBN number
227 | # or the project homepage.
228 | #
229 | # epub_identifier = ''
230 | 
231 | # A unique identification for the text.
232 | #
233 | # epub_uid = ''
234 | 
235 | # A list of files that should not be packed into the epub file.
236 | # epub_exclude_files = ['search.html']
237 | 
238 | # -- Extension configuration -------------------------------------------------
239 | 
240 | # -- Options for intersphinx extension ---------------------------------------
241 | 
242 | # Example configuration for intersphinx: refer to the Python standard library.
243 | # Note: don't add trailing slashes, since sphinx adds "/objects.inv" to the end
244 | intersphinx_mapping = {
245 |     "python": ("https://docs.python.org/3", None),
246 |     "rdflib": ("https://rdflib.readthedocs.io/en/latest", None),
247 |     "pandas": ("https://pandas.pydata.org/docs", None),
248 |     "sklearn": ("https://scikit-learn.org/stable", None),
249 |     "numpy": ("https://numpy.org/doc/stable", None),
250 |     "scipy": ("https://docs.scipy.org/doc/scipy", None),
251 | }
252 | 
253 | autoclass_content = "both"
254 | 
255 | # Don't sort alphabetically, explained at:
256 | # https://stackoverflow.com/questions/37209921/python-how-not-to-sort-sphinx-output-in-alphabetical-order
257 | autodoc_member_order = "bysource"
258 | 
259 | todo_include_todos = True
260 | todo_emit_warnings = True
261 | 
262 | # Output SVG inheritance diagrams
263 | graphviz_output_format = "svg"
264 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | PyStow |release| Documentation
 2 | ==============================
 3 | If you've ever written the following few lines of code, :mod:`pystow` is for you:
 4 | 
 5 | .. code-block:: python
 6 | 
 7 |     import os
 8 |     home = os.path.expanduser('~')
 9 |     project_name = 'adeft'
10 |     envvar_name = f'{project_name.upper()}_HOME'
11 |     if envvar_name in os.environ:
12 |         ADEFT_HOME = os.environ[envvar_name]
13 |     else:
14 |         ADEFT_HOME = os.path.join(home, f'.{project_name}')
15 |     os.makedirs(ADEFT_HOME, exist_ok=True)
16 | 
17 | Many projects (let's use `Adeft <https://github.com/indralab/adeft>`_ as an example) create a folder in the home
18 | directory as a dot-file such as ``$HOME/.adeft``. I found that I had so many of these that I started grouping
19 | them inside a ``$HOME/.data`` folder. It's also the case that every time you create one of these folders,
20 | you need to ensure its existence.
21 | 
22 | :mod:`pystow` takes care of these things. You can replace the previous code with:
23 | 
24 | .. code-block:: python
25 | 
26 |     import pystow
27 |     ADEFT_HOME = pystow.join('adeft')
28 | 
29 | First, it takes the name of the module, uppercases it, and postpends ``_HOME`` on to it (e.g., ``ADEFT_HOME``)
30 | and looks in the environment. If this variable is available, it uses that as the directory. It ensures it
31 | exists, then returns a :class:`pathlib.Path` pointing to it.
32 | 
33 | If ``ADEFT_HOME`` (or more generally, ``<MODULENAME>_HOME`` is not available in the environment, it picks the
34 | path as ``$HOME/.data/<module name>``. Normally, ``$HOME`` is specified in your OS. However, if you want to
35 | pick another location to stick the data, you can override using ``$HOME`` by setting ``$PYSTOW_HOME`` in
36 | the environment.
37 | 
38 | If you want to go more directories deep inside the adeft default directory, you can just keep using more
39 | positional arguments (the same semantics as :func:`os.path.join`). These directories automatically
40 | get created as well.
41 | 
42 | .. code-block:: python
43 | 
44 |     >>> import pystow
45 |     >>> from pathlib import Path
46 |     >>> # already set somewhere
47 |     >>> __version__ = ...
48 |     >>> ADEFT_VERSION_HOME: Path = pystow.join('adeft', __version__)
49 | 
50 | .. toctree::
51 |    :maxdepth: 2
52 |    :caption: Getting Started
53 |    :name: start
54 | 
55 |    installation
56 |    usage
57 |    utils
58 |    cli
59 | 
60 | Indices and Tables
61 | ------------------
62 | * :ref:`genindex`
63 | * :ref:`modindex`
64 | * :ref:`search`
65 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
  1 | Installation
  2 | ============
  3 | The most recent release can be installed from
  4 | `PyPI <https://pypi.org/project/pystow>`_ with uv:
  5 | 
  6 | .. code-block:: console
  7 | 
  8 |     $ uv pip install pystow
  9 | 
 10 | or with pip:
 11 | 
 12 | .. code-block:: console
 13 | 
 14 |     $ python3 -m pip install pystow
 15 | 
 16 | Installing from git
 17 | -------------------
 18 | The most recent code and data can be installed directly from GitHub with uv:
 19 | 
 20 | .. code-block:: console
 21 | 
 22 |     $ uv --preview pip install git+https://github.com/cthoyt/pystow.git
 23 | 
 24 | or with pip:
 25 | 
 26 | .. code-block:: console
 27 | 
 28 |     $ UV_PREVIEW=1 python3 -m pip install git+https://github.com/cthoyt/pystow.git
 29 | 
 30 | .. note::
 31 | 
 32 |     The ``UV_PREVIEW`` environment variable is required to be
 33 |     set until the uv build backend becomes a stable feature.
 34 | 
 35 | Installing for development
 36 | --------------------------
 37 | To install in development mode with uv:
 38 | 
 39 | .. code-block:: console
 40 | 
 41 |     $ git clone git+https://github.com/cthoyt/pystow.git
 42 |     $ cd pystow
 43 |     $ uv --preview pip install -e .
 44 | 
 45 | or with pip:
 46 | 
 47 | .. code-block:: console
 48 | 
 49 |     $ UV_PREVIEW=1 python3 -m pip install -e .
 50 | 
 51 | Configuration
 52 | =============
 53 | By default, data is stored in the ``$HOME/.data`` directory. By default, the ``<app>`` app will create the
 54 | ``$HOME/.data/<app>`` folder.
 55 | 
 56 | If you want to use an alternate folder name to ``.data`` inside the home directory, you can set the ``PYSTOW_NAME``
 57 | environment variable. For example, if you set ``PYSTOW_NAME=mydata``, then the following code for the ``pykeen`` app
 58 | will create the ``$HOME/mydata/pykeen/`` directory:
 59 | 
 60 | .. code-block:: python
 61 | 
 62 |     import os
 63 |     import pystow
 64 | 
 65 |     # Only for demonstration purposes. You should set environment
 66 |     # variables either with your .bashrc or in the command line REPL.
 67 |     os.environ['PYSTOW_NAME'] = 'mydata'
 68 | 
 69 |     # Get a directory (as a pathlib.Path) for ~/mydata/pykeen
 70 |     pykeen_directory = pystow.join('pykeen')
 71 | 
 72 | 
 73 | If you want to specify a completely custom directory that isn't relative to your home directory, you can set
 74 | the ``PYSTOW_HOME`` environment variable. For example, if you set ``PYSTOW_HOME=/usr/local/``, then the following code
 75 | for the ``pykeen`` app will create the ``/usr/local/pykeen/`` directory:
 76 | 
 77 | .. code-block:: python
 78 | 
 79 |     import os
 80 |     import pystow
 81 | 
 82 |     # Only for demonstration purposes. You should set environment
 83 |     # variables either with your .bashrc or in the command line REPL.
 84 |     os.environ['PYSTOW_HOME'] = '/usr/local/'
 85 | 
 86 |     # Get a directory (as a pathlib.Path) for /usr/local/pykeen
 87 |     pykeen_directory = pystow.join('pykeen')
 88 | 
 89 | 
 90 | .. warning:: If you set ``PYSTOW_HOME``, then ``PYSTOW_NAME`` is disregarded.
 91 | 
 92 | X Desktop Group (XDG) Compatibility
 93 | -----------------------------------
 94 | While PyStow's main goal is to make application data less opaque and less
 95 | hidden, some users might want to use the
 96 | `XDG specifications <http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_
 97 | for storing their app data.
 98 | 
 99 | If you set the environment variable ``PYSTOW_USE_APPDIRS`` to ``true`` or ``True``, then the
100 | `appdirs <https://pypi.org/project/appdirs>`_ package will be used to choose
101 | the base directory based on the ``user data dir`` option.
102 | 
103 | .. warning:: If you use this setting, make sure you first do ``pip install appdirs``
104 | 
105 | .. note:: This can still be  overridden by ``PYSTOW_HOME``.
106 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
 1 | Usage
 2 | =====
 3 | .. automodapi:: pystow
 4 |     :no-inheritance-diagram:
 5 |     :no-heading:
 6 |     :headings: --
 7 |     :skip: Module
 8 |     :no-main-docstr:
 9 | 
10 | .. automodapi:: pystow.impl
11 |     :no-inheritance-diagram:
12 |     :no-heading:
13 |     :headings: --
14 |     :no-main-docstr:
15 | 


--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
1 | Utilities
2 | =========
3 | .. automodapi:: pystow.utils
4 |     :no-inheritance-diagram:
5 |     :no-heading:
6 |     :headings: --
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["uv_build>=0.6.6,<1.0"]
  3 | build-backend = "uv_build"
  4 | 
  5 | [project]
  6 | name = "pystow"
  7 | version = "0.7.1-dev"
  8 | description = "Easily pick a place to store data for your Python code"
  9 | readme = "README.md"
 10 | authors = [
 11 |     { name = "Charles Tapley Hoyt", email = "cthoyt@gmail.com" }
 12 | ]
 13 | maintainers = [
 14 |     { name = "Charles Tapley Hoyt", email = "cthoyt@gmail.com" }
 15 | ]
 16 | 
 17 | # See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#classifiers
 18 | # Search tags using the controlled vocabulary at https://pypi.org/classifiers
 19 | classifiers = [
 20 |     "Development Status :: 5 - Production/Stable",
 21 |     "Environment :: Console",
 22 |     "Intended Audience :: Developers",
 23 |     "License :: OSI Approved :: MIT License",
 24 |     "Operating System :: OS Independent",
 25 |     "Framework :: Pytest",
 26 |     "Framework :: tox",
 27 |     "Framework :: Sphinx",
 28 |     "Natural Language :: English",
 29 |     "Programming Language :: Python",
 30 |     "Programming Language :: Python :: 3.9",
 31 |     "Programming Language :: Python :: 3.10",
 32 |     "Programming Language :: Python :: 3.11",
 33 |     "Programming Language :: Python :: 3.12",
 34 |     "Programming Language :: Python :: 3.13",
 35 |     "Programming Language :: Python :: 3 :: Only",
 36 |     "Typing :: Typed",
 37 | ]
 38 | keywords = [
 39 |     "snekpack", # please keep this keyword to credit the cookiecutter-snekpack template
 40 |     "cookiecutter",
 41 |     "caching",
 42 |     "file management"
 43 | ]
 44 | 
 45 | # License Information.
 46 | # See PEP-639 at https://peps.python.org/pep-0639/#add-license-files-key
 47 | license-files = [
 48 |     "LICENSE",
 49 | ]
 50 | 
 51 | requires-python = ">=3.9"
 52 | dependencies = [
 53 |     "click",
 54 |     "requests",
 55 |     "tqdm",
 56 |     "typing-extensions",
 57 | ]
 58 | 
 59 | [project.optional-dependencies]
 60 | tests = [
 61 |     "pytest",
 62 |     "coverage[toml]",
 63 |     "requests_file",
 64 | ]
 65 | docs = [
 66 |     "sphinx>=8",
 67 |     "sphinx-rtd-theme>=3.0",
 68 |     "sphinx-click", 
 69 |     "sphinx_automodapi",
 70 | ]
 71 | rdf = [
 72 |     "rdflib",
 73 | ]
 74 | xml = [
 75 |     "lxml",
 76 | ]
 77 | pandas = [
 78 |     "pandas",
 79 | ]
 80 | aws = [
 81 |     "boto3",
 82 | ]
 83 | 
 84 | # See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#urls
 85 | # and also https://packaging.python.org/en/latest/specifications/well-known-project-urls/
 86 | [project.urls]
 87 | "Bug Tracker" = "https://github.com/cthoyt/pystow/issues"
 88 | Homepage = "https://github.com/cthoyt/pystow"
 89 | Repository = "https://github.com/cthoyt/pystow.git"
 90 | Documentation = "https://pystow.readthedocs.io"
 91 | Funding = "https://github.com/sponsors/cthoyt"
 92 | 
 93 | [project.scripts]
 94 | pystow = "pystow.cli:main"
 95 | 
 96 | [tool.cruft]
 97 | skip = [
 98 |     "**/__init__.py",
 99 |     "tests/*"
100 | ]
101 | 
102 | # MyPy, see https://mypy.readthedocs.io/en/stable/config_file.html
103 | [tool.mypy]
104 | plugins = [
105 | ]
106 | 
107 | # Doc8, see https://doc8.readthedocs.io/en/stable/readme.html#ini-file-usage
108 | [tool.doc8]
109 | max-line-length = 120
110 | 
111 | # Pytest, see https://docs.pytest.org/en/stable/reference/customize.html#pyproject-toml
112 | [tool.pytest.ini_options]
113 | markers = [
114 |     "slow: marks tests as slow (deselect with '-m \"not slow\"')",
115 | ]
116 | 
117 | # Coverage, see https://coverage.readthedocs.io/en/latest/config.html
118 | [tool.coverage.run]
119 | branch = true
120 | source = [
121 |     "pystow",
122 | ]
123 | omit = [
124 |     "tests/*",
125 |     "docs/*",
126 | ]
127 | 
128 | [tool.coverage.paths]
129 | source = [
130 |     "src/pystow",
131 |     ".tox/*/lib/python*/site-packages/pystow",
132 | ]
133 | 
134 | [tool.coverage.report]
135 | show_missing = true
136 | exclude_lines = [
137 |     "pragma: no cover",
138 |     "raise NotImplementedError",
139 |     "if __name__ == \"__main__\":",
140 |     "if TYPE_CHECKING:",
141 |     "def __str__",
142 |     "def __repr__",
143 | ]
144 | 
145 | [tool.ruff]
146 | line-length = 100
147 | extend-include = ["*.ipynb"]
148 | 
149 | [tool.ruff.lint]
150 | # See https://docs.astral.sh/ruff/rules
151 | extend-select = [
152 |     "F", # pyflakes
153 |     "E", # pycodestyle errors
154 |     "W", # pycodestyle warnings
155 |     "C90", # mccabe
156 |     "I", # isort
157 |     "UP", # pyupgrade
158 |     "D", # pydocstyle
159 |     "DOC", # pydoclint
160 |     "B", # bugbear
161 |     "S", # bandit
162 |     "T20", # print
163 |     "N", # pep8 naming
164 |     "ERA", # eradicate commented out code
165 |     "NPY", # numpy checks
166 |     "RUF", # ruff rules
167 |     "C4", # comprehensions
168 | ]
169 | ignore = [
170 |     "D105", # Missing docstring in magic method
171 |     "E203", # Black conflicts with the following
172 |     "S301", # yolo pickle
173 |     "S320", # yolo lxml
174 | ]
175 | 
176 | 
177 | # See https://docs.astral.sh/ruff/settings/#per-file-ignores
178 | [tool.ruff.lint.per-file-ignores]
179 | # Ignore security issues in the version.py, which are inconsistent
180 | "src/pystow/version.py" = ["S603", "S607"]
181 | # Ignore commented out code in Sphinx configuration file
182 | "docs/source/conf.py" = ["ERA001"]
183 | # Prints are okay in notebooks
184 | "notebooks/**/*.ipynb" = ["T201"]
185 | 
186 | [tool.ruff.lint.pydocstyle]
187 | convention = "pep257"
188 | 
189 | [tool.ruff.lint.isort]
190 | relative-imports-order = "closest-to-furthest"
191 | known-third-party = [
192 |     "tqdm",
193 | ]
194 | known-first-party = [
195 |     "pystow",
196 |     "tests",
197 | ]
198 | 
199 | [tool.ruff.format]
200 | # see https://docs.astral.sh/ruff/settings/#format_docstring-code-format
201 | docstring-code-format = true
202 | 
203 | [tool.bumpversion]
204 | current_version = "0.7.1-dev"
205 | parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+(?P<build>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?"
206 | serialize = [
207 |     "{major}.{minor}.{patch}-{release}+{build}",
208 |     "{major}.{minor}.{patch}+{build}",
209 |     "{major}.{minor}.{patch}-{release}",
210 |     "{major}.{minor}.{patch}",
211 | ]
212 | commit = true
213 | tag = false
214 | 
215 | [tool.bumpversion.parts.release]
216 | optional_value = "production"
217 | first_value = "dev"
218 | values = [
219 |     "dev",
220 |     "production",
221 | ]
222 | 
223 | [[tool.bumpversion.files]]
224 | filename = "pyproject.toml"
225 | search = "version = \"{current_version}\""
226 | replace = "version = \"{new_version}\""
227 | 
228 | [[tool.bumpversion.files]]
229 | filename = "docs/source/conf.py"
230 | search = "release = \"{current_version}\""
231 | replace = "release = \"{new_version}\""
232 | 
233 | [[tool.bumpversion.files]]
234 | filename = "src/pystow/version.py"
235 | search = "VERSION = \"{current_version}\""
236 | replace = "VERSION = \"{new_version}\""
237 | 


--------------------------------------------------------------------------------
/src/pystow/__init__.py:
--------------------------------------------------------------------------------
  1 | """PyStow: Easily pick a place to store data for your python package."""
  2 | 
  3 | from .api import (
  4 |     dump_df,
  5 |     dump_json,
  6 |     dump_pickle,
  7 |     dump_rdf,
  8 |     dump_xml,
  9 |     ensure,
 10 |     ensure_csv,
 11 |     ensure_custom,
 12 |     ensure_excel,
 13 |     ensure_from_google,
 14 |     ensure_from_s3,
 15 |     ensure_gunzip,
 16 |     ensure_json,
 17 |     ensure_json_bz2,
 18 |     ensure_open,
 19 |     ensure_open_bz2,
 20 |     ensure_open_gz,
 21 |     ensure_open_lzma,
 22 |     ensure_open_sqlite,
 23 |     ensure_open_sqlite_gz,
 24 |     ensure_open_tarfile,
 25 |     ensure_open_zip,
 26 |     ensure_pickle,
 27 |     ensure_pickle_gz,
 28 |     ensure_rdf,
 29 |     ensure_tar_df,
 30 |     ensure_tar_xml,
 31 |     ensure_untar,
 32 |     ensure_xml,
 33 |     ensure_zip_df,
 34 |     ensure_zip_np,
 35 |     join,
 36 |     joinpath_sqlite,
 37 |     load_df,
 38 |     load_json,
 39 |     load_pickle,
 40 |     load_pickle_gz,
 41 |     load_rdf,
 42 |     load_xml,
 43 |     module,
 44 |     open,
 45 |     open_gz,
 46 | )
 47 | from .config_api import ConfigError, get_config, write_config
 48 | from .impl import Module, VersionHint
 49 | from .utils import ensure_readme
 50 | 
 51 | __all__ = [
 52 |     "ConfigError",
 53 |     "Module",
 54 |     "VersionHint",
 55 |     "dump_df",
 56 |     "dump_json",
 57 |     "dump_pickle",
 58 |     "dump_rdf",
 59 |     "dump_xml",
 60 |     "ensure",
 61 |     "ensure_csv",
 62 |     "ensure_custom",
 63 |     "ensure_excel",
 64 |     "ensure_from_google",
 65 |     "ensure_from_s3",
 66 |     "ensure_gunzip",
 67 |     "ensure_json",
 68 |     "ensure_json_bz2",
 69 |     "ensure_open",
 70 |     "ensure_open_bz2",
 71 |     "ensure_open_gz",
 72 |     "ensure_open_lzma",
 73 |     "ensure_open_sqlite",
 74 |     "ensure_open_sqlite_gz",
 75 |     "ensure_open_tarfile",
 76 |     "ensure_open_zip",
 77 |     "ensure_pickle",
 78 |     "ensure_pickle_gz",
 79 |     "ensure_rdf",
 80 |     "ensure_tar_df",
 81 |     "ensure_tar_xml",
 82 |     "ensure_untar",
 83 |     "ensure_xml",
 84 |     "ensure_zip_df",
 85 |     "ensure_zip_np",
 86 |     "get_config",
 87 |     "join",
 88 |     "joinpath_sqlite",
 89 |     "load_df",
 90 |     "load_json",
 91 |     "load_pickle",
 92 |     "load_pickle_gz",
 93 |     "load_rdf",
 94 |     "load_xml",
 95 |     "module",
 96 |     "open",
 97 |     "open_gz",
 98 |     "write_config",
 99 | ]
100 | 
101 | ensure_readme()
102 | 
103 | del ensure_readme
104 | 


--------------------------------------------------------------------------------
/src/pystow/__main__.py:
--------------------------------------------------------------------------------
1 | # type: ignore
2 | 
3 | """Command line interface for PyStow."""
4 | 
5 | from .cli import main
6 | 
7 | if __name__ == "__main__":
8 |     main()
9 | 


--------------------------------------------------------------------------------
/src/pystow/cache.py:
--------------------------------------------------------------------------------
  1 | """Utilities for caching files."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import functools
  6 | import json
  7 | import logging
  8 | import pickle
  9 | from abc import ABC, abstractmethod
 10 | from collections.abc import MutableMapping
 11 | from pathlib import Path
 12 | from typing import (
 13 |     TYPE_CHECKING,
 14 |     Any,
 15 |     Callable,
 16 |     Generic,
 17 |     TypeVar,
 18 |     Union,
 19 |     cast,
 20 | )
 21 | 
 22 | if TYPE_CHECKING:
 23 |     import pandas as pd
 24 | 
 25 | __all__ = [
 26 |     # Classes
 27 |     "Cached",
 28 |     "CachedCollection",
 29 |     "CachedDataFrame",
 30 |     "CachedJSON",
 31 |     "CachedPickle",
 32 |     # Types
 33 |     "Getter",
 34 | ]
 35 | 
 36 | logger = logging.getLogger(__name__)
 37 | 
 38 | JSONType = Union[
 39 |     dict[str, Any],
 40 |     list[Any],
 41 | ]
 42 | 
 43 | X = TypeVar("X")
 44 | Getter = Callable[[], X]
 45 | 
 46 | 
 47 | class Cached(Generic[X], ABC):
 48 |     """Caching decorator."""
 49 | 
 50 |     def __init__(
 51 |         self,
 52 |         path: str | Path,
 53 |         *,
 54 |         force: bool = False,
 55 |         cache: bool = True,
 56 |     ) -> None:
 57 |         """Instantiate the decorator.
 58 | 
 59 |         :param path: The path to the cache for the file
 60 |         :param cache: Should caching be done? Defaults to true, turn off for debugging purposes
 61 |         :param force: Should a pre-existing file be disregared/overwritten?
 62 |         """
 63 |         self.path = Path(path)
 64 |         self.force = force
 65 |         self.cache = cache
 66 | 
 67 |     def __call__(self, func: Getter[X]) -> Getter[X]:
 68 |         """Apply this instance as a decorator.
 69 | 
 70 |         :param func: The function to wrap
 71 |         :return: A wrapped function
 72 |         """
 73 | 
 74 |         @functools.wraps(func)
 75 |         def _wrapped() -> X:
 76 |             if not self.cache:
 77 |                 return func()
 78 | 
 79 |             if self.path.is_file() and not self.force:
 80 |                 return self.load()
 81 |             logger.debug("no cache found at %s", self.path)
 82 |             rv = func()
 83 |             logger.debug("writing cache to %s", self.path)
 84 |             self.dump(rv)
 85 |             return rv
 86 | 
 87 |         return _wrapped
 88 | 
 89 |     @abstractmethod
 90 |     def load(self) -> X:
 91 |         """Load data from the cache (typically by opening a file at the given path)."""
 92 | 
 93 |     @abstractmethod
 94 |     def dump(self, rv: X) -> None:
 95 |         """Dump data to the cache (typically by opening a file at the given path).
 96 | 
 97 |         :param rv: The data to dump
 98 |         """
 99 | 
100 | 
101 | class CachedJSON(Cached[JSONType]):
102 |     """Make a function lazily cache its return value as JSON."""
103 | 
104 |     def load(self) -> JSONType:
105 |         """Load data from the cache as JSON.
106 | 
107 |         :returns: A python object with JSON-like data from the cache
108 |         """
109 |         with open(self.path) as file:
110 |             return cast(JSONType, json.load(file))
111 | 
112 |     def dump(self, rv: JSONType) -> None:
113 |         """Dump data to the cache as JSON.
114 | 
115 |         :param rv: The JSON data to dump
116 |         """
117 |         with open(self.path, "w") as file:
118 |             json.dump(rv, file, indent=2)
119 | 
120 | 
121 | class CachedPickle(Cached[Any]):
122 |     """Make a function lazily cache its return value as a pickle."""
123 | 
124 |     def load(self) -> Any:
125 |         """Load data from the cache as a pickle.
126 | 
127 |         :returns: A python object loaded from the cache
128 |         """
129 |         with open(self.path, "rb") as file:
130 |             return pickle.load(file)
131 | 
132 |     def dump(self, rv: Any) -> None:
133 |         """Dump data to the cache as a pickle.
134 | 
135 |         :param rv: The arbitrary python object to dump
136 |         """
137 |         with open(self.path, "wb") as file:
138 |             pickle.dump(rv, file, protocol=pickle.HIGHEST_PROTOCOL)
139 | 
140 | 
141 | class CachedCollection(Cached[list[str]]):
142 |     """Make a function lazily cache its return value as file."""
143 | 
144 |     def load(self) -> list[str]:
145 |         """Load data from the cache as a list of strings.
146 | 
147 |         :returns: A list of strings loaded from the cache
148 |         """
149 |         with open(self.path) as file:
150 |             return [line.strip() for line in file]
151 | 
152 |     def dump(self, rv: list[str]) -> None:
153 |         """Dump data to the cache as a list of strings.
154 | 
155 |         :param rv: The list of strings to dump
156 |         """
157 |         with open(self.path, "w") as file:
158 |             for line in rv:
159 |                 print(line, file=file)
160 | 
161 | 
162 | class CachedDataFrame(Cached["pd.DataFrame"]):
163 |     """Make a function lazily cache its return value as a dataframe."""
164 | 
165 |     def __init__(
166 |         self,
167 |         path: str | Path,
168 |         cache: bool = True,
169 |         force: bool = False,
170 |         sep: str | None = None,
171 |         dtype: Any | None = None,
172 |         read_csv_kwargs: MutableMapping[str, Any] | None = None,
173 |     ) -> None:
174 |         """Instantiate the decorator.
175 | 
176 |         :param path: The path to the cache for the file
177 |         :param force: Should a pre-existing file be disregared/overwritten?
178 |         :param sep: The separator. Defaults to TSV, since this is the only reasonable default.
179 |         :param dtype: A shortcut for setting the dtype
180 |         :param read_csv_kwargs: Additional kwargs to pass to :func:`pd.read_csv`.
181 |         :raises ValueError: if sep is given as a kwarg and also in ``read_csv_kwargs``.
182 |         """
183 |         super().__init__(path=path, cache=cache, force=force)
184 |         self.read_csv_kwargs = read_csv_kwargs or {}
185 |         if "sep" not in self.read_csv_kwargs:
186 |             self.sep = sep or "\t"
187 |         elif sep is not None:
188 |             raise ValueError
189 |         else:
190 |             self.sep = self.read_csv_kwargs.pop("sep")
191 |         if dtype is not None:
192 |             if "dtype" in self.read_csv_kwargs:
193 |                 raise ValueError
194 |             self.read_csv_kwargs["dtype"] = dtype
195 |         self.read_csv_kwargs.setdefault("keep_default_na", False)
196 | 
197 |     def load(self) -> pd.DataFrame:
198 |         """Load data from the cache as a dataframe.
199 | 
200 |         :returns: A dataframe loaded from the cache.
201 |         """
202 |         import pandas as pd
203 | 
204 |         return pd.read_csv(
205 |             self.path,
206 |             sep=self.sep,
207 |             **self.read_csv_kwargs,
208 |         )
209 | 
210 |     def dump(self, rv: pd.DataFrame) -> None:
211 |         """Dump data to the cache as a dataframe.
212 | 
213 |         :param rv: The dataframe to dump
214 |         """
215 |         rv.to_csv(self.path, sep=self.sep, index=False)
216 | 


--------------------------------------------------------------------------------
/src/pystow/cli.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # flake8: noqa
 3 | # type: ignore
 4 | 
 5 | """Command line interface for PyStow."""
 6 | 
 7 | from __future__ import annotations
 8 | 
 9 | import os
10 | from typing import Optional, Sequence
11 | 
12 | import click
13 | 
14 | 
15 | @click.group()
16 | def main() -> None:
17 |     """Run the PyStow CLI."""
18 | 
19 | 
20 | @main.command()
21 | @click.argument("keys", nargs=-1)
22 | @click.option("--name")
23 | def join(keys: Sequence[str], name: Optional[str]):
24 |     """List a directory."""
25 |     from . import api
26 | 
27 |     click.echo(api.join(*keys, name=name))
28 | 
29 | 
30 | @main.command()
31 | @click.argument("keys", nargs=-1)
32 | def ls(keys: Sequence[str]):
33 |     """List a directory."""
34 |     from . import api
35 | 
36 |     directory = api.join(*keys)
37 |     _ls(directory)
38 | 
39 | 
40 | @main.command()
41 | @click.argument("keys", nargs=-1)
42 | @click.option("--url", required=True)
43 | @click.option("--name")
44 | @click.option("--force", is_flag=True)
45 | def ensure(keys: Sequence[str], url: str, name: Optional[str], force: bool):
46 |     """Ensure a file is downloaded."""
47 |     from . import api
48 | 
49 |     path = api.ensure(*keys, url=url, name=name, force=force)
50 |     _ls(path.parent)
51 | 
52 | 
53 | def _ls(directory):
54 |     command = f"ls -al {directory}"
55 |     click.secho(f"[pystow] {command}", fg="cyan", bold=True)
56 |     os.system(command)  # noqa:S605
57 | 
58 | 
59 | @main.command(name="set")
60 | @click.argument("module")
61 | @click.argument("key")
62 | @click.argument("value")
63 | def set_config(module: str, key: str, value: str):
64 |     """Set a configuration value."""
65 |     from .config_api import write_config
66 | 
67 |     write_config(module, key, value)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/src/pystow/config_api.py:
--------------------------------------------------------------------------------
  1 | """Configuration handling."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import os
  6 | from configparser import ConfigParser
  7 | from functools import lru_cache
  8 | from pathlib import Path
  9 | from textwrap import dedent
 10 | from typing import Any, Callable, TypeVar
 11 | 
 12 | from .utils import getenv_path
 13 | 
 14 | __all__ = [
 15 |     "get_config",
 16 |     "write_config",
 17 | ]
 18 | 
 19 | X = TypeVar("X")
 20 | 
 21 | CONFIG_NAME_ENVVAR = "PYSTOW_CONFIG_NAME"
 22 | CONFIG_HOME_ENVVAR = "PYSTOW_CONFIG_HOME"
 23 | CONFIG_NAME_DEFAULT = ".config"
 24 | 
 25 | 
 26 | class ConfigError(ValueError):
 27 |     """Raised when configuration can not be looked up."""
 28 | 
 29 |     def __init__(self, module: str, key: str):
 30 |         """Initialize the configuration error.
 31 | 
 32 |         :param module: Name of the module, e.g., ``bioportal``
 33 |         :param key: Name of the key inside the module, e.g., ``api_key``
 34 |         """
 35 |         self.module = module
 36 |         self.key = key
 37 | 
 38 |     def __str__(self) -> str:
 39 |         path = get_home().joinpath(self.module).with_suffix(".ini")
 40 |         return dedent(
 41 |             f"""\
 42 |         Could not look up {self.module}/{self.key} and no default given.
 43 | 
 44 |         This can be solved with one of the following:
 45 | 
 46 |         1. Set the {self.module.upper()}_{self.key.upper()} environment variable
 47 | 
 48 |            - Windows, via GUI: https://www.computerhope.com/issues/ch000549.htm
 49 |            - Windows, via CLI: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/set_1
 50 |            - Mac OS: https://apple.stackexchange.com/questions/106778/how-do-i-set-environment-variables-on-os-x
 51 |            - Linux: https://www.freecodecamp.org/news/how-to-set-an-environment-variable-in-linux/
 52 | 
 53 |         2. Use the PyStow CLI from the command line to
 54 |            set the configuration like so:
 55 | 
 56 |            $ pystow set {self.module} {self.key} <value>
 57 | 
 58 |            This creates an INI file in {path}
 59 |            with the configuration in the right place.
 60 | 
 61 |         3. Create/edit an INI file in {path} and manually
 62 |            fill it in by 1) creating a section inside it called [{self.module}]
 63 |            and 2) setting a value for {self.key} = <value> that looks like:
 64 | 
 65 |            # {path}
 66 |            [{self.module}]
 67 |            {self.key} = <value>
 68 | 
 69 |         See https://github.com/cthoyt/pystow#%EF%B8%8F%EF%B8%8F-configuration for more information.
 70 |         """
 71 |         )
 72 | 
 73 | 
 74 | def get_name() -> str:
 75 |     """Get the config home directory name.
 76 | 
 77 |     :returns: The name of the pystow home directory, either loaded from
 78 |         the :data:`CONFIG_NAME_ENVVAR`` environment variable or given by the default
 79 |         value :data:`CONFIG_NAME_DEFAULT`.
 80 |     """
 81 |     return os.getenv(CONFIG_NAME_ENVVAR, default=CONFIG_NAME_DEFAULT)
 82 | 
 83 | 
 84 | def get_home(ensure_exists: bool = True) -> Path:
 85 |     """Get the config home directory.
 86 | 
 87 |     :param ensure_exists: If true, ensures the directory is created
 88 |     :returns: A path object representing the pystow home directory, as one of:
 89 | 
 90 |         1. :data:`CONFIG_HOME_ENVVAR` environment variable or
 91 |         2. The default directory constructed in the user's home directory plus what's
 92 |            returned by :func:`get_name`.
 93 |     """
 94 |     default = Path.home().joinpath(get_name()).expanduser()
 95 |     return getenv_path(CONFIG_HOME_ENVVAR, default, ensure_exists=ensure_exists)
 96 | 
 97 | 
 98 | @lru_cache(maxsize=1)
 99 | def _get_cfp(module: str) -> ConfigParser:
100 |     cfp = ConfigParser()
101 |     directory = get_home()
102 | 
103 |     # If a multi-part module was given like "zenodo:sandbox",
104 |     # then only look for the first part "zenodo" as the file name
105 |     if ":" in module:
106 |         module = module.split(":", 1)[0]
107 | 
108 |     filenames = [
109 |         os.path.join(directory, "config.cfg"),
110 |         os.path.join(directory, "config.ini"),
111 |         os.path.join(directory, "pystow.cfg"),
112 |         os.path.join(directory, "pystow.ini"),
113 |         os.path.join(directory, f"{module}.cfg"),
114 |         os.path.join(directory, f"{module}.ini"),
115 |         os.path.join(directory, module, f"{module}.cfg"),
116 |         os.path.join(directory, module, f"{module}.ini"),
117 |         os.path.join(directory, module, "conf.ini"),
118 |         os.path.join(directory, module, "config.ini"),
119 |         os.path.join(directory, module, "conf.cfg"),
120 |         os.path.join(directory, module, "config.cfg"),
121 |     ]
122 |     cfp.read(filenames)
123 |     return cfp
124 | 
125 | 
126 | def get_config(
127 |     module: str,
128 |     key: str,
129 |     *,
130 |     passthrough: X | None = None,
131 |     default: X | None = None,
132 |     dtype: type[X] | None = None,
133 |     raise_on_missing: bool = False,
134 | ) -> Any:
135 |     """Get a configuration value.
136 | 
137 |     :param module: Name of the module (e.g., ``pybel``) to get configuration for
138 |     :param key: Name of the key (e.g., ``connection``)
139 |     :param passthrough: If this is not none, will get returned
140 |     :param default: If the environment and configuration files don't contain anything,
141 |         this is returned.
142 |     :param dtype: The datatype to parse out. Can either be :func:`int`, :func:`float`,
143 |         :func:`bool`, or :func:`str`. If none, defaults to :func:`str`.
144 |     :param raise_on_missing: If true, will raise a value error if no data is found and no default
145 |         is given
146 |     :returns: The config value or the default.
147 |     :raises ConfigError: If ``raise_on_missing`` conditions are met
148 |     """
149 |     if passthrough is not None:
150 |         return _cast(passthrough, dtype)
151 |     rv = os.getenv(f"{module.upper()}_{key.upper()}")
152 |     if rv is not None:
153 |         return _cast(rv, dtype)
154 |     rv = _get_cfp(module).get(module, key, fallback=None)
155 |     if rv is None:
156 |         if default is None and raise_on_missing:
157 |             raise ConfigError(module=module, key=key)
158 |         return default
159 |     return _cast(rv, dtype)
160 | 
161 | 
162 | def _cast(rv: Any, dtype: None | Callable[..., Any]) -> Any:
163 |     if not isinstance(rv, str):  # if it's not a string, it doesn't need munging
164 |         return rv
165 |     if dtype in (None, str):  # no munging necessary
166 |         return rv
167 |     if dtype in (int, float):
168 |         return dtype(rv)
169 |     if dtype is bool:
170 |         if rv.lower() in ("t", "true", "yes", "1", 1, True):
171 |             return True
172 |         elif rv.lower() in ("f", "false", "no", "0", 0, False):
173 |             return False
174 |         else:
175 |             raise ValueError(f"value can not be coerced into bool: {rv}")
176 |     raise TypeError(f"dtype is invalid: {dtype}")
177 | 
178 | 
179 | def write_config(module: str, key: str, value: str) -> None:
180 |     """Write a configuration value.
181 | 
182 |     :param module: The name of the app (e.g., ``indra``)
183 |     :param key: The key of the configuration in the app
184 |     :param value: The value of the configuration in the app
185 |     """
186 |     _get_cfp.cache_clear()
187 |     cfp = ConfigParser()
188 | 
189 |     # If there's a multi-part module such as "zenodo:sandbox",
190 |     # then write to zenodo.ini with section [zenodo:sandbox]
191 |     fname = module.split(":", 1)[0] if ":" in module else module
192 | 
193 |     path = get_home().joinpath(fname).with_suffix(".ini")
194 |     cfp.read(path)
195 | 
196 |     # If the file did not exist, then this section will be empty
197 |     # and running set() would raise a configparser.NoSectionError.
198 |     if not cfp.has_section(module):
199 |         cfp.add_section(module)
200 | 
201 |     # Note that the section duplicates the file name
202 |     cfp.set(section=module, option=key, value=value)
203 | 
204 |     with path.open("w") as file:
205 |         cfp.write(file)
206 | 


--------------------------------------------------------------------------------
/src/pystow/constants.py:
--------------------------------------------------------------------------------
 1 | """PyStow constants."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from collections.abc import Generator
 6 | from io import StringIO
 7 | from textwrap import dedent
 8 | from typing import IO, Any, Callable
 9 | 
10 | __all__ = [
11 |     "JSON",
12 |     "PYSTOW_HOME_ENVVAR",
13 |     "PYSTOW_NAME_DEFAULT",
14 |     "PYSTOW_NAME_ENVVAR",
15 |     "PYSTOW_USE_APPDIRS",
16 |     "README_TEXT",
17 |     "Opener",
18 |     "Provider",
19 | ]
20 | 
21 | PYSTOW_NAME_ENVVAR = "PYSTOW_NAME"
22 | PYSTOW_HOME_ENVVAR = "PYSTOW_HOME"
23 | PYSTOW_USE_APPDIRS = "PYSTOW_USE_APPDIRS"
24 | PYSTOW_NAME_DEFAULT = ".data"
25 | README_TEXT = dedent(
26 |     """\
27 | # PyStow Data Directory
28 | 
29 | This directory is used by [`pystow`](https://github.com/cthoyt/pystow) as a
30 | reproducible location to store and access data.
31 | 
32 | ### ⚙️️ Configuration
33 | 
34 | By default, data is stored in the `$HOME/.data` directory. By default, the `<app>`
35 | app will create the `$HOME/.data/<app>` folder.
36 | 
37 | If you want to use an alternate folder name to `.data` inside the home directory,
38 | you can set the `PYSTOW_NAME` environment variable. For example, if you set
39 | `PYSTOW_NAME=mydata`, then the following code for the `pykeen` app will
40 | create the `$HOME/mydata/pykeen/` directory:
41 | 
42 | ```python
43 | import os
44 | import pystow
45 | 
46 | # Only for demonstration purposes. You should set environment
47 | # variables either with your .bashrc or in the command line REPL.
48 | os.environ['PYSTOW_NAME'] = 'mydata'
49 | 
50 | # Get a directory (as a pathlib.Path) for ~/mydata/pykeen
51 | pykeen_directory = pystow.join('pykeen')
52 | ```
53 | 
54 | If you want to specify a completely custom directory that isn't relative to
55 | your home directory, you can set the `PYSTOW_HOME` environment variable. For
56 | example, if you set `PYSTOW_HOME=/usr/local/`, then the following code for
57 | the `pykeen` app will create the `/usr/local/pykeen/` directory:
58 | 
59 | ```python
60 | import os
61 | import pystow
62 | 
63 | # Only for demonstration purposes. You should set environment
64 | # variables either with your .bashrc or in the command line REPL.
65 | os.environ['PYSTOW_HOME'] = '/usr/local/'
66 | 
67 | # Get a directory (as a pathlib.Path) for /usr/local/pykeen
68 | pykeen_directory = pystow.join('pykeen')
69 | ```
70 | 
71 | Note: if you set `PYSTOW_HOME`, then `PYSTOW_NAME` is disregarded.
72 | """
73 | )
74 | 
75 | Opener = Generator[StringIO, None, None]
76 | BytesOpener = Generator[IO[bytes], None, None]
77 | JSON = Any
78 | Provider = Callable[..., None]
79 | 


--------------------------------------------------------------------------------
/src/pystow/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cthoyt/pystow/80249d83c684cb15ce05b0c83e10d45c22b966d7/src/pystow/py.typed


--------------------------------------------------------------------------------
/src/pystow/utils.py:
--------------------------------------------------------------------------------
   1 | """Utilities."""
   2 | 
   3 | from __future__ import annotations
   4 | 
   5 | import contextlib
   6 | import csv
   7 | import gzip
   8 | import hashlib
   9 | import logging
  10 | import lzma
  11 | import os
  12 | import pickle
  13 | import shutil
  14 | import tarfile
  15 | import tempfile
  16 | import typing
  17 | import urllib.error
  18 | import zipfile
  19 | from collections.abc import Collection, Generator, Iterable, Iterator, Mapping
  20 | from functools import partial
  21 | from io import BytesIO, StringIO
  22 | from pathlib import Path, PurePosixPath
  23 | from subprocess import check_output
  24 | from typing import (
  25 |     TYPE_CHECKING,
  26 |     Any,
  27 |     Literal,
  28 |     NamedTuple,
  29 |     TextIO,
  30 |     cast,
  31 | )
  32 | from urllib.parse import urlparse
  33 | from urllib.request import urlretrieve
  34 | from uuid import uuid4
  35 | 
  36 | import requests
  37 | from tqdm.auto import tqdm
  38 | from typing_extensions import TypeAlias
  39 | 
  40 | from .constants import (
  41 |     PYSTOW_HOME_ENVVAR,
  42 |     PYSTOW_NAME_DEFAULT,
  43 |     PYSTOW_NAME_ENVVAR,
  44 |     PYSTOW_USE_APPDIRS,
  45 |     README_TEXT,
  46 | )
  47 | 
  48 | if TYPE_CHECKING:
  49 |     import _csv
  50 | 
  51 |     import botocore.client
  52 |     import lxml.etree
  53 |     import numpy.typing
  54 |     import pandas
  55 |     import rdflib
  56 | 
  57 | __all__ = [
  58 |     "DownloadBackend",
  59 |     "Hash",
  60 |     "HexDigestError",
  61 |     "HexDigestMismatch",
  62 |     "UnexpectedDirectory",
  63 |     "UnexpectedDirectoryError",
  64 |     "download",
  65 |     "download_from_google",
  66 |     "download_from_s3",
  67 |     "get_base",
  68 |     "get_commit",
  69 |     "get_df_io",
  70 |     "get_hashes",
  71 |     "get_hexdigests_remote",
  72 |     "get_home",
  73 |     "get_name",
  74 |     "get_np_io",
  75 |     "get_offending_hexdigests",
  76 |     "getenv_path",
  77 |     "gunzip",
  78 |     "mkdir",
  79 |     "mock_envvar",
  80 |     "mock_home",
  81 |     "n",
  82 |     "name_from_s3_key",
  83 |     "name_from_url",
  84 |     "path_to_sqlite",
  85 |     "raise_on_digest_mismatch",
  86 |     "read_rdf",
  87 |     "read_tarfile_csv",
  88 |     "read_tarfile_xml",
  89 |     "read_zip_np",
  90 |     "read_zipfile_csv",
  91 |     "read_zipfile_rdf",
  92 |     "read_zipfile_xml",
  93 |     "safe_open",
  94 |     "safe_open_writer",
  95 |     "write_lzma_csv",
  96 |     "write_pickle_gz",
  97 |     "write_tarfile_csv",
  98 |     "write_zipfile_csv",
  99 |     "write_zipfile_np",
 100 |     "write_zipfile_xml",
 101 | ]
 102 | 
 103 | logger = logging.getLogger(__name__)
 104 | 
 105 | #: Represents an available backend for downloading
 106 | DownloadBackend: TypeAlias = Literal["urllib", "requests"]
 107 | 
 108 | #: This type alias uses a stub-only constructor, meaning that
 109 | #: hashlib._Hash isn't actually part of the code, but MyPy injects it
 110 | #: so we can do type checking
 111 | Hash: TypeAlias = "hashlib._Hash"
 112 | 
 113 | 
 114 | class HexDigestMismatch(NamedTuple):
 115 |     """Contains information about a hexdigest mismatch."""
 116 | 
 117 |     #: the name of the algorithm
 118 |     name: str
 119 |     #: the observed/actual hexdigest, encoded as a string
 120 |     actual: str
 121 |     #: the expected hexdigest, encoded as a string
 122 |     expected: str
 123 | 
 124 | 
 125 | class HexDigestError(ValueError):
 126 |     """Thrown if the hashsums do not match expected hashsums."""
 127 | 
 128 |     def __init__(self, offending_hexdigests: Collection[HexDigestMismatch]):
 129 |         """Instantiate the exception.
 130 | 
 131 |         :param offending_hexdigests: The result from :func:`get_offending_hexdigests`
 132 |         """
 133 |         self.offending_hexdigests = offending_hexdigests
 134 | 
 135 |     def __str__(self) -> str:
 136 |         return "\n".join(
 137 |             (
 138 |                 "Hexdigest of downloaded file does not match the expected ones!",
 139 |                 *(
 140 |                     f"\t{name} actual: {actual} vs. expected: {expected}"
 141 |                     for name, actual, expected in self.offending_hexdigests
 142 |                 ),
 143 |             )
 144 |         )
 145 | 
 146 | 
 147 | class UnexpectedDirectoryError(FileExistsError):
 148 |     """Thrown if a directory path is given where file path should have been."""
 149 | 
 150 |     def __init__(self, path: Path):
 151 |         """Instantiate the exception.
 152 | 
 153 |         :param path: The path to a directory that should have been a file.
 154 |         """
 155 |         self.path = path
 156 | 
 157 |     def __str__(self) -> str:
 158 |         return f"got directory instead of file: {self.path}"
 159 | 
 160 | 
 161 | #: Backwards compatible name
 162 | UnexpectedDirectory = UnexpectedDirectoryError
 163 | 
 164 | 
 165 | def get_hexdigests_remote(
 166 |     hexdigests_remote: Mapping[str, str] | None, hexdigests_strict: bool = False
 167 | ) -> Mapping[str, str]:
 168 |     """Process hexdigests via URLs.
 169 | 
 170 |     :param hexdigests_remote:
 171 |         The expected hexdigests as (algorithm_name, url to file with expected hex digest) pairs.
 172 |     :param hexdigests_strict:
 173 |         Set this to `False` to stop automatically checking for the `algorithm(filename)=hash` format
 174 |     :returns:
 175 |         A mapping of algorithms to hexdigests
 176 |     """
 177 |     rv = {}
 178 |     for key, url in (hexdigests_remote or {}).items():
 179 |         text = requests.get(url, timeout=15).text
 180 |         if not hexdigests_strict and "=" in text:
 181 |             text = text.rsplit("=", 1)[-1].strip()
 182 |         rv[key] = text
 183 |     return rv
 184 | 
 185 | 
 186 | def get_offending_hexdigests(
 187 |     path: str | Path,
 188 |     chunk_size: int | None = None,
 189 |     hexdigests: Mapping[str, str] | None = None,
 190 |     hexdigests_remote: Mapping[str, str] | None = None,
 191 |     hexdigests_strict: bool = False,
 192 | ) -> Collection[HexDigestMismatch]:
 193 |     """
 194 |     Check a file for hash sums.
 195 | 
 196 |     :param path:
 197 |         The file path.
 198 |     :param chunk_size:
 199 |         The chunk size for reading the file.
 200 |     :param hexdigests:
 201 |         The expected hexdigests as (algorithm_name, expected_hex_digest) pairs.
 202 |     :param hexdigests_remote:
 203 |         The expected hexdigests as (algorithm_name, url to file with expected hexdigest) pairs.
 204 |     :param hexdigests_strict:
 205 |         Set this to false to stop automatically checking for the `algorithm(filename)=hash` format
 206 | 
 207 |     :return:
 208 |         A collection of observed / expected hexdigests where the digests do not match.
 209 |     """
 210 |     hexdigests = dict(
 211 |         **(hexdigests or {}),
 212 |         **get_hexdigests_remote(hexdigests_remote, hexdigests_strict=hexdigests_strict),
 213 |     )
 214 | 
 215 |     # If there aren't any keys in the combine dictionaries,
 216 |     # then there won't be any mismatches
 217 |     if not hexdigests:
 218 |         return []
 219 | 
 220 |     logger.info(f"Checking hash sums for file: {path}")
 221 | 
 222 |     # instantiate algorithms
 223 |     algorithms = get_hashes(path=path, names=set(hexdigests), chunk_size=chunk_size)
 224 | 
 225 |     # Compare digests
 226 |     mismatches = []
 227 |     for alg, expected_digest in hexdigests.items():
 228 |         observed_digest = algorithms[alg].hexdigest()
 229 |         if observed_digest != expected_digest:
 230 |             logger.error(f"{alg} expected {expected_digest} but got {observed_digest}.")
 231 |             mismatches.append(HexDigestMismatch(alg, observed_digest, expected_digest))
 232 |         else:
 233 |             logger.debug(f"Successfully checked with {alg}.")
 234 | 
 235 |     return mismatches
 236 | 
 237 | 
 238 | def get_hashes(
 239 |     path: str | Path,
 240 |     names: Iterable[str],
 241 |     *,
 242 |     chunk_size: int | None = None,
 243 | ) -> Mapping[str, Hash]:
 244 |     """Calculate several hexdigests of hash algorithms for a file concurrently.
 245 | 
 246 |     :param path: The file path.
 247 |     :param names: Names of the hash algorithms in :mod:`hashlib`
 248 |     :param chunk_size: The chunk size for reading the file.
 249 | 
 250 |     :return:
 251 |         A collection of observed hexdigests
 252 |     """
 253 |     path = Path(path).resolve()
 254 |     if chunk_size is None:
 255 |         chunk_size = 64 * 2**10
 256 | 
 257 |     # instantiate hash algorithms
 258 |     algorithms: Mapping[str, Hash] = {name: hashlib.new(name) for name in names}
 259 | 
 260 |     # calculate hash sums of file incrementally
 261 |     buffer = memoryview(bytearray(chunk_size))
 262 |     with path.open("rb", buffering=0) as file:
 263 |         for this_chunk_size in iter(lambda: file.readinto(buffer), 0):
 264 |             for alg in algorithms.values():
 265 |                 alg.update(buffer[:this_chunk_size])
 266 | 
 267 |     return algorithms
 268 | 
 269 | 
 270 | def raise_on_digest_mismatch(
 271 |     *,
 272 |     path: Path,
 273 |     hexdigests: Mapping[str, str] | None = None,
 274 |     hexdigests_remote: Mapping[str, str] | None = None,
 275 |     hexdigests_strict: bool = False,
 276 | ) -> None:
 277 |     """Raise a HexDigestError if the digests do not match.
 278 | 
 279 |     :param path:
 280 |         The file path.
 281 |     :param hexdigests:
 282 |         The expected hexdigests as (algorithm_name, expected_hex_digest) pairs.
 283 |     :param hexdigests_remote:
 284 |         The expected hexdigests as (algorithm_name, url to file with expected hexdigest) pairs.
 285 |     :param hexdigests_strict:
 286 |         Set this to false to stop automatically checking for the `algorithm(filename)=hash` format
 287 | 
 288 |     :raises HexDigestError: if there are any offending hex digests
 289 |         The expected hexdigests as (algorithm_name, url to file with expected hexdigest) pairs.
 290 |     """
 291 |     offending_hexdigests = get_offending_hexdigests(
 292 |         path=path,
 293 |         hexdigests=hexdigests,
 294 |         hexdigests_remote=hexdigests_remote,
 295 |         hexdigests_strict=hexdigests_strict,
 296 |     )
 297 |     if offending_hexdigests:
 298 |         raise HexDigestError(offending_hexdigests)
 299 | 
 300 | 
 301 | class TqdmReportHook(tqdm):  # type:ignore
 302 |     """A custom progress bar that can be used with urllib.
 303 | 
 304 |     Based on https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5
 305 |     """
 306 | 
 307 |     def update_to(
 308 |         self,
 309 |         blocks: int = 1,
 310 |         block_size: int = 1,
 311 |         total_size: int | None = None,
 312 |     ) -> None:
 313 |         """Update the internal state based on a urllib report hook.
 314 | 
 315 |         :param blocks: Number of blocks transferred so far
 316 |         :param block_size: Size of each block (in tqdm units)
 317 |         :param total_size: Total size (in tqdm units). If [default: None] remains unchanged.
 318 |         """
 319 |         if total_size is not None:
 320 |             self.total = total_size
 321 |         self.update(blocks * block_size - self.n)  # will also set self.n = b * bsize
 322 | 
 323 | 
 324 | def download(
 325 |     url: str,
 326 |     path: str | Path,
 327 |     force: bool = True,
 328 |     clean_on_failure: bool = True,
 329 |     backend: DownloadBackend = "urllib",
 330 |     hexdigests: Mapping[str, str] | None = None,
 331 |     hexdigests_remote: Mapping[str, str] | None = None,
 332 |     hexdigests_strict: bool = False,
 333 |     progress_bar: bool = True,
 334 |     tqdm_kwargs: Mapping[str, Any] | None = None,
 335 |     **kwargs: Any,
 336 | ) -> None:
 337 |     """Download a file from a given URL.
 338 | 
 339 |     :param url: URL to download
 340 |     :param path: Path to download the file to
 341 |     :param force: If false and the file already exists, will not re-download.
 342 |     :param clean_on_failure: If true, will delete the file on any exception raised during download
 343 |     :param backend: The downloader to use. Choose 'urllib' or 'requests'
 344 |     :param hexdigests:
 345 |         The expected hexdigests as (algorithm_name, expected_hex_digest) pairs.
 346 |     :param hexdigests_remote:
 347 |         The expected hexdigests as (algorithm_name, url to file with expected hexdigest) pairs.
 348 |     :param hexdigests_strict:
 349 |         Set this to false to stop automatically checking for the `algorithm(filename)=hash` format
 350 |     :param progress_bar:
 351 |         Set to true to show a progress bar while downloading
 352 |     :param tqdm_kwargs:
 353 |         Override the default arguments passed to :class:`tadm.tqdm` when progress_bar is True.
 354 |     :param kwargs:
 355 |         The keyword arguments to pass to :func:`urllib.request.urlretrieve`
 356 |         or to `requests.get` depending on the backend chosen. If using 'requests' backend,
 357 |         `stream` is set to True by default.
 358 | 
 359 |     :raises Exception: Thrown if an error besides a keyboard interrupt is thrown during download
 360 |     :raises KeyboardInterrupt: If a keyboard interrupt is thrown during download
 361 |     :raises UnexpectedDirectory: If a directory is given for the ``path`` argument
 362 |     :raises ValueError: If an invalid backend is chosen
 363 |     :raises DownloadError: If an error occurs during download
 364 |     """
 365 |     path = Path(path).resolve()
 366 | 
 367 |     if path.is_dir():
 368 |         raise UnexpectedDirectoryError(path)
 369 |     if path.is_file() and not force:
 370 |         raise_on_digest_mismatch(
 371 |             path=path,
 372 |             hexdigests=hexdigests,
 373 |             hexdigests_remote=hexdigests_remote,
 374 |             hexdigests_strict=hexdigests_strict,
 375 |         )
 376 |         logger.debug("did not re-download %s from %s", path, url)
 377 |         return
 378 | 
 379 |     _tqdm_kwargs = {
 380 |         "unit": "B",
 381 |         "unit_scale": True,
 382 |         "unit_divisor": 1024,
 383 |         "miniters": 1,
 384 |         "disable": not progress_bar,
 385 |         "desc": f"Downloading {path.name}",
 386 |         "leave": False,
 387 |     }
 388 |     if tqdm_kwargs:
 389 |         _tqdm_kwargs.update(tqdm_kwargs)
 390 | 
 391 |     try:
 392 |         if backend == "urllib":
 393 |             logger.info("downloading with urllib from %s to %s", url, path)
 394 |             with TqdmReportHook(**_tqdm_kwargs) as t:
 395 |                 try:
 396 |                     urlretrieve(url, path, reporthook=t.update_to, **kwargs)  # noqa:S310
 397 |                 except urllib.error.URLError as e:
 398 |                     raise DownloadError(backend, url, path, e) from e
 399 |         elif backend == "requests":
 400 |             kwargs.setdefault("stream", True)
 401 |             try:
 402 |                 # see https://requests.readthedocs.io/en/master/user/quickstart/#raw-response-content
 403 |                 # pattern from https://stackoverflow.com/a/39217788/5775947
 404 |                 with requests.get(url, **kwargs) as response, path.open("wb") as file:  # noqa:S113
 405 |                     logger.info(
 406 |                         "downloading (stream=%s) with requests from %s to %s",
 407 |                         kwargs["stream"],
 408 |                         url,
 409 |                         path,
 410 |                     )
 411 |                     # Solution for progress bar from https://stackoverflow.com/a/63831344/5775947
 412 |                     total_size = int(response.headers.get("Content-Length", 0))
 413 |                     # Decompress if needed
 414 |                     response.raw.read = partial(  # type:ignore[method-assign]
 415 |                         response.raw.read, decode_content=True
 416 |                     )
 417 |                     with tqdm.wrapattr(
 418 |                         response.raw, "read", total=total_size, **_tqdm_kwargs
 419 |                     ) as fsrc:
 420 |                         shutil.copyfileobj(fsrc, file)
 421 |             except requests.exceptions.ConnectionError as e:
 422 |                 raise DownloadError(backend, url, path, e) from e
 423 |         else:
 424 |             raise ValueError(f'Invalid backend: {backend}. Use "requests" or "urllib".')
 425 |     except (Exception, KeyboardInterrupt):
 426 |         if clean_on_failure:
 427 |             _unlink(path)
 428 |         raise
 429 | 
 430 |     raise_on_digest_mismatch(
 431 |         path=path,
 432 |         hexdigests=hexdigests,
 433 |         hexdigests_remote=hexdigests_remote,
 434 |         hexdigests_strict=hexdigests_strict,
 435 |     )
 436 | 
 437 | 
 438 | class DownloadError(OSError):
 439 |     """An error that wraps information from a requests or urllib download failure."""
 440 | 
 441 |     def __init__(
 442 |         self,
 443 |         backend: DownloadBackend,
 444 |         url: str,
 445 |         path: Path,
 446 |         exc: urllib.error.URLError | requests.exceptions.ConnectionError,
 447 |     ) -> None:
 448 |         """Initialize the error.
 449 | 
 450 |         :param backend: The backend used
 451 |         :param url: The url that failed to download
 452 |         :param path: The path that was supposed to be downloaded to
 453 |         :param exc: The exception raised
 454 |         """
 455 |         self.backend = backend
 456 |         self.url = url
 457 |         self.path = path
 458 |         self.exc = exc
 459 |         # TODO parse out HTTP error code, if possible
 460 | 
 461 |     def __str__(self) -> str:
 462 |         return f"Failed with {self.backend} to download {self.url} to {self.path}"
 463 | 
 464 | 
 465 | def name_from_url(url: str) -> str:
 466 |     """Get the filename from the end of the URL.
 467 | 
 468 |     :param url: A URL
 469 |     :return: The name of the file at the end of the URL
 470 |     """
 471 |     parse_result = urlparse(url)
 472 |     path = PurePosixPath(parse_result.path)
 473 |     name = path.name
 474 |     return name
 475 | 
 476 | 
 477 | def base_from_gzip_name(name: str) -> str:
 478 |     """Get the base name for a file after stripping the gz ending.
 479 | 
 480 |     :param name: The name of the gz file
 481 |     :returns: The cleaned name of the file, with no gz ending
 482 |     :raises ValueError: if the file does not end with ".gz"
 483 |     """
 484 |     if not name.endswith(".gz"):
 485 |         raise ValueError(f"Name does not end with .gz: {name}")
 486 |     return name[: -len(".gz")]
 487 | 
 488 | 
 489 | def name_from_s3_key(key: str) -> str:
 490 |     """Get the filename from the S3 key.
 491 | 
 492 |     :param key: A S3 path
 493 |     :returns: The name of the file
 494 |     """
 495 |     return key.split("/")[-1]
 496 | 
 497 | 
 498 | def mkdir(path: Path, ensure_exists: bool = True) -> None:
 499 |     """Make a directory (or parent directory if a file is given) if flagged with ``ensure_exists``.
 500 | 
 501 |     :param path: The path to a directory
 502 |     :param ensure_exists:
 503 |         Should the directories leading to the path be created if they don't already exist?
 504 |     """
 505 |     if ensure_exists:
 506 |         path.mkdir(exist_ok=True, parents=True)
 507 | 
 508 | 
 509 | @contextlib.contextmanager
 510 | def mock_envvar(envvar: str, value: str) -> Iterator[None]:
 511 |     """Mock the environment variable then delete it after the test is over.
 512 | 
 513 |     :param envvar: The environment variable to mock
 514 |     :param value: The value to temporarily put in the environment variable
 515 |         during this mock.
 516 |     :yield: None, since this just mocks the environment variable for the
 517 |         time being.
 518 |     """
 519 |     original_value = os.environ.get(envvar)
 520 |     os.environ[envvar] = value
 521 |     yield
 522 |     if original_value is None:
 523 |         del os.environ[envvar]
 524 |     else:
 525 |         os.environ[envvar] = original_value
 526 | 
 527 | 
 528 | @contextlib.contextmanager
 529 | def mock_home() -> Iterator[Path]:
 530 |     """Mock the PyStow home environment variable, yields the directory name.
 531 | 
 532 |     :yield: The path to the temporary directory.
 533 |     """
 534 |     with tempfile.TemporaryDirectory() as directory:
 535 |         with mock_envvar(PYSTOW_HOME_ENVVAR, directory):
 536 |             yield Path(directory)
 537 | 
 538 | 
 539 | def getenv_path(envvar: str, default: Path, ensure_exists: bool = True) -> Path:
 540 |     """Get an environment variable representing a path, or use the default.
 541 | 
 542 |     :param envvar: The environmental variable name to check
 543 |     :param default:
 544 |         The default path to return if the environmental variable is not set
 545 |     :param ensure_exists:
 546 |         Should the directories leading to the path be created if they don't already exist?
 547 |     :return: A path either specified by the environmental variable or by the default.
 548 |     """
 549 |     rv = Path(os.getenv(envvar, default=default)).expanduser()
 550 |     mkdir(rv, ensure_exists=ensure_exists)
 551 |     return rv
 552 | 
 553 | 
 554 | def n() -> str:
 555 |     """Get a random string for testing.
 556 | 
 557 |     :returns: A random string for testing purposes.
 558 |     """
 559 |     return str(uuid4())
 560 | 
 561 | 
 562 | def get_df_io(df: pandas.DataFrame, sep: str = "\t", index: bool = False, **kwargs: Any) -> BytesIO:
 563 |     """Get the dataframe as bytes.
 564 | 
 565 |     :param df: A dataframe
 566 |     :param sep: The separator in the dataframe. Overrides Pandas default to use a tab.
 567 |     :param index:  Should the index be output? Overrides the Pandas default to be false.
 568 |     :param kwargs: Additional kwargs to pass to :func:`pandas.DataFrame.to_csv`.
 569 |     :return: A bytes object that can be used as a file.
 570 |     """
 571 |     sio = StringIO()
 572 |     df.to_csv(sio, sep=sep, index=index, **kwargs)
 573 |     sio.seek(0)
 574 |     bio = BytesIO(sio.read().encode("utf-8"))
 575 |     return bio
 576 | 
 577 | 
 578 | def get_np_io(arr: numpy.typing.ArrayLike, **kwargs: Any) -> BytesIO:
 579 |     """Get the numpy object as bytes.
 580 | 
 581 |     :param arr: Array-like
 582 |     :param kwargs: Additional kwargs to pass to :func:`numpy.save`.
 583 |     :return: A bytes object that can be used as a file.
 584 |     """
 585 |     import numpy as np
 586 | 
 587 |     bio = BytesIO()
 588 |     np.save(bio, arr, **kwargs)
 589 |     bio.seek(0)
 590 |     return bio
 591 | 
 592 | 
 593 | def write_pickle_gz(
 594 |     obj: Any,
 595 |     path: str | Path,
 596 |     **kwargs: Any,
 597 | ) -> None:
 598 |     """Write an object to a gzipped pickle.
 599 | 
 600 |     :param obj: The object to write
 601 |     :param path: The path of the file to write to
 602 |     :param kwargs:
 603 |         Additional kwargs to pass to :func:`pickle.dump`
 604 |     """
 605 |     with gzip.open(path, mode="wb") as file:
 606 |         pickle.dump(obj, file, **kwargs)
 607 | 
 608 | 
 609 | def write_lzma_csv(
 610 |     df: pandas.DataFrame,
 611 |     path: str | Path,
 612 |     sep: str = "\t",
 613 |     index: bool = False,
 614 |     **kwargs: Any,
 615 | ) -> None:
 616 |     """Write a dataframe as an lzma-compressed file.
 617 | 
 618 |     :param df: A dataframe
 619 |     :param path: The path to the resulting LZMA compressed dataframe file
 620 |     :param sep: The separator in the dataframe. Overrides Pandas default to use a tab.
 621 |     :param index:  Should the index be output? Overrides the Pandas default to be false.
 622 |     :param kwargs:
 623 |         Additional kwargs to pass to :func:`get_df_io` and transitively
 624 |         to :func:`pandas.DataFrame.to_csv`.
 625 |     """
 626 |     bytes_io = get_df_io(df, sep=sep, index=index, **kwargs)
 627 |     with lzma.open(path, "wb") as file:
 628 |         file.write(bytes_io.read())
 629 | 
 630 | 
 631 | def write_zipfile_csv(
 632 |     df: pandas.DataFrame,
 633 |     path: str | Path,
 634 |     inner_path: str,
 635 |     sep: str = "\t",
 636 |     index: bool = False,
 637 |     **kwargs: Any,
 638 | ) -> None:
 639 |     """Write a dataframe to an inner CSV file to a zip archive.
 640 | 
 641 |     :param df: A dataframe
 642 |     :param path: The path to the resulting zip archive
 643 |     :param inner_path: The path inside the zip archive to write the dataframe
 644 |     :param sep: The separator in the dataframe. Overrides Pandas default to use a tab.
 645 |     :param index:  Should the index be output? Overrides the Pandas default to be false.
 646 |     :param kwargs:
 647 |         Additional kwargs to pass to :func:`get_df_io` and transitively
 648 |         to :func:`pandas.DataFrame.to_csv`.
 649 |     """
 650 |     bytes_io = get_df_io(df, sep=sep, index=index, **kwargs)
 651 |     with zipfile.ZipFile(file=path, mode="w") as zip_file:
 652 |         with zip_file.open(inner_path, mode="w") as file:
 653 |             file.write(bytes_io.read())
 654 | 
 655 | 
 656 | def read_zipfile_csv(
 657 |     path: str | Path, inner_path: str, sep: str = "\t", **kwargs: Any
 658 | ) -> pandas.DataFrame:
 659 |     """Read an inner CSV file from a zip archive.
 660 | 
 661 |     :param path: The path to the zip archive
 662 |     :param inner_path: The path inside the zip archive to the dataframe
 663 |     :param sep: The separator in the dataframe. Overrides Pandas default to use a tab.
 664 |     :param kwargs: Additional kwargs to pass to :func:`pandas.read_csv`.
 665 |     :return: A dataframe
 666 |     """
 667 |     import pandas as pd
 668 | 
 669 |     with zipfile.ZipFile(file=path) as zip_file:
 670 |         with zip_file.open(inner_path) as file:
 671 |             return pd.read_csv(file, sep=sep, **kwargs)
 672 | 
 673 | 
 674 | def write_zipfile_xml(
 675 |     element_tree: lxml.etree.ElementTree,
 676 |     path: str | Path,
 677 |     inner_path: str,
 678 |     **kwargs: Any,
 679 | ) -> None:
 680 |     """Write an XML element tree to an inner XML file to a zip archive.
 681 | 
 682 |     :param element_tree: An XML element tree
 683 |     :param path: The path to the resulting zip archive
 684 |     :param inner_path: The path inside the zip archive to write the dataframe
 685 |     :param kwargs: Additional kwargs to pass to :func:`tostring`
 686 |     """
 687 |     from lxml import etree
 688 | 
 689 |     kwargs.setdefault("pretty_print", True)
 690 |     with zipfile.ZipFile(file=path, mode="w") as zip_file:
 691 |         with zip_file.open(inner_path, mode="w") as file:
 692 |             file.write(etree.tostring(element_tree, **kwargs))
 693 | 
 694 | 
 695 | def read_zipfile_xml(path: str | Path, inner_path: str, **kwargs: Any) -> lxml.etree.ElementTree:
 696 |     """Read an inner XML file from a zip archive.
 697 | 
 698 |     :param path: The path to the zip archive
 699 |     :param inner_path: The path inside the zip archive to the xml file
 700 |     :param kwargs: Additional kwargs to pass to :func:`lxml.etree.parse`
 701 |     :return: An XML element tree
 702 |     """
 703 |     from lxml import etree
 704 | 
 705 |     with zipfile.ZipFile(file=path) as zip_file:
 706 |         with zip_file.open(inner_path) as file:
 707 |             return etree.parse(file, **kwargs)
 708 | 
 709 | 
 710 | def write_zipfile_np(
 711 |     arr: numpy.typing.ArrayLike,
 712 |     path: str | Path,
 713 |     inner_path: str,
 714 |     **kwargs: Any,
 715 | ) -> None:
 716 |     """Write a dataframe to an inner CSV file to a zip archive.
 717 | 
 718 |     :param arr: Array-like
 719 |     :param path: The path to the resulting zip archive
 720 |     :param inner_path: The path inside the zip archive to write the dataframe
 721 |     :param kwargs:
 722 |         Additional kwargs to pass to :func:`get_np_io` and transitively
 723 |         to :func:`numpy.save`.
 724 |     """
 725 |     bytes_io = get_np_io(arr, **kwargs)
 726 |     with zipfile.ZipFile(file=path, mode="w") as zip_file:
 727 |         with zip_file.open(inner_path, mode="w") as file:
 728 |             file.write(bytes_io.read())
 729 | 
 730 | 
 731 | def read_zip_np(path: str | Path, inner_path: str, **kwargs: Any) -> numpy.typing.ArrayLike:
 732 |     """Read an inner numpy array-like from a zip archive.
 733 | 
 734 |     :param path: The path to the zip archive
 735 |     :param inner_path: The path inside the zip archive to the dataframe
 736 |     :param kwargs: Additional kwargs to pass to :func:`numpy.load`.
 737 |     :return: A numpy array or other object
 738 |     """
 739 |     import numpy as np
 740 | 
 741 |     with zipfile.ZipFile(file=path) as zip_file:
 742 |         with zip_file.open(inner_path) as file:
 743 |             return cast(np.typing.ArrayLike, np.load(file, **kwargs))
 744 | 
 745 | 
 746 | def read_zipfile_rdf(path: str | Path, inner_path: str, **kwargs: Any) -> rdflib.Graph:
 747 |     """Read an inner RDF file from a zip archive.
 748 | 
 749 |     :param path: The path to the zip archive
 750 |     :param inner_path: The path inside the zip archive to the dataframe
 751 |     :param kwargs: Additional kwargs to pass to :func:`pandas.read_csv`.
 752 |     :return: A graph
 753 |     """
 754 |     import rdflib
 755 | 
 756 |     graph = rdflib.Graph()
 757 |     with zipfile.ZipFile(file=path) as zip_file:
 758 |         with zip_file.open(inner_path) as file:
 759 |             graph.parse(file, **kwargs)
 760 |     return graph
 761 | 
 762 | 
 763 | def write_tarfile_csv(
 764 |     df: pandas.DataFrame,
 765 |     path: str | Path,
 766 |     inner_path: str,
 767 |     sep: str = "\t",
 768 |     index: bool = False,
 769 |     **kwargs: Any,
 770 | ) -> None:
 771 |     """Write a dataframe to an inner CSV file from a tar archive.
 772 | 
 773 |     :param df: A dataframe
 774 |     :param path: The path to the resulting tar archive
 775 |     :param inner_path: The path inside the tar archive to write the dataframe
 776 |     :param sep: The separator in the dataframe. Overrides Pandas default to use a tab.
 777 |     :param index:  Should the index be output? Overrides the Pandas default to be false.
 778 |     :param kwargs:
 779 |         Additional kwargs to pass to :func:`get_df_io` and transitively
 780 |         to :func:`pandas.DataFrame.to_csv`.
 781 |     """
 782 |     s = df.to_csv(sep=sep, index=index, **kwargs)
 783 |     tarinfo = tarfile.TarInfo(name=inner_path)
 784 |     tarinfo.size = len(s)
 785 |     with tarfile.TarFile(path, mode="w") as tar_file:
 786 |         tar_file.addfile(tarinfo, BytesIO(s.encode("utf-8")))
 787 | 
 788 | 
 789 | def read_tarfile_csv(
 790 |     path: str | Path, inner_path: str, sep: str = "\t", **kwargs: Any
 791 | ) -> pandas.DataFrame:
 792 |     """Read an inner CSV file from a tar archive.
 793 | 
 794 |     :param path: The path to the tar archive
 795 |     :param inner_path: The path inside the tar archive to the dataframe
 796 |     :param sep: The separator in the dataframe. Overrides Pandas default to use a tab.
 797 |     :param kwargs: Additional kwargs to pass to :func:`pandas.read_csv`.
 798 |     :return: A dataframe
 799 |     """
 800 |     import pandas as pd
 801 | 
 802 |     with tarfile.open(path) as tar_file:
 803 |         with tar_file.extractfile(inner_path) as file:  # type: ignore
 804 |             return pd.read_csv(file, sep=sep, **kwargs)
 805 | 
 806 | 
 807 | def read_tarfile_xml(path: str | Path, inner_path: str, **kwargs: Any) -> lxml.etree.ElementTree:
 808 |     """Read an inner XML file from a tar archive.
 809 | 
 810 |     :param path: The path to the tar archive
 811 |     :param inner_path: The path inside the tar archive to the xml file
 812 |     :param kwargs: Additional kwargs to pass to :func:`lxml.etree.parse`
 813 |     :return: An XML element tree
 814 |     """
 815 |     from lxml import etree
 816 | 
 817 |     with tarfile.open(path) as tar_file:
 818 |         with tar_file.extractfile(inner_path) as file:  # type: ignore
 819 |             return etree.parse(file, **kwargs)
 820 | 
 821 | 
 822 | def read_rdf(path: str | Path, **kwargs: Any) -> rdflib.Graph:
 823 |     """Read an RDF file with :mod:`rdflib`.
 824 | 
 825 |     :param path: The path to the RDF file
 826 |     :param kwargs: Additional kwargs to pass to :func:`rdflib.Graph.parse`
 827 |     :return: A parsed RDF graph
 828 |     """
 829 |     import rdflib
 830 | 
 831 |     if isinstance(path, str):
 832 |         path = Path(path)
 833 |     graph = rdflib.Graph()
 834 |     with (
 835 |         gzip.open(path, "rb") if isinstance(path, Path) and path.suffix == ".gz" else open(path)
 836 |     ) as file:
 837 |         graph.parse(file, **kwargs)  # type:ignore
 838 |     return graph
 839 | 
 840 | 
 841 | def write_sql(df: pandas.DataFrame, name: str, path: str | Path, **kwargs: Any) -> None:
 842 |     """Write a dataframe as a SQL table.
 843 | 
 844 |     :param df: A dataframe
 845 |     :param name: The table the database to write to
 846 |     :param path: The path to the resulting tar archive
 847 |     :param kwargs: Additional keyword arguments to pass to :meth:`pandas.DataFrame.to_sql`
 848 |     """
 849 |     import sqlite3
 850 | 
 851 |     with contextlib.closing(sqlite3.connect(path)) as conn:
 852 |         df.to_sql(name, conn, **kwargs)
 853 | 
 854 | 
 855 | def get_commit(org: str, repo: str, provider: str = "git") -> str:
 856 |     """Get last commit hash for the given repo.
 857 | 
 858 |     :param org: The GitHub organization or owner
 859 |     :param repo: The GitHub repository name
 860 |     :param provider: The method for getting the most recent commit
 861 |     :raises ValueError: if an invalid provider is given
 862 |     :returns: A commit hash's hex digest as a string
 863 |     """
 864 |     if provider == "git":
 865 |         output = check_output(["git", "ls-remote", f"https://github.com/{org}/{repo}"])  # noqa
 866 |         lines = (line.strip().split("\t") for line in output.decode("utf8").splitlines())
 867 |         rv = next(line[0] for line in lines if line[1] == "HEAD")
 868 |     elif provider == "github":
 869 |         res = requests.get(f"https://api.github.com/repos/{org}/{repo}/branches/master", timeout=15)
 870 |         res_json = res.json()
 871 |         rv = res_json["commit"]["sha"]
 872 |     else:
 873 |         raise ValueError(f"invalid implementation: {provider}")
 874 |     return rv
 875 | 
 876 | 
 877 | CHUNK_SIZE = 32768
 878 | DOWNLOAD_URL = "https://docs.google.com/uc?export=download"
 879 | TOKEN_KEY = "download_warning"  # noqa:S105
 880 | 
 881 | 
 882 | def download_from_google(
 883 |     file_id: str,
 884 |     path: str | Path,
 885 |     force: bool = True,
 886 |     clean_on_failure: bool = True,
 887 |     hexdigests: Mapping[str, str] | None = None,
 888 | ) -> None:
 889 |     """Download a file from google drive.
 890 | 
 891 |     Implementation inspired by https://github.com/ndrplz/google-drive-downloader.
 892 | 
 893 |     :param file_id: The google file identifier
 894 |     :param path: The place to write the file
 895 |     :param force: If false and the file already exists, will not re-download.
 896 |     :param clean_on_failure: If true, will delete the file on any exception raised during download
 897 |     :param hexdigests:
 898 |         The expected hexdigests as (algorithm_name, expected_hex_digest) pairs.
 899 | 
 900 |     :raises Exception: Thrown if an error besides a keyboard interrupt is thrown during download
 901 |     :raises KeyboardInterrupt: If a keyboard interrupt is thrown during download
 902 |     :raises UnexpectedDirectory: If a directory is given for the ``path`` argument
 903 |     """
 904 |     path = Path(path).resolve()
 905 | 
 906 |     if path.is_dir():
 907 |         raise UnexpectedDirectoryError(path)
 908 |     if path.is_file() and not force:
 909 |         raise_on_digest_mismatch(path=path, hexdigests=hexdigests)
 910 |         logger.debug("did not re-download %s from Google ID %s", path, file_id)
 911 |         return
 912 | 
 913 |     try:
 914 |         with requests.Session() as sess:
 915 |             res = sess.get(DOWNLOAD_URL, params={"id": file_id}, stream=True)
 916 |             token = _get_confirm_token(res)
 917 |             res = sess.get(DOWNLOAD_URL, params={"id": file_id, "confirm": token}, stream=True)
 918 |             with path.open("wb") as file:
 919 |                 for chunk in tqdm(res.iter_content(CHUNK_SIZE), desc="writing", unit="chunk"):
 920 |                     if chunk:  # filter out keep-alive new chunks
 921 |                         file.write(chunk)
 922 |     except (Exception, KeyboardInterrupt):
 923 |         if clean_on_failure:
 924 |             _unlink(path)
 925 |         raise
 926 | 
 927 |     raise_on_digest_mismatch(path=path, hexdigests=hexdigests)
 928 | 
 929 | 
 930 | def _get_confirm_token(res: requests.Response) -> str:
 931 |     for key, value in res.cookies.items():
 932 |         if key.startswith(TOKEN_KEY):
 933 |             return value
 934 |     raise ValueError(f"no token found with key {TOKEN_KEY} in cookies: {res.cookies}")
 935 | 
 936 | 
 937 | def download_from_s3(
 938 |     s3_bucket: str,
 939 |     s3_key: str,
 940 |     path: str | Path,
 941 |     client: None | botocore.client.BaseClient = None,
 942 |     client_kwargs: Mapping[str, Any] | None = None,
 943 |     download_file_kwargs: Mapping[str, Any] | None = None,
 944 |     force: bool = True,
 945 |     clean_on_failure: bool = True,
 946 | ) -> None:
 947 |     """Download a file from S3.
 948 | 
 949 |     :param s3_bucket: The key inside the S3 bucket name
 950 |     :param s3_key: The key inside the S3 bucket
 951 |     :param path: The place to write the file
 952 |     :param client:
 953 |         A botocore client. If none given, one will be created automatically
 954 |     :param client_kwargs:
 955 |         Keyword arguments to be passed to the client on instantiation.
 956 |     :param download_file_kwargs:
 957 |         Keyword arguments to be passed to :func:`boto3.s3.transfer.S3Transfer.download_file`
 958 |     :param force: If false and the file already exists, will not re-download.
 959 |     :param clean_on_failure: If true, will delete the file on any exception raised during download
 960 | 
 961 |     :raises Exception: Thrown if an error besides a keyboard interrupt is thrown during download
 962 |     :raises KeyboardInterrupt: If a keyboard interrupt is thrown during download
 963 |     :raises UnexpectedDirectory: If a directory is given for the ``path`` argument
 964 |     """
 965 |     path = Path(path).resolve()
 966 | 
 967 |     if path.is_dir():
 968 |         raise UnexpectedDirectoryError(path)
 969 |     if path.is_file() and not force:
 970 |         logger.debug("did not re-download %s from %s %s", path, s3_bucket, s3_key)
 971 |         return
 972 | 
 973 |     try:
 974 |         import boto3.s3.transfer
 975 | 
 976 |         if client is None:
 977 |             import boto3
 978 |             import botocore.client
 979 | 
 980 |             client_kwargs = {} if client_kwargs is None else dict(client_kwargs)
 981 |             client_kwargs.setdefault(
 982 |                 "config", botocore.client.Config(signature_version=botocore.UNSIGNED)
 983 |             )
 984 |             client = boto3.client("s3", **client_kwargs)
 985 | 
 986 |         download_file_kwargs = {} if download_file_kwargs is None else dict(download_file_kwargs)
 987 |         download_file_kwargs.setdefault(
 988 |             "Config", boto3.s3.transfer.TransferConfig(use_threads=False)
 989 |         )
 990 |         client.download_file(s3_bucket, s3_key, path.as_posix(), **download_file_kwargs)
 991 |     except (Exception, KeyboardInterrupt):
 992 |         if clean_on_failure:
 993 |             _unlink(path)
 994 |         raise
 995 | 
 996 | 
 997 | def _unlink(path: str | Path) -> None:
 998 |     # python 3.6 does not have pathlib.Path.unlink, smh
 999 |     try:
1000 |         os.remove(path)
1001 |     except OSError:
1002 |         pass  # if the file can't be deleted then no problem
1003 | 
1004 | 
1005 | def get_name() -> str:
1006 |     """Get the PyStow home directory name.
1007 | 
1008 |     :returns: The name of the pystow home directory, either loaded from
1009 |         the :data:`PYSTOW_NAME_ENVVAR`` environment variable or given by the default
1010 |         value :data:`PYSTOW_NAME_DEFAULT`.
1011 |     """
1012 |     return os.getenv(PYSTOW_NAME_ENVVAR, default=PYSTOW_NAME_DEFAULT)
1013 | 
1014 | 
1015 | def use_appdirs() -> bool:
1016 |     """Check if X Desktop Group (XDG) compatibility is requested.
1017 | 
1018 |     :returns: If the :data:`PYSTOW_USE_APPDIRS` is set to ``true`` in the environment.
1019 |     """
1020 |     return os.getenv(PYSTOW_USE_APPDIRS) in {"true", "True"}
1021 | 
1022 | 
1023 | def get_home(ensure_exists: bool = True) -> Path:
1024 |     """Get the PyStow home directory.
1025 | 
1026 |     :param ensure_exists: If true, ensures the directory is created
1027 |     :returns: A path object representing the pystow home directory, as one of:
1028 | 
1029 |         1. :data:`PYSTOW_HOME_ENVVAR` environment variable or
1030 |         2. The user data directory defined by :mod:`appdirs` if the :data:`PYSTOW_USE_APPDIRS`
1031 |            environment variable is set to ``true`` or
1032 |         3. The default directory constructed in the user's home directory plus what's
1033 |            returned by :func:`get_name`.
1034 |     """
1035 |     if use_appdirs():
1036 |         from appdirs import user_data_dir
1037 | 
1038 |         default = Path(user_data_dir())
1039 |     else:
1040 |         default = Path.home() / get_name()
1041 |     return getenv_path(PYSTOW_HOME_ENVVAR, default, ensure_exists=ensure_exists)
1042 | 
1043 | 
1044 | def get_base(key: str, ensure_exists: bool = True) -> Path:
1045 |     """Get the base directory for a module.
1046 | 
1047 |     :param key:
1048 |         The name of the module. No funny characters. The envvar
1049 |         <key>_HOME where key is uppercased is checked first before using
1050 |         the default home directory.
1051 |     :param ensure_exists:
1052 |         Should all directories be created automatically? Defaults to true.
1053 |     :returns:
1054 |         The path to the given
1055 | 
1056 |     :raises ValueError: if the key is invalid (e.g., has a dot in it)
1057 |     """
1058 |     if "." in key:
1059 |         raise ValueError(f"The module should not have a dot in it: {key}")
1060 |     envvar = f"{key.upper()}_HOME"
1061 |     if use_appdirs():
1062 |         from appdirs import user_data_dir
1063 | 
1064 |         default = Path(user_data_dir(appname=key))
1065 |     else:
1066 |         default = get_home(ensure_exists=False) / key
1067 |     return getenv_path(envvar, default, ensure_exists=ensure_exists)
1068 | 
1069 | 
1070 | def ensure_readme() -> None:
1071 |     """Ensure there's a README in the PyStow data directory.
1072 | 
1073 |     :raises PermissionError: If the script calling this function does not have
1074 |         adequate permissions to write a file into the PyStow home directory.
1075 |     """
1076 |     try:
1077 |         readme_path = get_home(ensure_exists=True).joinpath("README.md")
1078 |     except PermissionError as e:
1079 |         raise PermissionError(
1080 |             "PyStow was not able to create its home directory in due to a lack of "
1081 |             "permissions. This can happen, e.g., if you're working on a server where you don't "
1082 |             "have full rights. See https://pystow.readthedocs.io/en/latest/installation.html#"
1083 |             "configuration for instructions on choosing a different home folder location for "
1084 |             "PyStow to somewhere where you have write permissions."
1085 |         ) from e
1086 |     if readme_path.is_file():
1087 |         return
1088 |     with readme_path.open("w", encoding="utf8") as file:
1089 |         print(README_TEXT, file=file)
1090 | 
1091 | 
1092 | def path_to_sqlite(path: str | Path) -> str:
1093 |     """Convert a path to a SQLite connection string.
1094 | 
1095 |     :param path: A path to a SQLite database file
1096 |     :returns: A standard connection string to the database
1097 |     """
1098 |     path = Path(path).expanduser().resolve()
1099 |     return f"sqlite:///{path.as_posix()}"
1100 | 
1101 | 
1102 | def gunzip(source: str | Path, target: str | Path) -> None:
1103 |     """Unzip a file in the source to the target.
1104 | 
1105 |     :param source: The path to an input file
1106 |     :param target: The path to an output file
1107 |     """
1108 |     with gzip.open(source, "rb") as in_file, open(target, "wb") as out_file:
1109 |         shutil.copyfileobj(in_file, out_file)
1110 | 
1111 | 
1112 | #: A human-readable flag for how to open a file.
1113 | Operation: TypeAlias = Literal["read", "write"]
1114 | OPERATION_VALUES: set[str] = set(typing.get_args(Operation))
1115 | 
1116 | #: A human-readable flag for how to open a file.
1117 | Representation: TypeAlias = Literal["text", "binary"]
1118 | REPRESENTATION_VALUES: set[str] = set(typing.get_args(Representation))
1119 | 
1120 | MODE_MAP: dict[tuple[Operation, Representation], Literal["rt", "wt", "rb", "wb"]] = {
1121 |     ("read", "text"): "rt",
1122 |     ("read", "binary"): "rb",
1123 |     ("write", "text"): "wt",
1124 |     ("write", "binary"): "wb",
1125 | }
1126 | 
1127 | 
1128 | # docstr-coverage:excused `overload`
1129 | @typing.overload
1130 | @contextlib.contextmanager
1131 | def safe_open(
1132 |     path: str | Path, *, operation: Operation = ..., representation: Literal["text"] = "text"
1133 | ) -> Generator[typing.TextIO, None, None]: ...
1134 | 
1135 | 
1136 | # docstr-coverage:excused `overload`
1137 | @typing.overload
1138 | @contextlib.contextmanager
1139 | def safe_open(
1140 |     path: str | Path, *, operation: Operation = ..., representation: Literal["binary"] = "binary"
1141 | ) -> Generator[typing.BinaryIO, None, None]: ...
1142 | 
1143 | 
1144 | @contextlib.contextmanager
1145 | def safe_open(
1146 |     path: str | Path, *, operation: Operation = "read", representation: Representation = "text"
1147 | ) -> Generator[typing.TextIO, None, None] | Generator[typing.BinaryIO, None, None]:
1148 |     """Safely open a file for reading or writing text."""
1149 |     if operation not in OPERATION_VALUES:
1150 |         raise ValueError(
1151 |             f"Invalid operation given: {operation}. Should be one of {OPERATION_VALUES}."
1152 |         )
1153 |     if representation not in REPRESENTATION_VALUES:
1154 |         raise ValueError(
1155 |             f"Invalid representation given: {representation}. "
1156 |             f"Should be one of {REPRESENTATION_VALUES}."
1157 |         )
1158 | 
1159 |     mode = MODE_MAP[operation, representation]
1160 |     path = Path(path).expanduser().resolve()
1161 |     if path.suffix.endswith(".gz"):
1162 |         with gzip.open(path, mode=mode) as file:
1163 |             yield file  # type:ignore
1164 |     else:
1165 |         with open(path, mode=mode) as file:
1166 |             yield file  # type:ignore
1167 | 
1168 | 
1169 | @contextlib.contextmanager
1170 | def safe_open_writer(
1171 |     f: str | Path | TextIO, *, delimiter: str = "\t", **kwargs: Any
1172 | ) -> Generator[_csv._writer, None, None]:
1173 |     """Open a CSV writer, wrapping :func:`csv.writer`.
1174 | 
1175 |     :param f: A path to a file, or an already open text-based IO object
1176 |     :param delimiter: The delimiter for writing to CSV
1177 |     :param kwargs: Keyword arguments to pass to :func:`csv.writer`
1178 |     :yields: A CSV writer object, constructed from :func:`csv.writer`
1179 |     """
1180 |     if isinstance(f, (str, Path)):
1181 |         with safe_open(f, operation="write", representation="text") as file:
1182 |             yield csv.writer(file, delimiter=delimiter, **kwargs)
1183 |     else:
1184 |         yield csv.writer(f, delimiter=delimiter, **kwargs)
1185 | 


--------------------------------------------------------------------------------
/src/pystow/version.py:
--------------------------------------------------------------------------------
1 | """Version information for PyStow."""
2 | 
3 | __all__ = [
4 |     "VERSION",
5 | ]
6 | 
7 | VERSION = "0.7.1-dev"
8 | 


--------------------------------------------------------------------------------
/tests/resources/test.txt:
--------------------------------------------------------------------------------
1 | this is a test file
2 | 


--------------------------------------------------------------------------------
/tests/resources/test.txt.md5:
--------------------------------------------------------------------------------
1 | 4221d002ceb5d3c9e9137e495ceaa647


--------------------------------------------------------------------------------
/tests/resources/test_1.csv:
--------------------------------------------------------------------------------
1 | ﻿h1,h2,h3
2 | v1_1,v1_2,v1_3
3 | v2_1,v2_2,v2_3
4 | 


--------------------------------------------------------------------------------
/tests/resources/test_1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "key": "value"
3 | }


--------------------------------------------------------------------------------
/tests/resources/test_1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cthoyt/pystow/80249d83c684cb15ce05b0c83e10d45c22b966d7/tests/resources/test_1.pkl


--------------------------------------------------------------------------------
/tests/resources/test_1.tsv:
--------------------------------------------------------------------------------
1 | h1	h2	h3
2 | v1_1	v1_2	v1_3
3 | v2_1	v2_2	v2_3
4 | 


--------------------------------------------------------------------------------
/tests/resources/test_verbose.txt.md5:
--------------------------------------------------------------------------------
1 | MD5(text.txt)=4221d002ceb5d3c9e9137e495ceaa647


--------------------------------------------------------------------------------
/tests/resources/test_wrong.txt.md5:
--------------------------------------------------------------------------------
1 | yolo


--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
  1 | """Test for API completeness."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import inspect
  6 | import unittest
  7 | from typing import Callable, TypeVar, cast
  8 | 
  9 | import pandas as pd
 10 | import rdflib
 11 | from lxml import etree
 12 | 
 13 | import pystow
 14 | from pystow import Module
 15 | 
 16 | SKIP = {"__init__"}
 17 | 
 18 | X = TypeVar("X")
 19 | 
 20 | 
 21 | def _df_equal(a: pd.DataFrame, b: pd.DataFrame, msg: str | None = None) -> bool:
 22 |     return bool(a.values.tolist() == b.values.tolist())
 23 | 
 24 | 
 25 | def _rdf_equal(a: rdflib.Graph, b: rdflib.Graph, msg: str | None = None) -> bool:
 26 |     return {tuple(t) for t in a} == {tuple(t) for t in b}
 27 | 
 28 | 
 29 | def _etree_equal(a: etree.ElementTree, b: etree.ElementTree, msg: str | None = None) -> bool:
 30 |     return cast(str, etree.tostring(a)) == cast(str, etree.tostring(b))
 31 | 
 32 | 
 33 | class TestExposed(unittest.TestCase):
 34 |     """Test API exposure."""
 35 | 
 36 |     def setUp(self) -> None:
 37 |         """Set up the test case."""
 38 |         self.addTypeEqualityFunc(pd.DataFrame, _df_equal)  # type:ignore[arg-type]
 39 |         self.addTypeEqualityFunc(rdflib.Graph, _rdf_equal)  # type:ignore[arg-type]
 40 |         self.addTypeEqualityFunc(type(etree.ElementTree()), _etree_equal)  # type:ignore[arg-type]
 41 | 
 42 |     def assert_io(
 43 |         self, obj: X, extension: str, dump: Callable[..., None], load: Callable[..., X]
 44 |     ) -> None:
 45 |         """Test an object can be  dumped and loaded.
 46 | 
 47 |         :param obj: The object to dump
 48 |         :param extension: The extension to use
 49 |         :param dump: The dump function
 50 |         :param load: The load function
 51 |         """
 52 |         name = f"test.{extension}"
 53 |         path = pystow.join("test", name=name)
 54 |         if path.is_file():
 55 |             path.unlink()
 56 |         self.assertFalse(path.is_file())
 57 | 
 58 |         dump("test", name=name, obj=obj)
 59 |         self.assertTrue(path.is_file())
 60 |         self.assertEqual(obj, load("test", name=name))
 61 | 
 62 |     def test_exposed(self) -> None:
 63 |         """Test that all module-level functions also have a counterpart in the top-level API."""
 64 |         for name, func in Module.__dict__.items():
 65 |             if not inspect.isfunction(func) or name in SKIP:
 66 |                 continue
 67 |             with self.subTest(name=name):
 68 |                 self.assertIn(
 69 |                     name,
 70 |                     pystow.api.__all__,
 71 |                     msg=f"Module.{name} should be included in from `pystow.api.__all__`.",
 72 |                 )
 73 |                 self.assertTrue(
 74 |                     hasattr(pystow.api, name),
 75 |                     msg=f"`Module.{name} should be exposed as a top-level "
 76 |                     f"function in `pystow.api`.",
 77 |                 )
 78 |                 self.assertTrue(
 79 |                     hasattr(pystow, name),
 80 |                     msg=f"`pystow.api.{name}` should be imported in `pystow.__init__`.",
 81 |                 )
 82 | 
 83 |     def test_io(self) -> None:
 84 |         """Test IO functions."""
 85 |         obj = ["a", "b", "c"]
 86 |         for ext, dump, load in [
 87 |             ("json", pystow.dump_json, pystow.load_json),
 88 |             ("pkl", pystow.dump_pickle, pystow.load_pickle),
 89 |         ]:
 90 |             with self.subTest(ext=ext):
 91 |                 self.assert_io(obj, extension=ext, dump=dump, load=load)  # type:ignore
 92 | 
 93 |     def test_pd_io(self) -> None:
 94 |         """Test pandas IO."""
 95 |         columns = list("abc")
 96 |         data = [(1, 2, 3), (4, 5, 6)]
 97 |         df = pd.DataFrame(data, columns=columns)
 98 |         self.assert_io(df, extension="tsv", load=pystow.load_df, dump=pystow.dump_df)
 99 | 
100 |     def test_rdf_io(self) -> None:
101 |         """Test RDFlib IO."""
102 |         graph = rdflib.Graph()
103 |         graph.add(
104 |             (
105 |                 rdflib.URIRef("http://example.com/subject"),
106 |                 rdflib.URIRef("http://example.com/predicate"),
107 |                 rdflib.URIRef("http://example.com/object"),
108 |             )
109 |         )
110 |         self.assertEqual(1, len(graph))
111 |         self.assert_io(graph, extension="ttl", dump=pystow.dump_rdf, load=pystow.load_rdf)
112 | 
113 |     def test_xml_io(self) -> None:
114 |         """Test XML I/O."""
115 |         root = etree.Element("root")
116 |         root.set("interesting", "somewhat")
117 |         etree.SubElement(root, "test")
118 |         my_tree = etree.ElementTree(root)
119 |         self.assert_io(my_tree, extension="xml", dump=pystow.dump_xml, load=pystow.load_xml)
120 | 


--------------------------------------------------------------------------------
/tests/test_caching.py:
--------------------------------------------------------------------------------
  1 | """Tests for caching."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import os
  6 | import tempfile
  7 | import unittest
  8 | from pathlib import Path
  9 | 
 10 | from pystow.cache import CachedPickle
 11 | 
 12 | EXPECTED = 5
 13 | EXPECTED_2 = 6
 14 | 
 15 | 
 16 | class TestCache(unittest.TestCase):
 17 |     """Tests for caches."""
 18 | 
 19 |     def setUp(self) -> None:
 20 |         """Set up the test case with a temporary directory."""
 21 |         self.tmpdir = tempfile.TemporaryDirectory()
 22 |         self.directory = Path(self.tmpdir.name)
 23 | 
 24 |     def tearDown(self) -> None:
 25 |         """Tear down the test case's temporary directory."""
 26 |         self.tmpdir.cleanup()
 27 | 
 28 |     def test_cache_exception(self) -> None:
 29 |         """Test that exceptions aren't swallowed."""
 30 |         path = self.directory.joinpath("test.pkl")
 31 | 
 32 |         self.assertFalse(path.is_file())
 33 | 
 34 |         @CachedPickle(path=path)
 35 |         def _f1() -> None:
 36 |             raise NotImplementedError
 37 | 
 38 |         self.assertFalse(path.is_file(), msg="function has not been called")
 39 | 
 40 |         with self.assertRaises(NotImplementedError):
 41 |             _f1()
 42 | 
 43 |         self.assertFalse(
 44 |             path.is_file(),
 45 |             msg="file should not have been created if an exception was thrown by the function",
 46 |         )
 47 | 
 48 |     def test_cache_pickle(self) -> None:
 49 |         """Test caching a pickle."""
 50 |         path = self.directory.joinpath("test.pkl")
 51 |         self.assertFalse(
 52 |             path.is_file(),
 53 |             msg="the file should not exist at the beginning of the test",
 54 |         )
 55 | 
 56 |         raise_flag = True
 57 | 
 58 |         @CachedPickle(path=path)
 59 |         def _f1() -> int:
 60 |             if raise_flag:
 61 |                 raise ValueError
 62 |             return EXPECTED
 63 | 
 64 |         self.assertFalse(path.is_file(), msg="the file should not exist until function is called")
 65 | 
 66 |         with self.assertRaises(ValueError):
 67 |             _f1()
 68 |         self.assertFalse(
 69 |             path.is_file(),
 70 |             msg="the function should throw an exception "
 71 |             "because of the flag, and no file should be created",
 72 |         )
 73 | 
 74 |         raise_flag = False
 75 |         actual = _f1()
 76 |         self.assertEqual(EXPECTED, actual)
 77 |         self.assertTrue(path.is_file(), msg="a file should have been created")
 78 | 
 79 |         raise_flag = True
 80 |         actual_2 = _f1()  # if raises, the caching mechanism didn't work
 81 |         self.assertEqual(EXPECTED, actual_2)
 82 |         self.assertTrue(path.is_file())
 83 | 
 84 |         os.unlink(path)
 85 |         self.assertFalse(path.is_file())
 86 |         with self.assertRaises(ValueError):
 87 |             _f1()
 88 | 
 89 |         @CachedPickle(path=path, force=True)
 90 |         def _f2() -> int:
 91 |             return EXPECTED_2
 92 | 
 93 |         self.assertEqual(EXPECTED_2, _f2())  # overwrites the file
 94 |         self.assertEqual(EXPECTED_2, _f1())
 95 | 
 96 |     def test_no_cache(self) -> None:
 97 |         """Test that no caching happens."""
 98 |         path = self.directory.joinpath("test.pkl")
 99 |         sentinel_value = 5
100 | 
101 |         self.assertFalse(path.is_file())
102 | 
103 |         @CachedPickle(path=path, cache=False)
104 |         def _f1() -> int:
105 |             return sentinel_value
106 | 
107 |         self.assertFalse(path.is_file(), msg="function has not been called")
108 | 
109 |         # check the following twice, just for good measure!
110 |         for _ in range(2):
111 |             self.assertEqual(sentinel_value, _f1())
112 |             self.assertFalse(
113 |                 path.is_file(),
114 |                 msg="file should not have been created since caching was turned off",
115 |             )
116 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
  1 | """Test configuration loading."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import tempfile
  6 | import unittest
  7 | from configparser import ConfigParser
  8 | from pathlib import Path
  9 | from typing import ClassVar
 10 | 
 11 | import pystow
 12 | from pystow.config_api import CONFIG_HOME_ENVVAR, _get_cfp
 13 | from pystow.utils import mock_envvar
 14 | 
 15 | 
 16 | class TestConfig(unittest.TestCase):
 17 |     """Test configuration."""
 18 | 
 19 |     test_section: ClassVar[str]
 20 |     test_option: ClassVar[str]
 21 |     test_value: ClassVar[str]
 22 |     cfp: ClassVar[ConfigParser]
 23 | 
 24 |     @classmethod
 25 |     def setUpClass(cls) -> None:
 26 |         """Set up the class for testing."""
 27 |         cls.test_section = "test"
 28 |         cls.test_option = "option"
 29 |         cls.test_value = "value"
 30 |         cls.cfp = _get_cfp(cls.test_section)
 31 |         cls.cfp.add_section(cls.test_section)
 32 |         cls.cfp.set(
 33 |             section=cls.test_section,
 34 |             option=cls.test_option,
 35 |             value=cls.test_value,
 36 |         )
 37 | 
 38 |     def test_env_cast(self) -> None:
 39 |         """Test casting works properly when getting from the environment."""
 40 |         with mock_envvar("TEST_VAR", "1234"):
 41 |             self.assertEqual("1234", pystow.get_config("test", "var"))
 42 |             self.assertEqual("1234", pystow.get_config("test", "var", dtype=str))
 43 |             self.assertEqual(1234, pystow.get_config("test", "var", dtype=int))
 44 |             with self.assertRaises(ValueError):
 45 |                 pystow.get_config("test", "var", dtype=bool)
 46 |             with self.assertRaises(TypeError):
 47 |                 pystow.get_config("test", "var", dtype=object)
 48 | 
 49 |     def test_get_config(self) -> None:
 50 |         """Test lookup not existing."""
 51 |         self.assertIsNone(pystow.get_config(self.test_section, "key"))
 52 |         self.assertEqual("1234", pystow.get_config(self.test_section, "key", default="1234"))
 53 | 
 54 |         value = "not_value"
 55 |         self.assertEqual(
 56 |             value, pystow.get_config(self.test_section, self.test_option, passthrough=value)
 57 |         )
 58 | 
 59 |         self.assertEqual(1, pystow.get_config(self.test_section, self.test_option, passthrough=1))
 60 |         self.assertEqual(
 61 |             1, pystow.get_config(self.test_section, self.test_option, passthrough="1", dtype=int)
 62 |         )
 63 | 
 64 |         self.assertEqual(
 65 |             True,
 66 |             pystow.get_config(self.test_section, self.test_option, passthrough="1", dtype=bool),
 67 |         )
 68 |         self.assertEqual(
 69 |             True,
 70 |             pystow.get_config(self.test_section, self.test_option, passthrough="yes", dtype=bool),
 71 |         )
 72 |         self.assertEqual(
 73 |             True,
 74 |             pystow.get_config(self.test_section, self.test_option, passthrough="Yes", dtype=bool),
 75 |         )
 76 |         self.assertEqual(
 77 |             True,
 78 |             pystow.get_config(self.test_section, self.test_option, passthrough="YES", dtype=bool),
 79 |         )
 80 |         self.assertEqual(
 81 |             True,
 82 |             pystow.get_config(self.test_section, self.test_option, passthrough="True", dtype=bool),
 83 |         )
 84 |         self.assertEqual(
 85 |             True,
 86 |             pystow.get_config(self.test_section, self.test_option, passthrough="TRUE", dtype=bool),
 87 |         )
 88 |         self.assertEqual(
 89 |             True,
 90 |             pystow.get_config(self.test_section, self.test_option, passthrough="T", dtype=bool),
 91 |         )
 92 |         self.assertEqual(
 93 |             True,
 94 |             pystow.get_config(self.test_section, self.test_option, passthrough="t", dtype=bool),
 95 |         )
 96 |         self.assertEqual(
 97 |             True,
 98 |             pystow.get_config(self.test_section, self.test_option, passthrough=True, dtype=bool),
 99 |         )
100 |         self.assertEqual(
101 |             True, pystow.get_config(self.test_section, self.test_option, passthrough=1, dtype=bool)
102 |         )
103 | 
104 |     def test_subsection(self) -> None:
105 |         """Test subsections."""
106 |         with tempfile.TemporaryDirectory() as directory, mock_envvar(CONFIG_HOME_ENVVAR, directory):
107 |             directory_ = Path(directory)
108 |             path = directory_.joinpath("test.ini")
109 |             self.assertFalse(path.is_file(), msg="file should not already exist")
110 | 
111 |             self.assertIsNone(pystow.get_config("test:subtest", "key"))
112 |             self.assertFalse(path.is_file(), msg="getting config should not create a file")
113 | 
114 |             pystow.write_config("test:subtest", "key", "value")
115 |             self.assertTrue(path.is_file(), msg=f"{list(directory_.iterdir())}")
116 | 
117 |             self.assertEqual("value", pystow.get_config("test:subtest", "key"))
118 | 


--------------------------------------------------------------------------------
/tests/test_module.py:
--------------------------------------------------------------------------------
  1 | """Tests for PyStow."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import bz2
  6 | import contextlib
  7 | import itertools as itt
  8 | import json
  9 | import lzma
 10 | import os
 11 | import pickle
 12 | import shutil
 13 | import tempfile
 14 | import unittest
 15 | from collections.abc import Generator, Mapping
 16 | from pathlib import Path
 17 | from typing import Any
 18 | from unittest import mock
 19 | 
 20 | import pandas as pd
 21 | 
 22 | import pystow
 23 | from pystow import join
 24 | from pystow.constants import PYSTOW_HOME_ENVVAR, PYSTOW_NAME_ENVVAR
 25 | from pystow.impl import Module
 26 | from pystow.utils import (
 27 |     get_home,
 28 |     get_name,
 29 |     mock_envvar,
 30 |     n,
 31 |     write_pickle_gz,
 32 |     write_sql,
 33 |     write_tarfile_csv,
 34 |     write_zipfile_csv,
 35 | )
 36 | 
 37 | HERE = Path(__file__).parent.resolve()
 38 | RESOURCES = HERE.joinpath("resources")
 39 | 
 40 | TSV_NAME = "test_1.tsv"
 41 | TSV_URL = f"{n()}/{TSV_NAME}"
 42 | 
 43 | SQLITE_NAME = "test_1.db"
 44 | SQLITE_URL = f"{n()}/{SQLITE_NAME}"
 45 | SQLITE_PATH = RESOURCES / SQLITE_NAME
 46 | SQLITE_TABLE = "testtable"
 47 | 
 48 | JSON_NAME = "test_1.json"
 49 | JSON_URL = f"{n()}/{JSON_NAME}"
 50 | JSON_PATH = RESOURCES / JSON_NAME
 51 | 
 52 | PICKLE_NAME = "test_1.pkl"
 53 | PICKLE_URL = f"{n()}/{PICKLE_NAME}"
 54 | PICKLE_PATH = RESOURCES / PICKLE_NAME
 55 | 
 56 | PICKLE_GZ_NAME = "test_1.pkl.gz"
 57 | PICKLE_GZ_URL = f"{n()}/{PICKLE_GZ_NAME}"
 58 | PICKLE_GZ_PATH = RESOURCES / PICKLE_GZ_NAME
 59 | 
 60 | JSON_BZ2_NAME = "test_1.json.bz2"
 61 | JSON_BZ2_URL = f"{n()}/{JSON_BZ2_NAME}"
 62 | JSON_BZ2_PATH = RESOURCES / JSON_BZ2_NAME
 63 | 
 64 | MOCK_FILES: Mapping[str, Path] = {
 65 |     TSV_URL: RESOURCES / TSV_NAME,
 66 |     JSON_URL: JSON_PATH,
 67 |     JSON_BZ2_URL: JSON_BZ2_PATH,
 68 |     PICKLE_URL: PICKLE_PATH,
 69 |     PICKLE_GZ_URL: PICKLE_GZ_PATH,
 70 |     SQLITE_URL: SQLITE_PATH,
 71 | }
 72 | 
 73 | TEST_TSV_ROWS = [
 74 |     ("h1", "h2", "h3"),
 75 |     ("v1_1", "v1_2", "v1_3"),
 76 |     ("v2_1", "v2_2", "v2_3"),
 77 | ]
 78 | TEST_DF = pd.DataFrame(TEST_TSV_ROWS)
 79 | TEST_JSON = {"key": "value"}
 80 | 
 81 | # Make the pickle file
 82 | if not PICKLE_PATH.is_file():
 83 |     PICKLE_PATH.write_bytes(pickle.dumps(TEST_TSV_ROWS))
 84 | 
 85 | if not SQLITE_PATH.is_file():
 86 |     write_sql(TEST_DF, name=SQLITE_TABLE, path=SQLITE_PATH, index=False)
 87 | 
 88 | if not JSON_PATH.is_file():
 89 |     JSON_PATH.write_text(json.dumps(TEST_JSON))
 90 | 
 91 | if not JSON_BZ2_PATH.is_file():
 92 |     with bz2.open(JSON_BZ2_PATH, mode="wt") as file:
 93 |         json.dump(TEST_JSON, file, indent=2)
 94 | 
 95 | 
 96 | class TestMocks(unittest.TestCase):
 97 |     """Tests for :mod:`pystow` mocks and context managers."""
 98 | 
 99 |     def test_mock_home(self) -> None:
100 |         """Test that home can be properly mocked."""
101 |         name = n()
102 | 
103 |         with tempfile.TemporaryDirectory() as d:
104 |             expected_path = Path(d) / name
105 |             self.assertFalse(expected_path.exists())
106 | 
107 |             with mock_envvar(PYSTOW_HOME_ENVVAR, expected_path.as_posix()):
108 |                 self.assertFalse(expected_path.exists())
109 |                 self.assertEqual(expected_path, get_home(ensure_exists=False))
110 |                 self.assertFalse(expected_path.exists())
111 | 
112 |     def test_mock_name(self) -> None:
113 |         """Test that the name can be properly mocked."""
114 |         name = n()
115 | 
116 |         expected_path = Path.home() / name
117 |         self.assertFalse(expected_path.exists())
118 | 
119 |         with mock_envvar(PYSTOW_NAME_ENVVAR, name):
120 |             self.assertEqual(name, get_name())
121 | 
122 |             self.assertFalse(expected_path.exists())
123 |             self.assertEqual(expected_path, get_home(ensure_exists=False))
124 |             self.assertFalse(expected_path.exists())
125 | 
126 | 
127 | class TestJoin(unittest.TestCase):
128 |     """Tests for :mod:`pystow`."""
129 | 
130 |     def setUp(self) -> None:
131 |         """Set up the test case."""
132 |         self.directory = tempfile.TemporaryDirectory()
133 | 
134 |     def tearDown(self) -> None:
135 |         """Tear down the test case."""
136 |         self.directory.cleanup()
137 | 
138 |     @contextlib.contextmanager
139 |     def mock_directory(self) -> Generator[Path, None, None]:
140 |         """Use this test case's temporary directory as a mock environment variable.
141 | 
142 |         :yield: The mock directory's path
143 |         """
144 |         with mock_envvar(PYSTOW_HOME_ENVVAR, self.directory.name):
145 |             yield Path(self.directory.name)
146 | 
147 |     @staticmethod
148 |     def mock_download() -> mock._patch_default_new:
149 |         """Mock connection to the internet using local resource files.
150 | 
151 |         :return: A patch object that can be applied to the pystow download function
152 |         """
153 | 
154 |         def _mock_get_data(url: str, path: str | Path, **_kwargs: Any) -> Path:
155 |             return Path(shutil.copy(MOCK_FILES[url], path))
156 | 
157 |         return mock.patch("pystow.utils.download", side_effect=_mock_get_data)
158 | 
159 |     @staticmethod
160 |     def mock_download_once(local_path: str | Path) -> mock._patch_default_new:
161 |         """Mock connection to the internet using local resource files.
162 | 
163 |         :param local_path: the path to the file to mock
164 |         :return: A patch object that can be applied to the pystow download function
165 |         """
166 | 
167 |         def _mock_get_data(path: str | Path, **_kwargs: Any) -> Path:
168 |             return Path(shutil.copy(local_path, path))
169 | 
170 |         return mock.patch("pystow.utils.download", side_effect=_mock_get_data)
171 | 
172 |     def join(self, *parts: str) -> Path:
173 |         """Help join the parts to this test case's temporary directory.
174 | 
175 |         :param parts: The file path parts that are joined with this test case's directory
176 |         :return: A path to the file
177 |         """
178 |         return Path(self.directory.name).joinpath(*parts)
179 | 
180 |     def test_mock(self) -> None:
181 |         """Test that mocking the directory works properly for this test case."""
182 |         with self.mock_directory():
183 |             self.assertEqual(os.getenv(PYSTOW_HOME_ENVVAR), self.directory.name)
184 | 
185 |     def test_join(self) -> None:
186 |         """Test the :func:`pystow.join` function."""
187 |         parts_examples = [
188 |             [n()],
189 |             [n(), n()],
190 |             [n(), n(), n()],
191 |         ]
192 |         with self.mock_directory():
193 |             for parts in parts_examples:
194 |                 with self.subTest(parts=parts):
195 |                     self.assertEqual(self.join(*parts), join(*parts))
196 | 
197 |     def test_join_with_version(self) -> None:
198 |         """Test the join function when a version is present."""
199 |         with self.mock_directory():
200 |             key = "key"
201 |             version = "v1"
202 |             self.assertEqual(
203 |                 self.join(key, version),
204 |                 pystow.join(key, version=version),
205 |             )
206 | 
207 |             parts = [n()]
208 |             self.assertEqual(
209 |                 self.join(key, version, *parts), pystow.join(key, *parts, version=version)
210 |             )
211 | 
212 |             parts = [n()]
213 |             name = "yup.tsv"
214 |             self.assertEqual(
215 |                 self.join(key, version, *parts, name),
216 |                 pystow.join(key, *parts, version=version, name=name),
217 |             )
218 | 
219 |             def _version_getter() -> str:
220 |                 return "v2"
221 | 
222 |             parts = [n()]
223 |             name = "yup.tsv"
224 |             self.assertEqual(
225 |                 self.join(key, _version_getter(), *parts, name),
226 |                 pystow.join(key, *parts, version=_version_getter, name=name),
227 |             )
228 | 
229 |             with self.assertRaises(ValueError):
230 |                 pystow.join(key, version="/")
231 | 
232 |     def test_ensure(self) -> None:
233 |         """Test ensuring various files."""
234 |         write_pickle_gz(TEST_TSV_ROWS, path=PICKLE_GZ_PATH)
235 | 
236 |         with self.mock_directory(), self.mock_download():
237 |             with self.subTest(type="tsv"):
238 |                 df = pystow.ensure_csv("test", url=TSV_URL)
239 |                 self.assertEqual(3, len(df.columns))
240 | 
241 |                 df2 = pystow.load_df("test", name=TSV_NAME)
242 |                 self.assertEqual(df.values.tolist(), df2.values.tolist())
243 | 
244 |             with self.subTest(type="json"):
245 |                 j = pystow.ensure_json("test", url=JSON_URL)
246 |                 self.assertEqual(TEST_JSON, j)
247 | 
248 |                 j2 = pystow.load_json("test", name=JSON_NAME)
249 |                 self.assertEqual(j, j2)
250 | 
251 |             with self.subTest(type="pickle"):
252 |                 p = pystow.ensure_pickle("test", url=PICKLE_URL)
253 |                 self.assertEqual(3, len(p))
254 | 
255 |                 p2 = pystow.load_pickle("test", name=PICKLE_NAME)
256 |                 self.assertEqual(p, p2)
257 | 
258 |             with self.subTest(type="pickle_gz"):
259 |                 p = pystow.ensure_pickle_gz("test", url=PICKLE_GZ_URL)
260 |                 self.assertEqual(3, len(p))
261 | 
262 |                 p2 = pystow.load_pickle_gz("test", name=PICKLE_GZ_NAME)
263 |                 self.assertEqual(p, p2)
264 | 
265 |             with self.subTest(type="json_bz2"):
266 |                 p = pystow.ensure_json_bz2("test", url=JSON_BZ2_URL)
267 |                 self.assertEqual(TEST_JSON, p)
268 | 
269 |     def test_open_fail(self) -> None:
270 |         """Test opening a missing file."""
271 |         with self.assertRaises(FileNotFoundError):
272 |             with pystow.open("nope", name="nope"):
273 |                 pass
274 | 
275 |         with self.assertRaises(FileNotFoundError):
276 |             pystow.load_json("nope", name="nope")
277 | 
278 |     def test_ensure_open_lzma(self) -> None:
279 |         """Test opening lzma-encoded files."""
280 |         with tempfile.TemporaryDirectory() as directory, self.mock_directory():
281 |             path = Path(directory) / n()
282 |             with self.mock_download_once(path):
283 |                 with lzma.open(path, "wt") as file_1:
284 |                     for row in TEST_TSV_ROWS:
285 |                         print(*row, sep="\t", file=file_1)
286 |                 # FIXME this ignore needs to be removed and addressed
287 |                 with pystow.ensure_open_lzma("test", url=n()) as file_2:  # type: ignore
288 |                     df = pd.read_csv(file_2, sep="\t")
289 |                     self.assertEqual(3, len(df.columns))
290 | 
291 |     def test_ensure_open_zip(self) -> None:
292 |         """Test opening tar-encoded files."""
293 |         with tempfile.TemporaryDirectory() as directory, self.mock_directory():
294 |             path = Path(directory) / n()
295 |             inner_path = n()
296 |             with self.mock_download_once(path):
297 |                 write_zipfile_csv(TEST_DF, path, inner_path)
298 |                 with pystow.ensure_open_zip("test", url=n(), inner_path=inner_path) as file:
299 |                     df = pd.read_csv(file, sep="\t")
300 |                     self.assertEqual(3, len(df.columns))
301 | 
302 |     def test_ensure_open_tarfile(self) -> None:
303 |         """Test opening tarfile-encoded files."""
304 |         with tempfile.TemporaryDirectory() as directory, self.mock_directory():
305 |             path = Path(directory) / n()
306 |             inner_path = n()
307 |             with self.mock_download_once(path):
308 |                 write_tarfile_csv(TEST_DF, path, inner_path)
309 |                 with pystow.ensure_open_tarfile("test", url=n(), inner_path=inner_path) as file:
310 |                     df = pd.read_csv(file, sep="\t")
311 |                     self.assertEqual(3, len(df.columns))
312 | 
313 |     def test_ensure_module(self) -> None:
314 |         """Test that the ``ensure_exist`` argument in :meth:`Module.from_key` works properly."""
315 |         parts_examples = [
316 |             [n()],
317 |             [n(), n()],
318 |             [n(), n(), n()],
319 |         ]
320 |         ensure_examples = [False, True]
321 | 
322 |         for ensure_exists, parts in itt.product(ensure_examples, parts_examples):
323 |             with self.subTest(ensure_exists=ensure_exists, parts=parts), self.mock_directory():
324 |                 expected_directory = self.join(*parts)
325 | 
326 |                 module = Module.from_key(*parts, ensure_exists=ensure_exists)
327 | 
328 |                 self.assertEqual(expected_directory, module.base)
329 |                 self.assertIs(
330 |                     expected_directory.exists(),
331 |                     ensure_exists,
332 |                     msg=f"{expected_directory} should{'' if ensure_exists else ' not'} exist.",
333 |                 )
334 | 
335 |     def test_ensure_custom(self) -> None:
336 |         """Test ensure with custom provider."""
337 |         with self.mock_directory():
338 |             # create a minimal provider
339 |             def touch_file(path: Path, **_kwargs: Any) -> None:
340 |                 """
341 |                 Create a file.
342 | 
343 |                 :param path:
344 |                     the file path
345 |                 :param _kwargs:
346 |                     ignored keywords
347 |                 """
348 |                 path.touch()
349 | 
350 |             # wrap to record calls
351 |             provider = mock.Mock(wraps=touch_file)
352 | 
353 |             # the keyword-based parameters for the provider
354 |             kwargs: dict[str, Any] = {"a": 4, "c": {0: 1, 5: 7}}
355 | 
356 |             # call first time
357 |             name = n()
358 |             path = pystow.ensure_custom("test", name=name, provider=provider, **kwargs)
359 |             self.assertTrue(path.is_file())
360 |             # call a second time
361 |             path = pystow.ensure_custom("test", name=name, provider=provider, **kwargs)
362 |             # ensure that the provider was only called once with the given parameters
363 |             provider.assert_called_once_with(path, **kwargs)
364 | 
365 |     def test_ensure_open_sqlite(self) -> None:
366 |         """Test caching SQLite."""
367 |         with self.mock_directory(), self.mock_download():
368 |             with pystow.ensure_open_sqlite("test", url=SQLITE_URL) as conn:
369 |                 df = pd.read_sql(f"SELECT * from {SQLITE_TABLE}", conn)  # noqa:S608
370 |                 self.assertEqual(3, len(df.columns))
371 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | """Tests for utilities."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import hashlib
  6 | import os
  7 | import tempfile
  8 | import unittest
  9 | from pathlib import Path
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | import requests
 14 | from lxml import etree
 15 | from requests_file import FileAdapter
 16 | 
 17 | from pystow.utils import (
 18 |     DownloadError,
 19 |     HexDigestError,
 20 |     download,
 21 |     get_hexdigests_remote,
 22 |     getenv_path,
 23 |     mkdir,
 24 |     mock_envvar,
 25 |     n,
 26 |     name_from_url,
 27 |     read_tarfile_csv,
 28 |     read_zip_np,
 29 |     read_zipfile_csv,
 30 |     read_zipfile_xml,
 31 |     write_tarfile_csv,
 32 |     write_zipfile_csv,
 33 |     write_zipfile_np,
 34 |     write_zipfile_xml,
 35 | )
 36 | 
 37 | HERE = Path(__file__).resolve().parent
 38 | TEST_TXT = HERE.joinpath("resources", "test.txt")
 39 | TEST_TXT_MD5 = HERE.joinpath("resources", "test.txt.md5")
 40 | TEST_TXT_VERBOSE_MD5 = HERE.joinpath("resources", "test_verbose.txt.md5")
 41 | TEST_TXT_WRONG_MD5 = HERE.joinpath("resources", "test_wrong.txt.md5")
 42 | 
 43 | skip_on_windows = unittest.skipIf(
 44 |     os.name == "nt",
 45 |     reason="Funny stuff happens in requests with a file adapter on windows that adds line breaks",
 46 | )
 47 | 
 48 | 
 49 | class _Session(requests.sessions.Session):
 50 |     """A mock session."""
 51 | 
 52 |     def __init__(self) -> None:
 53 |         """Instantiate the patched session with an additional file adapter."""
 54 |         super().__init__()
 55 |         self.mount("file://", FileAdapter())
 56 | 
 57 | 
 58 | requests.sessions.Session = _Session  # type: ignore
 59 | 
 60 | 
 61 | class TestUtils(unittest.TestCase):
 62 |     """Test utility functions."""
 63 | 
 64 |     def test_name_from_url(self) -> None:
 65 |         """Test :func:`name_from_url`."""
 66 |         data = [
 67 |             ("test.tsv", "https://example.com/test.tsv"),
 68 |             ("test.tsv", "https://example.com/deeper/test.tsv"),
 69 |             ("test.tsv.gz", "https://example.com/deeper/test.tsv.gz"),
 70 |         ]
 71 |         for name, url in data:
 72 |             with self.subTest(name=name, url=url):
 73 |                 self.assertEqual(name, name_from_url(url))
 74 | 
 75 |     @skip_on_windows
 76 |     def test_file_values(self) -> None:
 77 |         """Test encodings."""
 78 |         for url, value in [
 79 |             (TEST_TXT, "this is a test file\n"),
 80 |             (TEST_TXT_MD5, "4221d002ceb5d3c9e9137e495ceaa647"),
 81 |             (TEST_TXT_VERBOSE_MD5, "MD5(text.txt)=4221d002ceb5d3c9e9137e495ceaa647"),
 82 |             (TEST_TXT_WRONG_MD5, "yolo"),
 83 |         ]:
 84 |             with self.subTest(name=url.name):
 85 |                 self.assertEqual(value, requests.get(url.as_uri(), timeout=15).text)
 86 | 
 87 |     def test_mkdir(self) -> None:
 88 |         """Test for ensuring a directory."""
 89 |         with tempfile.TemporaryDirectory() as directory:
 90 |             directory_ = Path(directory)
 91 |             subdirectory = directory_ / "sd1"
 92 |             self.assertFalse(subdirectory.exists())
 93 | 
 94 |             mkdir(subdirectory, ensure_exists=False)
 95 |             self.assertFalse(subdirectory.exists())
 96 | 
 97 |             mkdir(subdirectory, ensure_exists=True)
 98 |             self.assertTrue(subdirectory.exists())
 99 | 
100 |     def test_mock_envvar(self) -> None:
101 |         """Test that environment variables can be mocked properly."""
102 |         name, value = n(), n()
103 | 
104 |         self.assertNotIn(name, os.environ)
105 |         with mock_envvar(name, value):
106 |             self.assertIn(name, os.environ)
107 |             self.assertEqual(value, os.getenv(name))
108 |         self.assertNotIn(name, os.environ)
109 | 
110 |     def test_getenv_path(self) -> None:
111 |         """Test that :func:`getenv_path` works properly."""
112 |         envvar = n()
113 | 
114 |         with tempfile.TemporaryDirectory() as directory:
115 |             directory_ = Path(directory)
116 |             value = directory_ / n()
117 |             default = directory_ / n()
118 | 
119 |             self.assertEqual(default, getenv_path(envvar, default))
120 |             with mock_envvar(envvar, value.as_posix()):
121 |                 self.assertEqual(value, getenv_path(envvar, default))
122 |             # Check that it goes back
123 |             self.assertEqual(default, getenv_path(envvar, default))
124 | 
125 |     def test_compressed_io(self) -> None:
126 |         """Test that the read/write to compressed folder functions work."""
127 |         rows = [[1, 2], [3, 4], [5, 6]]
128 |         columns = ["A", "B"]
129 |         df = pd.DataFrame(rows, columns=columns)
130 |         inner_path = "okay.tsv"
131 | 
132 |         data = [
133 |             ("test.zip", write_zipfile_csv, read_zipfile_csv),
134 |             ("test.tar.gz", write_tarfile_csv, read_tarfile_csv),
135 |         ]
136 |         for name, writer, reader in data:
137 |             with self.subTest(name=name), tempfile.TemporaryDirectory() as directory:
138 |                 path = Path(directory) / name
139 |                 self.assertFalse(path.exists())
140 |                 writer(df, path=path, inner_path=inner_path)
141 |                 self.assertTrue(path.exists())
142 |                 new_df = reader(path=path, inner_path=inner_path)
143 |                 self.assertEqual(list(df.columns), list(new_df.columns))
144 |                 self.assertEqual(df.values.tolist(), new_df.values.tolist())
145 | 
146 |     def test_xml_io(self) -> None:
147 |         """Test that read/write for XML element tree works."""
148 |         root = etree.Element("Doc")
149 |         level1 = etree.SubElement(root, "S")
150 |         main = etree.SubElement(level1, "Text")
151 |         main.text = "Thanks for contributing an answer to Stack Overflow!"
152 |         second = etree.SubElement(level1, "Tokens")
153 |         level2 = etree.SubElement(second, "Token", word="low")
154 | 
155 |         level3 = etree.SubElement(level2, "Morph")
156 |         second1 = etree.SubElement(level3, "Lemma")
157 |         second1.text = "sdfs"
158 |         second1 = etree.SubElement(level3, "info")
159 |         second1.text = "qw"
160 | 
161 |         level4 = etree.SubElement(level3, "Aff")
162 |         second1 = etree.SubElement(level4, "Type")
163 |         second1.text = "sdfs"
164 |         second1 = etree.SubElement(level4, "Suf")
165 |         second1.text = "qw"
166 | 
167 |         tree = etree.ElementTree(root)
168 |         inner_path = "okay.tsv"
169 |         data = [
170 |             ("test.zip", write_zipfile_xml, read_zipfile_xml),
171 |         ]
172 |         for name, writer, reader in data:
173 |             with self.subTest(name=name), tempfile.TemporaryDirectory() as directory:
174 |                 path = Path(directory) / name
175 |                 self.assertFalse(path.exists())
176 |                 writer(tree, path=path, inner_path=inner_path)
177 |                 self.assertTrue(path.exists())
178 |                 new_tree = reader(path=path, inner_path=inner_path)
179 |                 self.assertEqual(
180 |                     etree.tostring(tree, pretty_print=True),
181 |                     etree.tostring(new_tree, pretty_print=True),
182 |                 )
183 | 
184 |     def test_numpy_io(self) -> None:
185 |         """Test IO with numpy."""
186 |         arr = np.array([[0, 1], [2, 3]])
187 |         inner_path = "okay.npz"
188 |         with tempfile.TemporaryDirectory() as directory:
189 |             path = Path(directory) / "test.zip"
190 |             write_zipfile_np(arr, inner_path=inner_path, path=path)
191 |             reloaded_arr = read_zip_np(path=path, inner_path=inner_path)
192 |             self.assertTrue(np.array_equal(arr, reloaded_arr))
193 | 
194 | 
195 | class TestDownload(unittest.TestCase):
196 |     """Tests for downloading."""
197 | 
198 |     def setUp(self) -> None:
199 |         """Set up a test."""
200 |         self.directory_obj = tempfile.TemporaryDirectory()
201 |         self.directory = Path(self.directory_obj.name)
202 |         self.bad_url = "https://nope.nope/nope.tsv"
203 |         self.path_for_bad_url = self.directory.joinpath("nope.tsv")
204 | 
205 |     def tearDown(self) -> None:
206 |         """Tear down a test."""
207 |         self.directory_obj.cleanup()
208 | 
209 |     def test_bad_file_error(self) -> None:
210 |         """Test that urllib errors are handled properly."""
211 |         with self.assertRaises(DownloadError):
212 |             download(
213 |                 url=self.bad_url,
214 |                 path=self.path_for_bad_url,
215 |                 backend="urllib",
216 |             )
217 |         self.assertFalse(self.path_for_bad_url.is_file())
218 | 
219 |     def test_requests_error_stream(self) -> None:
220 |         """Test that requests errors are handled properly."""
221 |         with self.assertRaises(DownloadError):
222 |             download(
223 |                 url=self.bad_url,
224 |                 path=self.path_for_bad_url,
225 |                 backend="requests",
226 |                 stream=True,
227 |             )
228 |         self.assertFalse(self.path_for_bad_url.is_file())
229 | 
230 |     def test_requests_error_sync(self) -> None:
231 |         """Test that requests errors are handled properly."""
232 |         with self.assertRaises(DownloadError):
233 |             download(
234 |                 url=self.bad_url,
235 |                 path=self.path_for_bad_url,
236 |                 backend="requests",
237 |                 stream=False,
238 |             )
239 |         self.assertFalse(self.path_for_bad_url.is_file())
240 | 
241 | 
242 | class TestHashing(unittest.TestCase):
243 |     """Tests for hexdigest checking."""
244 | 
245 |     def setUp(self) -> None:
246 |         """Set up a test."""
247 |         self.directory = tempfile.TemporaryDirectory()
248 |         self.path = Path(self.directory.name).joinpath("test.tsv")
249 | 
250 |         md5 = hashlib.md5()  # noqa: S324
251 |         with TEST_TXT.open("rb") as file:
252 |             md5.update(file.read())
253 |         self.expected_md5 = md5.hexdigest()
254 |         self.mismatching_md5_hexdigest = "yolo"
255 |         self.assertNotEqual(self.mismatching_md5_hexdigest, self.expected_md5)
256 | 
257 |     def tearDown(self) -> None:
258 |         """Tear down a test."""
259 |         self.directory.cleanup()
260 | 
261 |     def test_hash_success(self) -> None:
262 |         """Test checking actually works."""
263 |         self.assertFalse(self.path.exists())
264 |         download(
265 |             url=TEST_TXT.as_uri(),
266 |             path=self.path,
267 |             hexdigests={
268 |                 "md5": self.expected_md5,
269 |             },
270 |         )
271 | 
272 |     @skip_on_windows
273 |     def test_hash_remote_success(self) -> None:
274 |         """Test checking actually works."""
275 |         self.assertFalse(self.path.exists())
276 |         download(
277 |             url=TEST_TXT.as_uri(),
278 |             path=self.path,
279 |             hexdigests_remote={
280 |                 "md5": TEST_TXT_MD5.as_uri(),
281 |             },
282 |             hexdigests_strict=True,
283 |         )
284 |         self.assertTrue(self.path.exists())
285 | 
286 |     @skip_on_windows
287 |     def test_hash_remote_verbose_success(self) -> None:
288 |         """Test checking actually works."""
289 |         self.assertFalse(self.path.exists())
290 |         download(
291 |             url=TEST_TXT.as_uri(),
292 |             path=self.path,
293 |             hexdigests_remote={
294 |                 "md5": TEST_TXT_VERBOSE_MD5.as_uri(),
295 |             },
296 |             hexdigests_strict=False,
297 |         )
298 |         self.assertTrue(self.path.exists())
299 | 
300 |     def test_hash_remote_verbose_failure(self) -> None:
301 |         """Test checking actually works."""
302 |         self.assertFalse(self.path.exists())
303 |         with self.assertRaises(HexDigestError):
304 |             download(
305 |                 url=TEST_TXT.as_uri(),
306 |                 path=self.path,
307 |                 hexdigests_remote={
308 |                     "md5": TEST_TXT_VERBOSE_MD5.as_uri(),
309 |                 },
310 |                 hexdigests_strict=True,
311 |             )
312 | 
313 |     def test_hash_error(self) -> None:
314 |         """Test hash error on download."""
315 |         self.assertFalse(self.path.exists())
316 |         with self.assertRaises(HexDigestError):
317 |             download(
318 |                 url=TEST_TXT.as_uri(),
319 |                 path=self.path,
320 |                 hexdigests={
321 |                     "md5": self.mismatching_md5_hexdigest,
322 |                 },
323 |             )
324 | 
325 |     def test_hash_remote_error(self) -> None:
326 |         """Test hash error on download."""
327 |         self.assertFalse(self.path.exists())
328 |         with self.assertRaises(HexDigestError):
329 |             download(
330 |                 url=TEST_TXT.as_uri(),
331 |                 path=self.path,
332 |                 hexdigests_remote={
333 |                     "md5": TEST_TXT_WRONG_MD5.as_uri(),
334 |                 },
335 |                 hexdigests_strict=True,
336 |             )
337 | 
338 |     def test_override_hash_error(self) -> None:
339 |         """Test hash error on download."""
340 |         self.path.write_text("test file content")
341 | 
342 |         self.assertTrue(self.path.exists())
343 |         with self.assertRaises(HexDigestError):
344 |             download(
345 |                 url=TEST_TXT.as_uri(),
346 |                 path=self.path,
347 |                 hexdigests={
348 |                     "md5": self.expected_md5,
349 |                 },
350 |                 force=False,
351 |             )
352 | 
353 |     def test_override_hash_remote_error(self) -> None:
354 |         """Test hash error on download."""
355 |         self.path.write_text("test file content")
356 | 
357 |         self.assertTrue(self.path.exists())
358 |         with self.assertRaises(HexDigestError):
359 |             download(
360 |                 url=TEST_TXT.as_uri(),
361 |                 path=self.path,
362 |                 hexdigests_remote={
363 |                     "md5": TEST_TXT_MD5.as_uri(),
364 |                 },
365 |                 hexdigests_strict=True,
366 |                 force=False,
367 |             )
368 | 
369 |     def test_force(self) -> None:
370 |         """Test overwriting wrong file."""
371 |         # now if force=True it should not bother with the hash check
372 |         self.path.write_text("test file content")
373 | 
374 |         self.assertTrue(self.path.exists())
375 |         download(
376 |             url=TEST_TXT.as_uri(),
377 |             path=self.path,
378 |             hexdigests={
379 |                 "md5": self.expected_md5,
380 |             },
381 |             force=True,
382 |         )
383 | 
384 |     @skip_on_windows
385 |     def test_remote_force(self) -> None:
386 |         """Test overwriting wrong file."""
387 |         # now if force=True it should not bother with the hash check
388 |         self.path.write_text("test file content")
389 | 
390 |         self.assertTrue(self.path.exists())
391 |         download(
392 |             url=TEST_TXT.as_uri(),
393 |             path=self.path,
394 |             hexdigests_remote={
395 |                 "md5": TEST_TXT_MD5.as_uri(),
396 |             },
397 |             hexdigests_strict=True,
398 |             force=True,
399 |         )
400 | 
401 |     def test_hexdigest_urls(self) -> None:
402 |         """Test getting hex digests from URLs."""
403 |         for url, strict in [
404 |             (TEST_TXT_MD5, True),
405 |             (TEST_TXT_MD5, False),
406 |             (TEST_TXT_VERBOSE_MD5, False),
407 |         ]:
408 |             hexdigests = get_hexdigests_remote(
409 |                 {"md5": url.as_uri()},
410 |                 hexdigests_strict=strict,
411 |             )
412 |             self.assertEqual(
413 |                 "4221d002ceb5d3c9e9137e495ceaa647",
414 |                 hexdigests["md5"],
415 |             )
416 | 
417 |         hexdigests = get_hexdigests_remote(
418 |             {"md5": TEST_TXT_VERBOSE_MD5.as_uri()}, hexdigests_strict=True
419 |         )
420 |         self.assertNotEqual(
421 |             "4221d002ceb5d3c9e9137e495ceaa647",
422 |             hexdigests["md5"],
423 |         )
424 | 
425 |     @unittest.skip(reason="this test hits a live endpoint")
426 |     def test_live(self) -> None:
427 |         """Test live."""
428 |         hexdigests = get_hexdigests_remote(
429 |             {"md5": "https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/pubmed22n0001.xml.gz.md5"},
430 |             hexdigests_strict=False,
431 |         )
432 |         self.assertEqual(
433 |             {
434 |                 "md5": "0f08d8f3947dde1f3bced5e1f450c0da",
435 |             },
436 |             hexdigests,
437 |         )
438 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | # Tox (http://tox.testrun.org/) is a tool for running tests
  2 | # in multiple virtualenvs. This configuration file will run the
  3 | # test suite on all supported python versions. To use it, "pip install tox"
  4 | # and then run "tox" from this directory.
  5 | 
  6 | [tox]
  7 | # To use a PEP 517 build-backend you are required to configure tox to use an isolated_build:
  8 | # https://tox.readthedocs.io/en/latest/example/package.html
  9 | isolated_build = True
 10 | 
 11 | # These environments are run in order if you just use `tox`:
 12 | envlist =
 13 |     # always keep coverage-clean first
 14 |     coverage-clean
 15 |     # code formatters
 16 |     format
 17 |     # format-docs
 18 |     # Code quality assessment
 19 |     pyroma
 20 |     lint
 21 |     mypy
 22 |     # Documentation quality assurance
 23 |     doc8
 24 |     docstr-coverage
 25 |     docs-test
 26 |     # the actual tests
 27 |     py
 28 |     doctests
 29 |     # always keep coverage-report last
 30 |     coverage-report
 31 | 
 32 | [testenv:.pkg]
 33 | # this special environment configures the build that tox does itself
 34 | set_env =
 35 |     UV_PREVIEW=1
 36 | 
 37 | [testenv]
 38 | description = Run unit and integration tests.
 39 | # Runs on the "tests" directory by default, or passes the positional
 40 | # arguments from `tox -e py <posargs_1> ... <posargs_n>
 41 | commands =
 42 |     coverage run -p -m pytest --durations=20 {posargs:tests}
 43 |     coverage combine
 44 |     coverage xml
 45 | extras =
 46 |     # See the [project.optional-dependencies] entry in pyproject.toml for "tests"
 47 |     tests
 48 |     pandas
 49 |     rdf
 50 |     xml
 51 | set_env =
 52 |     # this setting gets inherited into all environments, meaning
 53 |     # that things that call uv commands don't require a --preview
 54 |     UV_PREVIEW=1
 55 | 
 56 | [testenv:coverage-clean]
 57 | description = Remove testing coverage artifacts.
 58 | deps = coverage
 59 | skip_install = true
 60 | commands = coverage erase
 61 | 
 62 | [testenv:doctests]
 63 | description = Test that documentation examples run properly.
 64 | commands =
 65 |     # note that the package name is required for discovery
 66 |     xdoctest -m src/pystow
 67 | deps =
 68 |     xdoctest
 69 |     pygments
 70 | extras =
 71 |     pandas
 72 | 
 73 | [testenv:treon]
 74 | description = Test that notebooks can run to completion
 75 | commands =
 76 |     treon notebooks/
 77 | deps =
 78 |     treon
 79 | 
 80 | [testenv:format]
 81 | description = Format the code in a deterministic way using ruff. Note that ruff check should come before ruff format when using --fix (ref: https://github.com/astral-sh/ruff-pre-commit/blob/main/README.md)
 82 | deps =
 83 |     ruff
 84 | skip_install = true
 85 | commands =
 86 |     ruff check --fix
 87 |     ruff format
 88 | 
 89 | [testenv:format-docs]
 90 | description = Run documentation linters.
 91 | # note that this doesn't work with sphinx-click
 92 | # or any other extension that adds extra directives
 93 | deps =
 94 |     rstfmt
 95 | extras =
 96 |     # See the [project.optional-dependencies] entry in pyproject.toml for "docs"
 97 |     docs
 98 | skip_install = true
 99 | commands =
100 |     rstfmt docs/source/
101 | 
102 | [testenv:format-markdown]
103 | description = Run markdown formatter.
104 | skip_install = true
105 | allowlist_externals =
106 |     npx
107 | commands =
108 |     npx --yes prettier --write --prose-wrap always "**/*.md"
109 | 
110 | [testenv:lint]
111 | description = Check code quality using ruff and other tools.
112 | skip_install = true
113 | deps =
114 |     ruff
115 | commands =
116 |     ruff check
117 |     ruff format --check
118 | 
119 | [testenv:pyroma]
120 | deps =
121 |     pygments
122 |     pyroma
123 | skip_install = true
124 | commands = pyroma --min=10 .
125 | description = Run the pyroma tool to check the package friendliness of the project.
126 | 
127 | [testenv:mypy]
128 | description = Run the mypy tool to check static typing on the project. Installs the package to make sure all type stubs get recognized.
129 | deps =
130 |     mypy
131 |     types-requests
132 | extras =
133 |     pandas
134 |     rdf
135 |     xml
136 | commands = mypy --install-types --non-interactive --ignore-missing-imports --strict src/ tests/
137 | 
138 | [testenv:doc8]
139 | skip_install = true
140 | deps =
141 |     doc8
142 | extras =
143 |     docs
144 | commands =
145 |     doc8 docs/source/
146 | description = Run the doc8 tool to check the style of the RST files in the project docs.
147 | 
148 | [testenv:docstr-coverage]
149 | description = Run the docstr-coverage tool to check documentation coverage.
150 | skip_install = true
151 | deps =
152 |     docstr-coverage
153 | commands =
154 |     docstr-coverage src/ tests/ --skip-private --skip-magic
155 | 
156 | [testenv:docs]
157 | description = Build the documentation locally, allowing warnings.
158 | extras =
159 |     # See the [project.optional-dependencies] entry in pyproject.toml for "docs"
160 |     docs
161 |     # You might need to add additional extras if your documentation covers it
162 | commands =
163 |     python -m sphinx -b html -d docs/build/doctrees docs/source docs/build/html
164 | 
165 | [testenv:docs-test]
166 | description = Test building the documentation in an isolated environment. Warnings are considered as errors via -W.
167 | changedir = docs
168 | extras =
169 |     {[testenv:docs]extras}
170 | commands =
171 |     mkdir -p {envtmpdir}
172 |     cp -r source {envtmpdir}/source
173 |     python -m sphinx -W -b html     -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/html
174 |     # python -m sphinx -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage
175 |     # cat {envtmpdir}/build/coverage/c.txt
176 |     # cat {envtmpdir}/build/coverage/python.txt
177 | allowlist_externals =
178 |     cp
179 |     cat
180 |     mkdir
181 | 
182 | [testenv:coverage-xml]
183 | deps = coverage[toml]
184 | skip_install = true
185 | commands = coverage xml
186 | 
187 | [testenv:coverage-report]
188 | # TODO this is broken
189 | deps = coverage[toml]
190 | skip_install = true
191 | commands =
192 |     coverage report
193 | 
194 | ####################
195 | # Deployment tools #
196 | ####################
197 | 
198 | [testenv:bumpversion]
199 | description = Bump the version number
200 | commands = bump-my-version bump {posargs}
201 | skip_install = true
202 | passenv = HOME
203 | deps =
204 |     bump-my-version
205 | 
206 | [testenv:bumpversion-release]
207 | description = Remove the -dev tag from the version
208 | commands = bump-my-version bump release --tag
209 | skip_install = true
210 | passenv = HOME
211 | deps =
212 |     bump-my-version
213 | 
214 | [testenv:build]
215 | skip_install = true
216 | deps =
217 |     uv
218 | commands =
219 |     uv build --sdist --wheel --no-build-isolation
220 | 
221 | ############
222 | # Releases #
223 | ############
224 | 
225 | # In order to make a release to PyPI, you'll need to take the following steps:
226 | #
227 | # 1. Navigate to https://pypi.org/account/register/ to register for Test PyPI
228 | # 2. Navigate to https://pypi.org/manage/account/ and request to re-send a verification email.
229 | #    This is not sent by default, and is required to set up 2-Factor Authentication.
230 | # 3. Get account recovery codes
231 | # 4. Set up 2-Factor Authentication
232 | # 5. Get an API token from https://pypi.org/manage/account/token/
233 | # 6. Install keyring with `uv tool install keyring`
234 | # 7. Add your token to keyring with `keyring set https://upload.pypi.org/legacy/ __token__`
235 | 
236 | [testenv:release]
237 | description = Release the code to PyPI so users can pip install it, using credentials from keyring
238 | skip_install = true
239 | deps =
240 |     {[testenv:build]deps}
241 |     uv
242 |     keyring
243 | commands =
244 |     {[testenv:build]commands}
245 |     uv publish --username __token__ --keyring-provider subprocess --publish-url https://upload.pypi.org/legacy/
246 | 
247 | [testenv:release-via-env]
248 | description = Release the code to PyPI so users can pip install it, using credentials from the environment.
249 | skip_install = true
250 | deps =
251 |     {[testenv:build]deps}
252 |     uv
253 | commands =
254 |     {[testenv:build]commands}
255 |     uv publish --publish-url https://upload.pypi.org/legacy/
256 | passenv =
257 |     UV_PUBLISH_USERNAME
258 |     UV_PUBLISH_PASSWORD
259 | 
260 | [testenv:finish]
261 | description =
262 |     Run a workflow that removes -dev from the version, creates a tagged release on GitHub,
263 |     creates a release on PyPI, and bumps the version again.
264 | skip_install = true
265 | passenv =
266 |     HOME
267 | deps =
268 |     {[testenv:release]deps}
269 |     bump-my-version
270 | commands =
271 |     {[testenv:bumpversion-release]commands}
272 |     {[testenv:release]commands}
273 |     git push --tags
274 |     bump-my-version bump patch
275 |     git push
276 | allowlist_externals =
277 |     git
278 | 
279 | #################
280 | # Test Releases #
281 | #################
282 | 
283 | # In order to test making a release to Test PyPI, you'll need to take the following steps:
284 | #
285 | # 1. Navigate to https://test.pypi.org/account/register/ to register for Test PyPI
286 | # 2. Navigate to https://test.pypi.org/manage/account/ and request to re-send a verification email.
287 | #    This is not sent by default, and is required to set up 2-Factor Authentication.
288 | # 3. Get account recovery codes
289 | # 4. Set up 2-Factor Authentication
290 | # 5. Get an API token from https://test.pypi.org/manage/account/token/
291 | # 6. Install keyring with `uv tool install keyring`
292 | # 7. Add your token to keyring with `keyring set https://test.pypi.org/legacy/ __token__`
293 | 
294 | [testenv:testrelease]
295 | description = Release the code to the test PyPI site
296 | skip_install = true
297 | deps =
298 |     {[testenv:build]deps}
299 |     uv
300 |     keyring
301 | commands =
302 |     {[testenv:build]commands}
303 |     uv publish --username __token__ --keyring-provider subprocess --publish-url https://test.pypi.org/legacy/
304 | 
305 | [testenv:testfinish]
306 | description =
307 |     Run a workflow that removes -dev from the version, creates a tagged release on GitHub,
308 |     creates a release on Test PyPI, and bumps the version again.
309 | skip_install = true
310 | passenv =
311 |     HOME
312 | deps =
313 |     {[testenv:testrelease]deps}
314 |     bump-my-version
315 | commands =
316 |     {[testenv:bumpversion-release]commands}
317 |     {[testenv:testrelease]commands}
318 |     git push --tags
319 |     bump-my-version bump patch
320 |     git push
321 | allowlist_externals =
322 |     git
323 | 


--------------------------------------------------------------------------------