├── .coveragerc ├── .gitattributes ├── .github └── workflows │ ├── cancel.yml │ ├── linting.yml │ ├── pythonpublish.yaml │ ├── tests.yml │ └── upstream.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── README.rst ├── ci ├── environment-3.7.yml ├── environment-3.8.yml └── environment-3.9.yml ├── doc ├── Makefile ├── api.rst ├── conf.py ├── contributing.rst ├── environment.yml ├── index.rst ├── installation.rst ├── make.bat └── tutorial.ipynb ├── readthedocs.yml ├── setup.cfg ├── setup.py ├── versioneer.py └── xhistogram ├── __init__.py ├── _version.py ├── core.py ├── test ├── __init__.py ├── fixtures.py ├── test_chunking.py ├── test_chunking_hypotheses.py ├── test_core.py └── test_xarray.py └── xarray.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | 4 | [report] 5 | exclude_lines = 6 | if self.debug: 7 | pragma: no cover 8 | raise NotImplementedError 9 | if __name__ == .__main__.: 10 | ignore_errors = True 11 | omit = xhistogram/test/* 12 | xhistogram/__init__.py 13 | xhistogram/_version.py 14 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | xgcm/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/workflows/cancel.yml: -------------------------------------------------------------------------------- 1 | name: Cancel 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Tests", "Upstream", "Linting"] 6 | types: 7 | - requested 8 | 9 | jobs: 10 | cancel: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: styfle/cancel-workflow-action@0.8.0 14 | with: 15 | workflow_id: ${{ github.event.workflow.id }} -------------------------------------------------------------------------------- /.github/workflows/linting.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | checks: 7 | name: pre-commit hooks 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions/setup-python@v2 12 | - uses: pre-commit/action@v2.0.0 13 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yaml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Set up Python 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: '3.x' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools setuptools-scm wheel twine 20 | - name: Build and publish 21 | env: 22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 24 | run: | 25 | python setup.py sdist bdist_wheel 26 | python setup.py --version 27 | twine check dist/* 28 | twine upload dist/* 29 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | test: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | os: [windows-latest, ubuntu-latest, macos-latest] 12 | python-version: [3.7, 3.8, 3.9] 13 | 14 | steps: 15 | - name: Checkout source 16 | uses: actions/checkout@v2 17 | 18 | - name: Setup Conda Environment 19 | uses: conda-incubator/setup-miniconda@v2 20 | with: 21 | miniconda-version: "latest" 22 | python-version: ${{ matrix.python-version }} 23 | environment-file: ci/environment-${{ matrix.python-version }}.yml 24 | activate-environment: xhistogram_test_env 25 | auto-activate-base: false 26 | 27 | - name: Install 28 | shell: bash -l {0} 29 | run: python -m pip install -e . 30 | 31 | - name: List installed packages 32 | shell: bash -l {0} 33 | run: conda list 34 | 35 | - name: Run tests 36 | shell: bash -l {0} 37 | run: python -m pytest --cov=xhistogram --cov-report=xml xhistogram 38 | 39 | - name: Coverage 40 | uses: codecov/codecov-action@v1 41 | -------------------------------------------------------------------------------- /.github/workflows/upstream.yml: -------------------------------------------------------------------------------- 1 | name: Upstream 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [3.9] 11 | 12 | steps: 13 | - name: Checkout source 14 | uses: actions/checkout@v2 15 | 16 | - name: Setup Conda Environment 17 | uses: conda-incubator/setup-miniconda@v2 18 | with: 19 | miniconda-version: "latest" 20 | python-version: ${{ matrix.python-version }} 21 | environment-file: ci/environment-${{ matrix.python-version }}.yml 22 | activate-environment: xhistogram_test_env 23 | auto-activate-base: false 24 | 25 | - name: Install upstream packages 26 | shell: bash -l {0} 27 | run: | 28 | python -m pip install git+https://github.com/dask/dask.git 29 | python -m pip install git+https://github.com/pydata/xarray.git 30 | 31 | - name: Install 32 | shell: bash -l {0} 33 | run: python -m pip install -e . 34 | 35 | - name: List installed packages 36 | shell: bash -l {0} 37 | run: conda list 38 | 39 | - name: Run tests 40 | shell: bash -l {0} 41 | run: python -m pytest --cov=xhistogram --cov-report=xml xhistogram 42 | 43 | - name: Coverage 44 | uses: codecov/codecov-action@v1 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | doc/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # notebook 60 | */.ipynb_checkpoints/* 61 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 22.3.0 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | exclude: versioneer.py 8 | - repo: https://gitlab.com/pycqa/flake8 9 | rev: 3.9.0 10 | hooks: 11 | - id: flake8 12 | language_version: python3 13 | - repo: https://github.com/pycqa/doc8 14 | rev: 0.8.1 15 | hooks: 16 | - id: doc8 17 | files: "^doc/.*.rst" 18 | - repo: https://github.com/pre-commit/mirrors-mypy 19 | rev: v0.812 20 | hooks: 21 | - id: mypy 22 | files: "^xhistogram/" 23 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: xhistogram 6 | message: >- 7 | "If you use this software, please cite it as 8 | below." 9 | type: software 10 | authors: 11 | - family-names: "Abernathey" 12 | given-names: "Ryan" 13 | orcid: "https://orcid.org/0000-0001-5999-4917" 14 | - family-names: "Squire" 15 | given-names: "Dougie" 16 | orcid: "0000-0003-3271-6874" 17 | - family-names: "Nicholas" 18 | given-names: "Thomas" 19 | orcid: "https://orcid.org/0000-0002-2176-0530" 20 | - family-names: "Bourbeau" 21 | given-names: "James" 22 | orcid: "0000-0003-2164-7789" 23 | - family-names: "Joseph" 24 | given-names: "Gabe" 25 | - family-names: "Spring" 26 | given-names: "Aaron" 27 | orcid: "0000-0003-0216-2241" 28 | - family-names: "Bell" 29 | given-names: "Ray" 30 | orcid: "https://orcid.org/0000-0003-2623-0587" 31 | - family-names: "Bailey" 32 | given-names: "Shanice" 33 | orcid: "0000-0002-8176-9465" 34 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at ryan.abernathey@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Ryan Abernathey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include versioneer.py 3 | include xhistogram/_version.py 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | xhistogram: Fast, flexible, label-aware histograms for numpy and xarray 2 | ======================================================================= 3 | 4 | |pypi| |conda forge| |tests| |linting| |codecov| |docs| |license| |DOI| 5 | 6 | For more information, including installation instructions, read the full 7 | `xhistogram documentation`_. 8 | 9 | .. _Pangeo: http://pangeo-data.github.io 10 | .. _dask: http://dask.pydata.org 11 | .. _xarray: http://xarray.pydata.org 12 | .. _Arakawa Grids: https://en.wikipedia.org/wiki/Arakawa_grid 13 | .. _xhistogram documentation: https://xhistogram.readthedocs.io/ 14 | 15 | .. |conda forge| image:: https://anaconda.org/conda-forge/xhistogram/badges/version.svg 16 | :target: https://anaconda.org/conda-forge/xhistogram 17 | .. |DOI| image:: https://zenodo.org/badge/178940893.svg 18 | :target: https://zenodo.org/badge/latestdoi/178940893 19 | .. |tests| image:: https://github.com/xgcm/xhistogram/actions/workflows/tests.yml/badge.svg 20 | :target: https://github.com/xgcm/xhistogram/actions/workflows/tests.yml 21 | :alt: travis-ci build status 22 | .. |codecov| image:: https://codecov.io/github/xgcm/xhistogram/coverage.svg?branch=master 23 | :target: https://codecov.io/github/xgcm/xhistogram?branch=master 24 | :alt: code coverage 25 | .. |pypi| image:: https://badge.fury.io/py/xhistogram.svg 26 | :target: https://badge.fury.io/py/xhistogram 27 | :alt: pypi package 28 | .. |docs| image:: http://readthedocs.org/projects/xhistogram/badge/?version=latest 29 | :target: http://xhistogram.readthedocs.org/en/stable/?badge=latest 30 | :alt: documentation status 31 | .. |linting| image:: https://github.com/xgcm/xhistogram/actions/workflows/linting.yml/badge.svg 32 | :target: https://github.com/xgcm/xhistogram/actions/workflows/linting.yml 33 | :alt: linting status 34 | .. |license| image:: https://img.shields.io/github/license/mashape/apistatus.svg 35 | :target: https://github.com/xgcm/xhistogram 36 | :alt: license 37 | -------------------------------------------------------------------------------- /ci/environment-3.7.yml: -------------------------------------------------------------------------------- 1 | name: xhistogram_test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.7 6 | - xarray 7 | - dask-core 8 | - numpy=1.17 9 | - pytest 10 | - hypothesis 11 | - pip 12 | - pip: 13 | - codecov 14 | - pytest-cov 15 | -------------------------------------------------------------------------------- /ci/environment-3.8.yml: -------------------------------------------------------------------------------- 1 | name: xhistogram_test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.8 6 | - xarray 7 | - dask-core 8 | - numpy=1.18 9 | - pytest 10 | - hypothesis 11 | - pip 12 | - pip: 13 | - codecov 14 | - pytest-cov 15 | -------------------------------------------------------------------------------- /ci/environment-3.9.yml: -------------------------------------------------------------------------------- 1 | name: xhistogram_test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9 6 | - xarray 7 | - dask-core 8 | - numpy 9 | - pytest 10 | - hypothesis 11 | - pip 12 | - pip: 13 | - codecov 14 | - pytest-cov 15 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/xgcm.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/xgcm.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/xgcm" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/xgcm" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /doc/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | ### 3 | 4 | Core Module 5 | =========== 6 | 7 | .. automodule:: xhistogram.core 8 | :members: 9 | 10 | Xarray Module 11 | ============= 12 | 13 | .. automodule:: xhistogram.xarray 14 | :members: 15 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # xhistogram documentation build configuration file, created by 4 | # sphinx-quickstart on Sat Aug 29 00:18:20 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | import xhistogram 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | # sys.path.insert(0, os.path.abspath('.')) 23 | # sys.path.insert(os.path.abspath('..')) 24 | 25 | print(f"python exec: {sys.executable}") 26 | print(f"sys.path: {sys.path}") 27 | 28 | 29 | # -- General configuration ------------------------------------------------ 30 | 31 | # If your documentation needs a minimal Sphinx version, state it here. 32 | # needs_sphinx = '1.0' 33 | 34 | # Add any Sphinx extension module names here, as strings. They can be 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 36 | # ones. 37 | extensions = [ 38 | "sphinx.ext.mathjax", 39 | "sphinx.ext.autodoc", 40 | "sphinx.ext.autosummary", 41 | "sphinx.ext.extlinks", 42 | "sphinx.ext.viewcode", 43 | "sphinx.ext.intersphinx", 44 | "numpydoc", 45 | "nbsphinx", 46 | "IPython.sphinxext.ipython_directive", 47 | "IPython.sphinxext.ipython_console_highlighting", 48 | ] 49 | 50 | # never execute notebooks: avoids lots of expensive imports on rtd 51 | # https://nbsphinx.readthedocs.io/en/0.2.14/never-execute.html 52 | # nbsphinx_execute = 'never' 53 | 54 | # give cells 10 minutes to run before timeout 55 | nbsphinx_timeout = 600 56 | 57 | 58 | # http://stackoverflow.com/questions/5599254/how-to-use-sphinxs-autodoc-to-document-a-classs-init-self-method 59 | def skip(app, what, name, obj, skip, options): 60 | if name == "__init__": 61 | return False 62 | return skip 63 | 64 | 65 | def setup(app): 66 | app.connect("autodoc-skip-member", skip) 67 | 68 | 69 | # link to github issues 70 | extlinks = {"issue": ("https://github.com/xgcm/xhistogram/issues/%s", "GH")} 71 | 72 | autosummary_generate = True 73 | numpydoc_class_members_toctree = True 74 | numpydoc_show_class_members = False 75 | 76 | # Add any paths that contain templates here, relative to this directory. 77 | templates_path = ["_templates"] 78 | 79 | # The suffix of source filenames. 80 | source_suffix = ".rst" 81 | 82 | # The encoding of source files. 83 | # source_encoding = 'utf-8-sig' 84 | 85 | # The master toctree document. 86 | master_doc = "index" 87 | 88 | # General information about the project. 89 | project = "xhistogram" 90 | copyright = "2016-2019, xhistogram developers" 91 | 92 | # The version info for the project you're documenting, acts as replacement for 93 | # |version| and |release|, also used in various other places throughout the 94 | # built documents. 95 | # 96 | # The full version, including alpha/beta/rc tags. 97 | release = xhistogram.__version__ 98 | # The short X.Y version. 99 | version = ".".join(release.split(".")[:2]) 100 | 101 | # The language for content autogenerated by Sphinx. Refer to documentation 102 | # for a list of supported languages. 103 | # language = None 104 | 105 | # There are two options for replacing |today|: either, you set today to some 106 | # non-false value, then it is used: 107 | # today = '' 108 | # Else, today_fmt is used as the format for a strftime call. 109 | # today_fmt = '%B %d, %Y' 110 | 111 | # List of patterns, relative to source directory, that match files and 112 | # directories to ignore when looking for source files. 113 | exclude_patterns = ["_build"] 114 | 115 | # The reST default role (used for this markup: `text`) to use for all 116 | # documents. 117 | # default_role = None 118 | 119 | # If true, '()' will be appended to :func: etc. cross-reference text. 120 | # add_function_parentheses = True 121 | 122 | # If true, the current module name will be prepended to all description 123 | # unit titles (such as .. function::). 124 | # add_module_names = True 125 | 126 | # If true, sectionauthor and moduleauthor directives will be shown in the 127 | # output. They are ignored by default. 128 | # show_authors = False 129 | 130 | # The name of the Pygments (syntax highlighting) style to use. 131 | pygments_style = "sphinx" 132 | 133 | # A list of ignored prefixes for module index sorting. 134 | # modindex_common_prefix = [] 135 | 136 | # If true, keep warnings as "system message" paragraphs in the built documents. 137 | # keep_warnings = False 138 | 139 | 140 | # -- Options for HTML output ---------------------------------------------- 141 | 142 | on_rtd = os.environ.get("READTHEDOCS", None) == "True" 143 | 144 | if not on_rtd: # only import and set the theme if we're building docs locally 145 | import sphinx_rtd_theme 146 | 147 | html_theme = "sphinx_rtd_theme" 148 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 149 | 150 | # The theme to use for HTML and HTML Help pages. See the documentation for 151 | # a list of builtin themes. 152 | # tml_theme = 'default' 153 | html_theme = "sphinx_rtd_theme" 154 | 155 | # Theme options are theme-specific and customize the look and feel of a theme 156 | # further. For a list of options available for each theme, see the 157 | # documentation. 158 | # html_theme_options = {} 159 | 160 | # Add any paths that contain custom themes here, relative to this directory. 161 | # html_theme_path = [] 162 | 163 | # The name for this set of Sphinx documents. If None, it defaults to 164 | # " v documentation". 165 | # html_title = None 166 | 167 | # A shorter title for the navigation bar. Default is the same as html_title. 168 | # html_short_title = None 169 | 170 | # The name of an image file (relative to this directory) to place at the top 171 | # of the sidebar. 172 | # html_logo = None 173 | 174 | # The name of an image file (within the static path) to use as favicon of the 175 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 176 | # pixels large. 177 | # html_favicon = None 178 | 179 | # Add any paths that contain custom static files (such as style sheets) here, 180 | # relative to this directory. They are copied after the builtin static files, 181 | # so a file named "default.css" will overwrite the builtin "default.css". 182 | html_static_path = ["_static"] 183 | 184 | # Add any extra paths that contain custom files (such as robots.txt or 185 | # .htaccess) here, relative to this directory. These files are copied 186 | # directly to the root of the documentation. 187 | # html_extra_path = [] 188 | 189 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 190 | # using the given strftime format. 191 | # html_last_updated_fmt = '%b %d, %Y' 192 | 193 | # If true, SmartyPants will be used to convert quotes and dashes to 194 | # typographically correct entities. 195 | # html_use_smartypants = True 196 | 197 | # Custom sidebar templates, maps document names to template names. 198 | # html_sidebars = {} 199 | 200 | # Additional templates that should be rendered to pages, maps page names to 201 | # template names. 202 | # html_additional_pages = {} 203 | 204 | # If false, no module index is generated. 205 | # html_domain_indices = True 206 | 207 | # If false, no index is generated. 208 | # html_use_index = True 209 | 210 | # If true, the index is split into individual pages for each letter. 211 | # html_split_index = False 212 | 213 | # If true, links to the reST sources are added to the pages. 214 | # html_show_sourcelink = True 215 | 216 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 217 | # html_show_sphinx = True 218 | 219 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 220 | # html_show_copyright = True 221 | 222 | # If true, an OpenSearch description file will be output, and all pages will 223 | # contain a tag referring to it. The value of this option must be the 224 | # base URL from which the finished HTML is served. 225 | # html_use_opensearch = '' 226 | 227 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 228 | # html_file_suffix = None 229 | 230 | # Output file base name for HTML help builder. 231 | htmlhelp_basename = "xhistogramdoc" 232 | 233 | 234 | # -- Options for LaTeX output --------------------------------------------- 235 | 236 | # latex_elements = { 237 | # The paper size ('letterpaper' or 'a4paper'). 238 | # 'papersize': 'letterpaper', 239 | # The font size ('10pt', '11pt' or '12pt'). 240 | # 'pointsize': '10pt', 241 | # Additional stuff for the LaTeX preamble. 242 | # 'preamble': '', 243 | # } 244 | 245 | # Grouping the document tree into LaTeX files. List of tuples 246 | # (source start file, target name, title, 247 | # author, documentclass [howto, manual, or own class]). 248 | latex_documents = [ 249 | ( 250 | "index", 251 | "xhistogram.tex", 252 | "xhistogram Documentation", 253 | "xhistogram developers", 254 | "manual", 255 | ), 256 | ] 257 | 258 | # The name of an image file (relative to this directory) to place at the top of 259 | # the title page. 260 | # latex_logo = None 261 | 262 | # For "manual" documents, if this is true, then toplevel headings are parts, 263 | # not chapters. 264 | # latex_use_parts = False 265 | 266 | # If true, show page references after internal links. 267 | # latex_show_pagerefs = False 268 | 269 | # If true, show URL addresses after external links. 270 | # latex_show_urls = False 271 | 272 | # Documents to append as an appendix to all manuals. 273 | # latex_appendices = [] 274 | 275 | # If false, no module index is generated. 276 | # latex_domain_indices = True 277 | 278 | 279 | # -- Options for manual page output --------------------------------------- 280 | 281 | # One entry per manual page. List of tuples 282 | # (source start file, name, description, authors, manual section). 283 | man_pages = [ 284 | ("index", "xhistogram", "xhistogram Documentation", ["xhistogram developers"], 1) 285 | ] 286 | 287 | # If true, show URL addresses after external links. 288 | # man_show_urls = False 289 | 290 | 291 | # -- Options for Texinfo output ------------------------------------------- 292 | 293 | # Grouping the document tree into Texinfo files. List of tuples 294 | # (source start file, target name, title, author, 295 | # dir menu entry, description, category) 296 | texinfo_documents = [ 297 | ( 298 | "index", 299 | "xhistogram", 300 | "xhistogram Documentation", 301 | "xhistogram developers", 302 | "xhistogram", 303 | "Fast, flexible, label-aware histograms for numpy and xarray.", 304 | "Miscellaneous", 305 | ), 306 | ] 307 | 308 | # Documents to append as an appendix to all manuals. 309 | # texinfo_appendices = [] 310 | 311 | # If false, no module index is generated. 312 | # texinfo_domain_indices = True 313 | 314 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 315 | # texinfo_show_urls = 'footnote' 316 | 317 | # If true, do not generate a @detailmenu in the "Top" node's menu. 318 | # texinfo_no_detailmenu = False 319 | 320 | 321 | # Example configuration for intersphinx: refer to the Python standard library. 322 | intersphinx_mapping = { 323 | "python": ("https://docs.python.org/3/", None), 324 | "xarray": ("http://xarray.pydata.org/en/stable/", None), 325 | } 326 | -------------------------------------------------------------------------------- /doc/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributor Guide 2 | ================= 3 | 4 | This package is in very early stages. Lots of work is needed. 5 | 6 | You can help out just by using ``xhistogram`` and reporting 7 | `issues `__. 8 | 9 | The following sections cover some general guidelines for maintainers and 10 | contributors wanting to help develop ``xhistogram``. 11 | 12 | 13 | Feature requests, suggestions and bug reports 14 | --------------------------------------------- 15 | 16 | We are eager to hear about any bugs you have found, new features you 17 | would like to see and any other suggestions you may have. Please feel 18 | free to submit these as `issues `__. 19 | 20 | When suggesting features, please make sure to explain in detail how 21 | the proposed feature should work and to keep the scope as narrow as 22 | possible. This makes features easier to implement in small PRs. 23 | 24 | When report bugs, please include: 25 | 26 | * Any details about your local setup that might be helpful in 27 | troubleshooting, specifically the Python interpreter version, installed 28 | libraries, and ``xhistogram`` version. 29 | * Detailed steps to reproduce the bug, ideally a `Minimal, Complete and 30 | Verifiable Example `__. 31 | * If possible, a demonstration test that currently fails but should pass 32 | when the bug is fixed. 33 | 34 | 35 | Write documentation 36 | ------------------- 37 | Adding documentation is always helpful. This may include: 38 | 39 | * More complementary documentation. Have you perhaps found something unclear? 40 | * Docstrings. 41 | * Example notebooks of ``xhistogram`` being used in real analyses. 42 | 43 | The ``xhistogram`` documentation is written in reStructuredText. You 44 | can follow the conventions in already written documents. Some helpful guides 45 | can be found 46 | `here `__ and 47 | `here `__. 48 | 49 | When writing and editing documentation, it can be useful to see the resulting 50 | build without having to push to Github. You can build the documentation locally 51 | by running:: 52 | 53 | $ # Install the packages required to build the docs in a conda environment 54 | $ conda env create -f doc/environment.yml 55 | $ conda activate xhistogram_doc_env 56 | $ # Install the latest xhistogram 57 | $ pip install --no-deps -e . 58 | $ cd doc/ 59 | $ make html 60 | 61 | This will build the documentation locally in ``doc/_build/``. You can then open 62 | ``_build/html/index.html`` in your web browser to view the documentation. For 63 | example, if you have ``xdg-open`` installed:: 64 | 65 | $ xdg-open _build/html/index.html 66 | 67 | To lint the reStructuredText documentation files run:: 68 | 69 | $ doc8 doc/*.rst 70 | 71 | 72 | Preparing Pull Requests 73 | ----------------------- 74 | #. Fork the 75 | `xhistogram GitHub repository `__. It's 76 | fine to use ``xhistogram`` as your fork repository name because it will live 77 | under your username. 78 | 79 | #. Clone your fork locally, connect your repository to the upstream (main 80 | project), and create a branch to work on:: 81 | 82 | $ git clone git@github.com:YOUR_GITHUB_USERNAME/xhistogram.git 83 | $ cd xhistogram 84 | $ git remote add upstream git@github.com:xgcm/xhistogram.git 85 | $ git checkout -b your-bugfix-feature-branch-name master 86 | 87 | If you need some help with Git, follow 88 | `this quick start guide `__ 89 | 90 | #. Install dependencies into a new conda environment:: 91 | 92 | $ conda env create -f ci/environment-3.9.yml 93 | $ conda activate xhistogram_test_env 94 | 95 | #. Install xhistogram using the editable flag (meaning any changes you make to 96 | the package will be reflected directly in your environment):: 97 | 98 | $ pip install --no-deps -e . 99 | 100 | #. Start making your edits. Please try to type annotate your additions as 101 | much as possible. Adding type annotations to existing unannotated code is 102 | also very welcome. You can read about Python typing 103 | `here `__. 104 | 105 | #. Break your edits up into reasonably sized commits:: 106 | 107 | $ git commit -a -m "" 108 | $ git push -u 109 | 110 | It can be useful to manually run `pre-commit `_ as you 111 | make your edits. ``pre-commit`` will run checks on the format and typing of 112 | your code and will show you where you need to make changes. This will mean 113 | your code is more likely to pass the CI checks when you push it:: 114 | 115 | $ pip install pre_commit # you only need to do this once 116 | $ pre-commit run --all-files 117 | 118 | #. Run the tests (including those you add to test your edits!):: 119 | 120 | $ pytest xhistogram 121 | 122 | You can also test that your contribution and tests increased the test coverage:: 123 | 124 | $ coverage run --source xhistogram -m py.test 125 | $ coverage report 126 | 127 | #. Add a new entry describing your contribution to the :ref:`release-history` 128 | in ``doc/contributing.rst``. Please try to follow the format of the existing 129 | entries. 130 | 131 | #. Submit a pull request through the GitHub `website `__. 132 | 133 | Note that you can create the Pull Request while you're working on your PR. 134 | The PR will update as you add more commits. ``xhistogram`` developers and 135 | contributors can then review your code and offer suggestions. 136 | 137 | 138 | .. _release-history: 139 | 140 | Release History 141 | --------------- 142 | 143 | v0.3.2 144 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 145 | 146 | - Fix bug producing TypeError when `weights` is provided with 147 | `keep_coords=True` :issue:`78`. By 148 | `Dougie Squire `_. 149 | - Raise TypeError when `weights` is a dask array and bin edges are 150 | not explicitly provided :issue:`12`. By 151 | `Dougie Squire `_. 152 | 153 | v0.3.1 154 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 155 | 156 | - Add DOI badge and CITATION.cff. By 157 | `Julius Busecke `_. 158 | 159 | v0.3.0 160 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 161 | 162 | - Add support for histograms over non-float dtypes (e.g. datetime 163 | objects) :issue:`25`. By 164 | `Dougie Squire `_. 165 | - Refactor histogram calculation to use dask.array.blockwise 166 | when input arguments are dask arrays, resulting in significant 167 | performance improvements :issue:`49`. By 168 | `Ryan Abernathy `_, 169 | `Tom Nicholas `_ and 170 | `Gabe Joseph `_. 171 | - Fixed bug with density calculation when NaNs are present :issue:`51`. 172 | By `Dougie Squire `_. 173 | - Implemented various options for users for providing bins to 174 | xhistogram that mimic the numpy histogram API. This included 175 | adding a range argument to the xhistogram API :issue:`13`. 176 | By `Dougie Squire `_. 177 | - Added a function to check if the object passed to xhistogram is an 178 | xarray.DataArray and if not, throw an error. :issue:`14`. 179 | By `Yang Yunyi `_. 180 | 181 | v0.2.0 182 | ~~~~~~ 183 | 184 | - Added FutureWarning for upcoming changes to core API :issue:`13`. 185 | By `Dougie Squire `_. 186 | - Added documentation on how to deal with NaNs in weights :issue:`26`. 187 | By `Shanice Bailey `_. 188 | - Move CI to GitHub actions :issue:`32`. 189 | By `James Bourbeau `_. 190 | - Add documentation for contributors. 191 | By `Dougie Squire `_. 192 | - Add type checking with mypy :issue:`32`. 193 | By `Dougie Squire `_. 194 | 195 | v0.1.3 196 | ~~~~~~ 197 | 198 | - Update dependencies to exclude incompatible dask version :issue:`27`. 199 | By `Ryan Abernathey `_. 200 | 201 | v0.1.2 202 | ~~~~~~ 203 | 204 | - Aligned definition of ``bins`` with ``numpy.histogram`` :issue:`18`. 205 | By `Dougie Squire `_. 206 | 207 | v0.1.1 208 | ~~~~~~ 209 | 210 | Minor bugfix release 211 | 212 | - Imroved documentation examples. 213 | By `Dhruv Balwada `_. 214 | - Fixed issue :issue:`5` related to incorrect dimension order 215 | and dropping of dimension coordinates. 216 | By `Ryan Abernathey `_. 217 | 218 | v0.1 219 | ~~~~ 220 | 221 | First release 222 | -------------------------------------------------------------------------------- /doc/environment.yml: -------------------------------------------------------------------------------- 1 | name: xhistogram_doc_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9 6 | - xarray 7 | - netcdf4 8 | - numpy>=1.16,!=1.20.0,!=1.20.1,!=1.20.2,!=1.20.3 9 | - pytest 10 | - numpydoc 11 | - sphinx 12 | - ipython 13 | - matplotlib 14 | - jupyter_client 15 | - ipykernel 16 | - pandoc 17 | - pip 18 | - doc8 19 | - pip: 20 | - nbsphinx 21 | - sphinx_rtd_theme 22 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | xhistogram: Fast, flexible, label-aware histograms for numpy and xarray 2 | ======================================================================= 3 | 4 | Histograms (a.k.a "binning") are much more than just a visualization tool. 5 | They are the foundation of a wide range of scientific analyses including 6 | [joint] probability distributions and coordinate transformations. 7 | Xhistogram makes it easier to calculate flexible, complex histograms with 8 | multi-dimensional data. It integrates (optionally) with Dask, in order to 9 | scale up to very large datasets and with Xarray, in order to consume and 10 | produce labelled, annotated data structures. It is useful for a wide range of 11 | scientific tasks. 12 | 13 | 14 | Why a new histogram package? 15 | ---------------------------- 16 | 17 | The main problem with the standard ``histogram`` function in numpy and 18 | dask is that they automatically act over the entire input array (i.e. they 19 | "flatten" the data). Xhistogram allows you to choose which axes / dimensions 20 | you want to preserve and which you want to flatten. It also allows you to 21 | combine N arbitrary inputs to produce N-dimensional histograms. 22 | A good place to start is the :doc:`tutorial`. 23 | 24 | Contents 25 | -------- 26 | 27 | .. toctree:: 28 | :maxdepth: 1 29 | 30 | installation 31 | tutorial 32 | api 33 | contributing 34 | -------------------------------------------------------------------------------- /doc/installation.rst: -------------------------------------------------------------------------------- 1 | 2 | Installation 3 | ------------ 4 | 5 | Requirements 6 | ^^^^^^^^^^^^ 7 | 8 | xhistogram is compatible with python 3. It requires numpy and, optionally, 9 | xarray. 10 | 11 | Installation from Conda Forge 12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 13 | 14 | The easiest way to install xhistogram along with its dependencies is via conda 15 | forge:: 16 | 17 | conda install -c conda-forge xhistogram 18 | 19 | 20 | Installation from Pip 21 | ^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | An alternative is to use pip:: 24 | 25 | pip install xhistogram 26 | 27 | This will install the latest release from 28 | `pypi `_. 29 | 30 | Installation from GitHub 31 | ^^^^^^^^^^^^^^^^^^^^^^^^ 32 | 33 | xhistogram is under active development. To obtain the latest development version, 34 | you may clone the `source repository `_ 35 | and install it:: 36 | 37 | git clone https://github.com/xgcm/xhistogram.git 38 | cd xhistogram 39 | python setup.py install 40 | 41 | or simply:: 42 | 43 | pip install git+https://github.com/xgcm/xhistogram.git 44 | 45 | Users are encouraged to `fork `_ 46 | xhistogram and submit issues_ and `pull requests`_. 47 | 48 | .. _dask: http://dask.pydata.org 49 | .. _xarray: http://xarray.pydata.org 50 | .. _issues: https://github.com/xgcm/xhistogram/issues 51 | .. _`pull requests`: https://github.com/xgcm/xhistogram/pulls 52 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\xgcm.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\xgcm.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /doc/tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Xhistogram Tutorial\n", 8 | "\n", 9 | "Histograms are the foundation of many forms of data analysis.\n", 10 | "The goal of xhistogram is to make it easy to calculate weighted histograms in multiple dimensions over n-dimensional arrays, with control over the axes.\n", 11 | "Xhistogram builds on top of xarray, for automatic coordiantes and labels, and dask, for parallel scalability." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Toy Data\n", 19 | "\n", 20 | "We start by showing an example with toy data. First we use xarray to create some random, normally distributed data.\n", 21 | "\n", 22 | "### 1D Histogram" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": false, 30 | "jupyter": { 31 | "outputs_hidden": false 32 | } 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import xarray as xr\n", 37 | "import numpy as np\n", 38 | "%matplotlib inline\n", 39 | "\n", 40 | "nt, nx = 100, 30\n", 41 | "da = xr.DataArray(np.random.randn(nt, nx), dims=['time', 'x'],\n", 42 | " name='foo') # all inputs need a name\n", 43 | "display(da)\n", 44 | "da.plot()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "By default xhistogram operates on all dimensions of an array, just like numpy. However, it operates on xarray DataArrays, taking labels into account." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false, 59 | "jupyter": { 60 | "outputs_hidden": false 61 | } 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "from xhistogram.xarray import histogram\n", 66 | "\n", 67 | "bins = np.linspace(-4, 4, 20)\n", 68 | "h = histogram(da, bins=[bins])\n", 69 | "display(h)\n", 70 | "h.plot()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "**TODO:** \n", 78 | "- Bins needs to be a list; this is annoying, would be good to accept single items\n", 79 | "- The `foo_bin` coordinate is the estimated bin center, not the bounds. We need to add the bounds to the coordinates, but we can as long as we are returning DataArray and not Dataset.\n", 80 | "\n", 81 | "Both of the above need GitHub Issues\n", 82 | "\n", 83 | "### Histogram over a single axis" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": false, 91 | "jupyter": { 92 | "outputs_hidden": false 93 | } 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "h_x = histogram(da, bins=[bins], dim=['time'])\n", 98 | "h_x.plot()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "**TODO:**\n", 106 | " - Relax / explain requirement that dims is always a list" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false, 114 | "jupyter": { 115 | "outputs_hidden": false 116 | } 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "h_x.mean(dim='x').plot()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "### Weighted Histogram\n", 128 | "\n", 129 | "Weights can be the same shape as the input:" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false, 137 | "jupyter": { 138 | "outputs_hidden": false 139 | } 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "weights = 0.4 * xr.ones_like(da)\n", 144 | "histogram(da, bins=[bins], weights=weights)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "Or can use Xarray broadcasting:" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false, 159 | "jupyter": { 160 | "outputs_hidden": false 161 | } 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "weights = 0.2 * xr.ones_like(da.x)\n", 166 | "histogram(da, bins=[bins], weights=weights)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "## 2D Histogram\n", 174 | "\n", 175 | "Now let's say we have multiple input arrays. We can calculate their joint distribution:" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "collapsed": false, 183 | "jupyter": { 184 | "outputs_hidden": false 185 | } 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "db = xr.DataArray(np.random.randn(nt, nx), dims=['time', 'x'],\n", 190 | " name='bar') - 2\n", 191 | "\n", 192 | "histogram(da, db, bins=[bins, bins]).plot()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "## Real Data Example\n", 200 | "\n", 201 | "### Ocean Volume Census in TS Space\n", 202 | "\n", 203 | "Here we show how to use xhistogram to do a volume census of the ocean in Temperature-Salinity Space" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "First we open the World Ocean Atlas dataset from the opendap dataset (http://apdrc.soest.hawaii.edu/dods/public_data/WOA/WOA13/1_deg/annual). \n", 211 | "\n", 212 | "Here we read the annual mean Temparature, Salinity and Oxygen on a 5 degree grid." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": false, 220 | "jupyter": { 221 | "outputs_hidden": false 222 | } 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "# Read WOA using opendap \n", 227 | "Temp_url = 'http://apdrc.soest.hawaii.edu:80/dods/public_data/WOA/WOA13/5_deg/annual/temp'\n", 228 | "Salt_url = 'http://apdrc.soest.hawaii.edu:80/dods/public_data/WOA/WOA13/5_deg/annual/salt'\n", 229 | "Oxy_url = 'http://apdrc.soest.hawaii.edu:80/dods/public_data/WOA/WOA13/5_deg/annual/doxy'\n", 230 | "\n", 231 | "ds = xr.merge([\n", 232 | " xr.open_dataset(Temp_url).tmn.load(),\n", 233 | " xr.open_dataset(Salt_url).smn.load(),\n", 234 | " xr.open_dataset(Oxy_url).omn.load()])\n", 235 | "ds" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "Use histogram to bin data points. Use canonical ocean T/S ranges, and bin size of $0.1^0C$, and $0.025psu$. Similar ranges and bin size as this review paper on Mode Waters: https://doi.org/10.1016/B978-0-12-391851-2.00009-X ." 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": false, 250 | "jupyter": { 251 | "outputs_hidden": false 252 | } 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "sbins = np.arange(31,38, 0.025)\n", 257 | "tbins = np.arange(-2, 32, 0.1)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "collapsed": false, 265 | "jupyter": { 266 | "outputs_hidden": false 267 | } 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "# histogram of number of data points\n", 272 | "# histogram of number of data points\n", 273 | "hTS = histogram(ds.smn, ds.tmn, bins=[sbins, tbins])\n", 274 | "np.log10(hTS.T).plot(levels=31)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "However, we would like to do a volume census, which requires the data points to be weighted by volume of the grid box. \n", 282 | "\n", 283 | "\\begin{equation}\n", 284 | "dV = dz*dx*dy\n", 285 | "\\end{equation}" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": false, 293 | "jupyter": { 294 | "outputs_hidden": false 295 | } 296 | }, 297 | "outputs": [], 298 | "source": [ 299 | "# histogram of number of data points weighted by volume resolution\n", 300 | "# Note that depth is a non-uniform axis\n", 301 | "\n", 302 | "# Create a dz variable\n", 303 | "dz = np.diff(ds.lev)\n", 304 | "dz =np.insert(dz, 0, dz[0])\n", 305 | "dz = xr.DataArray(dz, coords= {'lev':ds.lev}, dims='lev')\n", 306 | "\n", 307 | "# weight by volume of grid cell (resolution = 5degree, 1degree=110km)\n", 308 | "dVol = dz * (5*110e3) * (5*110e3*np.cos(ds.lat*np.pi/180)) \n", 309 | "\n", 310 | "# Note: The weights are automatically broadcast to the right size\n", 311 | "hTSw = histogram(ds.smn, ds.tmn, bins=[sbins, tbins], weights=dVol)\n", 312 | "np.log10(hTSw.T).plot(levels=31, vmin=11.5, vmax=16, cmap='brg')" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "The ridges of this above plot are indicative of T/S classes with a lot of volume, and some of them are indicative of Mode Waters (example Eighteen Degree water with T$\\sim18^oC$, and S$\\sim36.5psu$. " 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": {}, 325 | "source": [ 326 | "#### Averaging a variable \n", 327 | "\n", 328 | "Next we calculate the mean oxygen value in each TS bin. \n", 329 | "\n", 330 | "\\begin{equation}\n", 331 | "\\overline{A} (m,n) = \\frac{\\sum_{T(x,y,z)=m, S(x,y,z)=n} (A(x,y,z) dV)}{\\sum_{T(x,y,z)=m, S(x,y,z)=n}dV}.\n", 332 | "\\end{equation}" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": { 339 | "collapsed": false, 340 | "jupyter": { 341 | "outputs_hidden": false 342 | } 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "hTSO2 = (histogram(ds.smn.where(~np.isnan(ds.omn)), \n", 347 | " ds.tmn.where(~np.isnan(ds.omn)), \n", 348 | " bins=[sbins, tbins], \n", 349 | " weights=ds.omn.where(~np.isnan(ds.omn))*dVol)/\n", 350 | " histogram(ds.smn.where(~np.isnan(ds.omn)), \n", 351 | " ds.tmn.where(~np.isnan(ds.omn)), \n", 352 | " bins=[sbins, tbins], \n", 353 | " weights=dVol))\n", 354 | "\n", 355 | "(hTSO2.T).plot(vmin=1, vmax=8)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "Some interesting patterns in average oxygen emerge. Convectively ventilated cold water have the highest oxygen and mode waters have relatively high oxygen. Oxygen minimum zones are interspersed in the middle of volumetic ridges (high volume waters). " 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "**NOTE**: NaNs in weights will make the weighted sum as nan. To avoid this, call `.fillna(0.)` on your weights input data before calling `histogram()`." 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "## Dask Integration\n", 377 | "\n", 378 | "Should just work, but need examples." 379 | ] 380 | } 381 | ], 382 | "metadata": { 383 | "kernelspec": { 384 | "display_name": "Python 3", 385 | "language": "python", 386 | "name": "python3" 387 | }, 388 | "language_info": { 389 | "codemirror_mode": { 390 | "name": "ipython", 391 | "version": 3 392 | }, 393 | "file_extension": ".py", 394 | "mimetype": "text/x-python", 395 | "name": "python", 396 | "nbconvert_exporter": "python", 397 | "pygments_lexer": "ipython3", 398 | "version": "3.8.6" 399 | } 400 | }, 401 | "nbformat": 4, 402 | "nbformat_minor": 4 403 | } 404 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | conda: 2 | file: doc/environment.yml 3 | python: 4 | version: 3 5 | setup_py_install: true 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | [versioneer] 5 | VCS = git 6 | style = pep440 7 | versionfile_source = xhistogram/_version.py 8 | versionfile_build = xhistogram/_version.py 9 | tag_prefix = v 10 | parentdir_prefix = xhistogram- 11 | 12 | [flake8] 13 | exclude = __init__.py,versioneer.py,_version.py 14 | max-line-length = 120 15 | 16 | [tool:pytest] 17 | addopts = -v --durations=10 18 | 19 | [mypy] 20 | 21 | [mypy-dask.*] 22 | ignore_missing_imports = True 23 | [mypy-numpy.*] 24 | ignore_missing_imports = True 25 | [mypy-pandas.*] 26 | ignore_missing_imports = True 27 | 28 | # Ignore versioneer files 29 | [mypy-xhistogram._version] 30 | ignore_errors = True 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import versioneer 3 | from setuptools import setup, find_packages 4 | 5 | DISTNAME = "xhistogram" 6 | LICENSE = "MIT" 7 | AUTHOR = "xhistogram Developers" 8 | AUTHOR_EMAIL = "rpa@ldeo.columbia.edu" 9 | URL = "https://github.com/xgcm/xhistogram" 10 | CLASSIFIERS = [ 11 | "Development Status :: 4 - Beta", 12 | "License :: OSI Approved :: Apache Software License", 13 | "Operating System :: OS Independent", 14 | "Intended Audience :: Science/Research", 15 | "Programming Language :: Python", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.7", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | "Topic :: Scientific/Engineering", 21 | ] 22 | 23 | INSTALL_REQUIRES = ["xarray>=0.12.0", "dask[array]>=2.3.0", "numpy>=1.17"] 24 | PYTHON_REQUIRES = ">=3.7" 25 | 26 | DESCRIPTION = "Fast, flexible, label-aware histograms for numpy and xarray" 27 | 28 | 29 | def readme(): 30 | with open("README.rst") as f: 31 | return f.read() 32 | 33 | 34 | setup( 35 | name=DISTNAME, 36 | version=versioneer.get_version(), 37 | cmdclass=versioneer.get_cmdclass(), 38 | license=LICENSE, 39 | author=AUTHOR, 40 | author_email=AUTHOR_EMAIL, 41 | classifiers=CLASSIFIERS, 42 | description=DESCRIPTION, 43 | long_description=readme(), 44 | install_requires=INSTALL_REQUIRES, 45 | python_requires=PYTHON_REQUIRES, 46 | url=URL, 47 | packages=find_packages(), 48 | ) 49 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | # Version: 0.18 2 | 3 | """The Versioneer - like a rocketeer, but for versions. 4 | 5 | The Versioneer 6 | ============== 7 | 8 | * like a rocketeer, but for versions! 9 | * https://github.com/warner/python-versioneer 10 | * Brian Warner 11 | * License: Public Domain 12 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy 13 | * [![Latest Version] 14 | (https://pypip.in/version/versioneer/badge.svg?style=flat) 15 | ](https://pypi.python.org/pypi/versioneer/) 16 | * [![Build Status] 17 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) 18 | ](https://travis-ci.org/warner/python-versioneer) 19 | 20 | This is a tool for managing a recorded version number in distutils-based 21 | python projects. The goal is to remove the tedious and error-prone "update 22 | the embedded version string" step from your release process. Making a new 23 | release should be as easy as recording a new tag in your version-control 24 | system, and maybe making new tarballs. 25 | 26 | 27 | ## Quick Install 28 | 29 | * `pip install versioneer` to somewhere to your $PATH 30 | * add a `[versioneer]` section to your setup.cfg (see below) 31 | * run `versioneer install` in your source tree, commit the results 32 | 33 | ## Version Identifiers 34 | 35 | Source trees come from a variety of places: 36 | 37 | * a version-control system checkout (mostly used by developers) 38 | * a nightly tarball, produced by build automation 39 | * a snapshot tarball, produced by a web-based VCS browser, like github's 40 | "tarball from tag" feature 41 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 42 | 43 | Within each source tree, the version identifier (either a string or a number, 44 | this tool is format-agnostic) can come from a variety of places: 45 | 46 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 47 | about recent "tags" and an absolute revision-id 48 | * the name of the directory into which the tarball was unpacked 49 | * an expanded VCS keyword ($Id$, etc) 50 | * a `_version.py` created by some earlier build step 51 | 52 | For released software, the version identifier is closely related to a VCS 53 | tag. Some projects use tag names that include more than just the version 54 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 55 | needs to strip the tag prefix to extract the version identifier. For 56 | unreleased software (between tags), the version identifier should provide 57 | enough information to help developers recreate the same tree, while also 58 | giving them an idea of roughly how old the tree is (after version 1.2, before 59 | version 1.3). Many VCS systems can report a description that captures this, 60 | for example `git describe --tags --dirty --always` reports things like 61 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 62 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 63 | uncommitted changes. 64 | 65 | The version identifier is used for multiple purposes: 66 | 67 | * to allow the module to self-identify its version: `myproject.__version__` 68 | * to choose a name and prefix for a 'setup.py sdist' tarball 69 | 70 | ## Theory of Operation 71 | 72 | Versioneer works by adding a special `_version.py` file into your source 73 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 74 | dynamically ask the VCS tool for version information at import time. 75 | 76 | `_version.py` also contains `$Revision$` markers, and the installation 77 | process marks `_version.py` to have this marker rewritten with a tag name 78 | during the `git archive` command. As a result, generated tarballs will 79 | contain enough information to get the proper version. 80 | 81 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 82 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 83 | that configures it. This overrides several distutils/setuptools commands to 84 | compute the version when invoked, and changes `setup.py build` and `setup.py 85 | sdist` to replace `_version.py` with a small static file that contains just 86 | the generated version data. 87 | 88 | ## Installation 89 | 90 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions. 91 | 92 | ## Version-String Flavors 93 | 94 | Code which uses Versioneer can learn about its version string at runtime by 95 | importing `_version` from your main `__init__.py` file and running the 96 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 97 | import the top-level `versioneer.py` and run `get_versions()`. 98 | 99 | Both functions return a dictionary with different flavors of version 100 | information: 101 | 102 | * `['version']`: A condensed version string, rendered using the selected 103 | style. This is the most commonly used value for the project's version 104 | string. The default "pep440" style yields strings like `0.11`, 105 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 106 | below for alternative styles. 107 | 108 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 109 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 110 | 111 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the 112 | commit date in ISO 8601 format. This will be None if the date is not 113 | available. 114 | 115 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 116 | this is only accurate if run in a VCS checkout, otherwise it is likely to 117 | be False or None 118 | 119 | * `['error']`: if the version string could not be computed, this will be set 120 | to a string describing the problem, otherwise it will be None. It may be 121 | useful to throw an exception in setup.py if this is set, to avoid e.g. 122 | creating tarballs with a version string of "unknown". 123 | 124 | Some variants are more useful than others. Including `full-revisionid` in a 125 | bug report should allow developers to reconstruct the exact code being tested 126 | (or indicate the presence of local changes that should be shared with the 127 | developers). `version` is suitable for display in an "about" box or a CLI 128 | `--version` output: it can be easily compared against release notes and lists 129 | of bugs fixed in various releases. 130 | 131 | The installer adds the following text to your `__init__.py` to place a basic 132 | version in `YOURPROJECT.__version__`: 133 | 134 | from ._version import get_versions 135 | __version__ = get_versions()['version'] 136 | del get_versions 137 | 138 | ## Styles 139 | 140 | The setup.cfg `style=` configuration controls how the VCS information is 141 | rendered into a version string. 142 | 143 | The default style, "pep440", produces a PEP440-compliant string, equal to the 144 | un-prefixed tag name for actual releases, and containing an additional "local 145 | version" section with more detail for in-between builds. For Git, this is 146 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 147 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 148 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 149 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 150 | software (exactly equal to a known tag), the identifier will only contain the 151 | stripped tag, e.g. "0.11". 152 | 153 | Other styles are available. See [details.md](details.md) in the Versioneer 154 | source tree for descriptions. 155 | 156 | ## Debugging 157 | 158 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 159 | to return a version of "0+unknown". To investigate the problem, run `setup.py 160 | version`, which will run the version-lookup code in a verbose mode, and will 161 | display the full contents of `get_versions()` (including the `error` string, 162 | which may help identify what went wrong). 163 | 164 | ## Known Limitations 165 | 166 | Some situations are known to cause problems for Versioneer. This details the 167 | most significant ones. More can be found on Github 168 | [issues page](https://github.com/warner/python-versioneer/issues). 169 | 170 | ### Subprojects 171 | 172 | Versioneer has limited support for source trees in which `setup.py` is not in 173 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are 174 | two common reasons why `setup.py` might not be in the root: 175 | 176 | * Source trees which contain multiple subprojects, such as 177 | [Buildbot](https://github.com/buildbot/buildbot), which contains both 178 | "master" and "slave" subprojects, each with their own `setup.py`, 179 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI 180 | distributions (and upload multiple independently-installable tarballs). 181 | * Source trees whose main purpose is to contain a C library, but which also 182 | provide bindings to Python (and perhaps other langauges) in subdirectories. 183 | 184 | Versioneer will look for `.git` in parent directories, and most operations 185 | should get the right version string. However `pip` and `setuptools` have bugs 186 | and implementation details which frequently cause `pip install .` from a 187 | subproject directory to fail to find a correct version string (so it usually 188 | defaults to `0+unknown`). 189 | 190 | `pip install --editable .` should work correctly. `setup.py install` might 191 | work too. 192 | 193 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in 194 | some later version. 195 | 196 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking 197 | this issue. The discussion in 198 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the 199 | issue from the Versioneer side in more detail. 200 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and 201 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve 202 | pip to let Versioneer work correctly. 203 | 204 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the 205 | `setup.cfg`, so subprojects were completely unsupported with those releases. 206 | 207 | ### Editable installs with setuptools <= 18.5 208 | 209 | `setup.py develop` and `pip install --editable .` allow you to install a 210 | project into a virtualenv once, then continue editing the source code (and 211 | test) without re-installing after every change. 212 | 213 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a 214 | convenient way to specify executable scripts that should be installed along 215 | with the python package. 216 | 217 | These both work as expected when using modern setuptools. When using 218 | setuptools-18.5 or earlier, however, certain operations will cause 219 | `pkg_resources.DistributionNotFound` errors when running the entrypoint 220 | script, which must be resolved by re-installing the package. This happens 221 | when the install happens with one version, then the egg_info data is 222 | regenerated while a different version is checked out. Many setup.py commands 223 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into 224 | a different virtualenv), so this can be surprising. 225 | 226 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes 227 | this one, but upgrading to a newer version of setuptools should probably 228 | resolve it. 229 | 230 | ### Unicode version strings 231 | 232 | While Versioneer works (and is continually tested) with both Python 2 and 233 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. 234 | Newer releases probably generate unicode version strings on py2. It's not 235 | clear that this is wrong, but it may be surprising for applications when then 236 | write these strings to a network connection or include them in bytes-oriented 237 | APIs like cryptographic checksums. 238 | 239 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates 240 | this question. 241 | 242 | 243 | ## Updating Versioneer 244 | 245 | To upgrade your project to a new release of Versioneer, do the following: 246 | 247 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 248 | * edit `setup.cfg`, if necessary, to include any new configuration settings 249 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. 250 | * re-run `versioneer install` in your source tree, to replace 251 | `SRC/_version.py` 252 | * commit any changed files 253 | 254 | ## Future Directions 255 | 256 | This tool is designed to make it easily extended to other version-control 257 | systems: all VCS-specific components are in separate directories like 258 | src/git/ . The top-level `versioneer.py` script is assembled from these 259 | components by running make-versioneer.py . In the future, make-versioneer.py 260 | will take a VCS name as an argument, and will construct a version of 261 | `versioneer.py` that is specific to the given VCS. It might also take the 262 | configuration arguments that are currently provided manually during 263 | installation by editing setup.py . Alternatively, it might go the other 264 | direction and include code from all supported VCS systems, reducing the 265 | number of intermediate scripts. 266 | 267 | 268 | ## License 269 | 270 | To make Versioneer easier to embed, all its code is dedicated to the public 271 | domain. The `_version.py` that it creates is also in the public domain. 272 | Specifically, both are released under the Creative Commons "Public Domain 273 | Dedication" license (CC0-1.0), as described in 274 | https://creativecommons.org/publicdomain/zero/1.0/ . 275 | 276 | """ 277 | 278 | from __future__ import print_function 279 | 280 | try: 281 | import configparser 282 | except ImportError: 283 | import ConfigParser as configparser 284 | import errno 285 | import json 286 | import os 287 | import re 288 | import subprocess 289 | import sys 290 | 291 | 292 | class VersioneerConfig: 293 | """Container for Versioneer configuration parameters.""" 294 | 295 | 296 | def get_root(): 297 | """Get the project root directory. 298 | 299 | We require that all commands are run from the project root, i.e. the 300 | directory that contains setup.py, setup.cfg, and versioneer.py . 301 | """ 302 | root = os.path.realpath(os.path.abspath(os.getcwd())) 303 | setup_py = os.path.join(root, "setup.py") 304 | versioneer_py = os.path.join(root, "versioneer.py") 305 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 306 | # allow 'python path/to/setup.py COMMAND' 307 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 308 | setup_py = os.path.join(root, "setup.py") 309 | versioneer_py = os.path.join(root, "versioneer.py") 310 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 311 | err = ( 312 | "Versioneer was unable to run the project root directory. " 313 | "Versioneer requires setup.py to be executed from " 314 | "its immediate directory (like 'python setup.py COMMAND'), " 315 | "or in a way that lets it use sys.argv[0] to find the root " 316 | "(like 'python path/to/setup.py COMMAND')." 317 | ) 318 | raise VersioneerBadRootError(err) 319 | try: 320 | # Certain runtime workflows (setup.py install/develop in a setuptools 321 | # tree) execute all dependencies in a single python process, so 322 | # "versioneer" may be imported multiple times, and python's shared 323 | # module-import table will cache the first one. So we can't use 324 | # os.path.dirname(__file__), as that will find whichever 325 | # versioneer.py was first imported, even in later projects. 326 | me = os.path.realpath(os.path.abspath(__file__)) 327 | me_dir = os.path.normcase(os.path.splitext(me)[0]) 328 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) 329 | if me_dir != vsr_dir: 330 | print( 331 | "Warning: build in %s is using versioneer.py from %s" 332 | % (os.path.dirname(me), versioneer_py) 333 | ) 334 | except NameError: 335 | pass 336 | return root 337 | 338 | 339 | def get_config_from_root(root): 340 | """Read the project setup.cfg file to determine Versioneer config.""" 341 | # This might raise EnvironmentError (if setup.cfg is missing), or 342 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 343 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 344 | # the top of versioneer.py for instructions on writing your setup.cfg . 345 | setup_cfg = os.path.join(root, "setup.cfg") 346 | parser = configparser.SafeConfigParser() 347 | with open(setup_cfg, "r") as f: 348 | parser.readfp(f) 349 | VCS = parser.get("versioneer", "VCS") # mandatory 350 | 351 | def get(parser, name): 352 | if parser.has_option("versioneer", name): 353 | return parser.get("versioneer", name) 354 | return None 355 | 356 | cfg = VersioneerConfig() 357 | cfg.VCS = VCS 358 | cfg.style = get(parser, "style") or "" 359 | cfg.versionfile_source = get(parser, "versionfile_source") 360 | cfg.versionfile_build = get(parser, "versionfile_build") 361 | cfg.tag_prefix = get(parser, "tag_prefix") 362 | if cfg.tag_prefix in ("''", '""'): 363 | cfg.tag_prefix = "" 364 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") 365 | cfg.verbose = get(parser, "verbose") 366 | return cfg 367 | 368 | 369 | class NotThisMethod(Exception): 370 | """Exception raised if a method is not valid for the current scenario.""" 371 | 372 | 373 | # these dictionaries contain VCS-specific tools 374 | LONG_VERSION_PY = {} 375 | HANDLERS = {} 376 | 377 | 378 | def register_vcs_handler(vcs, method): # decorator 379 | """Decorator to mark a method as the handler for a particular VCS.""" 380 | 381 | def decorate(f): 382 | """Store f in HANDLERS[vcs][method].""" 383 | if vcs not in HANDLERS: 384 | HANDLERS[vcs] = {} 385 | HANDLERS[vcs][method] = f 386 | return f 387 | 388 | return decorate 389 | 390 | 391 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): 392 | """Call the given command(s).""" 393 | assert isinstance(commands, list) 394 | p = None 395 | for c in commands: 396 | try: 397 | dispcmd = str([c] + args) 398 | # remember shell=False, so use git.cmd on windows, not just git 399 | p = subprocess.Popen( 400 | [c] + args, 401 | cwd=cwd, 402 | env=env, 403 | stdout=subprocess.PIPE, 404 | stderr=(subprocess.PIPE if hide_stderr else None), 405 | ) 406 | break 407 | except EnvironmentError: 408 | e = sys.exc_info()[1] 409 | if e.errno == errno.ENOENT: 410 | continue 411 | if verbose: 412 | print("unable to run %s" % dispcmd) 413 | print(e) 414 | return None, None 415 | else: 416 | if verbose: 417 | print("unable to find command, tried %s" % (commands,)) 418 | return None, None 419 | stdout = p.communicate()[0].strip() 420 | if sys.version_info[0] >= 3: 421 | stdout = stdout.decode() 422 | if p.returncode != 0: 423 | if verbose: 424 | print("unable to run %s (error)" % dispcmd) 425 | print("stdout was %s" % stdout) 426 | return None, p.returncode 427 | return stdout, p.returncode 428 | 429 | 430 | LONG_VERSION_PY[ 431 | "git" 432 | ] = ''' 433 | # This file helps to compute a version number in source trees obtained from 434 | # git-archive tarball (such as those provided by githubs download-from-tag 435 | # feature). Distribution tarballs (built by setup.py sdist) and build 436 | # directories (produced by setup.py build) will contain a much shorter file 437 | # that just contains the computed version number. 438 | 439 | # This file is released into the public domain. Generated by 440 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 441 | 442 | """Git implementation of _version.py.""" 443 | 444 | import errno 445 | import os 446 | import re 447 | import subprocess 448 | import sys 449 | 450 | 451 | def get_keywords(): 452 | """Get the keywords needed to look up the version information.""" 453 | # these strings will be replaced by git during git-archive. 454 | # setup.py/versioneer.py will grep for the variable names, so they must 455 | # each be defined on a line of their own. _version.py will just call 456 | # get_keywords(). 457 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 458 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 459 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" 460 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 461 | return keywords 462 | 463 | 464 | class VersioneerConfig: 465 | """Container for Versioneer configuration parameters.""" 466 | 467 | 468 | def get_config(): 469 | """Create, populate and return the VersioneerConfig() object.""" 470 | # these strings are filled in when 'setup.py versioneer' creates 471 | # _version.py 472 | cfg = VersioneerConfig() 473 | cfg.VCS = "git" 474 | cfg.style = "%(STYLE)s" 475 | cfg.tag_prefix = "%(TAG_PREFIX)s" 476 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 477 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 478 | cfg.verbose = False 479 | return cfg 480 | 481 | 482 | class NotThisMethod(Exception): 483 | """Exception raised if a method is not valid for the current scenario.""" 484 | 485 | 486 | LONG_VERSION_PY = {} 487 | HANDLERS = {} 488 | 489 | 490 | def register_vcs_handler(vcs, method): # decorator 491 | """Decorator to mark a method as the handler for a particular VCS.""" 492 | def decorate(f): 493 | """Store f in HANDLERS[vcs][method].""" 494 | if vcs not in HANDLERS: 495 | HANDLERS[vcs] = {} 496 | HANDLERS[vcs][method] = f 497 | return f 498 | return decorate 499 | 500 | 501 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 502 | env=None): 503 | """Call the given command(s).""" 504 | assert isinstance(commands, list) 505 | p = None 506 | for c in commands: 507 | try: 508 | dispcmd = str([c] + args) 509 | # remember shell=False, so use git.cmd on windows, not just git 510 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 511 | stdout=subprocess.PIPE, 512 | stderr=(subprocess.PIPE if hide_stderr 513 | else None)) 514 | break 515 | except EnvironmentError: 516 | e = sys.exc_info()[1] 517 | if e.errno == errno.ENOENT: 518 | continue 519 | if verbose: 520 | print("unable to run %%s" %% dispcmd) 521 | print(e) 522 | return None, None 523 | else: 524 | if verbose: 525 | print("unable to find command, tried %%s" %% (commands,)) 526 | return None, None 527 | stdout = p.communicate()[0].strip() 528 | if sys.version_info[0] >= 3: 529 | stdout = stdout.decode() 530 | if p.returncode != 0: 531 | if verbose: 532 | print("unable to run %%s (error)" %% dispcmd) 533 | print("stdout was %%s" %% stdout) 534 | return None, p.returncode 535 | return stdout, p.returncode 536 | 537 | 538 | def versions_from_parentdir(parentdir_prefix, root, verbose): 539 | """Try to determine the version from the parent directory name. 540 | 541 | Source tarballs conventionally unpack into a directory that includes both 542 | the project name and a version string. We will also support searching up 543 | two directory levels for an appropriately named parent directory 544 | """ 545 | rootdirs = [] 546 | 547 | for i in range(3): 548 | dirname = os.path.basename(root) 549 | if dirname.startswith(parentdir_prefix): 550 | return {"version": dirname[len(parentdir_prefix):], 551 | "full-revisionid": None, 552 | "dirty": False, "error": None, "date": None} 553 | else: 554 | rootdirs.append(root) 555 | root = os.path.dirname(root) # up a level 556 | 557 | if verbose: 558 | print("Tried directories %%s but none started with prefix %%s" %% 559 | (str(rootdirs), parentdir_prefix)) 560 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 561 | 562 | 563 | @register_vcs_handler("git", "get_keywords") 564 | def git_get_keywords(versionfile_abs): 565 | """Extract version information from the given file.""" 566 | # the code embedded in _version.py can just fetch the value of these 567 | # keywords. When used from setup.py, we don't want to import _version.py, 568 | # so we do it with a regexp instead. This function is not used from 569 | # _version.py. 570 | keywords = {} 571 | try: 572 | f = open(versionfile_abs, "r") 573 | for line in f.readlines(): 574 | if line.strip().startswith("git_refnames ="): 575 | mo = re.search(r'=\s*"(.*)"', line) 576 | if mo: 577 | keywords["refnames"] = mo.group(1) 578 | if line.strip().startswith("git_full ="): 579 | mo = re.search(r'=\s*"(.*)"', line) 580 | if mo: 581 | keywords["full"] = mo.group(1) 582 | if line.strip().startswith("git_date ="): 583 | mo = re.search(r'=\s*"(.*)"', line) 584 | if mo: 585 | keywords["date"] = mo.group(1) 586 | f.close() 587 | except EnvironmentError: 588 | pass 589 | return keywords 590 | 591 | 592 | @register_vcs_handler("git", "keywords") 593 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 594 | """Get version information from git keywords.""" 595 | if not keywords: 596 | raise NotThisMethod("no keywords at all, weird") 597 | date = keywords.get("date") 598 | if date is not None: 599 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant 600 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 601 | # -like" string, which we must then edit to make compliant), because 602 | # it's been around since git-1.5.3, and it's too difficult to 603 | # discover which version we're using, or to work around using an 604 | # older one. 605 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 606 | refnames = keywords["refnames"].strip() 607 | if refnames.startswith("$Format"): 608 | if verbose: 609 | print("keywords are unexpanded, not using") 610 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 611 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 612 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 613 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 614 | TAG = "tag: " 615 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 616 | if not tags: 617 | # Either we're using git < 1.8.3, or there really are no tags. We use 618 | # a heuristic: assume all version tags have a digit. The old git %%d 619 | # expansion behaves like git log --decorate=short and strips out the 620 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 621 | # between branches and tags. By ignoring refnames without digits, we 622 | # filter out many common branch names like "release" and 623 | # "stabilization", as well as "HEAD" and "master". 624 | tags = set([r for r in refs if re.search(r'\d', r)]) 625 | if verbose: 626 | print("discarding '%%s', no digits" %% ",".join(refs - tags)) 627 | if verbose: 628 | print("likely tags: %%s" %% ",".join(sorted(tags))) 629 | for ref in sorted(tags): 630 | # sorting will prefer e.g. "2.0" over "2.0rc1" 631 | if ref.startswith(tag_prefix): 632 | r = ref[len(tag_prefix):] 633 | if verbose: 634 | print("picking %%s" %% r) 635 | return {"version": r, 636 | "full-revisionid": keywords["full"].strip(), 637 | "dirty": False, "error": None, 638 | "date": date} 639 | # no suitable tags, so version is "0+unknown", but full hex is still there 640 | if verbose: 641 | print("no suitable tags, using unknown + full revision id") 642 | return {"version": "0+unknown", 643 | "full-revisionid": keywords["full"].strip(), 644 | "dirty": False, "error": "no suitable tags", "date": None} 645 | 646 | 647 | @register_vcs_handler("git", "pieces_from_vcs") 648 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 649 | """Get version from 'git describe' in the root of the source tree. 650 | 651 | This only gets called if the git-archive 'subst' keywords were *not* 652 | expanded, and _version.py hasn't already been rewritten with a short 653 | version string, meaning we're inside a checked out source tree. 654 | """ 655 | GITS = ["git"] 656 | if sys.platform == "win32": 657 | GITS = ["git.cmd", "git.exe"] 658 | 659 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 660 | hide_stderr=True) 661 | if rc != 0: 662 | if verbose: 663 | print("Directory %%s not under git control" %% root) 664 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 665 | 666 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 667 | # if there isn't one, this yields HEX[-dirty] (no NUM) 668 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 669 | "--always", "--long", 670 | "--match", "%%s*" %% tag_prefix], 671 | cwd=root) 672 | # --long was added in git-1.5.5 673 | if describe_out is None: 674 | raise NotThisMethod("'git describe' failed") 675 | describe_out = describe_out.strip() 676 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 677 | if full_out is None: 678 | raise NotThisMethod("'git rev-parse' failed") 679 | full_out = full_out.strip() 680 | 681 | pieces = {} 682 | pieces["long"] = full_out 683 | pieces["short"] = full_out[:7] # maybe improved later 684 | pieces["error"] = None 685 | 686 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 687 | # TAG might have hyphens. 688 | git_describe = describe_out 689 | 690 | # look for -dirty suffix 691 | dirty = git_describe.endswith("-dirty") 692 | pieces["dirty"] = dirty 693 | if dirty: 694 | git_describe = git_describe[:git_describe.rindex("-dirty")] 695 | 696 | # now we have TAG-NUM-gHEX or HEX 697 | 698 | if "-" in git_describe: 699 | # TAG-NUM-gHEX 700 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 701 | if not mo: 702 | # unparseable. Maybe git-describe is misbehaving? 703 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 704 | %% describe_out) 705 | return pieces 706 | 707 | # tag 708 | full_tag = mo.group(1) 709 | if not full_tag.startswith(tag_prefix): 710 | if verbose: 711 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 712 | print(fmt %% (full_tag, tag_prefix)) 713 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 714 | %% (full_tag, tag_prefix)) 715 | return pieces 716 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 717 | 718 | # distance: number of commits since tag 719 | pieces["distance"] = int(mo.group(2)) 720 | 721 | # commit: short hex revision ID 722 | pieces["short"] = mo.group(3) 723 | 724 | else: 725 | # HEX: no tags 726 | pieces["closest-tag"] = None 727 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 728 | cwd=root) 729 | pieces["distance"] = int(count_out) # total number of commits 730 | 731 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 732 | date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], 733 | cwd=root)[0].strip() 734 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 735 | 736 | return pieces 737 | 738 | 739 | def plus_or_dot(pieces): 740 | """Return a + if we don't already have one, else return a .""" 741 | if "+" in pieces.get("closest-tag", ""): 742 | return "." 743 | return "+" 744 | 745 | 746 | def render_pep440(pieces): 747 | """Build up version string, with post-release "local version identifier". 748 | 749 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 750 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 751 | 752 | Exceptions: 753 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 754 | """ 755 | if pieces["closest-tag"]: 756 | rendered = pieces["closest-tag"] 757 | if pieces["distance"] or pieces["dirty"]: 758 | rendered += plus_or_dot(pieces) 759 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 760 | if pieces["dirty"]: 761 | rendered += ".dirty" 762 | else: 763 | # exception #1 764 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 765 | pieces["short"]) 766 | if pieces["dirty"]: 767 | rendered += ".dirty" 768 | return rendered 769 | 770 | 771 | def render_pep440_pre(pieces): 772 | """TAG[.post.devDISTANCE] -- No -dirty. 773 | 774 | Exceptions: 775 | 1: no tags. 0.post.devDISTANCE 776 | """ 777 | if pieces["closest-tag"]: 778 | rendered = pieces["closest-tag"] 779 | if pieces["distance"]: 780 | rendered += ".post.dev%%d" %% pieces["distance"] 781 | else: 782 | # exception #1 783 | rendered = "0.post.dev%%d" %% pieces["distance"] 784 | return rendered 785 | 786 | 787 | def render_pep440_post(pieces): 788 | """TAG[.postDISTANCE[.dev0]+gHEX] . 789 | 790 | The ".dev0" means dirty. Note that .dev0 sorts backwards 791 | (a dirty tree will appear "older" than the corresponding clean one), 792 | but you shouldn't be releasing software with -dirty anyways. 793 | 794 | Exceptions: 795 | 1: no tags. 0.postDISTANCE[.dev0] 796 | """ 797 | if pieces["closest-tag"]: 798 | rendered = pieces["closest-tag"] 799 | if pieces["distance"] or pieces["dirty"]: 800 | rendered += ".post%%d" %% pieces["distance"] 801 | if pieces["dirty"]: 802 | rendered += ".dev0" 803 | rendered += plus_or_dot(pieces) 804 | rendered += "g%%s" %% pieces["short"] 805 | else: 806 | # exception #1 807 | rendered = "0.post%%d" %% pieces["distance"] 808 | if pieces["dirty"]: 809 | rendered += ".dev0" 810 | rendered += "+g%%s" %% pieces["short"] 811 | return rendered 812 | 813 | 814 | def render_pep440_old(pieces): 815 | """TAG[.postDISTANCE[.dev0]] . 816 | 817 | The ".dev0" means dirty. 818 | 819 | Eexceptions: 820 | 1: no tags. 0.postDISTANCE[.dev0] 821 | """ 822 | if pieces["closest-tag"]: 823 | rendered = pieces["closest-tag"] 824 | if pieces["distance"] or pieces["dirty"]: 825 | rendered += ".post%%d" %% pieces["distance"] 826 | if pieces["dirty"]: 827 | rendered += ".dev0" 828 | else: 829 | # exception #1 830 | rendered = "0.post%%d" %% pieces["distance"] 831 | if pieces["dirty"]: 832 | rendered += ".dev0" 833 | return rendered 834 | 835 | 836 | def render_git_describe(pieces): 837 | """TAG[-DISTANCE-gHEX][-dirty]. 838 | 839 | Like 'git describe --tags --dirty --always'. 840 | 841 | Exceptions: 842 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 843 | """ 844 | if pieces["closest-tag"]: 845 | rendered = pieces["closest-tag"] 846 | if pieces["distance"]: 847 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 848 | else: 849 | # exception #1 850 | rendered = pieces["short"] 851 | if pieces["dirty"]: 852 | rendered += "-dirty" 853 | return rendered 854 | 855 | 856 | def render_git_describe_long(pieces): 857 | """TAG-DISTANCE-gHEX[-dirty]. 858 | 859 | Like 'git describe --tags --dirty --always -long'. 860 | The distance/hash is unconditional. 861 | 862 | Exceptions: 863 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 864 | """ 865 | if pieces["closest-tag"]: 866 | rendered = pieces["closest-tag"] 867 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 868 | else: 869 | # exception #1 870 | rendered = pieces["short"] 871 | if pieces["dirty"]: 872 | rendered += "-dirty" 873 | return rendered 874 | 875 | 876 | def render(pieces, style): 877 | """Render the given version pieces into the requested style.""" 878 | if pieces["error"]: 879 | return {"version": "unknown", 880 | "full-revisionid": pieces.get("long"), 881 | "dirty": None, 882 | "error": pieces["error"], 883 | "date": None} 884 | 885 | if not style or style == "default": 886 | style = "pep440" # the default 887 | 888 | if style == "pep440": 889 | rendered = render_pep440(pieces) 890 | elif style == "pep440-pre": 891 | rendered = render_pep440_pre(pieces) 892 | elif style == "pep440-post": 893 | rendered = render_pep440_post(pieces) 894 | elif style == "pep440-old": 895 | rendered = render_pep440_old(pieces) 896 | elif style == "git-describe": 897 | rendered = render_git_describe(pieces) 898 | elif style == "git-describe-long": 899 | rendered = render_git_describe_long(pieces) 900 | else: 901 | raise ValueError("unknown style '%%s'" %% style) 902 | 903 | return {"version": rendered, "full-revisionid": pieces["long"], 904 | "dirty": pieces["dirty"], "error": None, 905 | "date": pieces.get("date")} 906 | 907 | 908 | def get_versions(): 909 | """Get version information or return default if unable to do so.""" 910 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 911 | # __file__, we can work backwards from there to the root. Some 912 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 913 | # case we can only use expanded keywords. 914 | 915 | cfg = get_config() 916 | verbose = cfg.verbose 917 | 918 | try: 919 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 920 | verbose) 921 | except NotThisMethod: 922 | pass 923 | 924 | try: 925 | root = os.path.realpath(__file__) 926 | # versionfile_source is the relative path from the top of the source 927 | # tree (where the .git directory might live) to this file. Invert 928 | # this to find the root from __file__. 929 | for i in cfg.versionfile_source.split('/'): 930 | root = os.path.dirname(root) 931 | except NameError: 932 | return {"version": "0+unknown", "full-revisionid": None, 933 | "dirty": None, 934 | "error": "unable to find root of source tree", 935 | "date": None} 936 | 937 | try: 938 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 939 | return render(pieces, cfg.style) 940 | except NotThisMethod: 941 | pass 942 | 943 | try: 944 | if cfg.parentdir_prefix: 945 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 946 | except NotThisMethod: 947 | pass 948 | 949 | return {"version": "0+unknown", "full-revisionid": None, 950 | "dirty": None, 951 | "error": "unable to compute version", "date": None} 952 | ''' 953 | 954 | 955 | @register_vcs_handler("git", "get_keywords") 956 | def git_get_keywords(versionfile_abs): 957 | """Extract version information from the given file.""" 958 | # the code embedded in _version.py can just fetch the value of these 959 | # keywords. When used from setup.py, we don't want to import _version.py, 960 | # so we do it with a regexp instead. This function is not used from 961 | # _version.py. 962 | keywords = {} 963 | try: 964 | f = open(versionfile_abs, "r") 965 | for line in f.readlines(): 966 | if line.strip().startswith("git_refnames ="): 967 | mo = re.search(r'=\s*"(.*)"', line) 968 | if mo: 969 | keywords["refnames"] = mo.group(1) 970 | if line.strip().startswith("git_full ="): 971 | mo = re.search(r'=\s*"(.*)"', line) 972 | if mo: 973 | keywords["full"] = mo.group(1) 974 | if line.strip().startswith("git_date ="): 975 | mo = re.search(r'=\s*"(.*)"', line) 976 | if mo: 977 | keywords["date"] = mo.group(1) 978 | f.close() 979 | except EnvironmentError: 980 | pass 981 | return keywords 982 | 983 | 984 | @register_vcs_handler("git", "keywords") 985 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 986 | """Get version information from git keywords.""" 987 | if not keywords: 988 | raise NotThisMethod("no keywords at all, weird") 989 | date = keywords.get("date") 990 | if date is not None: 991 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 992 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 993 | # -like" string, which we must then edit to make compliant), because 994 | # it's been around since git-1.5.3, and it's too difficult to 995 | # discover which version we're using, or to work around using an 996 | # older one. 997 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 998 | refnames = keywords["refnames"].strip() 999 | if refnames.startswith("$Format"): 1000 | if verbose: 1001 | print("keywords are unexpanded, not using") 1002 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 1003 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 1004 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 1005 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 1006 | TAG = "tag: " 1007 | tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) 1008 | if not tags: 1009 | # Either we're using git < 1.8.3, or there really are no tags. We use 1010 | # a heuristic: assume all version tags have a digit. The old git %d 1011 | # expansion behaves like git log --decorate=short and strips out the 1012 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 1013 | # between branches and tags. By ignoring refnames without digits, we 1014 | # filter out many common branch names like "release" and 1015 | # "stabilization", as well as "HEAD" and "master". 1016 | tags = set([r for r in refs if re.search(r"\d", r)]) 1017 | if verbose: 1018 | print("discarding '%s', no digits" % ",".join(refs - tags)) 1019 | if verbose: 1020 | print("likely tags: %s" % ",".join(sorted(tags))) 1021 | for ref in sorted(tags): 1022 | # sorting will prefer e.g. "2.0" over "2.0rc1" 1023 | if ref.startswith(tag_prefix): 1024 | r = ref[len(tag_prefix) :] 1025 | if verbose: 1026 | print("picking %s" % r) 1027 | return { 1028 | "version": r, 1029 | "full-revisionid": keywords["full"].strip(), 1030 | "dirty": False, 1031 | "error": None, 1032 | "date": date, 1033 | } 1034 | # no suitable tags, so version is "0+unknown", but full hex is still there 1035 | if verbose: 1036 | print("no suitable tags, using unknown + full revision id") 1037 | return { 1038 | "version": "0+unknown", 1039 | "full-revisionid": keywords["full"].strip(), 1040 | "dirty": False, 1041 | "error": "no suitable tags", 1042 | "date": None, 1043 | } 1044 | 1045 | 1046 | @register_vcs_handler("git", "pieces_from_vcs") 1047 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 1048 | """Get version from 'git describe' in the root of the source tree. 1049 | 1050 | This only gets called if the git-archive 'subst' keywords were *not* 1051 | expanded, and _version.py hasn't already been rewritten with a short 1052 | version string, meaning we're inside a checked out source tree. 1053 | """ 1054 | GITS = ["git"] 1055 | if sys.platform == "win32": 1056 | GITS = ["git.cmd", "git.exe"] 1057 | 1058 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) 1059 | if rc != 0: 1060 | if verbose: 1061 | print("Directory %s not under git control" % root) 1062 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 1063 | 1064 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 1065 | # if there isn't one, this yields HEX[-dirty] (no NUM) 1066 | describe_out, rc = run_command( 1067 | GITS, 1068 | [ 1069 | "describe", 1070 | "--tags", 1071 | "--dirty", 1072 | "--always", 1073 | "--long", 1074 | "--match", 1075 | "%s*" % tag_prefix, 1076 | ], 1077 | cwd=root, 1078 | ) 1079 | # --long was added in git-1.5.5 1080 | if describe_out is None: 1081 | raise NotThisMethod("'git describe' failed") 1082 | describe_out = describe_out.strip() 1083 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 1084 | if full_out is None: 1085 | raise NotThisMethod("'git rev-parse' failed") 1086 | full_out = full_out.strip() 1087 | 1088 | pieces = {} 1089 | pieces["long"] = full_out 1090 | pieces["short"] = full_out[:7] # maybe improved later 1091 | pieces["error"] = None 1092 | 1093 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1094 | # TAG might have hyphens. 1095 | git_describe = describe_out 1096 | 1097 | # look for -dirty suffix 1098 | dirty = git_describe.endswith("-dirty") 1099 | pieces["dirty"] = dirty 1100 | if dirty: 1101 | git_describe = git_describe[: git_describe.rindex("-dirty")] 1102 | 1103 | # now we have TAG-NUM-gHEX or HEX 1104 | 1105 | if "-" in git_describe: 1106 | # TAG-NUM-gHEX 1107 | mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) 1108 | if not mo: 1109 | # unparseable. Maybe git-describe is misbehaving? 1110 | pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out 1111 | return pieces 1112 | 1113 | # tag 1114 | full_tag = mo.group(1) 1115 | if not full_tag.startswith(tag_prefix): 1116 | if verbose: 1117 | fmt = "tag '%s' doesn't start with prefix '%s'" 1118 | print(fmt % (full_tag, tag_prefix)) 1119 | pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( 1120 | full_tag, 1121 | tag_prefix, 1122 | ) 1123 | return pieces 1124 | pieces["closest-tag"] = full_tag[len(tag_prefix) :] 1125 | 1126 | # distance: number of commits since tag 1127 | pieces["distance"] = int(mo.group(2)) 1128 | 1129 | # commit: short hex revision ID 1130 | pieces["short"] = mo.group(3) 1131 | 1132 | else: 1133 | # HEX: no tags 1134 | pieces["closest-tag"] = None 1135 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) 1136 | pieces["distance"] = int(count_out) # total number of commits 1137 | 1138 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 1139 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ 1140 | 0 1141 | ].strip() 1142 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 1143 | 1144 | return pieces 1145 | 1146 | 1147 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1148 | """Git-specific installation logic for Versioneer. 1149 | 1150 | For Git, this means creating/changing .gitattributes to mark _version.py 1151 | for export-subst keyword substitution. 1152 | """ 1153 | GITS = ["git"] 1154 | if sys.platform == "win32": 1155 | GITS = ["git.cmd", "git.exe"] 1156 | files = [manifest_in, versionfile_source] 1157 | if ipy: 1158 | files.append(ipy) 1159 | try: 1160 | me = __file__ 1161 | if me.endswith(".pyc") or me.endswith(".pyo"): 1162 | me = os.path.splitext(me)[0] + ".py" 1163 | versioneer_file = os.path.relpath(me) 1164 | except NameError: 1165 | versioneer_file = "versioneer.py" 1166 | files.append(versioneer_file) 1167 | present = False 1168 | try: 1169 | f = open(".gitattributes", "r") 1170 | for line in f.readlines(): 1171 | if line.strip().startswith(versionfile_source): 1172 | if "export-subst" in line.strip().split()[1:]: 1173 | present = True 1174 | f.close() 1175 | except EnvironmentError: 1176 | pass 1177 | if not present: 1178 | f = open(".gitattributes", "a+") 1179 | f.write("%s export-subst\n" % versionfile_source) 1180 | f.close() 1181 | files.append(".gitattributes") 1182 | run_command(GITS, ["add", "--"] + files) 1183 | 1184 | 1185 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1186 | """Try to determine the version from the parent directory name. 1187 | 1188 | Source tarballs conventionally unpack into a directory that includes both 1189 | the project name and a version string. We will also support searching up 1190 | two directory levels for an appropriately named parent directory 1191 | """ 1192 | rootdirs = [] 1193 | 1194 | for i in range(3): 1195 | dirname = os.path.basename(root) 1196 | if dirname.startswith(parentdir_prefix): 1197 | return { 1198 | "version": dirname[len(parentdir_prefix) :], 1199 | "full-revisionid": None, 1200 | "dirty": False, 1201 | "error": None, 1202 | "date": None, 1203 | } 1204 | else: 1205 | rootdirs.append(root) 1206 | root = os.path.dirname(root) # up a level 1207 | 1208 | if verbose: 1209 | print( 1210 | "Tried directories %s but none started with prefix %s" 1211 | % (str(rootdirs), parentdir_prefix) 1212 | ) 1213 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1214 | 1215 | 1216 | SHORT_VERSION_PY = """ 1217 | # This file was generated by 'versioneer.py' (0.18) from 1218 | # revision-control system data, or from the parent directory name of an 1219 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1220 | # of this file. 1221 | 1222 | import json 1223 | 1224 | version_json = ''' 1225 | %s 1226 | ''' # END VERSION_JSON 1227 | 1228 | 1229 | def get_versions(): 1230 | return json.loads(version_json) 1231 | """ 1232 | 1233 | 1234 | def versions_from_file(filename): 1235 | """Try to determine the version from _version.py if present.""" 1236 | try: 1237 | with open(filename) as f: 1238 | contents = f.read() 1239 | except EnvironmentError: 1240 | raise NotThisMethod("unable to read _version.py") 1241 | mo = re.search( 1242 | r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S 1243 | ) 1244 | if not mo: 1245 | mo = re.search( 1246 | r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S 1247 | ) 1248 | if not mo: 1249 | raise NotThisMethod("no version_json in _version.py") 1250 | return json.loads(mo.group(1)) 1251 | 1252 | 1253 | def write_to_version_file(filename, versions): 1254 | """Write the given version number to the given _version.py file.""" 1255 | os.unlink(filename) 1256 | contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) 1257 | with open(filename, "w") as f: 1258 | f.write(SHORT_VERSION_PY % contents) 1259 | 1260 | print("set %s to '%s'" % (filename, versions["version"])) 1261 | 1262 | 1263 | def plus_or_dot(pieces): 1264 | """Return a + if we don't already have one, else return a .""" 1265 | if "+" in pieces.get("closest-tag", ""): 1266 | return "." 1267 | return "+" 1268 | 1269 | 1270 | def render_pep440(pieces): 1271 | """Build up version string, with post-release "local version identifier". 1272 | 1273 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1274 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1275 | 1276 | Exceptions: 1277 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1278 | """ 1279 | if pieces["closest-tag"]: 1280 | rendered = pieces["closest-tag"] 1281 | if pieces["distance"] or pieces["dirty"]: 1282 | rendered += plus_or_dot(pieces) 1283 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1284 | if pieces["dirty"]: 1285 | rendered += ".dirty" 1286 | else: 1287 | # exception #1 1288 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 1289 | if pieces["dirty"]: 1290 | rendered += ".dirty" 1291 | return rendered 1292 | 1293 | 1294 | def render_pep440_pre(pieces): 1295 | """TAG[.post.devDISTANCE] -- No -dirty. 1296 | 1297 | Exceptions: 1298 | 1: no tags. 0.post.devDISTANCE 1299 | """ 1300 | if pieces["closest-tag"]: 1301 | rendered = pieces["closest-tag"] 1302 | if pieces["distance"]: 1303 | rendered += ".post.dev%d" % pieces["distance"] 1304 | else: 1305 | # exception #1 1306 | rendered = "0.post.dev%d" % pieces["distance"] 1307 | return rendered 1308 | 1309 | 1310 | def render_pep440_post(pieces): 1311 | """TAG[.postDISTANCE[.dev0]+gHEX] . 1312 | 1313 | The ".dev0" means dirty. Note that .dev0 sorts backwards 1314 | (a dirty tree will appear "older" than the corresponding clean one), 1315 | but you shouldn't be releasing software with -dirty anyways. 1316 | 1317 | Exceptions: 1318 | 1: no tags. 0.postDISTANCE[.dev0] 1319 | """ 1320 | if pieces["closest-tag"]: 1321 | rendered = pieces["closest-tag"] 1322 | if pieces["distance"] or pieces["dirty"]: 1323 | rendered += ".post%d" % pieces["distance"] 1324 | if pieces["dirty"]: 1325 | rendered += ".dev0" 1326 | rendered += plus_or_dot(pieces) 1327 | rendered += "g%s" % pieces["short"] 1328 | else: 1329 | # exception #1 1330 | rendered = "0.post%d" % pieces["distance"] 1331 | if pieces["dirty"]: 1332 | rendered += ".dev0" 1333 | rendered += "+g%s" % pieces["short"] 1334 | return rendered 1335 | 1336 | 1337 | def render_pep440_old(pieces): 1338 | """TAG[.postDISTANCE[.dev0]] . 1339 | 1340 | The ".dev0" means dirty. 1341 | 1342 | Eexceptions: 1343 | 1: no tags. 0.postDISTANCE[.dev0] 1344 | """ 1345 | if pieces["closest-tag"]: 1346 | rendered = pieces["closest-tag"] 1347 | if pieces["distance"] or pieces["dirty"]: 1348 | rendered += ".post%d" % pieces["distance"] 1349 | if pieces["dirty"]: 1350 | rendered += ".dev0" 1351 | else: 1352 | # exception #1 1353 | rendered = "0.post%d" % pieces["distance"] 1354 | if pieces["dirty"]: 1355 | rendered += ".dev0" 1356 | return rendered 1357 | 1358 | 1359 | def render_git_describe(pieces): 1360 | """TAG[-DISTANCE-gHEX][-dirty]. 1361 | 1362 | Like 'git describe --tags --dirty --always'. 1363 | 1364 | Exceptions: 1365 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1366 | """ 1367 | if pieces["closest-tag"]: 1368 | rendered = pieces["closest-tag"] 1369 | if pieces["distance"]: 1370 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1371 | else: 1372 | # exception #1 1373 | rendered = pieces["short"] 1374 | if pieces["dirty"]: 1375 | rendered += "-dirty" 1376 | return rendered 1377 | 1378 | 1379 | def render_git_describe_long(pieces): 1380 | """TAG-DISTANCE-gHEX[-dirty]. 1381 | 1382 | Like 'git describe --tags --dirty --always -long'. 1383 | The distance/hash is unconditional. 1384 | 1385 | Exceptions: 1386 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1387 | """ 1388 | if pieces["closest-tag"]: 1389 | rendered = pieces["closest-tag"] 1390 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1391 | else: 1392 | # exception #1 1393 | rendered = pieces["short"] 1394 | if pieces["dirty"]: 1395 | rendered += "-dirty" 1396 | return rendered 1397 | 1398 | 1399 | def render(pieces, style): 1400 | """Render the given version pieces into the requested style.""" 1401 | if pieces["error"]: 1402 | return { 1403 | "version": "unknown", 1404 | "full-revisionid": pieces.get("long"), 1405 | "dirty": None, 1406 | "error": pieces["error"], 1407 | "date": None, 1408 | } 1409 | 1410 | if not style or style == "default": 1411 | style = "pep440" # the default 1412 | 1413 | if style == "pep440": 1414 | rendered = render_pep440(pieces) 1415 | elif style == "pep440-pre": 1416 | rendered = render_pep440_pre(pieces) 1417 | elif style == "pep440-post": 1418 | rendered = render_pep440_post(pieces) 1419 | elif style == "pep440-old": 1420 | rendered = render_pep440_old(pieces) 1421 | elif style == "git-describe": 1422 | rendered = render_git_describe(pieces) 1423 | elif style == "git-describe-long": 1424 | rendered = render_git_describe_long(pieces) 1425 | else: 1426 | raise ValueError("unknown style '%s'" % style) 1427 | 1428 | return { 1429 | "version": rendered, 1430 | "full-revisionid": pieces["long"], 1431 | "dirty": pieces["dirty"], 1432 | "error": None, 1433 | "date": pieces.get("date"), 1434 | } 1435 | 1436 | 1437 | class VersioneerBadRootError(Exception): 1438 | """The project root directory is unknown or missing key files.""" 1439 | 1440 | 1441 | def get_versions(verbose=False): 1442 | """Get the project version from whatever source is available. 1443 | 1444 | Returns dict with two keys: 'version' and 'full'. 1445 | """ 1446 | if "versioneer" in sys.modules: 1447 | # see the discussion in cmdclass.py:get_cmdclass() 1448 | del sys.modules["versioneer"] 1449 | 1450 | root = get_root() 1451 | cfg = get_config_from_root(root) 1452 | 1453 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1454 | handlers = HANDLERS.get(cfg.VCS) 1455 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1456 | verbose = verbose or cfg.verbose 1457 | assert ( 1458 | cfg.versionfile_source is not None 1459 | ), "please set versioneer.versionfile_source" 1460 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1461 | 1462 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1463 | 1464 | # extract version from first of: _version.py, VCS command (e.g. 'git 1465 | # describe'), parentdir. This is meant to work for developers using a 1466 | # source checkout, for users of a tarball created by 'setup.py sdist', 1467 | # and for users of a tarball/zipball created by 'git archive' or github's 1468 | # download-from-tag feature or the equivalent in other VCSes. 1469 | 1470 | get_keywords_f = handlers.get("get_keywords") 1471 | from_keywords_f = handlers.get("keywords") 1472 | if get_keywords_f and from_keywords_f: 1473 | try: 1474 | keywords = get_keywords_f(versionfile_abs) 1475 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1476 | if verbose: 1477 | print("got version from expanded keyword %s" % ver) 1478 | return ver 1479 | except NotThisMethod: 1480 | pass 1481 | 1482 | try: 1483 | ver = versions_from_file(versionfile_abs) 1484 | if verbose: 1485 | print("got version from file %s %s" % (versionfile_abs, ver)) 1486 | return ver 1487 | except NotThisMethod: 1488 | pass 1489 | 1490 | from_vcs_f = handlers.get("pieces_from_vcs") 1491 | if from_vcs_f: 1492 | try: 1493 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1494 | ver = render(pieces, cfg.style) 1495 | if verbose: 1496 | print("got version from VCS %s" % ver) 1497 | return ver 1498 | except NotThisMethod: 1499 | pass 1500 | 1501 | try: 1502 | if cfg.parentdir_prefix: 1503 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1504 | if verbose: 1505 | print("got version from parentdir %s" % ver) 1506 | return ver 1507 | except NotThisMethod: 1508 | pass 1509 | 1510 | if verbose: 1511 | print("unable to compute version") 1512 | 1513 | return { 1514 | "version": "0+unknown", 1515 | "full-revisionid": None, 1516 | "dirty": None, 1517 | "error": "unable to compute version", 1518 | "date": None, 1519 | } 1520 | 1521 | 1522 | def get_version(): 1523 | """Get the short version string for this project.""" 1524 | return get_versions()["version"] 1525 | 1526 | 1527 | def get_cmdclass(): 1528 | """Get the custom setuptools/distutils subclasses used by Versioneer.""" 1529 | if "versioneer" in sys.modules: 1530 | del sys.modules["versioneer"] 1531 | # this fixes the "python setup.py develop" case (also 'install' and 1532 | # 'easy_install .'), in which subdependencies of the main project are 1533 | # built (using setup.py bdist_egg) in the same python process. Assume 1534 | # a main project A and a dependency B, which use different versions 1535 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1536 | # sys.modules by the time B's setup.py is executed, causing B to run 1537 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1538 | # sandbox that restores sys.modules to it's pre-build state, so the 1539 | # parent is protected against the child's "import versioneer". By 1540 | # removing ourselves from sys.modules here, before the child build 1541 | # happens, we protect the child from the parent's versioneer too. 1542 | # Also see https://github.com/warner/python-versioneer/issues/52 1543 | 1544 | cmds = {} 1545 | 1546 | # we add "version" to both distutils and setuptools 1547 | from distutils.core import Command 1548 | 1549 | class cmd_version(Command): 1550 | description = "report generated version string" 1551 | user_options = [] 1552 | boolean_options = [] 1553 | 1554 | def initialize_options(self): 1555 | pass 1556 | 1557 | def finalize_options(self): 1558 | pass 1559 | 1560 | def run(self): 1561 | vers = get_versions(verbose=True) 1562 | print("Version: %s" % vers["version"]) 1563 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1564 | print(" dirty: %s" % vers.get("dirty")) 1565 | print(" date: %s" % vers.get("date")) 1566 | if vers["error"]: 1567 | print(" error: %s" % vers["error"]) 1568 | 1569 | cmds["version"] = cmd_version 1570 | 1571 | # we override "build_py" in both distutils and setuptools 1572 | # 1573 | # most invocation pathways end up running build_py: 1574 | # distutils/build -> build_py 1575 | # distutils/install -> distutils/build ->.. 1576 | # setuptools/bdist_wheel -> distutils/install ->.. 1577 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1578 | # setuptools/install -> bdist_egg ->.. 1579 | # setuptools/develop -> ? 1580 | # pip install: 1581 | # copies source tree to a tempdir before running egg_info/etc 1582 | # if .git isn't copied too, 'git describe' will fail 1583 | # then does setup.py bdist_wheel, or sometimes setup.py install 1584 | # setup.py egg_info -> ? 1585 | 1586 | # we override different "build_py" commands for both environments 1587 | if "setuptools" in sys.modules: 1588 | from setuptools.command.build_py import build_py as _build_py 1589 | else: 1590 | from distutils.command.build_py import build_py as _build_py 1591 | 1592 | class cmd_build_py(_build_py): 1593 | def run(self): 1594 | root = get_root() 1595 | cfg = get_config_from_root(root) 1596 | versions = get_versions() 1597 | _build_py.run(self) 1598 | # now locate _version.py in the new build/ directory and replace 1599 | # it with an updated value 1600 | if cfg.versionfile_build: 1601 | target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) 1602 | print("UPDATING %s" % target_versionfile) 1603 | write_to_version_file(target_versionfile, versions) 1604 | 1605 | cmds["build_py"] = cmd_build_py 1606 | 1607 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1608 | from cx_Freeze.dist import build_exe as _build_exe 1609 | 1610 | # nczeczulin reports that py2exe won't like the pep440-style string 1611 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. 1612 | # setup(console=[{ 1613 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION 1614 | # "product_version": versioneer.get_version(), 1615 | # ... 1616 | 1617 | class cmd_build_exe(_build_exe): 1618 | def run(self): 1619 | root = get_root() 1620 | cfg = get_config_from_root(root) 1621 | versions = get_versions() 1622 | target_versionfile = cfg.versionfile_source 1623 | print("UPDATING %s" % target_versionfile) 1624 | write_to_version_file(target_versionfile, versions) 1625 | 1626 | _build_exe.run(self) 1627 | os.unlink(target_versionfile) 1628 | with open(cfg.versionfile_source, "w") as f: 1629 | LONG = LONG_VERSION_PY[cfg.VCS] 1630 | f.write( 1631 | LONG 1632 | % { 1633 | "DOLLAR": "$", 1634 | "STYLE": cfg.style, 1635 | "TAG_PREFIX": cfg.tag_prefix, 1636 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1637 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1638 | } 1639 | ) 1640 | 1641 | cmds["build_exe"] = cmd_build_exe 1642 | del cmds["build_py"] 1643 | 1644 | if "py2exe" in sys.modules: # py2exe enabled? 1645 | try: 1646 | from py2exe.distutils_buildexe import py2exe as _py2exe # py3 1647 | except ImportError: 1648 | from py2exe.build_exe import py2exe as _py2exe # py2 1649 | 1650 | class cmd_py2exe(_py2exe): 1651 | def run(self): 1652 | root = get_root() 1653 | cfg = get_config_from_root(root) 1654 | versions = get_versions() 1655 | target_versionfile = cfg.versionfile_source 1656 | print("UPDATING %s" % target_versionfile) 1657 | write_to_version_file(target_versionfile, versions) 1658 | 1659 | _py2exe.run(self) 1660 | os.unlink(target_versionfile) 1661 | with open(cfg.versionfile_source, "w") as f: 1662 | LONG = LONG_VERSION_PY[cfg.VCS] 1663 | f.write( 1664 | LONG 1665 | % { 1666 | "DOLLAR": "$", 1667 | "STYLE": cfg.style, 1668 | "TAG_PREFIX": cfg.tag_prefix, 1669 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1670 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1671 | } 1672 | ) 1673 | 1674 | cmds["py2exe"] = cmd_py2exe 1675 | 1676 | # we override different "sdist" commands for both environments 1677 | if "setuptools" in sys.modules: 1678 | from setuptools.command.sdist import sdist as _sdist 1679 | else: 1680 | from distutils.command.sdist import sdist as _sdist 1681 | 1682 | class cmd_sdist(_sdist): 1683 | def run(self): 1684 | versions = get_versions() 1685 | self._versioneer_generated_versions = versions 1686 | # unless we update this, the command will keep using the old 1687 | # version 1688 | self.distribution.metadata.version = versions["version"] 1689 | return _sdist.run(self) 1690 | 1691 | def make_release_tree(self, base_dir, files): 1692 | root = get_root() 1693 | cfg = get_config_from_root(root) 1694 | _sdist.make_release_tree(self, base_dir, files) 1695 | # now locate _version.py in the new base_dir directory 1696 | # (remembering that it may be a hardlink) and replace it with an 1697 | # updated value 1698 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1699 | print("UPDATING %s" % target_versionfile) 1700 | write_to_version_file( 1701 | target_versionfile, self._versioneer_generated_versions 1702 | ) 1703 | 1704 | cmds["sdist"] = cmd_sdist 1705 | 1706 | return cmds 1707 | 1708 | 1709 | CONFIG_ERROR = """ 1710 | setup.cfg is missing the necessary Versioneer configuration. You need 1711 | a section like: 1712 | 1713 | [versioneer] 1714 | VCS = git 1715 | style = pep440 1716 | versionfile_source = src/myproject/_version.py 1717 | versionfile_build = myproject/_version.py 1718 | tag_prefix = 1719 | parentdir_prefix = myproject- 1720 | 1721 | You will also need to edit your setup.py to use the results: 1722 | 1723 | import versioneer 1724 | setup(version=versioneer.get_version(), 1725 | cmdclass=versioneer.get_cmdclass(), ...) 1726 | 1727 | Please read the docstring in ./versioneer.py for configuration instructions, 1728 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1729 | """ 1730 | 1731 | SAMPLE_CONFIG = """ 1732 | # See the docstring in versioneer.py for instructions. Note that you must 1733 | # re-run 'versioneer.py setup' after changing this section, and commit the 1734 | # resulting files. 1735 | 1736 | [versioneer] 1737 | #VCS = git 1738 | #style = pep440 1739 | #versionfile_source = 1740 | #versionfile_build = 1741 | #tag_prefix = 1742 | #parentdir_prefix = 1743 | 1744 | """ 1745 | 1746 | INIT_PY_SNIPPET = """ 1747 | from ._version import get_versions 1748 | __version__ = get_versions()['version'] 1749 | del get_versions 1750 | """ 1751 | 1752 | 1753 | def do_setup(): 1754 | """Main VCS-independent setup function for installing Versioneer.""" 1755 | root = get_root() 1756 | try: 1757 | cfg = get_config_from_root(root) 1758 | except ( 1759 | EnvironmentError, 1760 | configparser.NoSectionError, 1761 | configparser.NoOptionError, 1762 | ) as e: 1763 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1764 | print("Adding sample versioneer config to setup.cfg", file=sys.stderr) 1765 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1766 | f.write(SAMPLE_CONFIG) 1767 | print(CONFIG_ERROR, file=sys.stderr) 1768 | return 1 1769 | 1770 | print(" creating %s" % cfg.versionfile_source) 1771 | with open(cfg.versionfile_source, "w") as f: 1772 | LONG = LONG_VERSION_PY[cfg.VCS] 1773 | f.write( 1774 | LONG 1775 | % { 1776 | "DOLLAR": "$", 1777 | "STYLE": cfg.style, 1778 | "TAG_PREFIX": cfg.tag_prefix, 1779 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1780 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1781 | } 1782 | ) 1783 | 1784 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") 1785 | if os.path.exists(ipy): 1786 | try: 1787 | with open(ipy, "r") as f: 1788 | old = f.read() 1789 | except EnvironmentError: 1790 | old = "" 1791 | if INIT_PY_SNIPPET not in old: 1792 | print(" appending to %s" % ipy) 1793 | with open(ipy, "a") as f: 1794 | f.write(INIT_PY_SNIPPET) 1795 | else: 1796 | print(" %s unmodified" % ipy) 1797 | else: 1798 | print(" %s doesn't exist, ok" % ipy) 1799 | ipy = None 1800 | 1801 | # Make sure both the top-level "versioneer.py" and versionfile_source 1802 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1803 | # they'll be copied into source distributions. Pip won't be able to 1804 | # install the package without this. 1805 | manifest_in = os.path.join(root, "MANIFEST.in") 1806 | simple_includes = set() 1807 | try: 1808 | with open(manifest_in, "r") as f: 1809 | for line in f: 1810 | if line.startswith("include "): 1811 | for include in line.split()[1:]: 1812 | simple_includes.add(include) 1813 | except EnvironmentError: 1814 | pass 1815 | # That doesn't cover everything MANIFEST.in can do 1816 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1817 | # it might give some false negatives. Appending redundant 'include' 1818 | # lines is safe, though. 1819 | if "versioneer.py" not in simple_includes: 1820 | print(" appending 'versioneer.py' to MANIFEST.in") 1821 | with open(manifest_in, "a") as f: 1822 | f.write("include versioneer.py\n") 1823 | else: 1824 | print(" 'versioneer.py' already in MANIFEST.in") 1825 | if cfg.versionfile_source not in simple_includes: 1826 | print( 1827 | " appending versionfile_source ('%s') to MANIFEST.in" 1828 | % cfg.versionfile_source 1829 | ) 1830 | with open(manifest_in, "a") as f: 1831 | f.write("include %s\n" % cfg.versionfile_source) 1832 | else: 1833 | print(" versionfile_source already in MANIFEST.in") 1834 | 1835 | # Make VCS-specific changes. For git, this means creating/changing 1836 | # .gitattributes to mark _version.py for export-subst keyword 1837 | # substitution. 1838 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 1839 | return 0 1840 | 1841 | 1842 | def scan_setup_py(): 1843 | """Validate the contents of setup.py against Versioneer's expectations.""" 1844 | found = set() 1845 | setters = False 1846 | errors = 0 1847 | with open("setup.py", "r") as f: 1848 | for line in f.readlines(): 1849 | if "import versioneer" in line: 1850 | found.add("import") 1851 | if "versioneer.get_cmdclass()" in line: 1852 | found.add("cmdclass") 1853 | if "versioneer.get_version()" in line: 1854 | found.add("get_version") 1855 | if "versioneer.VCS" in line: 1856 | setters = True 1857 | if "versioneer.versionfile_source" in line: 1858 | setters = True 1859 | if len(found) != 3: 1860 | print("") 1861 | print("Your setup.py appears to be missing some important items") 1862 | print("(but I might be wrong). Please make sure it has something") 1863 | print("roughly like the following:") 1864 | print("") 1865 | print(" import versioneer") 1866 | print(" setup( version=versioneer.get_version(),") 1867 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 1868 | print("") 1869 | errors += 1 1870 | if setters: 1871 | print("You should remove lines like 'versioneer.VCS = ' and") 1872 | print("'versioneer.versionfile_source = ' . This configuration") 1873 | print("now lives in setup.cfg, and should be removed from setup.py") 1874 | print("") 1875 | errors += 1 1876 | return errors 1877 | 1878 | 1879 | if __name__ == "__main__": 1880 | cmd = sys.argv[1] 1881 | if cmd == "setup": 1882 | errors = do_setup() 1883 | errors += scan_setup_py() 1884 | if errors: 1885 | sys.exit(1) 1886 | -------------------------------------------------------------------------------- /xhistogram/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import get_versions 2 | 3 | __version__ = get_versions()["version"] 4 | del get_versions 5 | 6 | __all__ = ["core", "xarray"] 7 | -------------------------------------------------------------------------------- /xhistogram/_version.py: -------------------------------------------------------------------------------- 1 | # This file helps to compute a version number in source trees obtained from 2 | # git-archive tarball (such as those provided by githubs download-from-tag 3 | # feature). Distribution tarballs (built by setup.py sdist) and build 4 | # directories (produced by setup.py build) will contain a much shorter file 5 | # that just contains the computed version number. 6 | 7 | # This file is released into the public domain. Generated by 8 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 9 | 10 | """Git implementation of _version.py.""" 11 | 12 | import errno 13 | import os 14 | import re 15 | import subprocess 16 | import sys 17 | 18 | 19 | def get_keywords(): 20 | """Get the keywords needed to look up the version information.""" 21 | # these strings will be replaced by git during git-archive. 22 | # setup.py/versioneer.py will grep for the variable names, so they must 23 | # each be defined on a line of their own. _version.py will just call 24 | # get_keywords(). 25 | git_refnames = "$Format:%d$" 26 | git_full = "$Format:%H$" 27 | git_date = "$Format:%ci$" 28 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 29 | return keywords 30 | 31 | 32 | class VersioneerConfig: 33 | """Container for Versioneer configuration parameters.""" 34 | 35 | 36 | def get_config(): 37 | """Create, populate and return the VersioneerConfig() object.""" 38 | # these strings are filled in when 'setup.py versioneer' creates 39 | # _version.py 40 | cfg = VersioneerConfig() 41 | cfg.VCS = "git" 42 | cfg.style = "pep440" 43 | cfg.tag_prefix = "v" 44 | cfg.parentdir_prefix = "xgcm-" 45 | cfg.versionfile_source = "xgcm/_version.py" 46 | cfg.verbose = False 47 | return cfg 48 | 49 | 50 | class NotThisMethod(Exception): 51 | """Exception raised if a method is not valid for the current scenario.""" 52 | 53 | 54 | LONG_VERSION_PY = {} 55 | HANDLERS = {} 56 | 57 | 58 | def register_vcs_handler(vcs, method): # decorator 59 | """Decorator to mark a method as the handler for a particular VCS.""" 60 | 61 | def decorate(f): 62 | """Store f in HANDLERS[vcs][method].""" 63 | if vcs not in HANDLERS: 64 | HANDLERS[vcs] = {} 65 | HANDLERS[vcs][method] = f 66 | return f 67 | 68 | return decorate 69 | 70 | 71 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): 72 | """Call the given command(s).""" 73 | assert isinstance(commands, list) 74 | p = None 75 | for c in commands: 76 | try: 77 | dispcmd = str([c] + args) 78 | # remember shell=False, so use git.cmd on windows, not just git 79 | p = subprocess.Popen( 80 | [c] + args, 81 | cwd=cwd, 82 | env=env, 83 | stdout=subprocess.PIPE, 84 | stderr=(subprocess.PIPE if hide_stderr else None), 85 | ) 86 | break 87 | except EnvironmentError: 88 | e = sys.exc_info()[1] 89 | if e.errno == errno.ENOENT: 90 | continue 91 | if verbose: 92 | print("unable to run %s" % dispcmd) 93 | print(e) 94 | return None, None 95 | else: 96 | if verbose: 97 | print("unable to find command, tried %s" % (commands,)) 98 | return None, None 99 | stdout = p.communicate()[0].strip() 100 | if sys.version_info[0] >= 3: 101 | stdout = stdout.decode() 102 | if p.returncode != 0: 103 | if verbose: 104 | print("unable to run %s (error)" % dispcmd) 105 | print("stdout was %s" % stdout) 106 | return None, p.returncode 107 | return stdout, p.returncode 108 | 109 | 110 | def versions_from_parentdir(parentdir_prefix, root, verbose): 111 | """Try to determine the version from the parent directory name. 112 | 113 | Source tarballs conventionally unpack into a directory that includes both 114 | the project name and a version string. We will also support searching up 115 | two directory levels for an appropriately named parent directory 116 | """ 117 | rootdirs = [] 118 | 119 | for i in range(3): 120 | dirname = os.path.basename(root) 121 | if dirname.startswith(parentdir_prefix): 122 | return { 123 | "version": dirname[len(parentdir_prefix) :], 124 | "full-revisionid": None, 125 | "dirty": False, 126 | "error": None, 127 | "date": None, 128 | } 129 | else: 130 | rootdirs.append(root) 131 | root = os.path.dirname(root) # up a level 132 | 133 | if verbose: 134 | print( 135 | "Tried directories %s but none started with prefix %s" 136 | % (str(rootdirs), parentdir_prefix) 137 | ) 138 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 139 | 140 | 141 | @register_vcs_handler("git", "get_keywords") 142 | def git_get_keywords(versionfile_abs): 143 | """Extract version information from the given file.""" 144 | # the code embedded in _version.py can just fetch the value of these 145 | # keywords. When used from setup.py, we don't want to import _version.py, 146 | # so we do it with a regexp instead. This function is not used from 147 | # _version.py. 148 | keywords = {} 149 | try: 150 | f = open(versionfile_abs, "r") 151 | for line in f.readlines(): 152 | if line.strip().startswith("git_refnames ="): 153 | mo = re.search(r'=\s*"(.*)"', line) 154 | if mo: 155 | keywords["refnames"] = mo.group(1) 156 | if line.strip().startswith("git_full ="): 157 | mo = re.search(r'=\s*"(.*)"', line) 158 | if mo: 159 | keywords["full"] = mo.group(1) 160 | if line.strip().startswith("git_date ="): 161 | mo = re.search(r'=\s*"(.*)"', line) 162 | if mo: 163 | keywords["date"] = mo.group(1) 164 | f.close() 165 | except EnvironmentError: 166 | pass 167 | return keywords 168 | 169 | 170 | @register_vcs_handler("git", "keywords") 171 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 172 | """Get version information from git keywords.""" 173 | if not keywords: 174 | raise NotThisMethod("no keywords at all, weird") 175 | date = keywords.get("date") 176 | if date is not None: 177 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 178 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 179 | # -like" string, which we must then edit to make compliant), because 180 | # it's been around since git-1.5.3, and it's too difficult to 181 | # discover which version we're using, or to work around using an 182 | # older one. 183 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 184 | refnames = keywords["refnames"].strip() 185 | if refnames.startswith("$Format"): 186 | if verbose: 187 | print("keywords are unexpanded, not using") 188 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 189 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 190 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 191 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 192 | TAG = "tag: " 193 | tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) 194 | if not tags: 195 | # Either we're using git < 1.8.3, or there really are no tags. We use 196 | # a heuristic: assume all version tags have a digit. The old git %d 197 | # expansion behaves like git log --decorate=short and strips out the 198 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 199 | # between branches and tags. By ignoring refnames without digits, we 200 | # filter out many common branch names like "release" and 201 | # "stabilization", as well as "HEAD" and "master". 202 | tags = set([r for r in refs if re.search(r"\d", r)]) 203 | if verbose: 204 | print("discarding '%s', no digits" % ",".join(refs - tags)) 205 | if verbose: 206 | print("likely tags: %s" % ",".join(sorted(tags))) 207 | for ref in sorted(tags): 208 | # sorting will prefer e.g. "2.0" over "2.0rc1" 209 | if ref.startswith(tag_prefix): 210 | r = ref[len(tag_prefix) :] 211 | if verbose: 212 | print("picking %s" % r) 213 | return { 214 | "version": r, 215 | "full-revisionid": keywords["full"].strip(), 216 | "dirty": False, 217 | "error": None, 218 | "date": date, 219 | } 220 | # no suitable tags, so version is "0+unknown", but full hex is still there 221 | if verbose: 222 | print("no suitable tags, using unknown + full revision id") 223 | return { 224 | "version": "0+unknown", 225 | "full-revisionid": keywords["full"].strip(), 226 | "dirty": False, 227 | "error": "no suitable tags", 228 | "date": None, 229 | } 230 | 231 | 232 | @register_vcs_handler("git", "pieces_from_vcs") 233 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 234 | """Get version from 'git describe' in the root of the source tree. 235 | 236 | This only gets called if the git-archive 'subst' keywords were *not* 237 | expanded, and _version.py hasn't already been rewritten with a short 238 | version string, meaning we're inside a checked out source tree. 239 | """ 240 | GITS = ["git"] 241 | if sys.platform == "win32": 242 | GITS = ["git.cmd", "git.exe"] 243 | 244 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) 245 | if rc != 0: 246 | if verbose: 247 | print("Directory %s not under git control" % root) 248 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 249 | 250 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 251 | # if there isn't one, this yields HEX[-dirty] (no NUM) 252 | describe_out, rc = run_command( 253 | GITS, 254 | [ 255 | "describe", 256 | "--tags", 257 | "--dirty", 258 | "--always", 259 | "--long", 260 | "--match", 261 | "%s*" % tag_prefix, 262 | ], 263 | cwd=root, 264 | ) 265 | # --long was added in git-1.5.5 266 | if describe_out is None: 267 | raise NotThisMethod("'git describe' failed") 268 | describe_out = describe_out.strip() 269 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 270 | if full_out is None: 271 | raise NotThisMethod("'git rev-parse' failed") 272 | full_out = full_out.strip() 273 | 274 | pieces = {} 275 | pieces["long"] = full_out 276 | pieces["short"] = full_out[:7] # maybe improved later 277 | pieces["error"] = None 278 | 279 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 280 | # TAG might have hyphens. 281 | git_describe = describe_out 282 | 283 | # look for -dirty suffix 284 | dirty = git_describe.endswith("-dirty") 285 | pieces["dirty"] = dirty 286 | if dirty: 287 | git_describe = git_describe[: git_describe.rindex("-dirty")] 288 | 289 | # now we have TAG-NUM-gHEX or HEX 290 | 291 | if "-" in git_describe: 292 | # TAG-NUM-gHEX 293 | mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) 294 | if not mo: 295 | # unparseable. Maybe git-describe is misbehaving? 296 | pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out 297 | return pieces 298 | 299 | # tag 300 | full_tag = mo.group(1) 301 | if not full_tag.startswith(tag_prefix): 302 | if verbose: 303 | fmt = "tag '%s' doesn't start with prefix '%s'" 304 | print(fmt % (full_tag, tag_prefix)) 305 | pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( 306 | full_tag, 307 | tag_prefix, 308 | ) 309 | return pieces 310 | pieces["closest-tag"] = full_tag[len(tag_prefix) :] 311 | 312 | # distance: number of commits since tag 313 | pieces["distance"] = int(mo.group(2)) 314 | 315 | # commit: short hex revision ID 316 | pieces["short"] = mo.group(3) 317 | 318 | else: 319 | # HEX: no tags 320 | pieces["closest-tag"] = None 321 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) 322 | pieces["distance"] = int(count_out) # total number of commits 323 | 324 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 325 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ 326 | 0 327 | ].strip() 328 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 329 | 330 | return pieces 331 | 332 | 333 | def plus_or_dot(pieces): 334 | """Return a + if we don't already have one, else return a .""" 335 | if "+" in pieces.get("closest-tag", ""): 336 | return "." 337 | return "+" 338 | 339 | 340 | def render_pep440(pieces): 341 | """Build up version string, with post-release "local version identifier". 342 | 343 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 344 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 345 | 346 | Exceptions: 347 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 348 | """ 349 | if pieces["closest-tag"]: 350 | rendered = pieces["closest-tag"] 351 | if pieces["distance"] or pieces["dirty"]: 352 | rendered += plus_or_dot(pieces) 353 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 354 | if pieces["dirty"]: 355 | rendered += ".dirty" 356 | else: 357 | # exception #1 358 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 359 | if pieces["dirty"]: 360 | rendered += ".dirty" 361 | return rendered 362 | 363 | 364 | def render_pep440_pre(pieces): 365 | """TAG[.post.devDISTANCE] -- No -dirty. 366 | 367 | Exceptions: 368 | 1: no tags. 0.post.devDISTANCE 369 | """ 370 | if pieces["closest-tag"]: 371 | rendered = pieces["closest-tag"] 372 | if pieces["distance"]: 373 | rendered += ".post.dev%d" % pieces["distance"] 374 | else: 375 | # exception #1 376 | rendered = "0.post.dev%d" % pieces["distance"] 377 | return rendered 378 | 379 | 380 | def render_pep440_post(pieces): 381 | """TAG[.postDISTANCE[.dev0]+gHEX] . 382 | 383 | The ".dev0" means dirty. Note that .dev0 sorts backwards 384 | (a dirty tree will appear "older" than the corresponding clean one), 385 | but you shouldn't be releasing software with -dirty anyways. 386 | 387 | Exceptions: 388 | 1: no tags. 0.postDISTANCE[.dev0] 389 | """ 390 | if pieces["closest-tag"]: 391 | rendered = pieces["closest-tag"] 392 | if pieces["distance"] or pieces["dirty"]: 393 | rendered += ".post%d" % pieces["distance"] 394 | if pieces["dirty"]: 395 | rendered += ".dev0" 396 | rendered += plus_or_dot(pieces) 397 | rendered += "g%s" % pieces["short"] 398 | else: 399 | # exception #1 400 | rendered = "0.post%d" % pieces["distance"] 401 | if pieces["dirty"]: 402 | rendered += ".dev0" 403 | rendered += "+g%s" % pieces["short"] 404 | return rendered 405 | 406 | 407 | def render_pep440_old(pieces): 408 | """TAG[.postDISTANCE[.dev0]] . 409 | 410 | The ".dev0" means dirty. 411 | 412 | Eexceptions: 413 | 1: no tags. 0.postDISTANCE[.dev0] 414 | """ 415 | if pieces["closest-tag"]: 416 | rendered = pieces["closest-tag"] 417 | if pieces["distance"] or pieces["dirty"]: 418 | rendered += ".post%d" % pieces["distance"] 419 | if pieces["dirty"]: 420 | rendered += ".dev0" 421 | else: 422 | # exception #1 423 | rendered = "0.post%d" % pieces["distance"] 424 | if pieces["dirty"]: 425 | rendered += ".dev0" 426 | return rendered 427 | 428 | 429 | def render_git_describe(pieces): 430 | """TAG[-DISTANCE-gHEX][-dirty]. 431 | 432 | Like 'git describe --tags --dirty --always'. 433 | 434 | Exceptions: 435 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 436 | """ 437 | if pieces["closest-tag"]: 438 | rendered = pieces["closest-tag"] 439 | if pieces["distance"]: 440 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 441 | else: 442 | # exception #1 443 | rendered = pieces["short"] 444 | if pieces["dirty"]: 445 | rendered += "-dirty" 446 | return rendered 447 | 448 | 449 | def render_git_describe_long(pieces): 450 | """TAG-DISTANCE-gHEX[-dirty]. 451 | 452 | Like 'git describe --tags --dirty --always -long'. 453 | The distance/hash is unconditional. 454 | 455 | Exceptions: 456 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 457 | """ 458 | if pieces["closest-tag"]: 459 | rendered = pieces["closest-tag"] 460 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 461 | else: 462 | # exception #1 463 | rendered = pieces["short"] 464 | if pieces["dirty"]: 465 | rendered += "-dirty" 466 | return rendered 467 | 468 | 469 | def render(pieces, style): 470 | """Render the given version pieces into the requested style.""" 471 | if pieces["error"]: 472 | return { 473 | "version": "unknown", 474 | "full-revisionid": pieces.get("long"), 475 | "dirty": None, 476 | "error": pieces["error"], 477 | "date": None, 478 | } 479 | 480 | if not style or style == "default": 481 | style = "pep440" # the default 482 | 483 | if style == "pep440": 484 | rendered = render_pep440(pieces) 485 | elif style == "pep440-pre": 486 | rendered = render_pep440_pre(pieces) 487 | elif style == "pep440-post": 488 | rendered = render_pep440_post(pieces) 489 | elif style == "pep440-old": 490 | rendered = render_pep440_old(pieces) 491 | elif style == "git-describe": 492 | rendered = render_git_describe(pieces) 493 | elif style == "git-describe-long": 494 | rendered = render_git_describe_long(pieces) 495 | else: 496 | raise ValueError("unknown style '%s'" % style) 497 | 498 | return { 499 | "version": rendered, 500 | "full-revisionid": pieces["long"], 501 | "dirty": pieces["dirty"], 502 | "error": None, 503 | "date": pieces.get("date"), 504 | } 505 | 506 | 507 | def get_versions(): 508 | """Get version information or return default if unable to do so.""" 509 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 510 | # __file__, we can work backwards from there to the root. Some 511 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 512 | # case we can only use expanded keywords. 513 | 514 | cfg = get_config() 515 | verbose = cfg.verbose 516 | 517 | try: 518 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) 519 | except NotThisMethod: 520 | pass 521 | 522 | try: 523 | root = os.path.realpath(__file__) 524 | # versionfile_source is the relative path from the top of the source 525 | # tree (where the .git directory might live) to this file. Invert 526 | # this to find the root from __file__. 527 | for i in cfg.versionfile_source.split("/"): 528 | root = os.path.dirname(root) 529 | except NameError: 530 | return { 531 | "version": "0+unknown", 532 | "full-revisionid": None, 533 | "dirty": None, 534 | "error": "unable to find root of source tree", 535 | "date": None, 536 | } 537 | 538 | try: 539 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 540 | return render(pieces, cfg.style) 541 | except NotThisMethod: 542 | pass 543 | 544 | try: 545 | if cfg.parentdir_prefix: 546 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 547 | except NotThisMethod: 548 | pass 549 | 550 | return { 551 | "version": "0+unknown", 552 | "full-revisionid": None, 553 | "dirty": None, 554 | "error": "unable to compute version", 555 | "date": None, 556 | } 557 | -------------------------------------------------------------------------------- /xhistogram/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | Numpy API for xhistogram. 3 | """ 4 | 5 | 6 | import dask 7 | import numpy as np 8 | from functools import reduce 9 | from collections.abc import Iterable 10 | from numpy import ( 11 | searchsorted, 12 | bincount, 13 | reshape, 14 | ravel_multi_index, 15 | concatenate, 16 | broadcast_arrays, 17 | ) 18 | 19 | # range is a keyword so save the builtin so they can use it. 20 | _range = range 21 | 22 | try: 23 | import dask.array as dsa 24 | 25 | has_dask = True 26 | except ImportError: 27 | has_dask = False 28 | 29 | 30 | def _any_dask_array(*args): 31 | if not has_dask: 32 | return False 33 | else: 34 | return any(isinstance(a, dsa.core.Array) for a in args) 35 | 36 | 37 | def _ensure_correctly_formatted_bins(bins, N_expected): 38 | # TODO: This could be done better / more robustly 39 | if bins is None: 40 | raise ValueError("bins must be provided") 41 | if isinstance(bins, (int, str, np.ndarray)): 42 | bins = N_expected * [bins] 43 | if len(bins) == N_expected: 44 | return bins 45 | else: 46 | raise ValueError( 47 | "The number of bin definitions doesn't match the number of args" 48 | ) 49 | 50 | 51 | def _ensure_correctly_formatted_range(range_, N_expected): 52 | # TODO: This could be done better / more robustly 53 | def _iterable_nested(x): 54 | return all(isinstance(i, Iterable) for i in x) 55 | 56 | if range_ is not None: 57 | if (len(range_) == 2) & (not _iterable_nested(range_)): 58 | return N_expected * [range_] 59 | elif N_expected == len(range_): 60 | if all(len(x) == 2 for x in range_): 61 | return range_ 62 | else: 63 | raise ValueError( 64 | "range should be provided as (lower_range, upper_range). In the " 65 | + "case of multiple args, range should be a list of such tuples" 66 | ) 67 | else: 68 | raise ValueError("The number of ranges doesn't match the number of args") 69 | else: 70 | return N_expected * [range_] 71 | 72 | 73 | def _bincount_2d(bin_indices, weights, N, hist_shapes): 74 | # a trick to apply bincount on an axis-by-axis basis 75 | # https://stackoverflow.com/questions/40591754/vectorizing-numpy-bincount 76 | # https://stackoverflow.com/questions/40588403/vectorized-searchsorted-numpy 77 | M = bin_indices.shape[0] 78 | if weights is not None: 79 | weights = weights.ravel() 80 | bin_indices_offset = (bin_indices + (N * np.arange(M)[:, None])).ravel() 81 | bc_offset = bincount(bin_indices_offset, weights=weights, minlength=N * M) 82 | final_shape = (M,) + tuple(hist_shapes) 83 | return bc_offset.reshape(final_shape) 84 | 85 | 86 | def _bincount_loop(bin_indices, weights, N, hist_shapes, block_chunks): 87 | M = bin_indices.shape[0] 88 | assert sum(block_chunks) == M 89 | block_counts = [] 90 | # iterate over chunks 91 | bounds = np.cumsum((0,) + block_chunks) 92 | for m_start, m_end in zip(bounds[:-1], bounds[1:]): 93 | bin_indices_block = bin_indices[m_start:m_end] 94 | weights_block = weights[m_start:m_end] if weights is not None else None 95 | bc_block = _bincount_2d(bin_indices_block, weights_block, N, hist_shapes) 96 | block_counts.append(bc_block) 97 | all_counts = concatenate(block_counts) 98 | final_shape = (bin_indices.shape[0],) + tuple(hist_shapes) 99 | return all_counts.reshape(final_shape) 100 | 101 | 102 | def _determine_block_chunks(bin_indices, block_size): 103 | M, N = bin_indices.shape 104 | if block_size is None: 105 | return (M,) 106 | if block_size == "auto": 107 | try: 108 | # dask arrays - use the pre-existing chunks 109 | chunks = bin_indices.chunks 110 | return chunks[0] 111 | except AttributeError: 112 | # automatically pick a chunk size 113 | # this a a heueristic without much basis 114 | _MAX_CHUNK_SIZE = 10_000_000 115 | block_size = min(_MAX_CHUNK_SIZE // N, M) 116 | assert isinstance(block_size, int) 117 | num_chunks = M // block_size 118 | block_chunks = num_chunks * (block_size,) 119 | residual = M % block_size 120 | if residual: 121 | block_chunks += (residual,) 122 | assert sum(block_chunks) == M 123 | return block_chunks 124 | 125 | 126 | def _dispatch_bincount(bin_indices, weights, N, hist_shapes, block_size=None): 127 | # block_chunks is like a dask chunk, a tuple that divides up the first 128 | # axis of bin_indices 129 | block_chunks = _determine_block_chunks(bin_indices, block_size) 130 | if len(block_chunks) == 1: 131 | # single global chunk, don't need a loop over chunks 132 | return _bincount_2d(bin_indices, weights, N, hist_shapes) 133 | else: 134 | return _bincount_loop(bin_indices, weights, N, hist_shapes, block_chunks) 135 | 136 | 137 | def _bincount_2d_vectorized( 138 | *args, bins=None, weights=None, density=False, right=False, block_size=None 139 | ): 140 | """Calculate the histogram independently on each row of a 2D array""" 141 | 142 | N_inputs = len(args) 143 | a0 = args[0] 144 | 145 | # consistency checks for inputa 146 | for a, b in zip(args, bins): 147 | assert a.ndim == 2 148 | assert b.ndim == 1 149 | assert a.shape == a0.shape 150 | if weights is not None: 151 | assert weights.shape == a0.shape 152 | 153 | nrows, ncols = a0.shape 154 | nbins = [len(b) for b in bins] 155 | hist_shapes = [nb + 1 for nb in nbins] 156 | 157 | # The maximum possible value of searchsorted is nbins 158 | # For _searchsorted_inclusive: 159 | # - 0 corresponds to a < b[0] 160 | # - i corresponds to b[i-1] <= a < b[i] 161 | # - nbins-1 corresponds to b[-2] <= a <= b[-1] 162 | # - nbins corresponds to a >= b[-1] 163 | def _searchsorted_inclusive(a, b): 164 | """ 165 | Like `searchsorted`, but where the last bin is also right-edge inclusive. 166 | """ 167 | # Similar to implementation in np.histogramdd 168 | # see https://github.com/numpy/numpy/blob/9c98662ee2f7daca3f9fae9d5144a9a8d3cabe8c/numpy/lib/histograms.py#L1056 169 | # This assumes the bins (b) are sorted 170 | bin_indices = searchsorted(b, a, side="right") 171 | on_edge = a == b[-1] 172 | # Shift these points one bin to the left. 173 | bin_indices[on_edge] -= 1 174 | return bin_indices 175 | 176 | each_bin_indices = [_searchsorted_inclusive(a, b) for a, b in zip(args, bins)] 177 | # product of the bins gives the joint distribution 178 | if N_inputs > 1: 179 | bin_indices = ravel_multi_index(each_bin_indices, hist_shapes) 180 | else: 181 | bin_indices = each_bin_indices[0] 182 | # total number of unique bin indices 183 | N = reduce(lambda x, y: x * y, hist_shapes) 184 | 185 | bin_counts = _dispatch_bincount( 186 | bin_indices, weights, N, hist_shapes, block_size=block_size 187 | ) 188 | 189 | # just throw out everything outside of the bins, as np.histogram does 190 | # TODO: make this optional? 191 | slices = (slice(None),) + (N_inputs * (slice(1, -1),)) 192 | bin_counts = bin_counts[slices] 193 | 194 | return bin_counts 195 | 196 | 197 | def _bincount( 198 | *all_arrays, weights=False, axis=None, bins=None, density=None, block_size=None 199 | ): 200 | a0 = all_arrays[0] 201 | 202 | do_full_array = (axis is None) or (set(axis) == set(_range(a0.ndim))) 203 | 204 | if do_full_array: 205 | kept_axes_shape = (1,) * a0.ndim 206 | else: 207 | kept_axes_shape = tuple( 208 | [a0.shape[i] if i not in axis else 1 for i in _range(a0.ndim)] 209 | ) 210 | 211 | def reshape_input(a): 212 | if do_full_array: 213 | d = a.ravel()[None, :] 214 | else: 215 | # reshape the array to 2D 216 | # axis 0: preserved axis after histogram 217 | # axis 1: calculate histogram along this axis 218 | new_pos = tuple(_range(-len(axis), 0)) 219 | c = np.moveaxis(a, axis, new_pos) 220 | split_idx = c.ndim - len(axis) 221 | dims_0 = c.shape[:split_idx] 222 | # assert dims_0 == kept_axes_shape 223 | dims_1 = c.shape[split_idx:] 224 | new_dim_0 = np.prod(dims_0) 225 | new_dim_1 = np.prod(dims_1) 226 | d = reshape(c, (new_dim_0, new_dim_1)) 227 | return d 228 | 229 | all_arrays_reshaped = [reshape_input(a) for a in all_arrays] 230 | 231 | if weights: 232 | weights_array = all_arrays_reshaped.pop() 233 | else: 234 | weights_array = None 235 | 236 | bin_counts = _bincount_2d_vectorized( 237 | *all_arrays_reshaped, 238 | bins=bins, 239 | weights=weights_array, 240 | density=density, 241 | block_size=block_size, 242 | ) 243 | 244 | final_shape = kept_axes_shape + bin_counts.shape[1:] 245 | bin_counts = reshape(bin_counts, final_shape) 246 | 247 | return bin_counts 248 | 249 | 250 | def histogram( 251 | *args, 252 | bins=None, 253 | range=None, 254 | axis=None, 255 | weights=None, 256 | density=False, 257 | block_size="auto", 258 | ): 259 | """Histogram applied along specified axis / axes. 260 | 261 | Parameters 262 | ---------- 263 | args : array_like 264 | Input data. The number of input arguments determines the dimensionality 265 | of the histogram. For example, two arguments produce a 2D histogram. 266 | All args must have the same size. 267 | bins : int, str or numpy array or a list of ints, strs and/or arrays, optional 268 | If a list, there should be one entry for each item in ``args``. 269 | The bin specifications are as follows: 270 | 271 | * If int; the number of bins for all arguments in ``args``. 272 | * If str; the method used to automatically calculate the optimal bin width 273 | for all arguments in ``args``, as defined by numpy `histogram_bin_edges`. 274 | * If numpy array; the bin edges for all arguments in ``args``. 275 | * If a list of ints, strs and/or arrays; the bin specification as 276 | above for every argument in ``args``. 277 | 278 | When bin edges are specified, all but the last (righthand-most) bin include 279 | the left edge and exclude the right edge. The last bin includes both edges. 280 | 281 | A TypeError will be raised if args or weights contains dask arrays and bins 282 | are not specified explicitly as an array or list of arrays. This is because 283 | other bin specifications trigger computation. 284 | range : (float, float) or a list of (float, float), optional 285 | If a list, there should be one entry for each item in ``args``. 286 | The range specifications are as follows: 287 | 288 | * If (float, float); the lower and upper range(s) of the bins for all 289 | arguments in ``args``. Values outside the range are ignored. The first 290 | element of the range must be less than or equal to the second. `range` 291 | affects the automatic bin computation as well. In this case, while bin 292 | width is computed to be optimal based on the actual data within `range`, 293 | the bin count will fill the entire range including portions containing 294 | no data. 295 | * If a list of (float, float); the ranges as above for every argument in 296 | ``args``. 297 | * If not provided, range is simply ``(arg.min(), arg.max())`` for each 298 | arg. 299 | axis : None or int or tuple of ints, optional 300 | Axis or axes along which the histogram is computed. The default is to 301 | compute the histogram of the flattened array 302 | weights : array_like, optional 303 | An array of weights, of the same shape as `a`. Each value in 304 | `a` only contributes its associated weight towards the bin count 305 | (instead of 1). If `density` is True, the weights are 306 | normalized, so that the integral of the density over the range 307 | remains 1. 308 | density : bool, optional 309 | If ``False``, the result will contain the number of samples in 310 | each bin. If ``True``, the result is the value of the 311 | probability *density* function at the bin, normalized such that 312 | the *integral* over the range is 1. Note that the sum of the 313 | histogram values will not be equal to 1 unless bins of unity 314 | width are chosen; it is not a probability *mass* function. 315 | block_size : int or 'auto', optional 316 | A parameter which governs the algorithm used to compute the histogram. 317 | Using a nonzero value splits the histogram calculation over the 318 | non-histogram axes into blocks of size ``block_size``, iterating over 319 | them with a loop (numpy inputs) or in parallel (dask inputs). If 320 | ``'auto'``, blocks will be determined either by the underlying dask 321 | chunks (dask inputs) or an experimental built-in heuristic (numpy inputs). 322 | 323 | Returns 324 | ------- 325 | hist : array 326 | The values of the histogram. 327 | bin_edges : list of arrays 328 | Return the bin edges for each input array. 329 | 330 | See Also 331 | -------- 332 | numpy.histogram, numpy.bincount, numpy.searchsorted 333 | """ 334 | 335 | a0 = args[0] 336 | ndim = a0.ndim 337 | n_inputs = len(args) 338 | 339 | is_dask_array = any([dask.is_dask_collection(a) for a in list(args) + [weights]]) 340 | 341 | if axis is not None: 342 | axis = np.atleast_1d(axis) 343 | assert axis.ndim == 1 344 | axis_normed = [] 345 | for ax in axis: 346 | if ax >= 0: 347 | ax_positive = ax 348 | else: 349 | ax_positive = ndim + ax 350 | assert ax_positive < ndim, "axis must be less than ndim" 351 | axis_normed.append(ax_positive) 352 | axis = [int(i) for i in axis_normed] 353 | 354 | all_arrays = list(args) 355 | n_inputs = len(all_arrays) 356 | 357 | if weights is not None: 358 | all_arrays.append(weights) 359 | has_weights = True 360 | else: 361 | has_weights = False 362 | 363 | dtype = "i8" if not has_weights else weights.dtype 364 | 365 | # Broadcast input arrays. Note that this dispatches to `dsa.broadcast_arrays` as necessary. 366 | all_arrays = broadcast_arrays(*all_arrays) 367 | # Since all arrays now have the same shape, just get the axes of the first. 368 | input_axes = tuple(_range(all_arrays[0].ndim)) 369 | 370 | # Some sanity checks and format bins and range correctly 371 | bins = _ensure_correctly_formatted_bins(bins, n_inputs) 372 | range = _ensure_correctly_formatted_range(range, n_inputs) 373 | 374 | # histogram_bin_edges triggers computation on dask arrays. It would be possible 375 | # to write a version of this that doesn't trigger when `range` is provided, but 376 | # for now let's just use np.histogram_bin_edges 377 | if is_dask_array: 378 | if not all(isinstance(b, np.ndarray) for b in bins): 379 | raise TypeError( 380 | "When using dask arrays, bins must be provided as numpy array(s) of edges" 381 | ) 382 | else: 383 | bins = [ 384 | np.histogram_bin_edges( 385 | a, bins=b, range=r, weights=all_arrays[-1] if has_weights else None 386 | ) 387 | for a, b, r in zip(all_arrays, bins, range) 388 | ] 389 | bincount_kwargs = dict( 390 | weights=has_weights, 391 | axis=axis, 392 | bins=bins, 393 | density=density, 394 | block_size=block_size, 395 | ) 396 | 397 | # remove these axes from the inputs 398 | if axis is not None: 399 | drop_axes = tuple(axis) 400 | else: 401 | drop_axes = input_axes 402 | 403 | if _any_dask_array(weights, *all_arrays): 404 | # We should be able to just apply the bin_count function to every 405 | # block and then sum over all blocks to get the total bin count. 406 | # The main challenge is to figure out the chunk shape that will come 407 | # out of _bincount. We might also need to add dummy dimensions to sum 408 | # over in the _bincount function 409 | import dask.array as dsa 410 | 411 | # Important note from blockwise docs 412 | # > Any index, like i missing from the output index is interpreted as a contraction... 413 | # > In the case of a contraction the passed function should expect an iterable of blocks 414 | # > on any array that holds that index. 415 | # This means that we need to have all the input indexes present in the output index 416 | # However, they will be reduced to singleton (len 1) dimensions 417 | 418 | adjust_chunks = {i: (lambda x: 1) for i in drop_axes} 419 | 420 | new_axes_start = max(input_axes) + 1 421 | new_axes = {new_axes_start + i: len(bin) - 1 for i, bin in enumerate(bins)} 422 | out_index = input_axes + tuple(new_axes) 423 | 424 | blockwise_args = [] 425 | for arg in all_arrays: 426 | blockwise_args.append(arg) 427 | blockwise_args.append(input_axes) 428 | 429 | bin_counts = dsa.blockwise( 430 | _bincount, 431 | out_index, 432 | *blockwise_args, 433 | new_axes=new_axes, 434 | adjust_chunks=adjust_chunks, 435 | meta=np.array((), dtype), 436 | **bincount_kwargs, 437 | ) 438 | # sum over the block dims 439 | bin_counts = bin_counts.sum(drop_axes) 440 | else: 441 | # drop the extra axis used for summing over blocks 442 | bin_counts = _bincount(*all_arrays, **bincount_kwargs).squeeze(drop_axes) 443 | 444 | if density: 445 | # Normalize by dividing by bin counts and areas such that all the 446 | # histogram data integrated over all dimensions = 1 447 | bin_widths = [np.diff(b) for b in bins] 448 | if n_inputs == 1: 449 | bin_areas = bin_widths[0] 450 | elif n_inputs == 2: 451 | bin_areas = np.outer(*bin_widths) 452 | else: 453 | # Slower, but N-dimensional logic 454 | bin_areas = np.prod(np.ix_(*bin_widths)) 455 | 456 | # Sum over the last n_inputs axes, which correspond to the bins. All other axes 457 | # are "bystander" axes. Sums must be done independently for each bystander axes 458 | # so that nans are dealt with correctly (#51) 459 | bin_axes = tuple(_range(-n_inputs, 0)) 460 | bin_count_sums = bin_counts.sum(axis=bin_axes) 461 | bin_count_sums_shape = bin_count_sums.shape + len(bin_axes) * (1,) 462 | h = bin_counts / bin_areas / reshape(bin_count_sums, bin_count_sums_shape) 463 | else: 464 | h = bin_counts 465 | 466 | return h, bins 467 | -------------------------------------------------------------------------------- /xhistogram/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xgcm/xhistogram/a73be2fd7bbed5c34d95f7ca876ddcab1dd2ecc1/xhistogram/test/__init__.py -------------------------------------------------------------------------------- /xhistogram/test/fixtures.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import dask 3 | import dask.array as dsa 4 | import numpy as np 5 | import xarray as xr 6 | 7 | 8 | def empty_dask_array(shape, dtype=float, chunks=None): 9 | # a dask array that errors if you try to comput it 10 | def raise_if_computed(): 11 | raise ValueError("Triggered forbidden computation") 12 | 13 | a = dsa.from_delayed(dask.delayed(raise_if_computed)(), shape, dtype) 14 | if chunks is not None: 15 | a = a.rechunk(chunks) 16 | 17 | return a 18 | 19 | 20 | def example_dataarray(shape=(5, 20)): 21 | data = np.random.randn(*shape) 22 | dims = [f"dim_{i}" for i in range(len(shape))] 23 | da = xr.DataArray(data, dims=dims, name="T") 24 | return da 25 | 26 | 27 | def example_dataset(n_dim=2, n_vars=2): 28 | """Random dataset with every variable having the same shape""" 29 | 30 | shape = tuple(range(8, 8 + n_dim)) 31 | dims = [f"dim_{i}" for i in range(len(shape))] 32 | var_names = [uuid.uuid4().hex for _ in range(n_vars)] 33 | ds = xr.Dataset() 34 | for i in range(n_vars): 35 | name = var_names[i] 36 | data = np.random.randn(*shape) 37 | da = xr.DataArray(data, dims=dims, name=name) 38 | ds[name] = da 39 | return ds 40 | -------------------------------------------------------------------------------- /xhistogram/test/test_chunking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from .fixtures import example_dataarray 5 | from ..xarray import histogram 6 | 7 | 8 | @pytest.mark.parametrize("weights", [False, True]) 9 | @pytest.mark.parametrize("chunksize", [1, 2, 3, 10]) 10 | @pytest.mark.parametrize("shape", [(10,), (10, 4)]) 11 | def test_chunked_weights(chunksize, shape, weights): 12 | 13 | data_a = example_dataarray(shape).chunk((chunksize,)) 14 | 15 | if weights: 16 | weights = example_dataarray(shape).chunk((chunksize,)) 17 | weights_arr = weights.values 18 | else: 19 | weights = weights_arr = None 20 | 21 | nbins_a = 6 22 | bins_a = np.linspace(-4, 4, nbins_a + 1) 23 | 24 | h = histogram(data_a, bins=[bins_a], weights=weights) 25 | 26 | assert h.shape == (nbins_a,) 27 | 28 | hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights_arr) 29 | 30 | np.testing.assert_allclose(hist, h.values) 31 | 32 | 33 | @pytest.mark.parametrize("xchunksize", [1, 2, 3, 10]) 34 | @pytest.mark.parametrize("ychunksize", [1, 2, 3, 12]) 35 | class TestFixedSize2DChunks: 36 | def test_2d_chunks(self, xchunksize, ychunksize): 37 | 38 | data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize)) 39 | 40 | nbins_a = 8 41 | bins_a = np.linspace(-4, 4, nbins_a + 1) 42 | 43 | h = histogram(data_a, bins=[bins_a]) 44 | 45 | assert h.shape == (nbins_a,) 46 | 47 | hist, _ = np.histogram(data_a.values, bins=bins_a) 48 | 49 | np.testing.assert_allclose(hist, h.values) 50 | 51 | @pytest.mark.parametrize("reduce_dim", ["dim_0", "dim_1"]) 52 | def test_2d_chunks_broadcast_dim( 53 | self, 54 | xchunksize, 55 | ychunksize, 56 | reduce_dim, 57 | ): 58 | data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize)) 59 | dims = list(data_a.dims) 60 | broadcast_dim = [d for d in dims if d != reduce_dim][0] 61 | 62 | nbins_a = 8 63 | bins_a = np.linspace(-4, 4, nbins_a + 1) 64 | 65 | h = histogram(data_a, bins=[bins_a], dim=(reduce_dim,)) 66 | 67 | assert h.shape == (data_a.sizes[broadcast_dim], nbins_a) 68 | 69 | def _np_hist(*args, **kwargs): 70 | h, _ = np.histogram(*args, **kwargs) 71 | return h 72 | 73 | hist = np.apply_along_axis( 74 | _np_hist, axis=dims.index(reduce_dim), arr=data_a.values, bins=bins_a 75 | ) 76 | 77 | if reduce_dim == "dim_0": 78 | h = h.transpose() 79 | np.testing.assert_allclose(hist, h.values) 80 | 81 | def test_2d_chunks_2d_hist(self, xchunksize, ychunksize): 82 | 83 | data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize)) 84 | data_b = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize)) 85 | 86 | nbins_a = 8 87 | nbins_b = 9 88 | bins_a = np.linspace(-4, 4, nbins_a + 1) 89 | bins_b = np.linspace(-4, 4, nbins_b + 1) 90 | 91 | h = histogram(data_a, data_b, bins=[bins_a, bins_b]) 92 | 93 | assert h.shape == (nbins_a, nbins_b) 94 | 95 | hist, _, _ = np.histogram2d( 96 | data_a.values.ravel(), 97 | data_b.values.ravel(), 98 | bins=[bins_a, bins_b], 99 | ) 100 | 101 | np.testing.assert_allclose(hist, h.values) 102 | 103 | 104 | @pytest.mark.parametrize("xchunksize", [1, 2, 3, 10]) 105 | @pytest.mark.parametrize("ychunksize", [1, 2, 3, 12]) 106 | class TestUnalignedChunks: 107 | def test_unaligned_data_chunks(self, xchunksize, ychunksize): 108 | data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize)) 109 | data_b = example_dataarray(shape=(10, 12)).chunk( 110 | (xchunksize + 1, ychunksize + 1) 111 | ) 112 | 113 | nbins_a = 8 114 | nbins_b = 9 115 | bins_a = np.linspace(-4, 4, nbins_a + 1) 116 | bins_b = np.linspace(-4, 4, nbins_b + 1) 117 | 118 | h = histogram(data_a, data_b, bins=[bins_a, bins_b]) 119 | 120 | assert h.shape == (nbins_a, nbins_b) 121 | 122 | hist, _, _ = np.histogram2d( 123 | data_a.values.ravel(), 124 | data_b.values.ravel(), 125 | bins=[bins_a, bins_b], 126 | ) 127 | 128 | np.testing.assert_allclose(hist, h.values) 129 | 130 | def test_unaligned_weights_chunks(self, xchunksize, ychunksize): 131 | 132 | data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize)) 133 | weights = example_dataarray(shape=(10, 12)).chunk( 134 | (xchunksize + 1, ychunksize + 1) 135 | ) 136 | 137 | nbins_a = 8 138 | bins_a = np.linspace(-4, 4, nbins_a + 1) 139 | 140 | h = histogram(data_a, bins=[bins_a], weights=weights) 141 | 142 | assert h.shape == (nbins_a,) 143 | 144 | hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights.values) 145 | 146 | np.testing.assert_allclose(hist, h.values) 147 | -------------------------------------------------------------------------------- /xhistogram/test/test_chunking_hypotheses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from .fixtures import example_dataarray, example_dataset 5 | from ..xarray import histogram 6 | 7 | pytest.importorskip("hypothesis") 8 | 9 | import hypothesis.strategies as st # noqa 10 | from hypothesis import given, settings # noqa 11 | 12 | 13 | @st.composite 14 | def chunk_shapes(draw, n_dim=3, max_arr_len=10): 15 | """Generate different chunking patterns for an N-D array of data.""" 16 | chunks = [] 17 | for n in range(n_dim): 18 | shape = draw(st.integers(min_value=1, max_value=max_arr_len)) 19 | chunks.append(shape) 20 | return tuple(chunks) 21 | 22 | 23 | class TestChunkingHypotheses: 24 | @given(chunk_shapes(n_dim=1, max_arr_len=20)) 25 | def test_all_chunking_patterns_1d(self, chunks): 26 | 27 | data = example_dataarray(shape=(20,)).chunk(chunks) 28 | 29 | nbins_a = 8 30 | bins = np.linspace(-4, 4, nbins_a + 1) 31 | 32 | h = histogram(data, bins=[bins]) 33 | 34 | assert h.shape == (nbins_a,) 35 | 36 | hist, _ = np.histogram( 37 | data.values, 38 | bins=bins, 39 | ) 40 | 41 | np.testing.assert_allclose(hist, h) 42 | 43 | # TODO mark as slow? 44 | @settings(deadline=None) 45 | @given(chunk_shapes(n_dim=2, max_arr_len=8)) 46 | def test_all_chunking_patterns_2d(self, chunks): 47 | 48 | data_a = example_dataarray(shape=(5, 20)).chunk(chunks) 49 | data_b = example_dataarray(shape=(5, 20)).chunk(chunks) 50 | 51 | nbins_a = 8 52 | nbins_b = 9 53 | bins_a = np.linspace(-4, 4, nbins_a + 1) 54 | bins_b = np.linspace(-4, 4, nbins_b + 1) 55 | 56 | h = histogram(data_a, data_b, bins=[bins_a, bins_b]) 57 | 58 | assert h.shape == (nbins_a, nbins_b) 59 | 60 | hist, _, _ = np.histogram2d( 61 | data_a.values.ravel(), 62 | data_b.values.ravel(), 63 | bins=[bins_a, bins_b], 64 | ) 65 | 66 | np.testing.assert_allclose(hist, h.values) 67 | 68 | # TODO mark as slow? 69 | @settings(deadline=None) 70 | @pytest.mark.parametrize("n_vars", [1, 2, 3, 4]) 71 | @given(chunk_shapes(n_dim=2, max_arr_len=7)) 72 | def test_all_chunking_patterns_dd_hist(self, n_vars, chunk_shapes): 73 | ds = example_dataset(n_dim=2, n_vars=n_vars) 74 | ds = ds.chunk({d: c for d, c in zip(ds.dims.keys(), chunk_shapes)}) 75 | 76 | n_bins = (7, 8, 9, 10)[:n_vars] 77 | bins = [np.linspace(-4, 4, n + 1) for n in n_bins] 78 | 79 | h = histogram(*[da for name, da in ds.data_vars.items()], bins=bins) 80 | 81 | assert h.shape == n_bins 82 | 83 | input_data = np.stack( 84 | [da.values.ravel() for name, da in ds.data_vars.items()], axis=-1 85 | ) 86 | hist, _ = np.histogramdd(input_data, bins=bins) 87 | 88 | np.testing.assert_allclose(hist, h.values) 89 | -------------------------------------------------------------------------------- /xhistogram/test/test_core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from itertools import combinations 5 | import dask.array as dsa 6 | 7 | from ..core import ( 8 | histogram, 9 | _ensure_correctly_formatted_bins, 10 | _ensure_correctly_formatted_range, 11 | ) 12 | from .fixtures import empty_dask_array, example_dataarray 13 | 14 | import pytest 15 | 16 | import contextlib 17 | 18 | 19 | bins_int = 10 20 | bins_str = "auto" 21 | bins_arr = np.linspace(-4, 4, 10) 22 | range_ = (0, 1) 23 | 24 | 25 | @pytest.mark.parametrize("density", [False, True]) 26 | @pytest.mark.parametrize("block_size", [None, 1, 2]) 27 | @pytest.mark.parametrize("axis", [1, None]) 28 | @pytest.mark.parametrize("bins", [10, np.linspace(-4, 4, 10), "auto"]) 29 | @pytest.mark.parametrize("range_", [None, (-4, 4)]) 30 | @pytest.mark.parametrize("add_nans", [False, True]) 31 | def test_histogram_results_1d(block_size, density, axis, bins, range_, add_nans): 32 | nrows, ncols = 5, 20 33 | # Setting the random seed here prevents np.testing.assert_allclose 34 | # from failing beow. We should investigate this further. 35 | np.random.seed(2) 36 | data = np.random.randn(nrows, ncols) 37 | if add_nans: 38 | N_nans = 20 39 | data.ravel()[np.random.choice(data.size, N_nans, replace=False)] = np.nan 40 | bins = np.linspace(-4, 4, 10) 41 | 42 | h, bin_edges = histogram( 43 | data, bins=bins, range=range_, axis=axis, block_size=block_size, density=density 44 | ) 45 | 46 | expected_shape = ( 47 | (nrows, len(bin_edges[0]) - 1) if axis == 1 else (len(bin_edges[0]) - 1,) 48 | ) 49 | assert h.shape == expected_shape 50 | 51 | # make sure we get the same thing as numpy.histogram 52 | if axis: 53 | bins_np = np.histogram_bin_edges( 54 | data, bins=bins, range=range_ 55 | ) # Use same bins for all slices below 56 | expected = np.stack( 57 | [ 58 | np.histogram(data[i], bins=bins_np, range=range_, density=density)[0] 59 | for i in range(nrows) 60 | ] 61 | ) 62 | else: 63 | expected = np.histogram(data, bins=bins, range=range_, density=density)[0] 64 | np.testing.assert_allclose(h, expected) 65 | 66 | if density: 67 | widths = np.diff(bins) 68 | integral = np.sum(h * widths, axis) 69 | np.testing.assert_allclose(integral, 1.0) 70 | 71 | 72 | @pytest.mark.parametrize("block_size", [None, 1, 2]) 73 | def test_histogram_results_1d_weighted(block_size): 74 | nrows, ncols = 5, 20 75 | data = np.random.randn(nrows, ncols) 76 | bins = np.linspace(-4, 4, 10) 77 | h, _ = histogram(data, bins=bins, axis=1, block_size=block_size) 78 | weights = 2 * np.ones_like(data) 79 | h_w, _ = histogram(data, bins=bins, axis=1, weights=weights, block_size=block_size) 80 | np.testing.assert_array_equal(2 * h, h_w) 81 | 82 | 83 | # @pytest.mark.skip(reason="Weight broadcasting on numpy arrays is not yet implemented") 84 | @pytest.mark.parametrize("block_size", [None, 1, 2, "auto"]) 85 | def test_histogram_results_1d_weighted_broadcasting(block_size): 86 | nrows, ncols = 5, 20 87 | data = np.random.randn(nrows, ncols) 88 | bins = np.linspace(-4, 4, 10) 89 | h, _ = histogram(data, bins=bins, axis=1, block_size=block_size) 90 | weights = 2 * np.ones((1, ncols)) 91 | h_w, _ = histogram(data, bins=bins, axis=1, weights=weights, block_size=block_size) 92 | np.testing.assert_array_equal(2 * h, h_w) 93 | 94 | 95 | @pytest.mark.parametrize("block_size", [None, 1, 2]) 96 | def test_histogram_right_edge(block_size): 97 | """Test that last bin is both left- and right-edge inclusive as it 98 | is for numpy.histogram 99 | """ 100 | nrows, ncols = 5, 20 101 | data = np.ones((nrows, ncols)) 102 | bins = np.array([0, 0.5, 1]) # All data at rightmost edge 103 | 104 | h, _ = histogram(data, bins=bins, axis=1, block_size=block_size) 105 | assert h.shape == (nrows, len(bins) - 1) 106 | 107 | # make sure we get the same thing as histogram (all data in the last bin) 108 | hist, _ = np.histogram(data, bins=bins) 109 | np.testing.assert_array_equal(hist, h.sum(axis=0)) 110 | 111 | # now try with no axis 112 | h_na, _ = histogram(data, bins=bins, block_size=block_size) 113 | np.testing.assert_array_equal(hist, h_na) 114 | 115 | 116 | def test_histogram_results_2d(): 117 | nrows, ncols = 5, 20 118 | data_a = np.random.randn(nrows, ncols) 119 | data_b = np.random.randn(nrows, ncols) 120 | nbins_a = 9 121 | bins_a = np.linspace(-4, 4, nbins_a + 1) 122 | nbins_b = 10 123 | bins_b = np.linspace(-4, 4, nbins_b + 1) 124 | 125 | h, _ = histogram(data_a, data_b, bins=[bins_a, bins_b]) 126 | assert h.shape == (nbins_a, nbins_b) 127 | 128 | hist, _, _ = np.histogram2d(data_a.ravel(), data_b.ravel(), bins=[bins_a, bins_b]) 129 | np.testing.assert_array_equal(hist, h) 130 | 131 | 132 | @pytest.mark.parametrize("dask", [False, True]) 133 | def test_histogram_results_2d_broadcasting(dask): 134 | nrows, ncols = 5, 20 135 | data_a = np.random.randn(ncols) 136 | data_b = np.random.randn(nrows, ncols) 137 | nbins_a = 9 138 | bins_a = np.linspace(-4, 4, nbins_a + 1) 139 | nbins_b = 10 140 | bins_b = np.linspace(-4, 4, nbins_b + 1) 141 | 142 | if dask: 143 | test_data_a = dsa.from_array(data_a, chunks=3) 144 | test_data_b = dsa.from_array(data_b, chunks=(2, 7)) 145 | else: 146 | test_data_a = data_a 147 | test_data_b = data_b 148 | 149 | h, _ = histogram(test_data_a, test_data_b, bins=[bins_a, bins_b]) 150 | assert h.shape == (nbins_a, nbins_b) 151 | 152 | hist, _, _ = np.histogram2d( 153 | np.broadcast_to(data_a, data_b.shape).ravel(), 154 | data_b.ravel(), 155 | bins=[bins_a, bins_b], 156 | ) 157 | np.testing.assert_array_equal(hist, h) 158 | 159 | 160 | @pytest.mark.parametrize("add_nans", [False, True]) 161 | def test_histogram_results_2d_density(add_nans): 162 | nrows, ncols = 5, 20 163 | data_a = np.random.randn(nrows, ncols) 164 | data_b = np.random.randn(nrows, ncols) 165 | if add_nans: 166 | N_nans = 20 167 | data_a.ravel()[np.random.choice(data_a.size, N_nans, replace=False)] = np.nan 168 | data_b.ravel()[np.random.choice(data_b.size, N_nans, replace=False)] = np.nan 169 | nbins_a = 9 170 | bins_a = np.linspace(-4, 4, nbins_a + 1) 171 | nbins_b = 10 172 | bins_b = np.linspace(-4, 4, nbins_b + 1) 173 | 174 | h, _ = histogram(data_a, data_b, bins=[bins_a, bins_b], density=True) 175 | assert h.shape == (nbins_a, nbins_b) 176 | 177 | hist, _, _ = np.histogram2d( 178 | data_a.ravel(), data_b.ravel(), bins=[bins_a, bins_b], density=True 179 | ) 180 | np.testing.assert_allclose(hist, h) 181 | 182 | # check integral is 1 183 | widths_a = np.diff(bins_a) 184 | widths_b = np.diff(bins_b) 185 | areas = np.outer(widths_a, widths_b) 186 | integral = np.sum(hist * areas) 187 | np.testing.assert_allclose(integral, 1.0) 188 | 189 | 190 | @pytest.mark.parametrize("add_nans", [False, True]) 191 | def test_histogram_results_3d_density(add_nans): 192 | nrows, ncols = 5, 20 193 | data_a = np.random.randn(nrows, ncols) 194 | data_b = np.random.randn(nrows, ncols) 195 | data_c = np.random.randn(nrows, ncols) 196 | if add_nans: 197 | N_nans = 20 198 | data_a.ravel()[np.random.choice(data_a.size, N_nans, replace=False)] = np.nan 199 | data_b.ravel()[np.random.choice(data_b.size, N_nans, replace=False)] = np.nan 200 | data_c.ravel()[np.random.choice(data_c.size, N_nans, replace=False)] = np.nan 201 | nbins_a = 9 202 | bins_a = np.linspace(-4, 4, nbins_a + 1) 203 | nbins_b = 10 204 | bins_b = np.linspace(-4, 4, nbins_b + 1) 205 | nbins_c = 9 206 | bins_c = np.linspace(-4, 4, nbins_c + 1) 207 | 208 | h, _ = histogram( 209 | data_a, data_b, data_c, bins=[bins_a, bins_b, bins_c], density=True 210 | ) 211 | 212 | assert h.shape == (nbins_a, nbins_b, nbins_c) 213 | 214 | hist, _ = np.histogramdd( 215 | (data_a.ravel(), data_b.ravel(), data_c.ravel()), 216 | bins=[bins_a, bins_b, bins_c], 217 | density=True, 218 | ) 219 | 220 | np.testing.assert_allclose(hist, h) 221 | 222 | # check integral is 1 223 | widths_a = np.diff(bins_a) 224 | widths_b = np.diff(bins_b) 225 | widths_c = np.diff(bins_c) 226 | areas = np.einsum("i,j,k", widths_a, widths_b, widths_c) 227 | integral = np.sum(hist * areas) 228 | np.testing.assert_allclose(integral, 1.0) 229 | 230 | 231 | @pytest.mark.parametrize("block_size", [None, 5, "auto"]) 232 | @pytest.mark.parametrize("use_dask", [False, True]) 233 | def test_histogram_shape(use_dask, block_size): 234 | """These tests just verify that arrays with the right shape come out. 235 | They don't verify correctness.""" 236 | 237 | shape = 10, 15, 12, 20 238 | if use_dask: 239 | b = empty_dask_array(shape, chunks=(1,) + shape[1:]) 240 | else: 241 | b = np.random.randn(*shape) 242 | bins = np.linspace(-4, 4, 27) 243 | 244 | # no axis 245 | c, _ = histogram(b, bins=bins, block_size=block_size) 246 | assert c.shape == (len(bins) - 1,) 247 | # same thing 248 | for axis in [(0, 1, 2, 3), (0, 1, 3, 2), (3, 2, 1, 0), (3, 2, 0, 1)]: 249 | c, _ = histogram(b, bins=bins, axis=axis) 250 | assert c.shape == (len(bins) - 1,) 251 | if use_dask: 252 | assert isinstance(c, dsa.Array) 253 | 254 | # scalar axis (check positive and negative) 255 | for axis in list(range(4)) + list(range(-1, -5, -1)): 256 | c, _ = histogram(b, bins=bins, axis=axis, block_size=block_size) 257 | shape = list(b.shape) 258 | del shape[axis] 259 | expected_shape = tuple(shape) + (len(bins) - 1,) 260 | assert c.shape == expected_shape 261 | if use_dask: 262 | assert isinstance(c, dsa.Array) 263 | 264 | # two axes 265 | for i, j in combinations(range(4), 2): 266 | axis = (i, j) 267 | c, _ = histogram(b, bins=bins, axis=axis, block_size=block_size) 268 | shape = list(b.shape) 269 | partial_shape = [shape[k] for k in range(b.ndim) if k not in axis] 270 | expected_shape = tuple(partial_shape) + (len(bins) - 1,) 271 | assert c.shape == expected_shape 272 | if use_dask: 273 | assert isinstance(c, dsa.Array) 274 | 275 | 276 | @pytest.mark.parametrize("arg_type", ["dask", "numpy"]) 277 | @pytest.mark.parametrize("weights_type", ["dask", "numpy", None]) 278 | @pytest.mark.parametrize("bins_type", ["int", "str", "numpy"]) 279 | def test_histogram_dask(arg_type, weights_type, bins_type): 280 | """Test that a TypeError is raised with dask arrays and inappropriate bins""" 281 | shape = 10, 15, 12, 20 282 | 283 | if arg_type == "dask": 284 | arg = empty_dask_array(shape) 285 | else: 286 | arg = example_dataarray(shape) 287 | 288 | if weights_type == "dask": 289 | weights = empty_dask_array(shape) 290 | elif weights_type == "numpy": 291 | weights = example_dataarray(shape) 292 | else: 293 | weights = None 294 | 295 | if bins_type == "int": 296 | bins = bins_int 297 | elif bins_type == "str": 298 | bins = bins_str 299 | else: 300 | bins = bins_arr 301 | 302 | # TypeError should be returned when 303 | # 1. args or weights is a dask array and bins is not a numpy array, or 304 | # 2. bins is a string and weights is a numpy array 305 | cond_1 = ((arg_type == "dask") | (weights_type == "dask")) & (bins_type != "numpy") 306 | cond_2 = (weights_type == "numpy") & (bins_type == "str") 307 | should_TypeError = cond_1 | cond_2 308 | 309 | with contextlib.ExitStack() as stack: 310 | if should_TypeError: 311 | stack.enter_context(pytest.raises(TypeError)) 312 | histogram(arg, bins=bins, weights=weights) 313 | histogram(arg, arg, bins=[bins, bins], weights=weights) 314 | 315 | 316 | @pytest.mark.parametrize( 317 | "in_out", 318 | [ 319 | (bins_int, 1, [bins_int]), # ( bins_in, n_args, bins_out ) 320 | (bins_str, 1, [bins_str]), 321 | (bins_arr, 1, [bins_arr]), 322 | ([bins_int], 1, [bins_int]), 323 | (bins_int, 2, 2 * [bins_int]), 324 | (bins_str, 2, 2 * [bins_str]), 325 | (bins_arr, 2, 2 * [bins_arr]), 326 | ([bins_int, bins_str, bins_arr], 3, [bins_int, bins_str, bins_arr]), 327 | ([bins_arr], 2, None), 328 | (None, 1, None), 329 | ([bins_arr, bins_arr], 1, None), 330 | ], 331 | ) 332 | def test_ensure_correctly_formatted_bins(in_out): 333 | """Test the helper function _ensure_correctly_formatted_bins""" 334 | bins_in, n, bins_expected = in_out 335 | if bins_expected is not None: 336 | bins = _ensure_correctly_formatted_bins(bins_in, n) 337 | assert bins == bins_expected 338 | else: 339 | with pytest.raises((ValueError, TypeError)): 340 | _ensure_correctly_formatted_bins(bins_in, n) 341 | 342 | 343 | @pytest.mark.parametrize( 344 | "in_out", 345 | [ 346 | (range_, 1, [range_]), # ( range_in, n_args, range_out ) 347 | (range_, 2, [range_, range_]), 348 | ([range_, range_], 2, [range_, range_]), 349 | ([(range_[0],)], 1, None), 350 | ([range_], 2, None), 351 | ([range_, range_], 1, None), 352 | ], 353 | ) 354 | def test_ensure_correctly_formatted_range(in_out): 355 | """Test the helper function _ensure_correctly_formatted_range""" 356 | range_in, n, range_expected = in_out 357 | if range_expected is not None: 358 | range_ = _ensure_correctly_formatted_range(range_in, n) 359 | assert range_ == range_expected 360 | else: 361 | with pytest.raises(ValueError): 362 | _ensure_correctly_formatted_range(range_in, n) 363 | 364 | 365 | @pytest.mark.parametrize("block_size", [None, 1, 2]) 366 | @pytest.mark.parametrize("use_dask", [False, True]) 367 | def test_histogram_results_datetime(use_dask, block_size): 368 | """Test computing histogram of datetime objects""" 369 | data = pd.date_range(start="2000-06-01", periods=5) 370 | if use_dask: 371 | data = dsa.asarray(data, chunks=(5,)) 372 | # everything should be in the second bin (index 1) 373 | bins = np.array( 374 | [ 375 | np.datetime64("1999-01-01"), 376 | np.datetime64("2000-01-01"), 377 | np.datetime64("2001-01-01"), 378 | ] 379 | ) 380 | h = histogram(data, bins=bins, block_size=block_size)[0] 381 | expected = np.histogram(data, bins=bins)[0] 382 | np.testing.assert_allclose(h, expected) 383 | -------------------------------------------------------------------------------- /xhistogram/test/test_xarray.py: -------------------------------------------------------------------------------- 1 | import xarray as xr 2 | import numpy as np 3 | import pytest 4 | import pandas as pd 5 | from itertools import combinations 6 | 7 | from ..xarray import histogram 8 | 9 | 10 | # example dimensions 11 | DIMS = {"time": 5, "depth": 10, "lat": 45, "lon": 90} 12 | COORDS = { 13 | "time": ("time", pd.date_range(start="2000-01-01", periods=DIMS["time"])), 14 | "depth": ("depth", np.arange(DIMS["depth"]) * 100.0 + 50), 15 | "lat": ("lat", np.arange(DIMS["lat"]) * 180 / DIMS["lat"] - 90 + 90 / DIMS["lat"]), 16 | "lon": ("lon", np.arange(DIMS["lon"]) * 360 / DIMS["lon"] + 180 / DIMS["lon"]), 17 | } 18 | 19 | 20 | @pytest.fixture( 21 | params=[ 22 | ("lon",), 23 | ("lat", "lon"), 24 | ("depth", "lat", "lon"), 25 | ("time", "depth", "lat", "lon"), 26 | ], 27 | ids=["1D", "2D", "3D", "4D"], 28 | ) 29 | def ones(request): 30 | dims = request.param 31 | shape = [DIMS[d] for d in dims] 32 | coords = {k: v for k, v in COORDS.items() if k in dims} 33 | data = np.ones(shape, dtype="f8") 34 | da = xr.DataArray(data, dims=dims, coords=coords, name="ones") 35 | return da 36 | 37 | 38 | @pytest.mark.parametrize("ndims", [1, 2, 3, 4]) 39 | def test_histogram_ones(ones, ndims): 40 | dims = ones.dims 41 | if ones.ndim < ndims: 42 | pytest.skip( 43 | "Don't need to test when number of dimension combinations " 44 | "exceeds the number of array dimensions" 45 | ) 46 | 47 | # everything should be in the middle bin (index 1) 48 | bins = np.array([0, 0.9, 1.1, 2]) 49 | bins_c = 0.5 * (bins[1:] + bins[:-1]) 50 | 51 | def _check_result(h, d): 52 | other_dims = [dim for dim in ones.dims if dim not in d] 53 | if len(other_dims) > 0: 54 | assert set(other_dims) <= set(h.dims) 55 | # check that all values are in the central bin 56 | h_sum = h.sum(other_dims) 57 | h_sum_expected = xr.DataArray( 58 | [0, ones.size, 0], 59 | dims=["ones_bin"], 60 | coords={"ones_bin": ("ones_bin", bins_c)}, 61 | name="histogram_ones", 62 | ) 63 | xr.testing.assert_identical(h_sum, h_sum_expected) 64 | 65 | for d in combinations(dims, ndims): 66 | h = histogram(ones, bins=[bins], dim=d) 67 | _check_result(h, d) 68 | 69 | 70 | @pytest.mark.parametrize("ndims", [1, 2, 3, 4]) 71 | def test_histogram_ones_density(ones, ndims): 72 | dims = ones.dims 73 | if ones.ndim < ndims: 74 | pytest.skip( 75 | "Don't need to test when number of dimension combinations " 76 | "exceeds the number of array dimensions" 77 | ) 78 | 79 | # everything should be in the middle bin (index 1) 80 | bins = np.array([0, 0.9, 1.1, 2]) 81 | bin_area = 0.2 82 | 83 | def _check_result(h_density, d): 84 | other_dims = [dim for dim in ones.dims if dim not in d] 85 | if len(other_dims) > 0: 86 | assert set(other_dims) <= set(h_density.dims) 87 | 88 | # check that all integrals over pdfs at different locations are = 1 89 | h_integrals = (h_density * bin_area).sum(dim="ones_bin") 90 | np.testing.assert_allclose(h_integrals.values, 1.0) 91 | 92 | for d in combinations(dims, ndims): 93 | h_density = histogram(ones, bins=[bins], dim=d, density=True) 94 | _check_result(h_density, d) 95 | 96 | 97 | # TODO: refactor this test to use better fixtures 98 | # (it currently has a ton of loops) 99 | @pytest.mark.parametrize("ndims", [1, 2, 3, 4]) 100 | def test_weights(ones, ndims): 101 | dims = ones.dims 102 | if ones.ndim < ndims: 103 | pytest.skip( 104 | "Don't need to test when number of dimension combinations " 105 | "exceeds the number of array dimensions" 106 | ) 107 | 108 | bins = np.array([0, 0.9, 1.1, 2]) 109 | bins_c = 0.5 * (bins[1:] + bins[:-1]) 110 | 111 | weight_value = 0.5 112 | 113 | def _check_result(h, d): 114 | other_dims = [dim for dim in ones.dims if dim not in d] 115 | if len(other_dims) > 0: 116 | assert set(other_dims) <= set(h.dims) 117 | # check that all values are in the central bin 118 | h_sum = h.sum(other_dims) 119 | h_sum_expected = xr.DataArray( 120 | [0, weight_value * ones.size, 0], 121 | dims=["ones_bin"], 122 | coords={"ones_bin": ("ones_bin", bins_c)}, 123 | name="histogram_ones", 124 | ) 125 | xr.testing.assert_identical(h_sum, h_sum_expected) 126 | 127 | # get every possible combination of sub-dimensions 128 | for n_combinations in range(ones.ndim): 129 | for weight_dims in combinations(dims, n_combinations): 130 | i_selector = {dim: 0 for dim in weight_dims} 131 | weights = xr.full_like(ones.isel(**i_selector), weight_value) 132 | for nc in range(ndims): 133 | for d in combinations(dims, nc + 1): 134 | h = histogram(ones, weights=weights, bins=[bins], dim=d) 135 | _check_result(h, d) 136 | 137 | 138 | # test for issue #5 139 | def test_dims_and_coords(): 140 | time_axis = np.arange(4) 141 | depth_axis = np.arange(10) 142 | X_axis = np.arange(30) 143 | Y_axis = np.arange(30) 144 | 145 | dat1 = np.random.randint( 146 | low=0, 147 | high=100, 148 | size=(len(time_axis), len(depth_axis), len(X_axis), len(Y_axis)), 149 | ) 150 | array1 = xr.DataArray( 151 | dat1, 152 | coords=[time_axis, depth_axis, X_axis, Y_axis], 153 | dims=["time", "depth", "X", "Y"], 154 | name="one", 155 | ) 156 | 157 | dat2 = np.random.randint( 158 | low=0, high=50, size=(len(time_axis), len(depth_axis), len(X_axis), len(Y_axis)) 159 | ) 160 | array2 = xr.DataArray( 161 | dat2, 162 | coords=[time_axis, depth_axis, X_axis, Y_axis], 163 | dims=["time", "depth", "X", "Y"], 164 | name="two", 165 | ) 166 | 167 | bins1 = np.linspace(0, 100, 50) 168 | bins2 = np.linspace(0, 50, 25) 169 | 170 | result = histogram(array1, array2, dim=["X", "Y"], bins=[bins1, bins2]) 171 | assert result.dims == ("time", "depth", "one_bin", "two_bin") 172 | assert result.time.identical(array1.time) 173 | assert result.depth.identical(array2.depth) 174 | 175 | 176 | @pytest.mark.parametrize("number_of_inputs", [1, 2]) 177 | @pytest.mark.parametrize("keep_coords", [True, False]) 178 | @pytest.mark.parametrize("include_weights", [True, False]) 179 | def test_carry_coords(keep_coords, number_of_inputs, include_weights): 180 | time_axis = np.arange(40) 181 | X_axis = np.arange(10) 182 | Y_axis = np.arange(10) 183 | weight_value = 0.5 184 | 185 | data = np.random.randint( 186 | low=0, high=100, size=(len(time_axis), len(X_axis), len(Y_axis)) 187 | ) 188 | da = xr.DataArray( 189 | data, coords=[time_axis, X_axis, Y_axis], dims=["time", "X", "Y"], name="one" 190 | ) 191 | 192 | if include_weights: 193 | weights = xr.full_like(da, weight_value) 194 | else: 195 | weights = None 196 | 197 | # faking coordinates 198 | da["lon"] = da.X**2 + da.Y**2 199 | assert "lon" in da.coords 200 | bins = np.linspace(0, 100, 10) 201 | result = histogram( 202 | *[da] * number_of_inputs, 203 | bins=[bins] * number_of_inputs, 204 | dim=["time"], 205 | weights=weights, 206 | keep_coords=keep_coords 207 | ) 208 | if keep_coords: 209 | assert "lon" in result.coords 210 | else: 211 | assert "lon" not in result.coords 212 | 213 | 214 | # test for issue #14 215 | def test_input_type_check(): 216 | np_array = np.arange(100) 217 | with pytest.raises(TypeError): 218 | histogram(np_array) 219 | -------------------------------------------------------------------------------- /xhistogram/xarray.py: -------------------------------------------------------------------------------- 1 | """ 2 | Xarray API for xhistogram. 3 | """ 4 | 5 | import xarray as xr 6 | from collections import OrderedDict 7 | from .core import histogram as _histogram 8 | 9 | # range is a keyword so save the builtin so they can use it. 10 | _range = range 11 | 12 | 13 | def histogram( 14 | *args, 15 | bins=None, 16 | range=None, 17 | dim=None, 18 | weights=None, 19 | density=False, 20 | block_size="auto", 21 | keep_coords=False, 22 | bin_dim_suffix="_bin", 23 | ): 24 | """Histogram applied along specified dimensions. 25 | 26 | Parameters 27 | ---------- 28 | args : xarray.DataArray objects 29 | Input data. The number of input arguments determines the dimensonality of 30 | the histogram. For example, two arguments prodocue a 2D histogram. All 31 | args must be aligned and have the same dimensions. 32 | bins : int, str or numpy array or a list of ints, strs and/or arrays, optional 33 | If a list, there should be one entry for each item in ``args``. 34 | The bin specifications are as follows: 35 | 36 | * If int; the number of bins for all arguments in ``args``. 37 | * If str; the method used to automatically calculate the optimal bin width 38 | for all arguments in ``args``, as defined by numpy `histogram_bin_edges`. 39 | * If numpy array; the bin edges for all arguments in ``args``. 40 | * If a list of ints, strs and/or arrays; the bin specification as 41 | above for every argument in ``args``. 42 | 43 | When bin edges are specified, all but the last (righthand-most) bin include 44 | the left edge and exclude the right edge. The last bin includes both edges. 45 | 46 | A TypeError will be raised if args or weights contains dask arrays and bins 47 | are not specified explicitly as an array or list of arrays. This is because 48 | other bin specifications trigger computation. 49 | range : (float, float) or a list of (float, float), optional 50 | If a list, there should be one entry for each item in ``args``. 51 | The range specifications are as follows: 52 | 53 | * If (float, float); the lower and upper range(s) of the bins for all 54 | arguments in ``args``. Values outside the range are ignored. The first 55 | element of the range must be less than or equal to the second. `range` 56 | affects the automatic bin computation as well. In this case, while bin 57 | width is computed to be optimal based on the actual data within `range`, 58 | the bin count will fill the entire range including portions containing 59 | no data. 60 | * If a list of (float, float); the ranges as above for every argument in 61 | ``args``. 62 | * If not provided, range is simply ``(arg.min(), arg.max())`` for each 63 | arg. 64 | dim : tuple of strings, optional 65 | Dimensions over which which the histogram is computed. The default is to 66 | compute the histogram of the flattened array. 67 | weights : array_like, optional 68 | An array of weights, of the same shape as `a`. Each value in 69 | `a` only contributes its associated weight towards the bin count 70 | (instead of 1). If `density` is True, the weights are 71 | normalized, so that the integral of the density over the range 72 | remains 1. NaNs in the weights input will fill the entire bin with 73 | NaNs. If there are NaNs in the weights input call ``.fillna(0.)`` 74 | before running ``histogram()``. 75 | density : bool, optional 76 | If ``False``, the result will contain the number of samples in 77 | each bin. If ``True``, the result is the value of the 78 | probability *density* function at the bin, normalized such that 79 | the *integral* over the range is 1. Note that the sum of the 80 | histogram values will not be equal to 1 unless bins of unity 81 | width are chosen; it is not a probability *mass* function. 82 | block_size : int or 'auto', optional 83 | A parameter which governs the algorithm used to compute the histogram. 84 | Using a nonzero value splits the histogram calculation over the 85 | non-histogram axes into blocks of size ``block_size``, iterating over 86 | them with a loop (numpy inputs) or in parallel (dask inputs). If 87 | ``'auto'``, blocks will be determined either by the underlying dask 88 | chunks (dask inputs) or an experimental built-in heuristic (numpy inputs). 89 | keep_coords : bool, optional 90 | If ``True``, keep all coordinates. Default: ``False`` 91 | bin_dim_suffix : str, optional 92 | Suffix to append to input arg names to define names of output bin 93 | dimensions 94 | 95 | Returns 96 | ------- 97 | hist : xarray.DataArray 98 | The values of the histogram. For each bin, the midpoint of the bin edges 99 | is given along the bin coordinates. 100 | 101 | """ 102 | 103 | args = list(args) 104 | N_args = len(args) 105 | 106 | # TODO: allow list of weights as well 107 | N_weights = 1 if weights is not None else 0 108 | 109 | for a in args: 110 | if not isinstance(a, xr.DataArray): 111 | raise TypeError( 112 | "xhistogram.xarray.histogram accepts only xarray.DataArray " 113 | + f"objects but a {type(a).__name__} was provided" 114 | ) 115 | 116 | for a in args: 117 | assert a.name is not None, "all arrays must have a name" 118 | 119 | # we drop coords to simplify alignment 120 | if not keep_coords: 121 | args = [da.reset_coords(drop=True) for da in args] 122 | if N_weights: 123 | args += [weights.reset_coords(drop=True)] 124 | # explicitly broadcast so we understand what is going into apply_ufunc 125 | # (apply_ufunc might be doing this by itself again) 126 | args = list(xr.align(*args, join="exact")) 127 | 128 | # what happens if we skip this? 129 | # args = list(xr.broadcast(*args)) 130 | a0 = args[0] 131 | a_coords = a0.coords 132 | 133 | # roll our own broadcasting 134 | # now manually expand the arrays 135 | all_dims = [d for a in args for d in a.dims] 136 | all_dims_ordered = list(OrderedDict.fromkeys(all_dims)) 137 | args_expanded = [] 138 | for a in args: 139 | expand_keys = [d for d in all_dims_ordered if d not in a.dims] 140 | a_expanded = a.expand_dims({k: 1 for k in expand_keys}) 141 | args_expanded.append(a_expanded) 142 | 143 | # only transpose if necessary, to avoid creating unnecessary dask tasks 144 | args_transposed = [] 145 | for a in args_expanded: 146 | if a.dims != all_dims_ordered: 147 | args_transposed.append(a.transpose(*all_dims_ordered)) 148 | else: 149 | args.transposed.append(a) 150 | args_data = [a.data for a in args_transposed] 151 | 152 | if N_weights: 153 | weights_data = args_data.pop() 154 | else: 155 | weights_data = None 156 | 157 | if dim is not None: 158 | dims_to_keep = [d for d in all_dims_ordered if d not in dim] 159 | axis = [args_transposed[0].get_axis_num(d) for d in dim] 160 | else: 161 | dims_to_keep = [] 162 | axis = None 163 | 164 | h_data, bins = _histogram( 165 | *args_data, 166 | weights=weights_data, 167 | bins=bins, 168 | range=range, 169 | axis=axis, 170 | density=density, 171 | block_size=block_size, 172 | ) 173 | 174 | # create output dims 175 | new_dims = [a.name + bin_dim_suffix for a in args[:N_args]] 176 | output_dims = dims_to_keep + new_dims 177 | 178 | # create new coords 179 | bin_centers = [0.5 * (bin[:-1] + bin[1:]) for bin in bins] 180 | new_coords = { 181 | name: ((name,), bin_center, a.attrs) 182 | for name, bin_center, a in zip(new_dims, bin_centers, args) 183 | } 184 | 185 | # old coords associated with dims 186 | old_dim_coords = {name: a0[name] for name in dims_to_keep if name in a_coords} 187 | 188 | all_coords = {} 189 | all_coords.update(old_dim_coords) 190 | all_coords.update(new_coords) 191 | # add compatible coords 192 | if keep_coords: 193 | for c in a_coords: 194 | if c not in all_coords and set(a0[c].dims).issubset(output_dims): 195 | all_coords[c] = a0[c] 196 | 197 | output_name = "_".join(["histogram"] + [a.name for a in args[:N_args]]) 198 | 199 | da_out = xr.DataArray(h_data, dims=output_dims, coords=all_coords, name=output_name) 200 | 201 | return da_out 202 | 203 | # we need weights to be passed through apply_func's alignment algorithm, 204 | # so we include it as an arg, so we create a wrapper function to do so 205 | # this feels like a hack 206 | # def _histogram_wrapped(*args, **kwargs): 207 | # alist = list(args) 208 | # weights = [alist.pop() for n in _range(N_weights)] 209 | # if N_weights == 0: 210 | # weights = None 211 | # elif N_weights == 1: 212 | # weights = weights[0] # squeeze 213 | # return _histogram(*alist, weights=weights, **kwargs) 214 | --------------------------------------------------------------------------------