├── .coveragerc
├── .gitattributes
├── .github
    └── workflows
    │   ├── cancel.yml
    │   ├── linting.yml
    │   ├── pythonpublish.yaml
    │   ├── tests.yml
    │   └── upstream.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MANIFEST.in
├── README.rst
├── ci
    ├── environment-3.7.yml
    ├── environment-3.8.yml
    └── environment-3.9.yml
├── doc
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── contributing.rst
    ├── environment.yml
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    └── tutorial.ipynb
├── readthedocs.yml
├── setup.cfg
├── setup.py
├── versioneer.py
└── xhistogram
    ├── __init__.py
    ├── _version.py
    ├── core.py
    ├── test
        ├── __init__.py
        ├── fixtures.py
        ├── test_chunking.py
        ├── test_chunking_hypotheses.py
        ├── test_core.py
        └── test_xarray.py
    └── xarray.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | 
 4 | [report]
 5 | exclude_lines =
 6 |     if self.debug:
 7 |     pragma: no cover
 8 |     raise NotImplementedError
 9 |     if __name__ == .__main__.:
10 | ignore_errors = True
11 | omit = xhistogram/test/*
12 |        xhistogram/__init__.py
13 |        xhistogram/_version.py
14 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | xgcm/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/workflows/cancel.yml:
--------------------------------------------------------------------------------
 1 | name: Cancel
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Tests", "Upstream", "Linting"]
 6 |     types:
 7 |       - requested
 8 | 
 9 | jobs:
10 |   cancel:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: styfle/cancel-workflow-action@0.8.0
14 |       with:
15 |         workflow_id: ${{ github.event.workflow.id }}


--------------------------------------------------------------------------------
/.github/workflows/linting.yml:
--------------------------------------------------------------------------------
 1 | name: Linting
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   checks:
 7 |     name: pre-commit hooks
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v2
11 |       - uses: actions/setup-python@v2
12 |       - uses: pre-commit/action@v2.0.0
13 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yaml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v1
12 |       - name: Set up Python
13 |         uses: actions/setup-python@v1
14 |         with:
15 |           python-version: '3.x'
16 |       - name: Install dependencies
17 |         run: |
18 |           python -m pip install --upgrade pip
19 |           pip install setuptools setuptools-scm wheel twine
20 |       - name: Build and publish
21 |         env:
22 |           TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
23 |           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
24 |         run: |
25 |           python setup.py sdist bdist_wheel
26 |           python setup.py --version
27 |           twine check dist/*
28 |           twine upload dist/*
29 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on: [push, pull_request, workflow_dispatch]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ${{ matrix.os }}
 8 |     strategy:
 9 |       fail-fast: false
10 |       matrix:
11 |         os: [windows-latest, ubuntu-latest, macos-latest]
12 |         python-version: [3.7, 3.8, 3.9]
13 | 
14 |     steps:
15 |       - name: Checkout source
16 |         uses: actions/checkout@v2
17 | 
18 |       - name: Setup Conda Environment
19 |         uses: conda-incubator/setup-miniconda@v2
20 |         with:
21 |           miniconda-version: "latest"
22 |           python-version: ${{ matrix.python-version }}
23 |           environment-file: ci/environment-${{ matrix.python-version }}.yml
24 |           activate-environment: xhistogram_test_env
25 |           auto-activate-base: false
26 | 
27 |       - name: Install
28 |         shell: bash -l {0}
29 |         run: python -m pip install -e .
30 | 
31 |       - name: List installed packages
32 |         shell: bash -l {0}
33 |         run: conda list
34 | 
35 |       - name: Run tests
36 |         shell: bash -l {0}
37 |         run: python -m pytest --cov=xhistogram --cov-report=xml xhistogram
38 | 
39 |       - name: Coverage
40 |         uses: codecov/codecov-action@v1
41 | 


--------------------------------------------------------------------------------
/.github/workflows/upstream.yml:
--------------------------------------------------------------------------------
 1 | name: Upstream
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: [3.9]
11 | 
12 |     steps:
13 |       - name: Checkout source
14 |         uses: actions/checkout@v2
15 | 
16 |       - name: Setup Conda Environment
17 |         uses: conda-incubator/setup-miniconda@v2
18 |         with:
19 |           miniconda-version: "latest"
20 |           python-version: ${{ matrix.python-version }}
21 |           environment-file: ci/environment-${{ matrix.python-version }}.yml
22 |           activate-environment: xhistogram_test_env
23 |           auto-activate-base: false
24 | 
25 |       - name: Install upstream packages
26 |         shell: bash -l {0}
27 |         run: |
28 |           python -m pip install git+https://github.com/dask/dask.git
29 |           python -m pip install git+https://github.com/pydata/xarray.git
30 | 
31 |       - name: Install
32 |         shell: bash -l {0}
33 |         run: python -m pip install -e .
34 | 
35 |       - name: List installed packages
36 |         shell: bash -l {0}
37 |         run: conda list
38 | 
39 |       - name: Run tests
40 |         shell: bash -l {0}
41 |         run: python -m pytest --cov=xhistogram --cov-report=xml xhistogram
42 | 
43 |       - name: Coverage
44 |         uses: codecov/codecov-action@v1
45 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | doc/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # notebook
60 | */.ipynb_checkpoints/*
61 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/psf/black
 3 |     rev: 22.3.0
 4 |     hooks:
 5 |     - id: black
 6 |       language_version: python3
 7 |       exclude: versioneer.py
 8 | -   repo: https://gitlab.com/pycqa/flake8
 9 |     rev: 3.9.0
10 |     hooks:
11 |     - id: flake8
12 |       language_version: python3
13 | -   repo: https://github.com/pycqa/doc8
14 |     rev: 0.8.1
15 |     hooks:
16 |     - id: doc8
17 |       files: "^doc/.*.rst"
18 | -   repo: https://github.com/pre-commit/mirrors-mypy
19 |     rev: v0.812
20 |     hooks:
21 |       - id: mypy
22 |         files: "^xhistogram/"
23 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: xhistogram
 6 | message: >-
 7 |   "If you use this software, please cite it as
 8 |   below."
 9 | type: software
10 | authors:
11 |   - family-names: "Abernathey"
12 |     given-names: "Ryan"
13 |     orcid: "https://orcid.org/0000-0001-5999-4917"
14 |   - family-names: "Squire"
15 |     given-names: "Dougie"
16 |     orcid: "0000-0003-3271-6874"
17 |   - family-names: "Nicholas"
18 |     given-names: "Thomas"
19 |     orcid: "https://orcid.org/0000-0002-2176-0530"
20 |   - family-names: "Bourbeau"
21 |     given-names: "James"
22 |     orcid: "0000-0003-2164-7789"
23 |   - family-names: "Joseph"
24 |     given-names: "Gabe"
25 |   - family-names: "Spring"
26 |     given-names: "Aaron"
27 |     orcid: "0000-0003-0216-2241"
28 |   - family-names: "Bell"
29 |     given-names: "Ray"
30 |     orcid: "https://orcid.org/0000-0003-2623-0587"
31 |   - family-names: "Bailey"
32 |     given-names: "Shanice"
33 |     orcid: "0000-0002-8176-9465"
34 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at ryan.abernathey@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Ryan Abernathey
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include versioneer.py
3 | include xhistogram/_version.py
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | xhistogram: Fast, flexible, label-aware histograms for numpy and xarray
 2 | =======================================================================
 3 | 
 4 | |pypi| |conda forge| |tests| |linting| |codecov| |docs| |license| |DOI|
 5 | 
 6 | For more information, including installation instructions, read the full
 7 | `xhistogram documentation`_.
 8 | 
 9 | .. _Pangeo: http://pangeo-data.github.io
10 | .. _dask: http://dask.pydata.org
11 | .. _xarray: http://xarray.pydata.org
12 | .. _Arakawa Grids: https://en.wikipedia.org/wiki/Arakawa_grid
13 | .. _xhistogram documentation: https://xhistogram.readthedocs.io/
14 | 
15 | .. |conda forge| image:: https://anaconda.org/conda-forge/xhistogram/badges/version.svg
16 |    :target: https://anaconda.org/conda-forge/xhistogram
17 | .. |DOI| image:: https://zenodo.org/badge/178940893.svg
18 |    :target: https://zenodo.org/badge/latestdoi/178940893
19 | .. |tests| image:: https://github.com/xgcm/xhistogram/actions/workflows/tests.yml/badge.svg
20 |    :target: https://github.com/xgcm/xhistogram/actions/workflows/tests.yml
21 |    :alt: travis-ci build status
22 | .. |codecov| image:: https://codecov.io/github/xgcm/xhistogram/coverage.svg?branch=master
23 |    :target: https://codecov.io/github/xgcm/xhistogram?branch=master
24 |    :alt: code coverage
25 | .. |pypi| image:: https://badge.fury.io/py/xhistogram.svg
26 |    :target: https://badge.fury.io/py/xhistogram
27 |    :alt: pypi package
28 | .. |docs| image:: http://readthedocs.org/projects/xhistogram/badge/?version=latest
29 |    :target: http://xhistogram.readthedocs.org/en/stable/?badge=latest
30 |    :alt: documentation status
31 | .. |linting| image:: https://github.com/xgcm/xhistogram/actions/workflows/linting.yml/badge.svg
32 |    :target: https://github.com/xgcm/xhistogram/actions/workflows/linting.yml
33 |    :alt: linting status
34 | .. |license| image:: https://img.shields.io/github/license/mashape/apistatus.svg
35 |    :target: https://github.com/xgcm/xhistogram
36 |    :alt: license
37 | 


--------------------------------------------------------------------------------
/ci/environment-3.7.yml:
--------------------------------------------------------------------------------
 1 | name: xhistogram_test_env
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.7
 6 |   - xarray
 7 |   - dask-core
 8 |   - numpy=1.17
 9 |   - pytest
10 |   - hypothesis
11 |   - pip
12 |   - pip:
13 |     - codecov
14 |     - pytest-cov
15 | 


--------------------------------------------------------------------------------
/ci/environment-3.8.yml:
--------------------------------------------------------------------------------
 1 | name: xhistogram_test_env
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - xarray
 7 |   - dask-core
 8 |   - numpy=1.18
 9 |   - pytest
10 |   - hypothesis
11 |   - pip
12 |   - pip:
13 |     - codecov
14 |     - pytest-cov
15 | 


--------------------------------------------------------------------------------
/ci/environment-3.9.yml:
--------------------------------------------------------------------------------
 1 | name: xhistogram_test_env
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.9
 6 |   - xarray
 7 |   - dask-core
 8 |   - numpy
 9 |   - pytest
10 |   - hypothesis
11 |   - pip
12 |   - pip:
13 |     - codecov
14 |     - pytest-cov
15 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/xgcm.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/xgcm.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/xgcm"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/xgcm"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ###
 3 | 
 4 | Core Module
 5 | ===========
 6 | 
 7 | .. automodule:: xhistogram.core
 8 |   :members:
 9 | 
10 | Xarray Module
11 | =============
12 | 
13 | .. automodule:: xhistogram.xarray
14 |   :members:
15 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # xhistogram documentation build configuration file, created by
  4 | # sphinx-quickstart on Sat Aug 29 00:18:20 2015.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | import xhistogram
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | # sys.path.insert(0, os.path.abspath('.'))
 23 | # sys.path.insert(os.path.abspath('..'))
 24 | 
 25 | print(f"python exec: {sys.executable}")
 26 | print(f"sys.path: {sys.path}")
 27 | 
 28 | 
 29 | # -- General configuration ------------------------------------------------
 30 | 
 31 | # If your documentation needs a minimal Sphinx version, state it here.
 32 | # needs_sphinx = '1.0'
 33 | 
 34 | # Add any Sphinx extension module names here, as strings. They can be
 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 36 | # ones.
 37 | extensions = [
 38 |     "sphinx.ext.mathjax",
 39 |     "sphinx.ext.autodoc",
 40 |     "sphinx.ext.autosummary",
 41 |     "sphinx.ext.extlinks",
 42 |     "sphinx.ext.viewcode",
 43 |     "sphinx.ext.intersphinx",
 44 |     "numpydoc",
 45 |     "nbsphinx",
 46 |     "IPython.sphinxext.ipython_directive",
 47 |     "IPython.sphinxext.ipython_console_highlighting",
 48 | ]
 49 | 
 50 | # never execute notebooks: avoids lots of expensive imports on rtd
 51 | # https://nbsphinx.readthedocs.io/en/0.2.14/never-execute.html
 52 | # nbsphinx_execute = 'never'
 53 | 
 54 | # give cells 10 minutes to run before timeout
 55 | nbsphinx_timeout = 600
 56 | 
 57 | 
 58 | # http://stackoverflow.com/questions/5599254/how-to-use-sphinxs-autodoc-to-document-a-classs-init-self-method
 59 | def skip(app, what, name, obj, skip, options):
 60 |     if name == "__init__":
 61 |         return False
 62 |     return skip
 63 | 
 64 | 
 65 | def setup(app):
 66 |     app.connect("autodoc-skip-member", skip)
 67 | 
 68 | 
 69 | # link to github issues
 70 | extlinks = {"issue": ("https://github.com/xgcm/xhistogram/issues/%s", "GH")}
 71 | 
 72 | autosummary_generate = True
 73 | numpydoc_class_members_toctree = True
 74 | numpydoc_show_class_members = False
 75 | 
 76 | # Add any paths that contain templates here, relative to this directory.
 77 | templates_path = ["_templates"]
 78 | 
 79 | # The suffix of source filenames.
 80 | source_suffix = ".rst"
 81 | 
 82 | # The encoding of source files.
 83 | # source_encoding = 'utf-8-sig'
 84 | 
 85 | # The master toctree document.
 86 | master_doc = "index"
 87 | 
 88 | # General information about the project.
 89 | project = "xhistogram"
 90 | copyright = "2016-2019, xhistogram developers"
 91 | 
 92 | # The version info for the project you're documenting, acts as replacement for
 93 | # |version| and |release|, also used in various other places throughout the
 94 | # built documents.
 95 | #
 96 | # The full version, including alpha/beta/rc tags.
 97 | release = xhistogram.__version__
 98 | # The short X.Y version.
 99 | version = ".".join(release.split(".")[:2])
100 | 
101 | # The language for content autogenerated by Sphinx. Refer to documentation
102 | # for a list of supported languages.
103 | # language = None
104 | 
105 | # There are two options for replacing |today|: either, you set today to some
106 | # non-false value, then it is used:
107 | # today = ''
108 | # Else, today_fmt is used as the format for a strftime call.
109 | # today_fmt = '%B %d, %Y'
110 | 
111 | # List of patterns, relative to source directory, that match files and
112 | # directories to ignore when looking for source files.
113 | exclude_patterns = ["_build"]
114 | 
115 | # The reST default role (used for this markup: `text`) to use for all
116 | # documents.
117 | # default_role = None
118 | 
119 | # If true, '()' will be appended to :func: etc. cross-reference text.
120 | # add_function_parentheses = True
121 | 
122 | # If true, the current module name will be prepended to all description
123 | # unit titles (such as .. function::).
124 | # add_module_names = True
125 | 
126 | # If true, sectionauthor and moduleauthor directives will be shown in the
127 | # output. They are ignored by default.
128 | # show_authors = False
129 | 
130 | # The name of the Pygments (syntax highlighting) style to use.
131 | pygments_style = "sphinx"
132 | 
133 | # A list of ignored prefixes for module index sorting.
134 | # modindex_common_prefix = []
135 | 
136 | # If true, keep warnings as "system message" paragraphs in the built documents.
137 | # keep_warnings = False
138 | 
139 | 
140 | # -- Options for HTML output ----------------------------------------------
141 | 
142 | on_rtd = os.environ.get("READTHEDOCS", None) == "True"
143 | 
144 | if not on_rtd:  # only import and set the theme if we're building docs locally
145 |     import sphinx_rtd_theme
146 | 
147 |     html_theme = "sphinx_rtd_theme"
148 |     html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
149 | 
150 | # The theme to use for HTML and HTML Help pages.  See the documentation for
151 | # a list of builtin themes.
152 | # tml_theme = 'default'
153 | html_theme = "sphinx_rtd_theme"
154 | 
155 | # Theme options are theme-specific and customize the look and feel of a theme
156 | # further.  For a list of options available for each theme, see the
157 | # documentation.
158 | # html_theme_options = {}
159 | 
160 | # Add any paths that contain custom themes here, relative to this directory.
161 | # html_theme_path = []
162 | 
163 | # The name for this set of Sphinx documents.  If None, it defaults to
164 | # "<project> v<release> documentation".
165 | # html_title = None
166 | 
167 | # A shorter title for the navigation bar.  Default is the same as html_title.
168 | # html_short_title = None
169 | 
170 | # The name of an image file (relative to this directory) to place at the top
171 | # of the sidebar.
172 | # html_logo = None
173 | 
174 | # The name of an image file (within the static path) to use as favicon of the
175 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
176 | # pixels large.
177 | # html_favicon = None
178 | 
179 | # Add any paths that contain custom static files (such as style sheets) here,
180 | # relative to this directory. They are copied after the builtin static files,
181 | # so a file named "default.css" will overwrite the builtin "default.css".
182 | html_static_path = ["_static"]
183 | 
184 | # Add any extra paths that contain custom files (such as robots.txt or
185 | # .htaccess) here, relative to this directory. These files are copied
186 | # directly to the root of the documentation.
187 | # html_extra_path = []
188 | 
189 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
190 | # using the given strftime format.
191 | # html_last_updated_fmt = '%b %d, %Y'
192 | 
193 | # If true, SmartyPants will be used to convert quotes and dashes to
194 | # typographically correct entities.
195 | # html_use_smartypants = True
196 | 
197 | # Custom sidebar templates, maps document names to template names.
198 | # html_sidebars = {}
199 | 
200 | # Additional templates that should be rendered to pages, maps page names to
201 | # template names.
202 | # html_additional_pages = {}
203 | 
204 | # If false, no module index is generated.
205 | # html_domain_indices = True
206 | 
207 | # If false, no index is generated.
208 | # html_use_index = True
209 | 
210 | # If true, the index is split into individual pages for each letter.
211 | # html_split_index = False
212 | 
213 | # If true, links to the reST sources are added to the pages.
214 | # html_show_sourcelink = True
215 | 
216 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
217 | # html_show_sphinx = True
218 | 
219 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
220 | # html_show_copyright = True
221 | 
222 | # If true, an OpenSearch description file will be output, and all pages will
223 | # contain a <link> tag referring to it.  The value of this option must be the
224 | # base URL from which the finished HTML is served.
225 | # html_use_opensearch = ''
226 | 
227 | # This is the file name suffix for HTML files (e.g. ".xhtml").
228 | # html_file_suffix = None
229 | 
230 | # Output file base name for HTML help builder.
231 | htmlhelp_basename = "xhistogramdoc"
232 | 
233 | 
234 | # -- Options for LaTeX output ---------------------------------------------
235 | 
236 | # latex_elements = {
237 | #     The paper size ('letterpaper' or 'a4paper').
238 | #     'papersize': 'letterpaper',
239 | #     The font size ('10pt', '11pt' or '12pt').
240 | #     'pointsize': '10pt',
241 | #     Additional stuff for the LaTeX preamble.
242 | #     'preamble': '',
243 | # }
244 | 
245 | # Grouping the document tree into LaTeX files. List of tuples
246 | # (source start file, target name, title,
247 | #  author, documentclass [howto, manual, or own class]).
248 | latex_documents = [
249 |     (
250 |         "index",
251 |         "xhistogram.tex",
252 |         "xhistogram Documentation",
253 |         "xhistogram developers",
254 |         "manual",
255 |     ),
256 | ]
257 | 
258 | # The name of an image file (relative to this directory) to place at the top of
259 | # the title page.
260 | # latex_logo = None
261 | 
262 | # For "manual" documents, if this is true, then toplevel headings are parts,
263 | # not chapters.
264 | # latex_use_parts = False
265 | 
266 | # If true, show page references after internal links.
267 | # latex_show_pagerefs = False
268 | 
269 | # If true, show URL addresses after external links.
270 | # latex_show_urls = False
271 | 
272 | # Documents to append as an appendix to all manuals.
273 | # latex_appendices = []
274 | 
275 | # If false, no module index is generated.
276 | # latex_domain_indices = True
277 | 
278 | 
279 | # -- Options for manual page output ---------------------------------------
280 | 
281 | # One entry per manual page. List of tuples
282 | # (source start file, name, description, authors, manual section).
283 | man_pages = [
284 |     ("index", "xhistogram", "xhistogram Documentation", ["xhistogram developers"], 1)
285 | ]
286 | 
287 | # If true, show URL addresses after external links.
288 | # man_show_urls = False
289 | 
290 | 
291 | # -- Options for Texinfo output -------------------------------------------
292 | 
293 | # Grouping the document tree into Texinfo files. List of tuples
294 | # (source start file, target name, title, author,
295 | #  dir menu entry, description, category)
296 | texinfo_documents = [
297 |     (
298 |         "index",
299 |         "xhistogram",
300 |         "xhistogram Documentation",
301 |         "xhistogram developers",
302 |         "xhistogram",
303 |         "Fast, flexible, label-aware histograms for numpy and xarray.",
304 |         "Miscellaneous",
305 |     ),
306 | ]
307 | 
308 | # Documents to append as an appendix to all manuals.
309 | # texinfo_appendices = []
310 | 
311 | # If false, no module index is generated.
312 | # texinfo_domain_indices = True
313 | 
314 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
315 | # texinfo_show_urls = 'footnote'
316 | 
317 | # If true, do not generate a @detailmenu in the "Top" node's menu.
318 | # texinfo_no_detailmenu = False
319 | 
320 | 
321 | # Example configuration for intersphinx: refer to the Python standard library.
322 | intersphinx_mapping = {
323 |     "python": ("https://docs.python.org/3/", None),
324 |     "xarray": ("http://xarray.pydata.org/en/stable/", None),
325 | }
326 | 


--------------------------------------------------------------------------------
/doc/contributing.rst:
--------------------------------------------------------------------------------
  1 | Contributor Guide
  2 | =================
  3 | 
  4 | This package is in very early stages. Lots of work is needed.
  5 | 
  6 | You can help out just by using ``xhistogram`` and reporting
  7 | `issues <https://github.com/xgcm/xhistogram/issues>`__.
  8 | 
  9 | The following sections cover some general guidelines for maintainers and
 10 | contributors wanting to help develop ``xhistogram``.
 11 | 
 12 | 
 13 | Feature requests, suggestions and bug reports
 14 | ---------------------------------------------
 15 | 
 16 | We are eager to hear about any bugs you have found, new features you
 17 | would like to see and any other suggestions you may have. Please feel
 18 | free to submit these as `issues <https://github.com/xgcm/xhistogram/issues>`__.
 19 | 
 20 | When suggesting features, please make sure to explain in detail how
 21 | the proposed feature should work and to keep the scope as narrow as
 22 | possible. This makes features easier to implement in small PRs.
 23 | 
 24 | When report bugs, please include:
 25 | 
 26 | * Any details about your local setup that might be helpful in
 27 |   troubleshooting, specifically the Python interpreter version, installed
 28 |   libraries, and ``xhistogram`` version.
 29 | * Detailed steps to reproduce the bug, ideally a `Minimal, Complete and
 30 |   Verifiable Example <http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports>`__.
 31 | * If possible, a demonstration test that currently fails but should pass
 32 |   when the bug is fixed.
 33 | 
 34 | 
 35 | Write documentation
 36 | -------------------
 37 | Adding documentation is always helpful. This may include:
 38 | 
 39 | * More complementary documentation. Have you perhaps found something unclear?
 40 | * Docstrings.
 41 | * Example notebooks of ``xhistogram`` being used in real analyses.
 42 | 
 43 | The ``xhistogram`` documentation is written in reStructuredText. You
 44 | can follow the conventions in already written documents. Some helpful guides
 45 | can be found
 46 | `here <http://docutils.sourceforge.net/docs/user/rst/quickref.html>`__ and
 47 | `here <https://github.com/ralsina/rst-cheatsheet/blob/master/rst-cheatsheet.rst>`__.
 48 | 
 49 | When writing and editing documentation, it can be useful to see the resulting
 50 | build without having to push to Github. You can build the documentation locally
 51 | by running::
 52 | 
 53 |     $ # Install the packages required to build the docs in a conda environment
 54 |     $ conda env create -f doc/environment.yml
 55 |     $ conda activate xhistogram_doc_env
 56 |     $ # Install the latest xhistogram
 57 |     $ pip install --no-deps -e .
 58 |     $ cd doc/
 59 |     $ make html
 60 | 
 61 | This will build the documentation locally in ``doc/_build/``. You can then open
 62 | ``_build/html/index.html`` in your web browser to view the documentation. For
 63 | example, if you have ``xdg-open`` installed::
 64 | 
 65 |     $ xdg-open _build/html/index.html
 66 | 
 67 | To lint the reStructuredText documentation files run::
 68 | 
 69 |     $ doc8 doc/*.rst
 70 | 
 71 | 
 72 | Preparing Pull Requests
 73 | -----------------------
 74 | #. Fork the
 75 |    `xhistogram GitHub repository <https://github.com/xgcm/xhistogram>`__.  It's
 76 |    fine to use ``xhistogram`` as your fork repository name because it will live
 77 |    under your username.
 78 | 
 79 | #. Clone your fork locally, connect your repository to the upstream (main
 80 |    project), and create a branch to work on::
 81 | 
 82 |     $ git clone git@github.com:YOUR_GITHUB_USERNAME/xhistogram.git
 83 |     $ cd xhistogram
 84 |     $ git remote add upstream git@github.com:xgcm/xhistogram.git
 85 |     $ git checkout -b your-bugfix-feature-branch-name master
 86 | 
 87 |    If you need some help with Git, follow
 88 |    `this quick start guide <https://git.wiki.kernel.org/index.php/QuickStart>`__
 89 | 
 90 | #. Install dependencies into a new conda environment::
 91 | 
 92 |     $ conda env create -f ci/environment-3.9.yml
 93 |     $ conda activate xhistogram_test_env
 94 | 
 95 | #. Install xhistogram using the editable flag (meaning any changes you make to
 96 |    the package will be reflected directly in your environment)::
 97 | 
 98 |     $ pip install --no-deps -e .
 99 | 
100 | #. Start making your edits. Please try to type annotate your additions as
101 |    much as possible. Adding type annotations to existing unannotated code is
102 |    also very welcome. You can read about Python typing
103 |    `here <https://mypy.readthedocs.io/en/stable/getting_started.html#function-signatures-and-dynamic-vs-static-typing>`__.
104 | 
105 | #. Break your edits up into reasonably sized commits::
106 | 
107 |     $ git commit -a -m "<commit message>"
108 |     $ git push -u
109 | 
110 |    It can be useful to manually run `pre-commit <https://pre-commit.com>`_ as you
111 |    make your edits. ``pre-commit`` will run checks on the format and typing of
112 |    your code and will show you where you need to make changes. This will mean
113 |    your code is more likely to pass the CI checks when you push it::
114 | 
115 |     $ pip install pre_commit # you only need to do this once
116 |     $ pre-commit run --all-files
117 | 
118 | #. Run the tests (including those you add to test your edits!)::
119 | 
120 |     $ pytest xhistogram
121 | 
122 |    You can also test that your contribution and tests increased the test coverage::
123 | 
124 |     $ coverage run --source xhistogram -m py.test
125 |     $ coverage report
126 | 
127 | #. Add a new entry describing your contribution to the :ref:`release-history`
128 |    in ``doc/contributing.rst``. Please try to follow the format of the existing
129 |    entries.
130 | 
131 | #. Submit a pull request through the GitHub `website <https://github.com/xgcm/xhistogram>`__.
132 | 
133 |    Note that you can create the Pull Request while you're working on your PR.
134 |    The PR will update as you add more commits. ``xhistogram`` developers and
135 |    contributors can then review your code and offer suggestions.
136 | 
137 | 
138 | .. _release-history:
139 | 
140 | Release History
141 | ---------------
142 | 
143 | v0.3.2
144 | ~~~~~~~~~~~~~~~~~~~~~~~~~
145 | 
146 | - Fix bug producing TypeError when `weights` is provided with
147 |   `keep_coords=True` :issue:`78`. By
148 |   `Dougie Squire <https://github.com/dougiesquire>`_.
149 | - Raise TypeError when `weights` is a dask array and bin edges are
150 |   not explicitly provided :issue:`12`. By
151 |   `Dougie Squire <https://github.com/dougiesquire>`_.
152 | 
153 | v0.3.1
154 | ~~~~~~~~~~~~~~~~~~~~~~~~~
155 | 
156 | - Add DOI badge and CITATION.cff. By
157 |   `Julius Busecke <https://github.com/jbusecke>`_.
158 | 
159 | v0.3.0
160 | ~~~~~~~~~~~~~~~~~~~~~~~~~
161 | 
162 | - Add support for histograms over non-float dtypes (e.g. datetime
163 |   objects) :issue:`25`. By
164 |   `Dougie Squire <https://github.com/dougiesquire>`_.
165 | - Refactor histogram calculation to use dask.array.blockwise
166 |   when input arguments are dask arrays, resulting in significant
167 |   performance improvements :issue:`49`. By
168 |   `Ryan Abernathy <https://github.com/rabernat>`_,
169 |   `Tom Nicholas <https://github.com/TomNicholas>`_ and
170 |   `Gabe Joseph <https://github.com/gjoseph92>`_.
171 | - Fixed bug with density calculation when NaNs are present :issue:`51`.
172 |   By `Dougie Squire <https://github.com/dougiesquire>`_.
173 | - Implemented various options for users for providing bins to
174 |   xhistogram that mimic the numpy histogram API. This included
175 |   adding a range argument to the xhistogram API :issue:`13`.
176 |   By `Dougie Squire <https://github.com/dougiesquire>`_.
177 | - Added a function to check if the object passed to xhistogram is an
178 |   xarray.DataArray and if not, throw an error. :issue:`14`.
179 |   By `Yang Yunyi <https://github.com/Badboy-16>`_.
180 | 
181 | v0.2.0
182 | ~~~~~~
183 | 
184 | - Added FutureWarning for upcoming changes to core API :issue:`13`.
185 |   By `Dougie Squire <https://github.com/dougiesquire>`_.
186 | - Added documentation on how to deal with NaNs in weights :issue:`26`.
187 |   By `Shanice Bailey <https://github.com/stb2145>`_.
188 | - Move CI to GitHub actions :issue:`32`.
189 |   By `James Bourbeau <https://github.com/jrbourbeau>`_.
190 | - Add documentation for contributors.
191 |   By `Dougie Squire <https://github.com/dougiesquire>`_.
192 | - Add type checking with mypy :issue:`32`.
193 |   By `Dougie Squire <https://github.com/dougiesquire>`_.
194 | 
195 | v0.1.3
196 | ~~~~~~
197 | 
198 | - Update dependencies to exclude incompatible dask version :issue:`27`.
199 |   By `Ryan Abernathey <https://github.com/rabernat>`_.
200 | 
201 | v0.1.2
202 | ~~~~~~
203 | 
204 | - Aligned definition of ``bins`` with ``numpy.histogram`` :issue:`18`.
205 |   By `Dougie Squire <https://github.com/dougiesquire>`_.
206 | 
207 | v0.1.1
208 | ~~~~~~
209 | 
210 | Minor bugfix release
211 | 
212 | - Imroved documentation examples.
213 |   By `Dhruv Balwada <https://github.com/dhruvbalwada>`_.
214 | - Fixed issue :issue:`5` related to incorrect dimension order
215 |   and dropping of dimension coordinates.
216 |   By `Ryan Abernathey <https://github.com/rabernat>`_.
217 | 
218 | v0.1
219 | ~~~~
220 | 
221 | First release
222 | 


--------------------------------------------------------------------------------
/doc/environment.yml:
--------------------------------------------------------------------------------
 1 | name: xhistogram_doc_env
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.9
 6 |   - xarray
 7 |   - netcdf4
 8 |   - numpy>=1.16,!=1.20.0,!=1.20.1,!=1.20.2,!=1.20.3
 9 |   - pytest
10 |   - numpydoc
11 |   - sphinx
12 |   - ipython
13 |   - matplotlib
14 |   - jupyter_client
15 |   - ipykernel
16 |   - pandoc
17 |   - pip
18 |   - doc8
19 |   - pip:
20 |     - nbsphinx
21 |     - sphinx_rtd_theme
22 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | xhistogram: Fast, flexible, label-aware histograms for numpy and xarray
 2 | =======================================================================
 3 | 
 4 | Histograms (a.k.a "binning") are much more than just a visualization tool.
 5 | They are the foundation of a wide range of scientific analyses including
 6 | [joint] probability distributions and coordinate transformations.
 7 | Xhistogram makes it easier to calculate flexible, complex histograms with
 8 | multi-dimensional data. It integrates (optionally) with Dask, in order to
 9 | scale up to very large datasets and with Xarray, in order to consume and
10 | produce labelled, annotated data structures. It is useful for a wide range of
11 | scientific tasks.
12 | 
13 | 
14 | Why a new histogram package?
15 | ----------------------------
16 | 
17 | The main problem with the standard ``histogram`` function in numpy and
18 | dask is that they automatically act over the entire input array (i.e. they
19 | "flatten" the data). Xhistogram allows you to choose which axes / dimensions
20 | you want to preserve and which you want to flatten. It also allows you to
21 | combine N arbitrary inputs to produce N-dimensional histograms.
22 | A good place to start is the :doc:`tutorial`.
23 | 
24 | Contents
25 | --------
26 | 
27 | .. toctree::
28 |    :maxdepth: 1
29 | 
30 |    installation
31 |    tutorial
32 |    api
33 |    contributing
34 | 


--------------------------------------------------------------------------------
/doc/installation.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Installation
 3 | ------------
 4 | 
 5 | Requirements
 6 | ^^^^^^^^^^^^
 7 | 
 8 | xhistogram is compatible with python 3. It requires numpy and, optionally,
 9 | xarray.
10 | 
11 | Installation from Conda Forge
12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13 | 
14 | The easiest way to install xhistogram along with its dependencies is via conda
15 | forge::
16 | 
17 |     conda install -c conda-forge xhistogram
18 | 
19 | 
20 | Installation from Pip
21 | ^^^^^^^^^^^^^^^^^^^^^
22 | 
23 | An alternative is to use pip::
24 | 
25 |     pip install xhistogram
26 | 
27 | This will install the latest release from
28 | `pypi <https://pypi.python.org/pypi>`_.
29 | 
30 | Installation from GitHub
31 | ^^^^^^^^^^^^^^^^^^^^^^^^
32 | 
33 | xhistogram is under active development. To obtain the latest development version,
34 | you may clone the `source repository <https://github.com/xgcm/xhistogram>`_
35 | and install it::
36 | 
37 |     git clone https://github.com/xgcm/xhistogram.git
38 |     cd xhistogram
39 |     python setup.py install
40 | 
41 | or simply::
42 | 
43 |     pip install git+https://github.com/xgcm/xhistogram.git
44 | 
45 | Users are encouraged to `fork <https://help.github.com/articles/fork-a-repo/>`_
46 | xhistogram and submit issues_ and `pull requests`_.
47 | 
48 | .. _dask: http://dask.pydata.org
49 | .. _xarray: http://xarray.pydata.org
50 | .. _issues: https://github.com/xgcm/xhistogram/issues
51 | .. _`pull requests`: https://github.com/xgcm/xhistogram/pulls
52 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\xgcm.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\xgcm.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/doc/tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Xhistogram Tutorial\n",
  8 |     "\n",
  9 |     "Histograms are the foundation of many forms of data analysis.\n",
 10 |     "The goal of xhistogram is to make it easy to calculate weighted histograms in multiple dimensions over n-dimensional arrays, with control over the axes.\n",
 11 |     "Xhistogram builds on top of xarray, for automatic coordiantes and labels, and dask, for parallel scalability."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Toy Data\n",
 19 |     "\n",
 20 |     "We start by showing an example with toy data. First we use xarray to create some random, normally distributed data.\n",
 21 |     "\n",
 22 |     "### 1D Histogram"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": false,
 30 |     "jupyter": {
 31 |      "outputs_hidden": false
 32 |     }
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import xarray as xr\n",
 37 |     "import numpy as np\n",
 38 |     "%matplotlib inline\n",
 39 |     "\n",
 40 |     "nt, nx = 100, 30\n",
 41 |     "da = xr.DataArray(np.random.randn(nt, nx), dims=['time', 'x'],\n",
 42 |     "                  name='foo') # all inputs need a name\n",
 43 |     "display(da)\n",
 44 |     "da.plot()"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "By default xhistogram operates on all dimensions of an array, just like numpy. However, it operates on xarray DataArrays, taking labels into account."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {
 58 |     "collapsed": false,
 59 |     "jupyter": {
 60 |      "outputs_hidden": false
 61 |     }
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from xhistogram.xarray import histogram\n",
 66 |     "\n",
 67 |     "bins = np.linspace(-4, 4, 20)\n",
 68 |     "h = histogram(da, bins=[bins])\n",
 69 |     "display(h)\n",
 70 |     "h.plot()"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "**TODO:** \n",
 78 |     "- Bins needs to be a list; this is annoying, would be good to accept single items\n",
 79 |     "- The `foo_bin` coordinate is the estimated bin center, not the bounds. We need to add the bounds to the coordinates, but we can as long as we are returning DataArray and not Dataset.\n",
 80 |     "\n",
 81 |     "Both of the above need GitHub Issues\n",
 82 |     "\n",
 83 |     "### Histogram over a single axis"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {
 90 |     "collapsed": false,
 91 |     "jupyter": {
 92 |      "outputs_hidden": false
 93 |     }
 94 |    },
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "h_x = histogram(da, bins=[bins], dim=['time'])\n",
 98 |     "h_x.plot()"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "**TODO:**\n",
106 |     "  - Relax / explain requirement that dims is always a list"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": false,
114 |     "jupyter": {
115 |      "outputs_hidden": false
116 |     }
117 |    },
118 |    "outputs": [],
119 |    "source": [
120 |     "h_x.mean(dim='x').plot()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "### Weighted Histogram\n",
128 |     "\n",
129 |     "Weights can be the same shape as the input:"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {
136 |     "collapsed": false,
137 |     "jupyter": {
138 |      "outputs_hidden": false
139 |     }
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "weights = 0.4 * xr.ones_like(da)\n",
144 |     "histogram(da, bins=[bins], weights=weights)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "Or can use Xarray broadcasting:"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false,
159 |     "jupyter": {
160 |      "outputs_hidden": false
161 |     }
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "weights = 0.2 * xr.ones_like(da.x)\n",
166 |     "histogram(da, bins=[bins], weights=weights)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "## 2D Histogram\n",
174 |     "\n",
175 |     "Now let's say we have multiple input arrays. We can calculate their joint distribution:"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {
182 |     "collapsed": false,
183 |     "jupyter": {
184 |      "outputs_hidden": false
185 |     }
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "db = xr.DataArray(np.random.randn(nt, nx), dims=['time', 'x'],\n",
190 |     "                  name='bar') - 2\n",
191 |     "\n",
192 |     "histogram(da, db, bins=[bins, bins]).plot()"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "## Real Data Example\n",
200 |     "\n",
201 |     "### Ocean Volume Census in TS Space\n",
202 |     "\n",
203 |     "Here we show how to use xhistogram to do a volume census of the ocean in Temperature-Salinity Space"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "First we open the World Ocean Atlas dataset from the opendap dataset (http://apdrc.soest.hawaii.edu/dods/public_data/WOA/WOA13/1_deg/annual). \n",
211 |     "\n",
212 |     "Here we read the annual mean Temparature, Salinity and Oxygen on a 5 degree grid."
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {
219 |     "collapsed": false,
220 |     "jupyter": {
221 |      "outputs_hidden": false
222 |     }
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "# Read WOA using opendap \n",
227 |     "Temp_url = 'http://apdrc.soest.hawaii.edu:80/dods/public_data/WOA/WOA13/5_deg/annual/temp'\n",
228 |     "Salt_url = 'http://apdrc.soest.hawaii.edu:80/dods/public_data/WOA/WOA13/5_deg/annual/salt'\n",
229 |     "Oxy_url = 'http://apdrc.soest.hawaii.edu:80/dods/public_data/WOA/WOA13/5_deg/annual/doxy'\n",
230 |     "\n",
231 |     "ds = xr.merge([\n",
232 |     "    xr.open_dataset(Temp_url).tmn.load(),\n",
233 |     "    xr.open_dataset(Salt_url).smn.load(),\n",
234 |     "    xr.open_dataset(Oxy_url).omn.load()])\n",
235 |     "ds"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "Use histogram to bin data points. Use canonical ocean T/S ranges, and bin size of $0.1^0C$, and $0.025psu$. Similar ranges and bin size as this review paper on Mode Waters: https://doi.org/10.1016/B978-0-12-391851-2.00009-X ."
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "metadata": {
249 |     "collapsed": false,
250 |     "jupyter": {
251 |      "outputs_hidden": false
252 |     }
253 |    },
254 |    "outputs": [],
255 |    "source": [
256 |     "sbins = np.arange(31,38, 0.025)\n",
257 |     "tbins = np.arange(-2, 32, 0.1)"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": null,
263 |    "metadata": {
264 |     "collapsed": false,
265 |     "jupyter": {
266 |      "outputs_hidden": false
267 |     }
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "# histogram of number of data points\n",
272 |     "# histogram of number of data points\n",
273 |     "hTS = histogram(ds.smn, ds.tmn, bins=[sbins, tbins])\n",
274 |     "np.log10(hTS.T).plot(levels=31)"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {},
280 |    "source": [
281 |     "However, we would like to do a volume census, which requires the data points to be weighted by volume of the grid box. \n",
282 |     "\n",
283 |     "\\begin{equation}\n",
284 |     "dV = dz*dx*dy\n",
285 |     "\\end{equation}"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {
292 |     "collapsed": false,
293 |     "jupyter": {
294 |      "outputs_hidden": false
295 |     }
296 |    },
297 |    "outputs": [],
298 |    "source": [
299 |     "# histogram of number of data points weighted by volume resolution\n",
300 |     "# Note that depth is a non-uniform axis\n",
301 |     "\n",
302 |     "# Create a dz variable\n",
303 |     "dz = np.diff(ds.lev)\n",
304 |     "dz =np.insert(dz, 0, dz[0])\n",
305 |     "dz = xr.DataArray(dz, coords= {'lev':ds.lev}, dims='lev')\n",
306 |     "\n",
307 |     "# weight by volume of grid cell (resolution = 5degree, 1degree=110km)\n",
308 |     "dVol = dz * (5*110e3) * (5*110e3*np.cos(ds.lat*np.pi/180)) \n",
309 |     "\n",
310 |     "# Note: The weights are automatically broadcast to the right size\n",
311 |     "hTSw = histogram(ds.smn, ds.tmn, bins=[sbins, tbins], weights=dVol)\n",
312 |     "np.log10(hTSw.T).plot(levels=31, vmin=11.5, vmax=16, cmap='brg')"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "metadata": {},
318 |    "source": [
319 |     "The ridges of this above plot are indicative of T/S classes with a lot of volume, and some of them are indicative of Mode Waters (example Eighteen Degree water with T$\\sim18^oC$, and S$\\sim36.5psu$. "
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "#### Averaging a variable \n",
327 |     "\n",
328 |     "Next we calculate the mean oxygen value in each TS bin. \n",
329 |     "\n",
330 |     "\\begin{equation}\n",
331 |     "\\overline{A} (m,n) = \\frac{\\sum_{T(x,y,z)=m, S(x,y,z)=n} (A(x,y,z) dV)}{\\sum_{T(x,y,z)=m, S(x,y,z)=n}dV}.\n",
332 |     "\\end{equation}"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {
339 |     "collapsed": false,
340 |     "jupyter": {
341 |      "outputs_hidden": false
342 |     }
343 |    },
344 |    "outputs": [],
345 |    "source": [
346 |     "hTSO2 = (histogram(ds.smn.where(~np.isnan(ds.omn)), \n",
347 |     "                   ds.tmn.where(~np.isnan(ds.omn)), \n",
348 |     "                   bins=[sbins, tbins], \n",
349 |     "                   weights=ds.omn.where(~np.isnan(ds.omn))*dVol)/\n",
350 |     "                histogram(ds.smn.where(~np.isnan(ds.omn)), \n",
351 |     "                          ds.tmn.where(~np.isnan(ds.omn)), \n",
352 |     "                          bins=[sbins, tbins], \n",
353 |     "                          weights=dVol))\n",
354 |     "\n",
355 |     "(hTSO2.T).plot(vmin=1, vmax=8)"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "metadata": {},
361 |    "source": [
362 |     "Some interesting patterns in average oxygen emerge. Convectively ventilated cold water have the highest oxygen and mode waters have relatively high oxygen. Oxygen minimum zones are interspersed in the middle of volumetic ridges (high volume waters). "
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "markdown",
367 |    "metadata": {},
368 |    "source": [
369 |     "**NOTE**: NaNs in weights will make the weighted sum as nan. To avoid this, call `.fillna(0.)` on your weights input data before calling `histogram()`."
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "markdown",
374 |    "metadata": {},
375 |    "source": [
376 |     "## Dask Integration\n",
377 |     "\n",
378 |     "Should just work, but need examples."
379 |    ]
380 |   }
381 |  ],
382 |  "metadata": {
383 |   "kernelspec": {
384 |    "display_name": "Python 3",
385 |    "language": "python",
386 |    "name": "python3"
387 |   },
388 |   "language_info": {
389 |    "codemirror_mode": {
390 |     "name": "ipython",
391 |     "version": 3
392 |    },
393 |    "file_extension": ".py",
394 |    "mimetype": "text/x-python",
395 |    "name": "python",
396 |    "nbconvert_exporter": "python",
397 |    "pygments_lexer": "ipython3",
398 |    "version": "3.8.6"
399 |   }
400 |  },
401 |  "nbformat": 4,
402 |  "nbformat_minor": 4
403 | }
404 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | conda:
2 |     file: doc/environment.yml
3 | python:
4 |    version: 3
5 |    setup_py_install: true
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [aliases]
 2 | test=pytest
 3 | 
 4 | [versioneer]
 5 | VCS = git
 6 | style = pep440
 7 | versionfile_source = xhistogram/_version.py
 8 | versionfile_build = xhistogram/_version.py
 9 | tag_prefix = v
10 | parentdir_prefix = xhistogram-
11 | 
12 | [flake8]
13 | exclude = __init__.py,versioneer.py,_version.py
14 | max-line-length = 120
15 | 
16 | [tool:pytest]
17 | addopts = -v --durations=10
18 | 
19 | [mypy]
20 | 
21 | [mypy-dask.*]
22 | ignore_missing_imports = True
23 | [mypy-numpy.*]
24 | ignore_missing_imports = True
25 | [mypy-pandas.*]
26 | ignore_missing_imports = True
27 | 
28 | # Ignore versioneer files
29 | [mypy-xhistogram._version]
30 | ignore_errors = True
31 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import versioneer
 3 | from setuptools import setup, find_packages
 4 | 
 5 | DISTNAME = "xhistogram"
 6 | LICENSE = "MIT"
 7 | AUTHOR = "xhistogram Developers"
 8 | AUTHOR_EMAIL = "rpa@ldeo.columbia.edu"
 9 | URL = "https://github.com/xgcm/xhistogram"
10 | CLASSIFIERS = [
11 |     "Development Status :: 4 - Beta",
12 |     "License :: OSI Approved :: Apache Software License",
13 |     "Operating System :: OS Independent",
14 |     "Intended Audience :: Science/Research",
15 |     "Programming Language :: Python",
16 |     "Programming Language :: Python :: 3",
17 |     "Programming Language :: Python :: 3.7",
18 |     "Programming Language :: Python :: 3.8",
19 |     "Programming Language :: Python :: 3.9",
20 |     "Topic :: Scientific/Engineering",
21 | ]
22 | 
23 | INSTALL_REQUIRES = ["xarray>=0.12.0", "dask[array]>=2.3.0", "numpy>=1.17"]
24 | PYTHON_REQUIRES = ">=3.7"
25 | 
26 | DESCRIPTION = "Fast, flexible, label-aware histograms for numpy and xarray"
27 | 
28 | 
29 | def readme():
30 |     with open("README.rst") as f:
31 |         return f.read()
32 | 
33 | 
34 | setup(
35 |     name=DISTNAME,
36 |     version=versioneer.get_version(),
37 |     cmdclass=versioneer.get_cmdclass(),
38 |     license=LICENSE,
39 |     author=AUTHOR,
40 |     author_email=AUTHOR_EMAIL,
41 |     classifiers=CLASSIFIERS,
42 |     description=DESCRIPTION,
43 |     long_description=readme(),
44 |     install_requires=INSTALL_REQUIRES,
45 |     python_requires=PYTHON_REQUIRES,
46 |     url=URL,
47 |     packages=find_packages(),
48 | )
49 | 


--------------------------------------------------------------------------------
/versioneer.py:
--------------------------------------------------------------------------------
   1 | # Version: 0.18
   2 | 
   3 | """The Versioneer - like a rocketeer, but for versions.
   4 | 
   5 | The Versioneer
   6 | ==============
   7 | 
   8 | * like a rocketeer, but for versions!
   9 | * https://github.com/warner/python-versioneer
  10 | * Brian Warner
  11 | * License: Public Domain
  12 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
  13 | * [![Latest Version]
  14 | (https://pypip.in/version/versioneer/badge.svg?style=flat)
  15 | ](https://pypi.python.org/pypi/versioneer/)
  16 | * [![Build Status]
  17 | (https://travis-ci.org/warner/python-versioneer.png?branch=master)
  18 | ](https://travis-ci.org/warner/python-versioneer)
  19 | 
  20 | This is a tool for managing a recorded version number in distutils-based
  21 | python projects. The goal is to remove the tedious and error-prone "update
  22 | the embedded version string" step from your release process. Making a new
  23 | release should be as easy as recording a new tag in your version-control
  24 | system, and maybe making new tarballs.
  25 | 
  26 | 
  27 | ## Quick Install
  28 | 
  29 | * `pip install versioneer` to somewhere to your $PATH
  30 | * add a `[versioneer]` section to your setup.cfg (see below)
  31 | * run `versioneer install` in your source tree, commit the results
  32 | 
  33 | ## Version Identifiers
  34 | 
  35 | Source trees come from a variety of places:
  36 | 
  37 | * a version-control system checkout (mostly used by developers)
  38 | * a nightly tarball, produced by build automation
  39 | * a snapshot tarball, produced by a web-based VCS browser, like github's
  40 |   "tarball from tag" feature
  41 | * a release tarball, produced by "setup.py sdist", distributed through PyPI
  42 | 
  43 | Within each source tree, the version identifier (either a string or a number,
  44 | this tool is format-agnostic) can come from a variety of places:
  45 | 
  46 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
  47 |   about recent "tags" and an absolute revision-id
  48 | * the name of the directory into which the tarball was unpacked
  49 | * an expanded VCS keyword ($Id$, etc)
  50 | * a `_version.py` created by some earlier build step
  51 | 
  52 | For released software, the version identifier is closely related to a VCS
  53 | tag. Some projects use tag names that include more than just the version
  54 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
  55 | needs to strip the tag prefix to extract the version identifier. For
  56 | unreleased software (between tags), the version identifier should provide
  57 | enough information to help developers recreate the same tree, while also
  58 | giving them an idea of roughly how old the tree is (after version 1.2, before
  59 | version 1.3). Many VCS systems can report a description that captures this,
  60 | for example `git describe --tags --dirty --always` reports things like
  61 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
  62 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
  63 | uncommitted changes.
  64 | 
  65 | The version identifier is used for multiple purposes:
  66 | 
  67 | * to allow the module to self-identify its version: `myproject.__version__`
  68 | * to choose a name and prefix for a 'setup.py sdist' tarball
  69 | 
  70 | ## Theory of Operation
  71 | 
  72 | Versioneer works by adding a special `_version.py` file into your source
  73 | tree, where your `__init__.py` can import it. This `_version.py` knows how to
  74 | dynamically ask the VCS tool for version information at import time.
  75 | 
  76 | `_version.py` also contains `$Revision$` markers, and the installation
  77 | process marks `_version.py` to have this marker rewritten with a tag name
  78 | during the `git archive` command. As a result, generated tarballs will
  79 | contain enough information to get the proper version.
  80 | 
  81 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to
  82 | the top level of your source tree, next to `setup.py` and the `setup.cfg`
  83 | that configures it. This overrides several distutils/setuptools commands to
  84 | compute the version when invoked, and changes `setup.py build` and `setup.py
  85 | sdist` to replace `_version.py` with a small static file that contains just
  86 | the generated version data.
  87 | 
  88 | ## Installation
  89 | 
  90 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
  91 | 
  92 | ## Version-String Flavors
  93 | 
  94 | Code which uses Versioneer can learn about its version string at runtime by
  95 | importing `_version` from your main `__init__.py` file and running the
  96 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
  97 | import the top-level `versioneer.py` and run `get_versions()`.
  98 | 
  99 | Both functions return a dictionary with different flavors of version
 100 | information:
 101 | 
 102 | * `['version']`: A condensed version string, rendered using the selected
 103 |   style. This is the most commonly used value for the project's version
 104 |   string. The default "pep440" style yields strings like `0.11`,
 105 |   `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
 106 |   below for alternative styles.
 107 | 
 108 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the
 109 |   full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
 110 | 
 111 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
 112 |   commit date in ISO 8601 format. This will be None if the date is not
 113 |   available.
 114 | 
 115 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
 116 |   this is only accurate if run in a VCS checkout, otherwise it is likely to
 117 |   be False or None
 118 | 
 119 | * `['error']`: if the version string could not be computed, this will be set
 120 |   to a string describing the problem, otherwise it will be None. It may be
 121 |   useful to throw an exception in setup.py if this is set, to avoid e.g.
 122 |   creating tarballs with a version string of "unknown".
 123 | 
 124 | Some variants are more useful than others. Including `full-revisionid` in a
 125 | bug report should allow developers to reconstruct the exact code being tested
 126 | (or indicate the presence of local changes that should be shared with the
 127 | developers). `version` is suitable for display in an "about" box or a CLI
 128 | `--version` output: it can be easily compared against release notes and lists
 129 | of bugs fixed in various releases.
 130 | 
 131 | The installer adds the following text to your `__init__.py` to place a basic
 132 | version in `YOURPROJECT.__version__`:
 133 | 
 134 |     from ._version import get_versions
 135 |     __version__ = get_versions()['version']
 136 |     del get_versions
 137 | 
 138 | ## Styles
 139 | 
 140 | The setup.cfg `style=` configuration controls how the VCS information is
 141 | rendered into a version string.
 142 | 
 143 | The default style, "pep440", produces a PEP440-compliant string, equal to the
 144 | un-prefixed tag name for actual releases, and containing an additional "local
 145 | version" section with more detail for in-between builds. For Git, this is
 146 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
 147 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
 148 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
 149 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released
 150 | software (exactly equal to a known tag), the identifier will only contain the
 151 | stripped tag, e.g. "0.11".
 152 | 
 153 | Other styles are available. See [details.md](details.md) in the Versioneer
 154 | source tree for descriptions.
 155 | 
 156 | ## Debugging
 157 | 
 158 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
 159 | to return a version of "0+unknown". To investigate the problem, run `setup.py
 160 | version`, which will run the version-lookup code in a verbose mode, and will
 161 | display the full contents of `get_versions()` (including the `error` string,
 162 | which may help identify what went wrong).
 163 | 
 164 | ## Known Limitations
 165 | 
 166 | Some situations are known to cause problems for Versioneer. This details the
 167 | most significant ones. More can be found on Github
 168 | [issues page](https://github.com/warner/python-versioneer/issues).
 169 | 
 170 | ### Subprojects
 171 | 
 172 | Versioneer has limited support for source trees in which `setup.py` is not in
 173 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
 174 | two common reasons why `setup.py` might not be in the root:
 175 | 
 176 | * Source trees which contain multiple subprojects, such as
 177 |   [Buildbot](https://github.com/buildbot/buildbot), which contains both
 178 |   "master" and "slave" subprojects, each with their own `setup.py`,
 179 |   `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
 180 |   distributions (and upload multiple independently-installable tarballs).
 181 | * Source trees whose main purpose is to contain a C library, but which also
 182 |   provide bindings to Python (and perhaps other langauges) in subdirectories.
 183 | 
 184 | Versioneer will look for `.git` in parent directories, and most operations
 185 | should get the right version string. However `pip` and `setuptools` have bugs
 186 | and implementation details which frequently cause `pip install .` from a
 187 | subproject directory to fail to find a correct version string (so it usually
 188 | defaults to `0+unknown`).
 189 | 
 190 | `pip install --editable .` should work correctly. `setup.py install` might
 191 | work too.
 192 | 
 193 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
 194 | some later version.
 195 | 
 196 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
 197 | this issue. The discussion in
 198 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
 199 | issue from the Versioneer side in more detail.
 200 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and
 201 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
 202 | pip to let Versioneer work correctly.
 203 | 
 204 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the
 205 | `setup.cfg`, so subprojects were completely unsupported with those releases.
 206 | 
 207 | ### Editable installs with setuptools <= 18.5
 208 | 
 209 | `setup.py develop` and `pip install --editable .` allow you to install a
 210 | project into a virtualenv once, then continue editing the source code (and
 211 | test) without re-installing after every change.
 212 | 
 213 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
 214 | convenient way to specify executable scripts that should be installed along
 215 | with the python package.
 216 | 
 217 | These both work as expected when using modern setuptools. When using
 218 | setuptools-18.5 or earlier, however, certain operations will cause
 219 | `pkg_resources.DistributionNotFound` errors when running the entrypoint
 220 | script, which must be resolved by re-installing the package. This happens
 221 | when the install happens with one version, then the egg_info data is
 222 | regenerated while a different version is checked out. Many setup.py commands
 223 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
 224 | a different virtualenv), so this can be surprising.
 225 | 
 226 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
 227 | this one, but upgrading to a newer version of setuptools should probably
 228 | resolve it.
 229 | 
 230 | ### Unicode version strings
 231 | 
 232 | While Versioneer works (and is continually tested) with both Python 2 and
 233 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
 234 | Newer releases probably generate unicode version strings on py2. It's not
 235 | clear that this is wrong, but it may be surprising for applications when then
 236 | write these strings to a network connection or include them in bytes-oriented
 237 | APIs like cryptographic checksums.
 238 | 
 239 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
 240 | this question.
 241 | 
 242 | 
 243 | ## Updating Versioneer
 244 | 
 245 | To upgrade your project to a new release of Versioneer, do the following:
 246 | 
 247 | * install the new Versioneer (`pip install -U versioneer` or equivalent)
 248 | * edit `setup.cfg`, if necessary, to include any new configuration settings
 249 |   indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
 250 | * re-run `versioneer install` in your source tree, to replace
 251 |   `SRC/_version.py`
 252 | * commit any changed files
 253 | 
 254 | ## Future Directions
 255 | 
 256 | This tool is designed to make it easily extended to other version-control
 257 | systems: all VCS-specific components are in separate directories like
 258 | src/git/ . The top-level `versioneer.py` script is assembled from these
 259 | components by running make-versioneer.py . In the future, make-versioneer.py
 260 | will take a VCS name as an argument, and will construct a version of
 261 | `versioneer.py` that is specific to the given VCS. It might also take the
 262 | configuration arguments that are currently provided manually during
 263 | installation by editing setup.py . Alternatively, it might go the other
 264 | direction and include code from all supported VCS systems, reducing the
 265 | number of intermediate scripts.
 266 | 
 267 | 
 268 | ## License
 269 | 
 270 | To make Versioneer easier to embed, all its code is dedicated to the public
 271 | domain. The `_version.py` that it creates is also in the public domain.
 272 | Specifically, both are released under the Creative Commons "Public Domain
 273 | Dedication" license (CC0-1.0), as described in
 274 | https://creativecommons.org/publicdomain/zero/1.0/ .
 275 | 
 276 | """
 277 | 
 278 | from __future__ import print_function
 279 | 
 280 | try:
 281 |     import configparser
 282 | except ImportError:
 283 |     import ConfigParser as configparser
 284 | import errno
 285 | import json
 286 | import os
 287 | import re
 288 | import subprocess
 289 | import sys
 290 | 
 291 | 
 292 | class VersioneerConfig:
 293 |     """Container for Versioneer configuration parameters."""
 294 | 
 295 | 
 296 | def get_root():
 297 |     """Get the project root directory.
 298 | 
 299 |     We require that all commands are run from the project root, i.e. the
 300 |     directory that contains setup.py, setup.cfg, and versioneer.py .
 301 |     """
 302 |     root = os.path.realpath(os.path.abspath(os.getcwd()))
 303 |     setup_py = os.path.join(root, "setup.py")
 304 |     versioneer_py = os.path.join(root, "versioneer.py")
 305 |     if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
 306 |         # allow 'python path/to/setup.py COMMAND'
 307 |         root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
 308 |         setup_py = os.path.join(root, "setup.py")
 309 |         versioneer_py = os.path.join(root, "versioneer.py")
 310 |     if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
 311 |         err = (
 312 |             "Versioneer was unable to run the project root directory. "
 313 |             "Versioneer requires setup.py to be executed from "
 314 |             "its immediate directory (like 'python setup.py COMMAND'), "
 315 |             "or in a way that lets it use sys.argv[0] to find the root "
 316 |             "(like 'python path/to/setup.py COMMAND')."
 317 |         )
 318 |         raise VersioneerBadRootError(err)
 319 |     try:
 320 |         # Certain runtime workflows (setup.py install/develop in a setuptools
 321 |         # tree) execute all dependencies in a single python process, so
 322 |         # "versioneer" may be imported multiple times, and python's shared
 323 |         # module-import table will cache the first one. So we can't use
 324 |         # os.path.dirname(__file__), as that will find whichever
 325 |         # versioneer.py was first imported, even in later projects.
 326 |         me = os.path.realpath(os.path.abspath(__file__))
 327 |         me_dir = os.path.normcase(os.path.splitext(me)[0])
 328 |         vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
 329 |         if me_dir != vsr_dir:
 330 |             print(
 331 |                 "Warning: build in %s is using versioneer.py from %s"
 332 |                 % (os.path.dirname(me), versioneer_py)
 333 |             )
 334 |     except NameError:
 335 |         pass
 336 |     return root
 337 | 
 338 | 
 339 | def get_config_from_root(root):
 340 |     """Read the project setup.cfg file to determine Versioneer config."""
 341 |     # This might raise EnvironmentError (if setup.cfg is missing), or
 342 |     # configparser.NoSectionError (if it lacks a [versioneer] section), or
 343 |     # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
 344 |     # the top of versioneer.py for instructions on writing your setup.cfg .
 345 |     setup_cfg = os.path.join(root, "setup.cfg")
 346 |     parser = configparser.SafeConfigParser()
 347 |     with open(setup_cfg, "r") as f:
 348 |         parser.readfp(f)
 349 |     VCS = parser.get("versioneer", "VCS")  # mandatory
 350 | 
 351 |     def get(parser, name):
 352 |         if parser.has_option("versioneer", name):
 353 |             return parser.get("versioneer", name)
 354 |         return None
 355 | 
 356 |     cfg = VersioneerConfig()
 357 |     cfg.VCS = VCS
 358 |     cfg.style = get(parser, "style") or ""
 359 |     cfg.versionfile_source = get(parser, "versionfile_source")
 360 |     cfg.versionfile_build = get(parser, "versionfile_build")
 361 |     cfg.tag_prefix = get(parser, "tag_prefix")
 362 |     if cfg.tag_prefix in ("''", '""'):
 363 |         cfg.tag_prefix = ""
 364 |     cfg.parentdir_prefix = get(parser, "parentdir_prefix")
 365 |     cfg.verbose = get(parser, "verbose")
 366 |     return cfg
 367 | 
 368 | 
 369 | class NotThisMethod(Exception):
 370 |     """Exception raised if a method is not valid for the current scenario."""
 371 | 
 372 | 
 373 | # these dictionaries contain VCS-specific tools
 374 | LONG_VERSION_PY = {}
 375 | HANDLERS = {}
 376 | 
 377 | 
 378 | def register_vcs_handler(vcs, method):  # decorator
 379 |     """Decorator to mark a method as the handler for a particular VCS."""
 380 | 
 381 |     def decorate(f):
 382 |         """Store f in HANDLERS[vcs][method]."""
 383 |         if vcs not in HANDLERS:
 384 |             HANDLERS[vcs] = {}
 385 |         HANDLERS[vcs][method] = f
 386 |         return f
 387 | 
 388 |     return decorate
 389 | 
 390 | 
 391 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
 392 |     """Call the given command(s)."""
 393 |     assert isinstance(commands, list)
 394 |     p = None
 395 |     for c in commands:
 396 |         try:
 397 |             dispcmd = str([c] + args)
 398 |             # remember shell=False, so use git.cmd on windows, not just git
 399 |             p = subprocess.Popen(
 400 |                 [c] + args,
 401 |                 cwd=cwd,
 402 |                 env=env,
 403 |                 stdout=subprocess.PIPE,
 404 |                 stderr=(subprocess.PIPE if hide_stderr else None),
 405 |             )
 406 |             break
 407 |         except EnvironmentError:
 408 |             e = sys.exc_info()[1]
 409 |             if e.errno == errno.ENOENT:
 410 |                 continue
 411 |             if verbose:
 412 |                 print("unable to run %s" % dispcmd)
 413 |                 print(e)
 414 |             return None, None
 415 |     else:
 416 |         if verbose:
 417 |             print("unable to find command, tried %s" % (commands,))
 418 |         return None, None
 419 |     stdout = p.communicate()[0].strip()
 420 |     if sys.version_info[0] >= 3:
 421 |         stdout = stdout.decode()
 422 |     if p.returncode != 0:
 423 |         if verbose:
 424 |             print("unable to run %s (error)" % dispcmd)
 425 |             print("stdout was %s" % stdout)
 426 |         return None, p.returncode
 427 |     return stdout, p.returncode
 428 | 
 429 | 
 430 | LONG_VERSION_PY[
 431 |     "git"
 432 | ] = '''
 433 | # This file helps to compute a version number in source trees obtained from
 434 | # git-archive tarball (such as those provided by githubs download-from-tag
 435 | # feature). Distribution tarballs (built by setup.py sdist) and build
 436 | # directories (produced by setup.py build) will contain a much shorter file
 437 | # that just contains the computed version number.
 438 | 
 439 | # This file is released into the public domain. Generated by
 440 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
 441 | 
 442 | """Git implementation of _version.py."""
 443 | 
 444 | import errno
 445 | import os
 446 | import re
 447 | import subprocess
 448 | import sys
 449 | 
 450 | 
 451 | def get_keywords():
 452 |     """Get the keywords needed to look up the version information."""
 453 |     # these strings will be replaced by git during git-archive.
 454 |     # setup.py/versioneer.py will grep for the variable names, so they must
 455 |     # each be defined on a line of their own. _version.py will just call
 456 |     # get_keywords().
 457 |     git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
 458 |     git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
 459 |     git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
 460 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 461 |     return keywords
 462 | 
 463 | 
 464 | class VersioneerConfig:
 465 |     """Container for Versioneer configuration parameters."""
 466 | 
 467 | 
 468 | def get_config():
 469 |     """Create, populate and return the VersioneerConfig() object."""
 470 |     # these strings are filled in when 'setup.py versioneer' creates
 471 |     # _version.py
 472 |     cfg = VersioneerConfig()
 473 |     cfg.VCS = "git"
 474 |     cfg.style = "%(STYLE)s"
 475 |     cfg.tag_prefix = "%(TAG_PREFIX)s"
 476 |     cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
 477 |     cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
 478 |     cfg.verbose = False
 479 |     return cfg
 480 | 
 481 | 
 482 | class NotThisMethod(Exception):
 483 |     """Exception raised if a method is not valid for the current scenario."""
 484 | 
 485 | 
 486 | LONG_VERSION_PY = {}
 487 | HANDLERS = {}
 488 | 
 489 | 
 490 | def register_vcs_handler(vcs, method):  # decorator
 491 |     """Decorator to mark a method as the handler for a particular VCS."""
 492 |     def decorate(f):
 493 |         """Store f in HANDLERS[vcs][method]."""
 494 |         if vcs not in HANDLERS:
 495 |             HANDLERS[vcs] = {}
 496 |         HANDLERS[vcs][method] = f
 497 |         return f
 498 |     return decorate
 499 | 
 500 | 
 501 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 502 |                 env=None):
 503 |     """Call the given command(s)."""
 504 |     assert isinstance(commands, list)
 505 |     p = None
 506 |     for c in commands:
 507 |         try:
 508 |             dispcmd = str([c] + args)
 509 |             # remember shell=False, so use git.cmd on windows, not just git
 510 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 511 |                                  stdout=subprocess.PIPE,
 512 |                                  stderr=(subprocess.PIPE if hide_stderr
 513 |                                          else None))
 514 |             break
 515 |         except EnvironmentError:
 516 |             e = sys.exc_info()[1]
 517 |             if e.errno == errno.ENOENT:
 518 |                 continue
 519 |             if verbose:
 520 |                 print("unable to run %%s" %% dispcmd)
 521 |                 print(e)
 522 |             return None, None
 523 |     else:
 524 |         if verbose:
 525 |             print("unable to find command, tried %%s" %% (commands,))
 526 |         return None, None
 527 |     stdout = p.communicate()[0].strip()
 528 |     if sys.version_info[0] >= 3:
 529 |         stdout = stdout.decode()
 530 |     if p.returncode != 0:
 531 |         if verbose:
 532 |             print("unable to run %%s (error)" %% dispcmd)
 533 |             print("stdout was %%s" %% stdout)
 534 |         return None, p.returncode
 535 |     return stdout, p.returncode
 536 | 
 537 | 
 538 | def versions_from_parentdir(parentdir_prefix, root, verbose):
 539 |     """Try to determine the version from the parent directory name.
 540 | 
 541 |     Source tarballs conventionally unpack into a directory that includes both
 542 |     the project name and a version string. We will also support searching up
 543 |     two directory levels for an appropriately named parent directory
 544 |     """
 545 |     rootdirs = []
 546 | 
 547 |     for i in range(3):
 548 |         dirname = os.path.basename(root)
 549 |         if dirname.startswith(parentdir_prefix):
 550 |             return {"version": dirname[len(parentdir_prefix):],
 551 |                     "full-revisionid": None,
 552 |                     "dirty": False, "error": None, "date": None}
 553 |         else:
 554 |             rootdirs.append(root)
 555 |             root = os.path.dirname(root)  # up a level
 556 | 
 557 |     if verbose:
 558 |         print("Tried directories %%s but none started with prefix %%s" %%
 559 |               (str(rootdirs), parentdir_prefix))
 560 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
 561 | 
 562 | 
 563 | @register_vcs_handler("git", "get_keywords")
 564 | def git_get_keywords(versionfile_abs):
 565 |     """Extract version information from the given file."""
 566 |     # the code embedded in _version.py can just fetch the value of these
 567 |     # keywords. When used from setup.py, we don't want to import _version.py,
 568 |     # so we do it with a regexp instead. This function is not used from
 569 |     # _version.py.
 570 |     keywords = {}
 571 |     try:
 572 |         f = open(versionfile_abs, "r")
 573 |         for line in f.readlines():
 574 |             if line.strip().startswith("git_refnames ="):
 575 |                 mo = re.search(r'=\s*"(.*)"', line)
 576 |                 if mo:
 577 |                     keywords["refnames"] = mo.group(1)
 578 |             if line.strip().startswith("git_full ="):
 579 |                 mo = re.search(r'=\s*"(.*)"', line)
 580 |                 if mo:
 581 |                     keywords["full"] = mo.group(1)
 582 |             if line.strip().startswith("git_date ="):
 583 |                 mo = re.search(r'=\s*"(.*)"', line)
 584 |                 if mo:
 585 |                     keywords["date"] = mo.group(1)
 586 |         f.close()
 587 |     except EnvironmentError:
 588 |         pass
 589 |     return keywords
 590 | 
 591 | 
 592 | @register_vcs_handler("git", "keywords")
 593 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
 594 |     """Get version information from git keywords."""
 595 |     if not keywords:
 596 |         raise NotThisMethod("no keywords at all, weird")
 597 |     date = keywords.get("date")
 598 |     if date is not None:
 599 |         # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
 600 |         # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
 601 |         # -like" string, which we must then edit to make compliant), because
 602 |         # it's been around since git-1.5.3, and it's too difficult to
 603 |         # discover which version we're using, or to work around using an
 604 |         # older one.
 605 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 606 |     refnames = keywords["refnames"].strip()
 607 |     if refnames.startswith("$Format"):
 608 |         if verbose:
 609 |             print("keywords are unexpanded, not using")
 610 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
 611 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
 612 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
 613 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
 614 |     TAG = "tag: "
 615 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
 616 |     if not tags:
 617 |         # Either we're using git < 1.8.3, or there really are no tags. We use
 618 |         # a heuristic: assume all version tags have a digit. The old git %%d
 619 |         # expansion behaves like git log --decorate=short and strips out the
 620 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
 621 |         # between branches and tags. By ignoring refnames without digits, we
 622 |         # filter out many common branch names like "release" and
 623 |         # "stabilization", as well as "HEAD" and "master".
 624 |         tags = set([r for r in refs if re.search(r'\d', r)])
 625 |         if verbose:
 626 |             print("discarding '%%s', no digits" %% ",".join(refs - tags))
 627 |     if verbose:
 628 |         print("likely tags: %%s" %% ",".join(sorted(tags)))
 629 |     for ref in sorted(tags):
 630 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
 631 |         if ref.startswith(tag_prefix):
 632 |             r = ref[len(tag_prefix):]
 633 |             if verbose:
 634 |                 print("picking %%s" %% r)
 635 |             return {"version": r,
 636 |                     "full-revisionid": keywords["full"].strip(),
 637 |                     "dirty": False, "error": None,
 638 |                     "date": date}
 639 |     # no suitable tags, so version is "0+unknown", but full hex is still there
 640 |     if verbose:
 641 |         print("no suitable tags, using unknown + full revision id")
 642 |     return {"version": "0+unknown",
 643 |             "full-revisionid": keywords["full"].strip(),
 644 |             "dirty": False, "error": "no suitable tags", "date": None}
 645 | 
 646 | 
 647 | @register_vcs_handler("git", "pieces_from_vcs")
 648 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
 649 |     """Get version from 'git describe' in the root of the source tree.
 650 | 
 651 |     This only gets called if the git-archive 'subst' keywords were *not*
 652 |     expanded, and _version.py hasn't already been rewritten with a short
 653 |     version string, meaning we're inside a checked out source tree.
 654 |     """
 655 |     GITS = ["git"]
 656 |     if sys.platform == "win32":
 657 |         GITS = ["git.cmd", "git.exe"]
 658 | 
 659 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
 660 |                           hide_stderr=True)
 661 |     if rc != 0:
 662 |         if verbose:
 663 |             print("Directory %%s not under git control" %% root)
 664 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
 665 | 
 666 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
 667 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
 668 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
 669 |                                           "--always", "--long",
 670 |                                           "--match", "%%s*" %% tag_prefix],
 671 |                                    cwd=root)
 672 |     # --long was added in git-1.5.5
 673 |     if describe_out is None:
 674 |         raise NotThisMethod("'git describe' failed")
 675 |     describe_out = describe_out.strip()
 676 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
 677 |     if full_out is None:
 678 |         raise NotThisMethod("'git rev-parse' failed")
 679 |     full_out = full_out.strip()
 680 | 
 681 |     pieces = {}
 682 |     pieces["long"] = full_out
 683 |     pieces["short"] = full_out[:7]  # maybe improved later
 684 |     pieces["error"] = None
 685 | 
 686 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
 687 |     # TAG might have hyphens.
 688 |     git_describe = describe_out
 689 | 
 690 |     # look for -dirty suffix
 691 |     dirty = git_describe.endswith("-dirty")
 692 |     pieces["dirty"] = dirty
 693 |     if dirty:
 694 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
 695 | 
 696 |     # now we have TAG-NUM-gHEX or HEX
 697 | 
 698 |     if "-" in git_describe:
 699 |         # TAG-NUM-gHEX
 700 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
 701 |         if not mo:
 702 |             # unparseable. Maybe git-describe is misbehaving?
 703 |             pieces["error"] = ("unable to parse git-describe output: '%%s'"
 704 |                                %% describe_out)
 705 |             return pieces
 706 | 
 707 |         # tag
 708 |         full_tag = mo.group(1)
 709 |         if not full_tag.startswith(tag_prefix):
 710 |             if verbose:
 711 |                 fmt = "tag '%%s' doesn't start with prefix '%%s'"
 712 |                 print(fmt %% (full_tag, tag_prefix))
 713 |             pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
 714 |                                %% (full_tag, tag_prefix))
 715 |             return pieces
 716 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
 717 | 
 718 |         # distance: number of commits since tag
 719 |         pieces["distance"] = int(mo.group(2))
 720 | 
 721 |         # commit: short hex revision ID
 722 |         pieces["short"] = mo.group(3)
 723 | 
 724 |     else:
 725 |         # HEX: no tags
 726 |         pieces["closest-tag"] = None
 727 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
 728 |                                     cwd=root)
 729 |         pieces["distance"] = int(count_out)  # total number of commits
 730 | 
 731 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
 732 |     date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
 733 |                        cwd=root)[0].strip()
 734 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 735 | 
 736 |     return pieces
 737 | 
 738 | 
 739 | def plus_or_dot(pieces):
 740 |     """Return a + if we don't already have one, else return a ."""
 741 |     if "+" in pieces.get("closest-tag", ""):
 742 |         return "."
 743 |     return "+"
 744 | 
 745 | 
 746 | def render_pep440(pieces):
 747 |     """Build up version string, with post-release "local version identifier".
 748 | 
 749 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
 750 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
 751 | 
 752 |     Exceptions:
 753 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
 754 |     """
 755 |     if pieces["closest-tag"]:
 756 |         rendered = pieces["closest-tag"]
 757 |         if pieces["distance"] or pieces["dirty"]:
 758 |             rendered += plus_or_dot(pieces)
 759 |             rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
 760 |             if pieces["dirty"]:
 761 |                 rendered += ".dirty"
 762 |     else:
 763 |         # exception #1
 764 |         rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
 765 |                                           pieces["short"])
 766 |         if pieces["dirty"]:
 767 |             rendered += ".dirty"
 768 |     return rendered
 769 | 
 770 | 
 771 | def render_pep440_pre(pieces):
 772 |     """TAG[.post.devDISTANCE] -- No -dirty.
 773 | 
 774 |     Exceptions:
 775 |     1: no tags. 0.post.devDISTANCE
 776 |     """
 777 |     if pieces["closest-tag"]:
 778 |         rendered = pieces["closest-tag"]
 779 |         if pieces["distance"]:
 780 |             rendered += ".post.dev%%d" %% pieces["distance"]
 781 |     else:
 782 |         # exception #1
 783 |         rendered = "0.post.dev%%d" %% pieces["distance"]
 784 |     return rendered
 785 | 
 786 | 
 787 | def render_pep440_post(pieces):
 788 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
 789 | 
 790 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
 791 |     (a dirty tree will appear "older" than the corresponding clean one),
 792 |     but you shouldn't be releasing software with -dirty anyways.
 793 | 
 794 |     Exceptions:
 795 |     1: no tags. 0.postDISTANCE[.dev0]
 796 |     """
 797 |     if pieces["closest-tag"]:
 798 |         rendered = pieces["closest-tag"]
 799 |         if pieces["distance"] or pieces["dirty"]:
 800 |             rendered += ".post%%d" %% pieces["distance"]
 801 |             if pieces["dirty"]:
 802 |                 rendered += ".dev0"
 803 |             rendered += plus_or_dot(pieces)
 804 |             rendered += "g%%s" %% pieces["short"]
 805 |     else:
 806 |         # exception #1
 807 |         rendered = "0.post%%d" %% pieces["distance"]
 808 |         if pieces["dirty"]:
 809 |             rendered += ".dev0"
 810 |         rendered += "+g%%s" %% pieces["short"]
 811 |     return rendered
 812 | 
 813 | 
 814 | def render_pep440_old(pieces):
 815 |     """TAG[.postDISTANCE[.dev0]] .
 816 | 
 817 |     The ".dev0" means dirty.
 818 | 
 819 |     Eexceptions:
 820 |     1: no tags. 0.postDISTANCE[.dev0]
 821 |     """
 822 |     if pieces["closest-tag"]:
 823 |         rendered = pieces["closest-tag"]
 824 |         if pieces["distance"] or pieces["dirty"]:
 825 |             rendered += ".post%%d" %% pieces["distance"]
 826 |             if pieces["dirty"]:
 827 |                 rendered += ".dev0"
 828 |     else:
 829 |         # exception #1
 830 |         rendered = "0.post%%d" %% pieces["distance"]
 831 |         if pieces["dirty"]:
 832 |             rendered += ".dev0"
 833 |     return rendered
 834 | 
 835 | 
 836 | def render_git_describe(pieces):
 837 |     """TAG[-DISTANCE-gHEX][-dirty].
 838 | 
 839 |     Like 'git describe --tags --dirty --always'.
 840 | 
 841 |     Exceptions:
 842 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
 843 |     """
 844 |     if pieces["closest-tag"]:
 845 |         rendered = pieces["closest-tag"]
 846 |         if pieces["distance"]:
 847 |             rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
 848 |     else:
 849 |         # exception #1
 850 |         rendered = pieces["short"]
 851 |     if pieces["dirty"]:
 852 |         rendered += "-dirty"
 853 |     return rendered
 854 | 
 855 | 
 856 | def render_git_describe_long(pieces):
 857 |     """TAG-DISTANCE-gHEX[-dirty].
 858 | 
 859 |     Like 'git describe --tags --dirty --always -long'.
 860 |     The distance/hash is unconditional.
 861 | 
 862 |     Exceptions:
 863 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
 864 |     """
 865 |     if pieces["closest-tag"]:
 866 |         rendered = pieces["closest-tag"]
 867 |         rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
 868 |     else:
 869 |         # exception #1
 870 |         rendered = pieces["short"]
 871 |     if pieces["dirty"]:
 872 |         rendered += "-dirty"
 873 |     return rendered
 874 | 
 875 | 
 876 | def render(pieces, style):
 877 |     """Render the given version pieces into the requested style."""
 878 |     if pieces["error"]:
 879 |         return {"version": "unknown",
 880 |                 "full-revisionid": pieces.get("long"),
 881 |                 "dirty": None,
 882 |                 "error": pieces["error"],
 883 |                 "date": None}
 884 | 
 885 |     if not style or style == "default":
 886 |         style = "pep440"  # the default
 887 | 
 888 |     if style == "pep440":
 889 |         rendered = render_pep440(pieces)
 890 |     elif style == "pep440-pre":
 891 |         rendered = render_pep440_pre(pieces)
 892 |     elif style == "pep440-post":
 893 |         rendered = render_pep440_post(pieces)
 894 |     elif style == "pep440-old":
 895 |         rendered = render_pep440_old(pieces)
 896 |     elif style == "git-describe":
 897 |         rendered = render_git_describe(pieces)
 898 |     elif style == "git-describe-long":
 899 |         rendered = render_git_describe_long(pieces)
 900 |     else:
 901 |         raise ValueError("unknown style '%%s'" %% style)
 902 | 
 903 |     return {"version": rendered, "full-revisionid": pieces["long"],
 904 |             "dirty": pieces["dirty"], "error": None,
 905 |             "date": pieces.get("date")}
 906 | 
 907 | 
 908 | def get_versions():
 909 |     """Get version information or return default if unable to do so."""
 910 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
 911 |     # __file__, we can work backwards from there to the root. Some
 912 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
 913 |     # case we can only use expanded keywords.
 914 | 
 915 |     cfg = get_config()
 916 |     verbose = cfg.verbose
 917 | 
 918 |     try:
 919 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
 920 |                                           verbose)
 921 |     except NotThisMethod:
 922 |         pass
 923 | 
 924 |     try:
 925 |         root = os.path.realpath(__file__)
 926 |         # versionfile_source is the relative path from the top of the source
 927 |         # tree (where the .git directory might live) to this file. Invert
 928 |         # this to find the root from __file__.
 929 |         for i in cfg.versionfile_source.split('/'):
 930 |             root = os.path.dirname(root)
 931 |     except NameError:
 932 |         return {"version": "0+unknown", "full-revisionid": None,
 933 |                 "dirty": None,
 934 |                 "error": "unable to find root of source tree",
 935 |                 "date": None}
 936 | 
 937 |     try:
 938 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
 939 |         return render(pieces, cfg.style)
 940 |     except NotThisMethod:
 941 |         pass
 942 | 
 943 |     try:
 944 |         if cfg.parentdir_prefix:
 945 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
 946 |     except NotThisMethod:
 947 |         pass
 948 | 
 949 |     return {"version": "0+unknown", "full-revisionid": None,
 950 |             "dirty": None,
 951 |             "error": "unable to compute version", "date": None}
 952 | '''
 953 | 
 954 | 
 955 | @register_vcs_handler("git", "get_keywords")
 956 | def git_get_keywords(versionfile_abs):
 957 |     """Extract version information from the given file."""
 958 |     # the code embedded in _version.py can just fetch the value of these
 959 |     # keywords. When used from setup.py, we don't want to import _version.py,
 960 |     # so we do it with a regexp instead. This function is not used from
 961 |     # _version.py.
 962 |     keywords = {}
 963 |     try:
 964 |         f = open(versionfile_abs, "r")
 965 |         for line in f.readlines():
 966 |             if line.strip().startswith("git_refnames ="):
 967 |                 mo = re.search(r'=\s*"(.*)"', line)
 968 |                 if mo:
 969 |                     keywords["refnames"] = mo.group(1)
 970 |             if line.strip().startswith("git_full ="):
 971 |                 mo = re.search(r'=\s*"(.*)"', line)
 972 |                 if mo:
 973 |                     keywords["full"] = mo.group(1)
 974 |             if line.strip().startswith("git_date ="):
 975 |                 mo = re.search(r'=\s*"(.*)"', line)
 976 |                 if mo:
 977 |                     keywords["date"] = mo.group(1)
 978 |         f.close()
 979 |     except EnvironmentError:
 980 |         pass
 981 |     return keywords
 982 | 
 983 | 
 984 | @register_vcs_handler("git", "keywords")
 985 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
 986 |     """Get version information from git keywords."""
 987 |     if not keywords:
 988 |         raise NotThisMethod("no keywords at all, weird")
 989 |     date = keywords.get("date")
 990 |     if date is not None:
 991 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
 992 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
 993 |         # -like" string, which we must then edit to make compliant), because
 994 |         # it's been around since git-1.5.3, and it's too difficult to
 995 |         # discover which version we're using, or to work around using an
 996 |         # older one.
 997 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 998 |     refnames = keywords["refnames"].strip()
 999 |     if refnames.startswith("$Format"):
1000 |         if verbose:
1001 |             print("keywords are unexpanded, not using")
1002 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
1003 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
1004 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
1005 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
1006 |     TAG = "tag: "
1007 |     tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
1008 |     if not tags:
1009 |         # Either we're using git < 1.8.3, or there really are no tags. We use
1010 |         # a heuristic: assume all version tags have a digit. The old git %d
1011 |         # expansion behaves like git log --decorate=short and strips out the
1012 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
1013 |         # between branches and tags. By ignoring refnames without digits, we
1014 |         # filter out many common branch names like "release" and
1015 |         # "stabilization", as well as "HEAD" and "master".
1016 |         tags = set([r for r in refs if re.search(r"\d", r)])
1017 |         if verbose:
1018 |             print("discarding '%s', no digits" % ",".join(refs - tags))
1019 |     if verbose:
1020 |         print("likely tags: %s" % ",".join(sorted(tags)))
1021 |     for ref in sorted(tags):
1022 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
1023 |         if ref.startswith(tag_prefix):
1024 |             r = ref[len(tag_prefix) :]
1025 |             if verbose:
1026 |                 print("picking %s" % r)
1027 |             return {
1028 |                 "version": r,
1029 |                 "full-revisionid": keywords["full"].strip(),
1030 |                 "dirty": False,
1031 |                 "error": None,
1032 |                 "date": date,
1033 |             }
1034 |     # no suitable tags, so version is "0+unknown", but full hex is still there
1035 |     if verbose:
1036 |         print("no suitable tags, using unknown + full revision id")
1037 |     return {
1038 |         "version": "0+unknown",
1039 |         "full-revisionid": keywords["full"].strip(),
1040 |         "dirty": False,
1041 |         "error": "no suitable tags",
1042 |         "date": None,
1043 |     }
1044 | 
1045 | 
1046 | @register_vcs_handler("git", "pieces_from_vcs")
1047 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
1048 |     """Get version from 'git describe' in the root of the source tree.
1049 | 
1050 |     This only gets called if the git-archive 'subst' keywords were *not*
1051 |     expanded, and _version.py hasn't already been rewritten with a short
1052 |     version string, meaning we're inside a checked out source tree.
1053 |     """
1054 |     GITS = ["git"]
1055 |     if sys.platform == "win32":
1056 |         GITS = ["git.cmd", "git.exe"]
1057 | 
1058 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
1059 |     if rc != 0:
1060 |         if verbose:
1061 |             print("Directory %s not under git control" % root)
1062 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
1063 | 
1064 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
1065 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
1066 |     describe_out, rc = run_command(
1067 |         GITS,
1068 |         [
1069 |             "describe",
1070 |             "--tags",
1071 |             "--dirty",
1072 |             "--always",
1073 |             "--long",
1074 |             "--match",
1075 |             "%s*" % tag_prefix,
1076 |         ],
1077 |         cwd=root,
1078 |     )
1079 |     # --long was added in git-1.5.5
1080 |     if describe_out is None:
1081 |         raise NotThisMethod("'git describe' failed")
1082 |     describe_out = describe_out.strip()
1083 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
1084 |     if full_out is None:
1085 |         raise NotThisMethod("'git rev-parse' failed")
1086 |     full_out = full_out.strip()
1087 | 
1088 |     pieces = {}
1089 |     pieces["long"] = full_out
1090 |     pieces["short"] = full_out[:7]  # maybe improved later
1091 |     pieces["error"] = None
1092 | 
1093 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
1094 |     # TAG might have hyphens.
1095 |     git_describe = describe_out
1096 | 
1097 |     # look for -dirty suffix
1098 |     dirty = git_describe.endswith("-dirty")
1099 |     pieces["dirty"] = dirty
1100 |     if dirty:
1101 |         git_describe = git_describe[: git_describe.rindex("-dirty")]
1102 | 
1103 |     # now we have TAG-NUM-gHEX or HEX
1104 | 
1105 |     if "-" in git_describe:
1106 |         # TAG-NUM-gHEX
1107 |         mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
1108 |         if not mo:
1109 |             # unparseable. Maybe git-describe is misbehaving?
1110 |             pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
1111 |             return pieces
1112 | 
1113 |         # tag
1114 |         full_tag = mo.group(1)
1115 |         if not full_tag.startswith(tag_prefix):
1116 |             if verbose:
1117 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
1118 |                 print(fmt % (full_tag, tag_prefix))
1119 |             pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
1120 |                 full_tag,
1121 |                 tag_prefix,
1122 |             )
1123 |             return pieces
1124 |         pieces["closest-tag"] = full_tag[len(tag_prefix) :]
1125 | 
1126 |         # distance: number of commits since tag
1127 |         pieces["distance"] = int(mo.group(2))
1128 | 
1129 |         # commit: short hex revision ID
1130 |         pieces["short"] = mo.group(3)
1131 | 
1132 |     else:
1133 |         # HEX: no tags
1134 |         pieces["closest-tag"] = None
1135 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
1136 |         pieces["distance"] = int(count_out)  # total number of commits
1137 | 
1138 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
1139 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
1140 |         0
1141 |     ].strip()
1142 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
1143 | 
1144 |     return pieces
1145 | 
1146 | 
1147 | def do_vcs_install(manifest_in, versionfile_source, ipy):
1148 |     """Git-specific installation logic for Versioneer.
1149 | 
1150 |     For Git, this means creating/changing .gitattributes to mark _version.py
1151 |     for export-subst keyword substitution.
1152 |     """
1153 |     GITS = ["git"]
1154 |     if sys.platform == "win32":
1155 |         GITS = ["git.cmd", "git.exe"]
1156 |     files = [manifest_in, versionfile_source]
1157 |     if ipy:
1158 |         files.append(ipy)
1159 |     try:
1160 |         me = __file__
1161 |         if me.endswith(".pyc") or me.endswith(".pyo"):
1162 |             me = os.path.splitext(me)[0] + ".py"
1163 |         versioneer_file = os.path.relpath(me)
1164 |     except NameError:
1165 |         versioneer_file = "versioneer.py"
1166 |     files.append(versioneer_file)
1167 |     present = False
1168 |     try:
1169 |         f = open(".gitattributes", "r")
1170 |         for line in f.readlines():
1171 |             if line.strip().startswith(versionfile_source):
1172 |                 if "export-subst" in line.strip().split()[1:]:
1173 |                     present = True
1174 |         f.close()
1175 |     except EnvironmentError:
1176 |         pass
1177 |     if not present:
1178 |         f = open(".gitattributes", "a+")
1179 |         f.write("%s export-subst\n" % versionfile_source)
1180 |         f.close()
1181 |         files.append(".gitattributes")
1182 |     run_command(GITS, ["add", "--"] + files)
1183 | 
1184 | 
1185 | def versions_from_parentdir(parentdir_prefix, root, verbose):
1186 |     """Try to determine the version from the parent directory name.
1187 | 
1188 |     Source tarballs conventionally unpack into a directory that includes both
1189 |     the project name and a version string. We will also support searching up
1190 |     two directory levels for an appropriately named parent directory
1191 |     """
1192 |     rootdirs = []
1193 | 
1194 |     for i in range(3):
1195 |         dirname = os.path.basename(root)
1196 |         if dirname.startswith(parentdir_prefix):
1197 |             return {
1198 |                 "version": dirname[len(parentdir_prefix) :],
1199 |                 "full-revisionid": None,
1200 |                 "dirty": False,
1201 |                 "error": None,
1202 |                 "date": None,
1203 |             }
1204 |         else:
1205 |             rootdirs.append(root)
1206 |             root = os.path.dirname(root)  # up a level
1207 | 
1208 |     if verbose:
1209 |         print(
1210 |             "Tried directories %s but none started with prefix %s"
1211 |             % (str(rootdirs), parentdir_prefix)
1212 |         )
1213 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
1214 | 
1215 | 
1216 | SHORT_VERSION_PY = """
1217 | # This file was generated by 'versioneer.py' (0.18) from
1218 | # revision-control system data, or from the parent directory name of an
1219 | # unpacked source archive. Distribution tarballs contain a pre-generated copy
1220 | # of this file.
1221 | 
1222 | import json
1223 | 
1224 | version_json = '''
1225 | %s
1226 | '''  # END VERSION_JSON
1227 | 
1228 | 
1229 | def get_versions():
1230 |     return json.loads(version_json)
1231 | """
1232 | 
1233 | 
1234 | def versions_from_file(filename):
1235 |     """Try to determine the version from _version.py if present."""
1236 |     try:
1237 |         with open(filename) as f:
1238 |             contents = f.read()
1239 |     except EnvironmentError:
1240 |         raise NotThisMethod("unable to read _version.py")
1241 |     mo = re.search(
1242 |         r"version_json = '''\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S
1243 |     )
1244 |     if not mo:
1245 |         mo = re.search(
1246 |             r"version_json = '''\r\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S
1247 |         )
1248 |     if not mo:
1249 |         raise NotThisMethod("no version_json in _version.py")
1250 |     return json.loads(mo.group(1))
1251 | 
1252 | 
1253 | def write_to_version_file(filename, versions):
1254 |     """Write the given version number to the given _version.py file."""
1255 |     os.unlink(filename)
1256 |     contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": "))
1257 |     with open(filename, "w") as f:
1258 |         f.write(SHORT_VERSION_PY % contents)
1259 | 
1260 |     print("set %s to '%s'" % (filename, versions["version"]))
1261 | 
1262 | 
1263 | def plus_or_dot(pieces):
1264 |     """Return a + if we don't already have one, else return a ."""
1265 |     if "+" in pieces.get("closest-tag", ""):
1266 |         return "."
1267 |     return "+"
1268 | 
1269 | 
1270 | def render_pep440(pieces):
1271 |     """Build up version string, with post-release "local version identifier".
1272 | 
1273 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
1274 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
1275 | 
1276 |     Exceptions:
1277 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
1278 |     """
1279 |     if pieces["closest-tag"]:
1280 |         rendered = pieces["closest-tag"]
1281 |         if pieces["distance"] or pieces["dirty"]:
1282 |             rendered += plus_or_dot(pieces)
1283 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
1284 |             if pieces["dirty"]:
1285 |                 rendered += ".dirty"
1286 |     else:
1287 |         # exception #1
1288 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
1289 |         if pieces["dirty"]:
1290 |             rendered += ".dirty"
1291 |     return rendered
1292 | 
1293 | 
1294 | def render_pep440_pre(pieces):
1295 |     """TAG[.post.devDISTANCE] -- No -dirty.
1296 | 
1297 |     Exceptions:
1298 |     1: no tags. 0.post.devDISTANCE
1299 |     """
1300 |     if pieces["closest-tag"]:
1301 |         rendered = pieces["closest-tag"]
1302 |         if pieces["distance"]:
1303 |             rendered += ".post.dev%d" % pieces["distance"]
1304 |     else:
1305 |         # exception #1
1306 |         rendered = "0.post.dev%d" % pieces["distance"]
1307 |     return rendered
1308 | 
1309 | 
1310 | def render_pep440_post(pieces):
1311 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
1312 | 
1313 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
1314 |     (a dirty tree will appear "older" than the corresponding clean one),
1315 |     but you shouldn't be releasing software with -dirty anyways.
1316 | 
1317 |     Exceptions:
1318 |     1: no tags. 0.postDISTANCE[.dev0]
1319 |     """
1320 |     if pieces["closest-tag"]:
1321 |         rendered = pieces["closest-tag"]
1322 |         if pieces["distance"] or pieces["dirty"]:
1323 |             rendered += ".post%d" % pieces["distance"]
1324 |             if pieces["dirty"]:
1325 |                 rendered += ".dev0"
1326 |             rendered += plus_or_dot(pieces)
1327 |             rendered += "g%s" % pieces["short"]
1328 |     else:
1329 |         # exception #1
1330 |         rendered = "0.post%d" % pieces["distance"]
1331 |         if pieces["dirty"]:
1332 |             rendered += ".dev0"
1333 |         rendered += "+g%s" % pieces["short"]
1334 |     return rendered
1335 | 
1336 | 
1337 | def render_pep440_old(pieces):
1338 |     """TAG[.postDISTANCE[.dev0]] .
1339 | 
1340 |     The ".dev0" means dirty.
1341 | 
1342 |     Eexceptions:
1343 |     1: no tags. 0.postDISTANCE[.dev0]
1344 |     """
1345 |     if pieces["closest-tag"]:
1346 |         rendered = pieces["closest-tag"]
1347 |         if pieces["distance"] or pieces["dirty"]:
1348 |             rendered += ".post%d" % pieces["distance"]
1349 |             if pieces["dirty"]:
1350 |                 rendered += ".dev0"
1351 |     else:
1352 |         # exception #1
1353 |         rendered = "0.post%d" % pieces["distance"]
1354 |         if pieces["dirty"]:
1355 |             rendered += ".dev0"
1356 |     return rendered
1357 | 
1358 | 
1359 | def render_git_describe(pieces):
1360 |     """TAG[-DISTANCE-gHEX][-dirty].
1361 | 
1362 |     Like 'git describe --tags --dirty --always'.
1363 | 
1364 |     Exceptions:
1365 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
1366 |     """
1367 |     if pieces["closest-tag"]:
1368 |         rendered = pieces["closest-tag"]
1369 |         if pieces["distance"]:
1370 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
1371 |     else:
1372 |         # exception #1
1373 |         rendered = pieces["short"]
1374 |     if pieces["dirty"]:
1375 |         rendered += "-dirty"
1376 |     return rendered
1377 | 
1378 | 
1379 | def render_git_describe_long(pieces):
1380 |     """TAG-DISTANCE-gHEX[-dirty].
1381 | 
1382 |     Like 'git describe --tags --dirty --always -long'.
1383 |     The distance/hash is unconditional.
1384 | 
1385 |     Exceptions:
1386 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
1387 |     """
1388 |     if pieces["closest-tag"]:
1389 |         rendered = pieces["closest-tag"]
1390 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
1391 |     else:
1392 |         # exception #1
1393 |         rendered = pieces["short"]
1394 |     if pieces["dirty"]:
1395 |         rendered += "-dirty"
1396 |     return rendered
1397 | 
1398 | 
1399 | def render(pieces, style):
1400 |     """Render the given version pieces into the requested style."""
1401 |     if pieces["error"]:
1402 |         return {
1403 |             "version": "unknown",
1404 |             "full-revisionid": pieces.get("long"),
1405 |             "dirty": None,
1406 |             "error": pieces["error"],
1407 |             "date": None,
1408 |         }
1409 | 
1410 |     if not style or style == "default":
1411 |         style = "pep440"  # the default
1412 | 
1413 |     if style == "pep440":
1414 |         rendered = render_pep440(pieces)
1415 |     elif style == "pep440-pre":
1416 |         rendered = render_pep440_pre(pieces)
1417 |     elif style == "pep440-post":
1418 |         rendered = render_pep440_post(pieces)
1419 |     elif style == "pep440-old":
1420 |         rendered = render_pep440_old(pieces)
1421 |     elif style == "git-describe":
1422 |         rendered = render_git_describe(pieces)
1423 |     elif style == "git-describe-long":
1424 |         rendered = render_git_describe_long(pieces)
1425 |     else:
1426 |         raise ValueError("unknown style '%s'" % style)
1427 | 
1428 |     return {
1429 |         "version": rendered,
1430 |         "full-revisionid": pieces["long"],
1431 |         "dirty": pieces["dirty"],
1432 |         "error": None,
1433 |         "date": pieces.get("date"),
1434 |     }
1435 | 
1436 | 
1437 | class VersioneerBadRootError(Exception):
1438 |     """The project root directory is unknown or missing key files."""
1439 | 
1440 | 
1441 | def get_versions(verbose=False):
1442 |     """Get the project version from whatever source is available.
1443 | 
1444 |     Returns dict with two keys: 'version' and 'full'.
1445 |     """
1446 |     if "versioneer" in sys.modules:
1447 |         # see the discussion in cmdclass.py:get_cmdclass()
1448 |         del sys.modules["versioneer"]
1449 | 
1450 |     root = get_root()
1451 |     cfg = get_config_from_root(root)
1452 | 
1453 |     assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
1454 |     handlers = HANDLERS.get(cfg.VCS)
1455 |     assert handlers, "unrecognized VCS '%s'" % cfg.VCS
1456 |     verbose = verbose or cfg.verbose
1457 |     assert (
1458 |         cfg.versionfile_source is not None
1459 |     ), "please set versioneer.versionfile_source"
1460 |     assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
1461 | 
1462 |     versionfile_abs = os.path.join(root, cfg.versionfile_source)
1463 | 
1464 |     # extract version from first of: _version.py, VCS command (e.g. 'git
1465 |     # describe'), parentdir. This is meant to work for developers using a
1466 |     # source checkout, for users of a tarball created by 'setup.py sdist',
1467 |     # and for users of a tarball/zipball created by 'git archive' or github's
1468 |     # download-from-tag feature or the equivalent in other VCSes.
1469 | 
1470 |     get_keywords_f = handlers.get("get_keywords")
1471 |     from_keywords_f = handlers.get("keywords")
1472 |     if get_keywords_f and from_keywords_f:
1473 |         try:
1474 |             keywords = get_keywords_f(versionfile_abs)
1475 |             ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
1476 |             if verbose:
1477 |                 print("got version from expanded keyword %s" % ver)
1478 |             return ver
1479 |         except NotThisMethod:
1480 |             pass
1481 | 
1482 |     try:
1483 |         ver = versions_from_file(versionfile_abs)
1484 |         if verbose:
1485 |             print("got version from file %s %s" % (versionfile_abs, ver))
1486 |         return ver
1487 |     except NotThisMethod:
1488 |         pass
1489 | 
1490 |     from_vcs_f = handlers.get("pieces_from_vcs")
1491 |     if from_vcs_f:
1492 |         try:
1493 |             pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
1494 |             ver = render(pieces, cfg.style)
1495 |             if verbose:
1496 |                 print("got version from VCS %s" % ver)
1497 |             return ver
1498 |         except NotThisMethod:
1499 |             pass
1500 | 
1501 |     try:
1502 |         if cfg.parentdir_prefix:
1503 |             ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
1504 |             if verbose:
1505 |                 print("got version from parentdir %s" % ver)
1506 |             return ver
1507 |     except NotThisMethod:
1508 |         pass
1509 | 
1510 |     if verbose:
1511 |         print("unable to compute version")
1512 | 
1513 |     return {
1514 |         "version": "0+unknown",
1515 |         "full-revisionid": None,
1516 |         "dirty": None,
1517 |         "error": "unable to compute version",
1518 |         "date": None,
1519 |     }
1520 | 
1521 | 
1522 | def get_version():
1523 |     """Get the short version string for this project."""
1524 |     return get_versions()["version"]
1525 | 
1526 | 
1527 | def get_cmdclass():
1528 |     """Get the custom setuptools/distutils subclasses used by Versioneer."""
1529 |     if "versioneer" in sys.modules:
1530 |         del sys.modules["versioneer"]
1531 |         # this fixes the "python setup.py develop" case (also 'install' and
1532 |         # 'easy_install .'), in which subdependencies of the main project are
1533 |         # built (using setup.py bdist_egg) in the same python process. Assume
1534 |         # a main project A and a dependency B, which use different versions
1535 |         # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
1536 |         # sys.modules by the time B's setup.py is executed, causing B to run
1537 |         # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
1538 |         # sandbox that restores sys.modules to it's pre-build state, so the
1539 |         # parent is protected against the child's "import versioneer". By
1540 |         # removing ourselves from sys.modules here, before the child build
1541 |         # happens, we protect the child from the parent's versioneer too.
1542 |         # Also see https://github.com/warner/python-versioneer/issues/52
1543 | 
1544 |     cmds = {}
1545 | 
1546 |     # we add "version" to both distutils and setuptools
1547 |     from distutils.core import Command
1548 | 
1549 |     class cmd_version(Command):
1550 |         description = "report generated version string"
1551 |         user_options = []
1552 |         boolean_options = []
1553 | 
1554 |         def initialize_options(self):
1555 |             pass
1556 | 
1557 |         def finalize_options(self):
1558 |             pass
1559 | 
1560 |         def run(self):
1561 |             vers = get_versions(verbose=True)
1562 |             print("Version: %s" % vers["version"])
1563 |             print(" full-revisionid: %s" % vers.get("full-revisionid"))
1564 |             print(" dirty: %s" % vers.get("dirty"))
1565 |             print(" date: %s" % vers.get("date"))
1566 |             if vers["error"]:
1567 |                 print(" error: %s" % vers["error"])
1568 | 
1569 |     cmds["version"] = cmd_version
1570 | 
1571 |     # we override "build_py" in both distutils and setuptools
1572 |     #
1573 |     # most invocation pathways end up running build_py:
1574 |     #  distutils/build -> build_py
1575 |     #  distutils/install -> distutils/build ->..
1576 |     #  setuptools/bdist_wheel -> distutils/install ->..
1577 |     #  setuptools/bdist_egg -> distutils/install_lib -> build_py
1578 |     #  setuptools/install -> bdist_egg ->..
1579 |     #  setuptools/develop -> ?
1580 |     #  pip install:
1581 |     #   copies source tree to a tempdir before running egg_info/etc
1582 |     #   if .git isn't copied too, 'git describe' will fail
1583 |     #   then does setup.py bdist_wheel, or sometimes setup.py install
1584 |     #  setup.py egg_info -> ?
1585 | 
1586 |     # we override different "build_py" commands for both environments
1587 |     if "setuptools" in sys.modules:
1588 |         from setuptools.command.build_py import build_py as _build_py
1589 |     else:
1590 |         from distutils.command.build_py import build_py as _build_py
1591 | 
1592 |     class cmd_build_py(_build_py):
1593 |         def run(self):
1594 |             root = get_root()
1595 |             cfg = get_config_from_root(root)
1596 |             versions = get_versions()
1597 |             _build_py.run(self)
1598 |             # now locate _version.py in the new build/ directory and replace
1599 |             # it with an updated value
1600 |             if cfg.versionfile_build:
1601 |                 target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
1602 |                 print("UPDATING %s" % target_versionfile)
1603 |                 write_to_version_file(target_versionfile, versions)
1604 | 
1605 |     cmds["build_py"] = cmd_build_py
1606 | 
1607 |     if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
1608 |         from cx_Freeze.dist import build_exe as _build_exe
1609 | 
1610 |         # nczeczulin reports that py2exe won't like the pep440-style string
1611 |         # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
1612 |         # setup(console=[{
1613 |         #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
1614 |         #   "product_version": versioneer.get_version(),
1615 |         #   ...
1616 | 
1617 |         class cmd_build_exe(_build_exe):
1618 |             def run(self):
1619 |                 root = get_root()
1620 |                 cfg = get_config_from_root(root)
1621 |                 versions = get_versions()
1622 |                 target_versionfile = cfg.versionfile_source
1623 |                 print("UPDATING %s" % target_versionfile)
1624 |                 write_to_version_file(target_versionfile, versions)
1625 | 
1626 |                 _build_exe.run(self)
1627 |                 os.unlink(target_versionfile)
1628 |                 with open(cfg.versionfile_source, "w") as f:
1629 |                     LONG = LONG_VERSION_PY[cfg.VCS]
1630 |                     f.write(
1631 |                         LONG
1632 |                         % {
1633 |                             "DOLLAR": "$",
1634 |                             "STYLE": cfg.style,
1635 |                             "TAG_PREFIX": cfg.tag_prefix,
1636 |                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1637 |                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
1638 |                         }
1639 |                     )
1640 | 
1641 |         cmds["build_exe"] = cmd_build_exe
1642 |         del cmds["build_py"]
1643 | 
1644 |     if "py2exe" in sys.modules:  # py2exe enabled?
1645 |         try:
1646 |             from py2exe.distutils_buildexe import py2exe as _py2exe  # py3
1647 |         except ImportError:
1648 |             from py2exe.build_exe import py2exe as _py2exe  # py2
1649 | 
1650 |         class cmd_py2exe(_py2exe):
1651 |             def run(self):
1652 |                 root = get_root()
1653 |                 cfg = get_config_from_root(root)
1654 |                 versions = get_versions()
1655 |                 target_versionfile = cfg.versionfile_source
1656 |                 print("UPDATING %s" % target_versionfile)
1657 |                 write_to_version_file(target_versionfile, versions)
1658 | 
1659 |                 _py2exe.run(self)
1660 |                 os.unlink(target_versionfile)
1661 |                 with open(cfg.versionfile_source, "w") as f:
1662 |                     LONG = LONG_VERSION_PY[cfg.VCS]
1663 |                     f.write(
1664 |                         LONG
1665 |                         % {
1666 |                             "DOLLAR": "$",
1667 |                             "STYLE": cfg.style,
1668 |                             "TAG_PREFIX": cfg.tag_prefix,
1669 |                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1670 |                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
1671 |                         }
1672 |                     )
1673 | 
1674 |         cmds["py2exe"] = cmd_py2exe
1675 | 
1676 |     # we override different "sdist" commands for both environments
1677 |     if "setuptools" in sys.modules:
1678 |         from setuptools.command.sdist import sdist as _sdist
1679 |     else:
1680 |         from distutils.command.sdist import sdist as _sdist
1681 | 
1682 |     class cmd_sdist(_sdist):
1683 |         def run(self):
1684 |             versions = get_versions()
1685 |             self._versioneer_generated_versions = versions
1686 |             # unless we update this, the command will keep using the old
1687 |             # version
1688 |             self.distribution.metadata.version = versions["version"]
1689 |             return _sdist.run(self)
1690 | 
1691 |         def make_release_tree(self, base_dir, files):
1692 |             root = get_root()
1693 |             cfg = get_config_from_root(root)
1694 |             _sdist.make_release_tree(self, base_dir, files)
1695 |             # now locate _version.py in the new base_dir directory
1696 |             # (remembering that it may be a hardlink) and replace it with an
1697 |             # updated value
1698 |             target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
1699 |             print("UPDATING %s" % target_versionfile)
1700 |             write_to_version_file(
1701 |                 target_versionfile, self._versioneer_generated_versions
1702 |             )
1703 | 
1704 |     cmds["sdist"] = cmd_sdist
1705 | 
1706 |     return cmds
1707 | 
1708 | 
1709 | CONFIG_ERROR = """
1710 | setup.cfg is missing the necessary Versioneer configuration. You need
1711 | a section like:
1712 | 
1713 |  [versioneer]
1714 |  VCS = git
1715 |  style = pep440
1716 |  versionfile_source = src/myproject/_version.py
1717 |  versionfile_build = myproject/_version.py
1718 |  tag_prefix =
1719 |  parentdir_prefix = myproject-
1720 | 
1721 | You will also need to edit your setup.py to use the results:
1722 | 
1723 |  import versioneer
1724 |  setup(version=versioneer.get_version(),
1725 |        cmdclass=versioneer.get_cmdclass(), ...)
1726 | 
1727 | Please read the docstring in ./versioneer.py for configuration instructions,
1728 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
1729 | """
1730 | 
1731 | SAMPLE_CONFIG = """
1732 | # See the docstring in versioneer.py for instructions. Note that you must
1733 | # re-run 'versioneer.py setup' after changing this section, and commit the
1734 | # resulting files.
1735 | 
1736 | [versioneer]
1737 | #VCS = git
1738 | #style = pep440
1739 | #versionfile_source =
1740 | #versionfile_build =
1741 | #tag_prefix =
1742 | #parentdir_prefix =
1743 | 
1744 | """
1745 | 
1746 | INIT_PY_SNIPPET = """
1747 | from ._version import get_versions
1748 | __version__ = get_versions()['version']
1749 | del get_versions
1750 | """
1751 | 
1752 | 
1753 | def do_setup():
1754 |     """Main VCS-independent setup function for installing Versioneer."""
1755 |     root = get_root()
1756 |     try:
1757 |         cfg = get_config_from_root(root)
1758 |     except (
1759 |         EnvironmentError,
1760 |         configparser.NoSectionError,
1761 |         configparser.NoOptionError,
1762 |     ) as e:
1763 |         if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
1764 |             print("Adding sample versioneer config to setup.cfg", file=sys.stderr)
1765 |             with open(os.path.join(root, "setup.cfg"), "a") as f:
1766 |                 f.write(SAMPLE_CONFIG)
1767 |         print(CONFIG_ERROR, file=sys.stderr)
1768 |         return 1
1769 | 
1770 |     print(" creating %s" % cfg.versionfile_source)
1771 |     with open(cfg.versionfile_source, "w") as f:
1772 |         LONG = LONG_VERSION_PY[cfg.VCS]
1773 |         f.write(
1774 |             LONG
1775 |             % {
1776 |                 "DOLLAR": "$",
1777 |                 "STYLE": cfg.style,
1778 |                 "TAG_PREFIX": cfg.tag_prefix,
1779 |                 "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1780 |                 "VERSIONFILE_SOURCE": cfg.versionfile_source,
1781 |             }
1782 |         )
1783 | 
1784 |     ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
1785 |     if os.path.exists(ipy):
1786 |         try:
1787 |             with open(ipy, "r") as f:
1788 |                 old = f.read()
1789 |         except EnvironmentError:
1790 |             old = ""
1791 |         if INIT_PY_SNIPPET not in old:
1792 |             print(" appending to %s" % ipy)
1793 |             with open(ipy, "a") as f:
1794 |                 f.write(INIT_PY_SNIPPET)
1795 |         else:
1796 |             print(" %s unmodified" % ipy)
1797 |     else:
1798 |         print(" %s doesn't exist, ok" % ipy)
1799 |         ipy = None
1800 | 
1801 |     # Make sure both the top-level "versioneer.py" and versionfile_source
1802 |     # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
1803 |     # they'll be copied into source distributions. Pip won't be able to
1804 |     # install the package without this.
1805 |     manifest_in = os.path.join(root, "MANIFEST.in")
1806 |     simple_includes = set()
1807 |     try:
1808 |         with open(manifest_in, "r") as f:
1809 |             for line in f:
1810 |                 if line.startswith("include "):
1811 |                     for include in line.split()[1:]:
1812 |                         simple_includes.add(include)
1813 |     except EnvironmentError:
1814 |         pass
1815 |     # That doesn't cover everything MANIFEST.in can do
1816 |     # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
1817 |     # it might give some false negatives. Appending redundant 'include'
1818 |     # lines is safe, though.
1819 |     if "versioneer.py" not in simple_includes:
1820 |         print(" appending 'versioneer.py' to MANIFEST.in")
1821 |         with open(manifest_in, "a") as f:
1822 |             f.write("include versioneer.py\n")
1823 |     else:
1824 |         print(" 'versioneer.py' already in MANIFEST.in")
1825 |     if cfg.versionfile_source not in simple_includes:
1826 |         print(
1827 |             " appending versionfile_source ('%s') to MANIFEST.in"
1828 |             % cfg.versionfile_source
1829 |         )
1830 |         with open(manifest_in, "a") as f:
1831 |             f.write("include %s\n" % cfg.versionfile_source)
1832 |     else:
1833 |         print(" versionfile_source already in MANIFEST.in")
1834 | 
1835 |     # Make VCS-specific changes. For git, this means creating/changing
1836 |     # .gitattributes to mark _version.py for export-subst keyword
1837 |     # substitution.
1838 |     do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
1839 |     return 0
1840 | 
1841 | 
1842 | def scan_setup_py():
1843 |     """Validate the contents of setup.py against Versioneer's expectations."""
1844 |     found = set()
1845 |     setters = False
1846 |     errors = 0
1847 |     with open("setup.py", "r") as f:
1848 |         for line in f.readlines():
1849 |             if "import versioneer" in line:
1850 |                 found.add("import")
1851 |             if "versioneer.get_cmdclass()" in line:
1852 |                 found.add("cmdclass")
1853 |             if "versioneer.get_version()" in line:
1854 |                 found.add("get_version")
1855 |             if "versioneer.VCS" in line:
1856 |                 setters = True
1857 |             if "versioneer.versionfile_source" in line:
1858 |                 setters = True
1859 |     if len(found) != 3:
1860 |         print("")
1861 |         print("Your setup.py appears to be missing some important items")
1862 |         print("(but I might be wrong). Please make sure it has something")
1863 |         print("roughly like the following:")
1864 |         print("")
1865 |         print(" import versioneer")
1866 |         print(" setup( version=versioneer.get_version(),")
1867 |         print("        cmdclass=versioneer.get_cmdclass(),  ...)")
1868 |         print("")
1869 |         errors += 1
1870 |     if setters:
1871 |         print("You should remove lines like 'versioneer.VCS = ' and")
1872 |         print("'versioneer.versionfile_source = ' . This configuration")
1873 |         print("now lives in setup.cfg, and should be removed from setup.py")
1874 |         print("")
1875 |         errors += 1
1876 |     return errors
1877 | 
1878 | 
1879 | if __name__ == "__main__":
1880 |     cmd = sys.argv[1]
1881 |     if cmd == "setup":
1882 |         errors = do_setup()
1883 |         errors += scan_setup_py()
1884 |         if errors:
1885 |             sys.exit(1)
1886 | 


--------------------------------------------------------------------------------
/xhistogram/__init__.py:
--------------------------------------------------------------------------------
1 | from ._version import get_versions
2 | 
3 | __version__ = get_versions()["version"]
4 | del get_versions
5 | 
6 | __all__ = ["core", "xarray"]
7 | 


--------------------------------------------------------------------------------
/xhistogram/_version.py:
--------------------------------------------------------------------------------
  1 | # This file helps to compute a version number in source trees obtained from
  2 | # git-archive tarball (such as those provided by githubs download-from-tag
  3 | # feature). Distribution tarballs (built by setup.py sdist) and build
  4 | # directories (produced by setup.py build) will contain a much shorter file
  5 | # that just contains the computed version number.
  6 | 
  7 | # This file is released into the public domain. Generated by
  8 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
  9 | 
 10 | """Git implementation of _version.py."""
 11 | 
 12 | import errno
 13 | import os
 14 | import re
 15 | import subprocess
 16 | import sys
 17 | 
 18 | 
 19 | def get_keywords():
 20 |     """Get the keywords needed to look up the version information."""
 21 |     # these strings will be replaced by git during git-archive.
 22 |     # setup.py/versioneer.py will grep for the variable names, so they must
 23 |     # each be defined on a line of their own. _version.py will just call
 24 |     # get_keywords().
 25 |     git_refnames = "$Format:%d$"
 26 |     git_full = "$Format:%H$"
 27 |     git_date = "$Format:%ci$"
 28 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 29 |     return keywords
 30 | 
 31 | 
 32 | class VersioneerConfig:
 33 |     """Container for Versioneer configuration parameters."""
 34 | 
 35 | 
 36 | def get_config():
 37 |     """Create, populate and return the VersioneerConfig() object."""
 38 |     # these strings are filled in when 'setup.py versioneer' creates
 39 |     # _version.py
 40 |     cfg = VersioneerConfig()
 41 |     cfg.VCS = "git"
 42 |     cfg.style = "pep440"
 43 |     cfg.tag_prefix = "v"
 44 |     cfg.parentdir_prefix = "xgcm-"
 45 |     cfg.versionfile_source = "xgcm/_version.py"
 46 |     cfg.verbose = False
 47 |     return cfg
 48 | 
 49 | 
 50 | class NotThisMethod(Exception):
 51 |     """Exception raised if a method is not valid for the current scenario."""
 52 | 
 53 | 
 54 | LONG_VERSION_PY = {}
 55 | HANDLERS = {}
 56 | 
 57 | 
 58 | def register_vcs_handler(vcs, method):  # decorator
 59 |     """Decorator to mark a method as the handler for a particular VCS."""
 60 | 
 61 |     def decorate(f):
 62 |         """Store f in HANDLERS[vcs][method]."""
 63 |         if vcs not in HANDLERS:
 64 |             HANDLERS[vcs] = {}
 65 |         HANDLERS[vcs][method] = f
 66 |         return f
 67 | 
 68 |     return decorate
 69 | 
 70 | 
 71 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
 72 |     """Call the given command(s)."""
 73 |     assert isinstance(commands, list)
 74 |     p = None
 75 |     for c in commands:
 76 |         try:
 77 |             dispcmd = str([c] + args)
 78 |             # remember shell=False, so use git.cmd on windows, not just git
 79 |             p = subprocess.Popen(
 80 |                 [c] + args,
 81 |                 cwd=cwd,
 82 |                 env=env,
 83 |                 stdout=subprocess.PIPE,
 84 |                 stderr=(subprocess.PIPE if hide_stderr else None),
 85 |             )
 86 |             break
 87 |         except EnvironmentError:
 88 |             e = sys.exc_info()[1]
 89 |             if e.errno == errno.ENOENT:
 90 |                 continue
 91 |             if verbose:
 92 |                 print("unable to run %s" % dispcmd)
 93 |                 print(e)
 94 |             return None, None
 95 |     else:
 96 |         if verbose:
 97 |             print("unable to find command, tried %s" % (commands,))
 98 |         return None, None
 99 |     stdout = p.communicate()[0].strip()
100 |     if sys.version_info[0] >= 3:
101 |         stdout = stdout.decode()
102 |     if p.returncode != 0:
103 |         if verbose:
104 |             print("unable to run %s (error)" % dispcmd)
105 |             print("stdout was %s" % stdout)
106 |         return None, p.returncode
107 |     return stdout, p.returncode
108 | 
109 | 
110 | def versions_from_parentdir(parentdir_prefix, root, verbose):
111 |     """Try to determine the version from the parent directory name.
112 | 
113 |     Source tarballs conventionally unpack into a directory that includes both
114 |     the project name and a version string. We will also support searching up
115 |     two directory levels for an appropriately named parent directory
116 |     """
117 |     rootdirs = []
118 | 
119 |     for i in range(3):
120 |         dirname = os.path.basename(root)
121 |         if dirname.startswith(parentdir_prefix):
122 |             return {
123 |                 "version": dirname[len(parentdir_prefix) :],
124 |                 "full-revisionid": None,
125 |                 "dirty": False,
126 |                 "error": None,
127 |                 "date": None,
128 |             }
129 |         else:
130 |             rootdirs.append(root)
131 |             root = os.path.dirname(root)  # up a level
132 | 
133 |     if verbose:
134 |         print(
135 |             "Tried directories %s but none started with prefix %s"
136 |             % (str(rootdirs), parentdir_prefix)
137 |         )
138 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
139 | 
140 | 
141 | @register_vcs_handler("git", "get_keywords")
142 | def git_get_keywords(versionfile_abs):
143 |     """Extract version information from the given file."""
144 |     # the code embedded in _version.py can just fetch the value of these
145 |     # keywords. When used from setup.py, we don't want to import _version.py,
146 |     # so we do it with a regexp instead. This function is not used from
147 |     # _version.py.
148 |     keywords = {}
149 |     try:
150 |         f = open(versionfile_abs, "r")
151 |         for line in f.readlines():
152 |             if line.strip().startswith("git_refnames ="):
153 |                 mo = re.search(r'=\s*"(.*)"', line)
154 |                 if mo:
155 |                     keywords["refnames"] = mo.group(1)
156 |             if line.strip().startswith("git_full ="):
157 |                 mo = re.search(r'=\s*"(.*)"', line)
158 |                 if mo:
159 |                     keywords["full"] = mo.group(1)
160 |             if line.strip().startswith("git_date ="):
161 |                 mo = re.search(r'=\s*"(.*)"', line)
162 |                 if mo:
163 |                     keywords["date"] = mo.group(1)
164 |         f.close()
165 |     except EnvironmentError:
166 |         pass
167 |     return keywords
168 | 
169 | 
170 | @register_vcs_handler("git", "keywords")
171 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
172 |     """Get version information from git keywords."""
173 |     if not keywords:
174 |         raise NotThisMethod("no keywords at all, weird")
175 |     date = keywords.get("date")
176 |     if date is not None:
177 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
178 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
179 |         # -like" string, which we must then edit to make compliant), because
180 |         # it's been around since git-1.5.3, and it's too difficult to
181 |         # discover which version we're using, or to work around using an
182 |         # older one.
183 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
184 |     refnames = keywords["refnames"].strip()
185 |     if refnames.startswith("$Format"):
186 |         if verbose:
187 |             print("keywords are unexpanded, not using")
188 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
189 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
190 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
191 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
192 |     TAG = "tag: "
193 |     tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
194 |     if not tags:
195 |         # Either we're using git < 1.8.3, or there really are no tags. We use
196 |         # a heuristic: assume all version tags have a digit. The old git %d
197 |         # expansion behaves like git log --decorate=short and strips out the
198 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
199 |         # between branches and tags. By ignoring refnames without digits, we
200 |         # filter out many common branch names like "release" and
201 |         # "stabilization", as well as "HEAD" and "master".
202 |         tags = set([r for r in refs if re.search(r"\d", r)])
203 |         if verbose:
204 |             print("discarding '%s', no digits" % ",".join(refs - tags))
205 |     if verbose:
206 |         print("likely tags: %s" % ",".join(sorted(tags)))
207 |     for ref in sorted(tags):
208 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
209 |         if ref.startswith(tag_prefix):
210 |             r = ref[len(tag_prefix) :]
211 |             if verbose:
212 |                 print("picking %s" % r)
213 |             return {
214 |                 "version": r,
215 |                 "full-revisionid": keywords["full"].strip(),
216 |                 "dirty": False,
217 |                 "error": None,
218 |                 "date": date,
219 |             }
220 |     # no suitable tags, so version is "0+unknown", but full hex is still there
221 |     if verbose:
222 |         print("no suitable tags, using unknown + full revision id")
223 |     return {
224 |         "version": "0+unknown",
225 |         "full-revisionid": keywords["full"].strip(),
226 |         "dirty": False,
227 |         "error": "no suitable tags",
228 |         "date": None,
229 |     }
230 | 
231 | 
232 | @register_vcs_handler("git", "pieces_from_vcs")
233 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
234 |     """Get version from 'git describe' in the root of the source tree.
235 | 
236 |     This only gets called if the git-archive 'subst' keywords were *not*
237 |     expanded, and _version.py hasn't already been rewritten with a short
238 |     version string, meaning we're inside a checked out source tree.
239 |     """
240 |     GITS = ["git"]
241 |     if sys.platform == "win32":
242 |         GITS = ["git.cmd", "git.exe"]
243 | 
244 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
245 |     if rc != 0:
246 |         if verbose:
247 |             print("Directory %s not under git control" % root)
248 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
249 | 
250 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
251 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
252 |     describe_out, rc = run_command(
253 |         GITS,
254 |         [
255 |             "describe",
256 |             "--tags",
257 |             "--dirty",
258 |             "--always",
259 |             "--long",
260 |             "--match",
261 |             "%s*" % tag_prefix,
262 |         ],
263 |         cwd=root,
264 |     )
265 |     # --long was added in git-1.5.5
266 |     if describe_out is None:
267 |         raise NotThisMethod("'git describe' failed")
268 |     describe_out = describe_out.strip()
269 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
270 |     if full_out is None:
271 |         raise NotThisMethod("'git rev-parse' failed")
272 |     full_out = full_out.strip()
273 | 
274 |     pieces = {}
275 |     pieces["long"] = full_out
276 |     pieces["short"] = full_out[:7]  # maybe improved later
277 |     pieces["error"] = None
278 | 
279 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
280 |     # TAG might have hyphens.
281 |     git_describe = describe_out
282 | 
283 |     # look for -dirty suffix
284 |     dirty = git_describe.endswith("-dirty")
285 |     pieces["dirty"] = dirty
286 |     if dirty:
287 |         git_describe = git_describe[: git_describe.rindex("-dirty")]
288 | 
289 |     # now we have TAG-NUM-gHEX or HEX
290 | 
291 |     if "-" in git_describe:
292 |         # TAG-NUM-gHEX
293 |         mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
294 |         if not mo:
295 |             # unparseable. Maybe git-describe is misbehaving?
296 |             pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
297 |             return pieces
298 | 
299 |         # tag
300 |         full_tag = mo.group(1)
301 |         if not full_tag.startswith(tag_prefix):
302 |             if verbose:
303 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
304 |                 print(fmt % (full_tag, tag_prefix))
305 |             pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
306 |                 full_tag,
307 |                 tag_prefix,
308 |             )
309 |             return pieces
310 |         pieces["closest-tag"] = full_tag[len(tag_prefix) :]
311 | 
312 |         # distance: number of commits since tag
313 |         pieces["distance"] = int(mo.group(2))
314 | 
315 |         # commit: short hex revision ID
316 |         pieces["short"] = mo.group(3)
317 | 
318 |     else:
319 |         # HEX: no tags
320 |         pieces["closest-tag"] = None
321 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
322 |         pieces["distance"] = int(count_out)  # total number of commits
323 | 
324 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
325 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
326 |         0
327 |     ].strip()
328 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
329 | 
330 |     return pieces
331 | 
332 | 
333 | def plus_or_dot(pieces):
334 |     """Return a + if we don't already have one, else return a ."""
335 |     if "+" in pieces.get("closest-tag", ""):
336 |         return "."
337 |     return "+"
338 | 
339 | 
340 | def render_pep440(pieces):
341 |     """Build up version string, with post-release "local version identifier".
342 | 
343 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
344 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
345 | 
346 |     Exceptions:
347 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
348 |     """
349 |     if pieces["closest-tag"]:
350 |         rendered = pieces["closest-tag"]
351 |         if pieces["distance"] or pieces["dirty"]:
352 |             rendered += plus_or_dot(pieces)
353 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
354 |             if pieces["dirty"]:
355 |                 rendered += ".dirty"
356 |     else:
357 |         # exception #1
358 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
359 |         if pieces["dirty"]:
360 |             rendered += ".dirty"
361 |     return rendered
362 | 
363 | 
364 | def render_pep440_pre(pieces):
365 |     """TAG[.post.devDISTANCE] -- No -dirty.
366 | 
367 |     Exceptions:
368 |     1: no tags. 0.post.devDISTANCE
369 |     """
370 |     if pieces["closest-tag"]:
371 |         rendered = pieces["closest-tag"]
372 |         if pieces["distance"]:
373 |             rendered += ".post.dev%d" % pieces["distance"]
374 |     else:
375 |         # exception #1
376 |         rendered = "0.post.dev%d" % pieces["distance"]
377 |     return rendered
378 | 
379 | 
380 | def render_pep440_post(pieces):
381 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
382 | 
383 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
384 |     (a dirty tree will appear "older" than the corresponding clean one),
385 |     but you shouldn't be releasing software with -dirty anyways.
386 | 
387 |     Exceptions:
388 |     1: no tags. 0.postDISTANCE[.dev0]
389 |     """
390 |     if pieces["closest-tag"]:
391 |         rendered = pieces["closest-tag"]
392 |         if pieces["distance"] or pieces["dirty"]:
393 |             rendered += ".post%d" % pieces["distance"]
394 |             if pieces["dirty"]:
395 |                 rendered += ".dev0"
396 |             rendered += plus_or_dot(pieces)
397 |             rendered += "g%s" % pieces["short"]
398 |     else:
399 |         # exception #1
400 |         rendered = "0.post%d" % pieces["distance"]
401 |         if pieces["dirty"]:
402 |             rendered += ".dev0"
403 |         rendered += "+g%s" % pieces["short"]
404 |     return rendered
405 | 
406 | 
407 | def render_pep440_old(pieces):
408 |     """TAG[.postDISTANCE[.dev0]] .
409 | 
410 |     The ".dev0" means dirty.
411 | 
412 |     Eexceptions:
413 |     1: no tags. 0.postDISTANCE[.dev0]
414 |     """
415 |     if pieces["closest-tag"]:
416 |         rendered = pieces["closest-tag"]
417 |         if pieces["distance"] or pieces["dirty"]:
418 |             rendered += ".post%d" % pieces["distance"]
419 |             if pieces["dirty"]:
420 |                 rendered += ".dev0"
421 |     else:
422 |         # exception #1
423 |         rendered = "0.post%d" % pieces["distance"]
424 |         if pieces["dirty"]:
425 |             rendered += ".dev0"
426 |     return rendered
427 | 
428 | 
429 | def render_git_describe(pieces):
430 |     """TAG[-DISTANCE-gHEX][-dirty].
431 | 
432 |     Like 'git describe --tags --dirty --always'.
433 | 
434 |     Exceptions:
435 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
436 |     """
437 |     if pieces["closest-tag"]:
438 |         rendered = pieces["closest-tag"]
439 |         if pieces["distance"]:
440 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
441 |     else:
442 |         # exception #1
443 |         rendered = pieces["short"]
444 |     if pieces["dirty"]:
445 |         rendered += "-dirty"
446 |     return rendered
447 | 
448 | 
449 | def render_git_describe_long(pieces):
450 |     """TAG-DISTANCE-gHEX[-dirty].
451 | 
452 |     Like 'git describe --tags --dirty --always -long'.
453 |     The distance/hash is unconditional.
454 | 
455 |     Exceptions:
456 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
457 |     """
458 |     if pieces["closest-tag"]:
459 |         rendered = pieces["closest-tag"]
460 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
461 |     else:
462 |         # exception #1
463 |         rendered = pieces["short"]
464 |     if pieces["dirty"]:
465 |         rendered += "-dirty"
466 |     return rendered
467 | 
468 | 
469 | def render(pieces, style):
470 |     """Render the given version pieces into the requested style."""
471 |     if pieces["error"]:
472 |         return {
473 |             "version": "unknown",
474 |             "full-revisionid": pieces.get("long"),
475 |             "dirty": None,
476 |             "error": pieces["error"],
477 |             "date": None,
478 |         }
479 | 
480 |     if not style or style == "default":
481 |         style = "pep440"  # the default
482 | 
483 |     if style == "pep440":
484 |         rendered = render_pep440(pieces)
485 |     elif style == "pep440-pre":
486 |         rendered = render_pep440_pre(pieces)
487 |     elif style == "pep440-post":
488 |         rendered = render_pep440_post(pieces)
489 |     elif style == "pep440-old":
490 |         rendered = render_pep440_old(pieces)
491 |     elif style == "git-describe":
492 |         rendered = render_git_describe(pieces)
493 |     elif style == "git-describe-long":
494 |         rendered = render_git_describe_long(pieces)
495 |     else:
496 |         raise ValueError("unknown style '%s'" % style)
497 | 
498 |     return {
499 |         "version": rendered,
500 |         "full-revisionid": pieces["long"],
501 |         "dirty": pieces["dirty"],
502 |         "error": None,
503 |         "date": pieces.get("date"),
504 |     }
505 | 
506 | 
507 | def get_versions():
508 |     """Get version information or return default if unable to do so."""
509 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
510 |     # __file__, we can work backwards from there to the root. Some
511 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
512 |     # case we can only use expanded keywords.
513 | 
514 |     cfg = get_config()
515 |     verbose = cfg.verbose
516 | 
517 |     try:
518 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
519 |     except NotThisMethod:
520 |         pass
521 | 
522 |     try:
523 |         root = os.path.realpath(__file__)
524 |         # versionfile_source is the relative path from the top of the source
525 |         # tree (where the .git directory might live) to this file. Invert
526 |         # this to find the root from __file__.
527 |         for i in cfg.versionfile_source.split("/"):
528 |             root = os.path.dirname(root)
529 |     except NameError:
530 |         return {
531 |             "version": "0+unknown",
532 |             "full-revisionid": None,
533 |             "dirty": None,
534 |             "error": "unable to find root of source tree",
535 |             "date": None,
536 |         }
537 | 
538 |     try:
539 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
540 |         return render(pieces, cfg.style)
541 |     except NotThisMethod:
542 |         pass
543 | 
544 |     try:
545 |         if cfg.parentdir_prefix:
546 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
547 |     except NotThisMethod:
548 |         pass
549 | 
550 |     return {
551 |         "version": "0+unknown",
552 |         "full-revisionid": None,
553 |         "dirty": None,
554 |         "error": "unable to compute version",
555 |         "date": None,
556 |     }
557 | 


--------------------------------------------------------------------------------
/xhistogram/core.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Numpy API for xhistogram.
  3 | """
  4 | 
  5 | 
  6 | import dask
  7 | import numpy as np
  8 | from functools import reduce
  9 | from collections.abc import Iterable
 10 | from numpy import (
 11 |     searchsorted,
 12 |     bincount,
 13 |     reshape,
 14 |     ravel_multi_index,
 15 |     concatenate,
 16 |     broadcast_arrays,
 17 | )
 18 | 
 19 | # range is a keyword so save the builtin so they can use it.
 20 | _range = range
 21 | 
 22 | try:
 23 |     import dask.array as dsa
 24 | 
 25 |     has_dask = True
 26 | except ImportError:
 27 |     has_dask = False
 28 | 
 29 | 
 30 | def _any_dask_array(*args):
 31 |     if not has_dask:
 32 |         return False
 33 |     else:
 34 |         return any(isinstance(a, dsa.core.Array) for a in args)
 35 | 
 36 | 
 37 | def _ensure_correctly_formatted_bins(bins, N_expected):
 38 |     # TODO: This could be done better / more robustly
 39 |     if bins is None:
 40 |         raise ValueError("bins must be provided")
 41 |     if isinstance(bins, (int, str, np.ndarray)):
 42 |         bins = N_expected * [bins]
 43 |     if len(bins) == N_expected:
 44 |         return bins
 45 |     else:
 46 |         raise ValueError(
 47 |             "The number of bin definitions doesn't match the number of args"
 48 |         )
 49 | 
 50 | 
 51 | def _ensure_correctly_formatted_range(range_, N_expected):
 52 |     # TODO: This could be done better / more robustly
 53 |     def _iterable_nested(x):
 54 |         return all(isinstance(i, Iterable) for i in x)
 55 | 
 56 |     if range_ is not None:
 57 |         if (len(range_) == 2) & (not _iterable_nested(range_)):
 58 |             return N_expected * [range_]
 59 |         elif N_expected == len(range_):
 60 |             if all(len(x) == 2 for x in range_):
 61 |                 return range_
 62 |             else:
 63 |                 raise ValueError(
 64 |                     "range should be provided as (lower_range, upper_range). In the "
 65 |                     + "case of multiple args, range should be a list of such tuples"
 66 |                 )
 67 |         else:
 68 |             raise ValueError("The number of ranges doesn't match the number of args")
 69 |     else:
 70 |         return N_expected * [range_]
 71 | 
 72 | 
 73 | def _bincount_2d(bin_indices, weights, N, hist_shapes):
 74 |     # a trick to apply bincount on an axis-by-axis basis
 75 |     # https://stackoverflow.com/questions/40591754/vectorizing-numpy-bincount
 76 |     # https://stackoverflow.com/questions/40588403/vectorized-searchsorted-numpy
 77 |     M = bin_indices.shape[0]
 78 |     if weights is not None:
 79 |         weights = weights.ravel()
 80 |     bin_indices_offset = (bin_indices + (N * np.arange(M)[:, None])).ravel()
 81 |     bc_offset = bincount(bin_indices_offset, weights=weights, minlength=N * M)
 82 |     final_shape = (M,) + tuple(hist_shapes)
 83 |     return bc_offset.reshape(final_shape)
 84 | 
 85 | 
 86 | def _bincount_loop(bin_indices, weights, N, hist_shapes, block_chunks):
 87 |     M = bin_indices.shape[0]
 88 |     assert sum(block_chunks) == M
 89 |     block_counts = []
 90 |     # iterate over chunks
 91 |     bounds = np.cumsum((0,) + block_chunks)
 92 |     for m_start, m_end in zip(bounds[:-1], bounds[1:]):
 93 |         bin_indices_block = bin_indices[m_start:m_end]
 94 |         weights_block = weights[m_start:m_end] if weights is not None else None
 95 |         bc_block = _bincount_2d(bin_indices_block, weights_block, N, hist_shapes)
 96 |         block_counts.append(bc_block)
 97 |     all_counts = concatenate(block_counts)
 98 |     final_shape = (bin_indices.shape[0],) + tuple(hist_shapes)
 99 |     return all_counts.reshape(final_shape)
100 | 
101 | 
102 | def _determine_block_chunks(bin_indices, block_size):
103 |     M, N = bin_indices.shape
104 |     if block_size is None:
105 |         return (M,)
106 |     if block_size == "auto":
107 |         try:
108 |             # dask arrays - use the pre-existing chunks
109 |             chunks = bin_indices.chunks
110 |             return chunks[0]
111 |         except AttributeError:
112 |             # automatically pick a chunk size
113 |             # this a a heueristic without much basis
114 |             _MAX_CHUNK_SIZE = 10_000_000
115 |             block_size = min(_MAX_CHUNK_SIZE // N, M)
116 |     assert isinstance(block_size, int)
117 |     num_chunks = M // block_size
118 |     block_chunks = num_chunks * (block_size,)
119 |     residual = M % block_size
120 |     if residual:
121 |         block_chunks += (residual,)
122 |     assert sum(block_chunks) == M
123 |     return block_chunks
124 | 
125 | 
126 | def _dispatch_bincount(bin_indices, weights, N, hist_shapes, block_size=None):
127 |     # block_chunks is like a dask chunk, a tuple that divides up the first
128 |     # axis of bin_indices
129 |     block_chunks = _determine_block_chunks(bin_indices, block_size)
130 |     if len(block_chunks) == 1:
131 |         # single global chunk, don't need a loop over chunks
132 |         return _bincount_2d(bin_indices, weights, N, hist_shapes)
133 |     else:
134 |         return _bincount_loop(bin_indices, weights, N, hist_shapes, block_chunks)
135 | 
136 | 
137 | def _bincount_2d_vectorized(
138 |     *args, bins=None, weights=None, density=False, right=False, block_size=None
139 | ):
140 |     """Calculate the histogram independently on each row of a 2D array"""
141 | 
142 |     N_inputs = len(args)
143 |     a0 = args[0]
144 | 
145 |     # consistency checks for inputa
146 |     for a, b in zip(args, bins):
147 |         assert a.ndim == 2
148 |         assert b.ndim == 1
149 |         assert a.shape == a0.shape
150 |     if weights is not None:
151 |         assert weights.shape == a0.shape
152 | 
153 |     nrows, ncols = a0.shape
154 |     nbins = [len(b) for b in bins]
155 |     hist_shapes = [nb + 1 for nb in nbins]
156 | 
157 |     # The maximum possible value of searchsorted is nbins
158 |     # For _searchsorted_inclusive:
159 |     #   - 0 corresponds to a < b[0]
160 |     #   - i corresponds to b[i-1] <= a < b[i]
161 |     #   - nbins-1 corresponds to b[-2] <= a <= b[-1]
162 |     #   - nbins corresponds to a >= b[-1]
163 |     def _searchsorted_inclusive(a, b):
164 |         """
165 |         Like `searchsorted`, but where the last bin is also right-edge inclusive.
166 |         """
167 |         # Similar to implementation in np.histogramdd
168 |         # see https://github.com/numpy/numpy/blob/9c98662ee2f7daca3f9fae9d5144a9a8d3cabe8c/numpy/lib/histograms.py#L1056
169 |         # This assumes the bins (b) are sorted
170 |         bin_indices = searchsorted(b, a, side="right")
171 |         on_edge = a == b[-1]
172 |         # Shift these points one bin to the left.
173 |         bin_indices[on_edge] -= 1
174 |         return bin_indices
175 | 
176 |     each_bin_indices = [_searchsorted_inclusive(a, b) for a, b in zip(args, bins)]
177 |     # product of the bins gives the joint distribution
178 |     if N_inputs > 1:
179 |         bin_indices = ravel_multi_index(each_bin_indices, hist_shapes)
180 |     else:
181 |         bin_indices = each_bin_indices[0]
182 |     # total number of unique bin indices
183 |     N = reduce(lambda x, y: x * y, hist_shapes)
184 | 
185 |     bin_counts = _dispatch_bincount(
186 |         bin_indices, weights, N, hist_shapes, block_size=block_size
187 |     )
188 | 
189 |     # just throw out everything outside of the bins, as np.histogram does
190 |     # TODO: make this optional?
191 |     slices = (slice(None),) + (N_inputs * (slice(1, -1),))
192 |     bin_counts = bin_counts[slices]
193 | 
194 |     return bin_counts
195 | 
196 | 
197 | def _bincount(
198 |     *all_arrays, weights=False, axis=None, bins=None, density=None, block_size=None
199 | ):
200 |     a0 = all_arrays[0]
201 | 
202 |     do_full_array = (axis is None) or (set(axis) == set(_range(a0.ndim)))
203 | 
204 |     if do_full_array:
205 |         kept_axes_shape = (1,) * a0.ndim
206 |     else:
207 |         kept_axes_shape = tuple(
208 |             [a0.shape[i] if i not in axis else 1 for i in _range(a0.ndim)]
209 |         )
210 | 
211 |     def reshape_input(a):
212 |         if do_full_array:
213 |             d = a.ravel()[None, :]
214 |         else:
215 |             # reshape the array to 2D
216 |             # axis 0: preserved axis after histogram
217 |             # axis 1: calculate histogram along this axis
218 |             new_pos = tuple(_range(-len(axis), 0))
219 |             c = np.moveaxis(a, axis, new_pos)
220 |             split_idx = c.ndim - len(axis)
221 |             dims_0 = c.shape[:split_idx]
222 |             # assert dims_0 == kept_axes_shape
223 |             dims_1 = c.shape[split_idx:]
224 |             new_dim_0 = np.prod(dims_0)
225 |             new_dim_1 = np.prod(dims_1)
226 |             d = reshape(c, (new_dim_0, new_dim_1))
227 |         return d
228 | 
229 |     all_arrays_reshaped = [reshape_input(a) for a in all_arrays]
230 | 
231 |     if weights:
232 |         weights_array = all_arrays_reshaped.pop()
233 |     else:
234 |         weights_array = None
235 | 
236 |     bin_counts = _bincount_2d_vectorized(
237 |         *all_arrays_reshaped,
238 |         bins=bins,
239 |         weights=weights_array,
240 |         density=density,
241 |         block_size=block_size,
242 |     )
243 | 
244 |     final_shape = kept_axes_shape + bin_counts.shape[1:]
245 |     bin_counts = reshape(bin_counts, final_shape)
246 | 
247 |     return bin_counts
248 | 
249 | 
250 | def histogram(
251 |     *args,
252 |     bins=None,
253 |     range=None,
254 |     axis=None,
255 |     weights=None,
256 |     density=False,
257 |     block_size="auto",
258 | ):
259 |     """Histogram applied along specified axis / axes.
260 | 
261 |     Parameters
262 |     ----------
263 |     args : array_like
264 |         Input data. The number of input arguments determines the dimensionality
265 |         of the histogram. For example, two arguments produce a 2D histogram.
266 |         All args must have the same size.
267 |     bins :  int, str or numpy array or a list of ints, strs and/or arrays, optional
268 |         If a list, there should be one entry for each item in ``args``.
269 |         The bin specifications are as follows:
270 | 
271 |           * If int; the number of bins for all arguments in ``args``.
272 |           * If str; the method used to automatically calculate the optimal bin width
273 |             for all arguments in ``args``, as defined by numpy `histogram_bin_edges`.
274 |           * If numpy array; the bin edges for all arguments in ``args``.
275 |           * If a list of ints, strs and/or arrays; the bin specification as
276 |             above for every argument in ``args``.
277 | 
278 |         When bin edges are specified, all but the last (righthand-most) bin include
279 |         the left edge and exclude the right edge. The last bin includes both edges.
280 | 
281 |         A TypeError will be raised if args or weights contains dask arrays and bins
282 |         are not specified explicitly as an array or list of arrays. This is because
283 |         other bin specifications trigger computation.
284 |     range : (float, float) or a list of (float, float), optional
285 |         If a list, there should be one entry for each item in ``args``.
286 |         The range specifications are as follows:
287 | 
288 |           * If (float, float); the lower and upper range(s) of the bins for all
289 |             arguments in ``args``. Values outside the range are ignored. The first
290 |             element of the range must be less than or equal to the second. `range`
291 |             affects the automatic bin computation as well. In this case, while bin
292 |             width is computed to be optimal based on the actual data within `range`,
293 |             the bin count will fill the entire range including portions containing
294 |             no data.
295 |           * If a list of (float, float); the ranges as above for every argument in
296 |             ``args``.
297 |           * If not provided, range is simply ``(arg.min(), arg.max())`` for each
298 |             arg.
299 |     axis : None or int or tuple of ints, optional
300 |         Axis or axes along which the histogram is computed. The default is to
301 |         compute the histogram of the flattened array
302 |     weights : array_like, optional
303 |         An array of weights, of the same shape as `a`.  Each value in
304 |         `a` only contributes its associated weight towards the bin count
305 |         (instead of 1). If `density` is True, the weights are
306 |         normalized, so that the integral of the density over the range
307 |         remains 1.
308 |     density : bool, optional
309 |         If ``False``, the result will contain the number of samples in
310 |         each bin. If ``True``, the result is the value of the
311 |         probability *density* function at the bin, normalized such that
312 |         the *integral* over the range is 1. Note that the sum of the
313 |         histogram values will not be equal to 1 unless bins of unity
314 |         width are chosen; it is not a probability *mass* function.
315 |     block_size : int or 'auto', optional
316 |         A parameter which governs the algorithm used to compute the histogram.
317 |         Using a nonzero value splits the histogram calculation over the
318 |         non-histogram axes into blocks of size ``block_size``, iterating over
319 |         them with a loop (numpy inputs) or in parallel (dask inputs). If
320 |         ``'auto'``, blocks will be determined either by the underlying dask
321 |         chunks (dask inputs) or an experimental built-in heuristic (numpy inputs).
322 | 
323 |     Returns
324 |     -------
325 |     hist : array
326 |         The values of the histogram.
327 |     bin_edges : list of arrays
328 |         Return the bin edges for each input array.
329 | 
330 |     See Also
331 |     --------
332 |     numpy.histogram, numpy.bincount, numpy.searchsorted
333 |     """
334 | 
335 |     a0 = args[0]
336 |     ndim = a0.ndim
337 |     n_inputs = len(args)
338 | 
339 |     is_dask_array = any([dask.is_dask_collection(a) for a in list(args) + [weights]])
340 | 
341 |     if axis is not None:
342 |         axis = np.atleast_1d(axis)
343 |         assert axis.ndim == 1
344 |         axis_normed = []
345 |         for ax in axis:
346 |             if ax >= 0:
347 |                 ax_positive = ax
348 |             else:
349 |                 ax_positive = ndim + ax
350 |             assert ax_positive < ndim, "axis must be less than ndim"
351 |             axis_normed.append(ax_positive)
352 |         axis = [int(i) for i in axis_normed]
353 | 
354 |     all_arrays = list(args)
355 |     n_inputs = len(all_arrays)
356 | 
357 |     if weights is not None:
358 |         all_arrays.append(weights)
359 |         has_weights = True
360 |     else:
361 |         has_weights = False
362 | 
363 |     dtype = "i8" if not has_weights else weights.dtype
364 | 
365 |     # Broadcast input arrays. Note that this dispatches to `dsa.broadcast_arrays` as necessary.
366 |     all_arrays = broadcast_arrays(*all_arrays)
367 |     # Since all arrays now have the same shape, just get the axes of the first.
368 |     input_axes = tuple(_range(all_arrays[0].ndim))
369 | 
370 |     # Some sanity checks and format bins and range correctly
371 |     bins = _ensure_correctly_formatted_bins(bins, n_inputs)
372 |     range = _ensure_correctly_formatted_range(range, n_inputs)
373 | 
374 |     # histogram_bin_edges triggers computation on dask arrays. It would be possible
375 |     # to write a version of this that doesn't trigger when `range` is provided, but
376 |     # for now let's just use np.histogram_bin_edges
377 |     if is_dask_array:
378 |         if not all(isinstance(b, np.ndarray) for b in bins):
379 |             raise TypeError(
380 |                 "When using dask arrays, bins must be provided as numpy array(s) of edges"
381 |             )
382 |     else:
383 |         bins = [
384 |             np.histogram_bin_edges(
385 |                 a, bins=b, range=r, weights=all_arrays[-1] if has_weights else None
386 |             )
387 |             for a, b, r in zip(all_arrays, bins, range)
388 |         ]
389 |     bincount_kwargs = dict(
390 |         weights=has_weights,
391 |         axis=axis,
392 |         bins=bins,
393 |         density=density,
394 |         block_size=block_size,
395 |     )
396 | 
397 |     # remove these axes from the inputs
398 |     if axis is not None:
399 |         drop_axes = tuple(axis)
400 |     else:
401 |         drop_axes = input_axes
402 | 
403 |     if _any_dask_array(weights, *all_arrays):
404 |         # We should be able to just apply the bin_count function to every
405 |         # block and then sum over all blocks to get the total bin count.
406 |         # The main challenge is to figure out the chunk shape that will come
407 |         # out of _bincount. We might also need to add dummy dimensions to sum
408 |         # over in the _bincount function
409 |         import dask.array as dsa
410 | 
411 |         # Important note from blockwise docs
412 |         # > Any index, like i missing from the output index is interpreted as a contraction...
413 |         # > In the case of a contraction the passed function should expect an iterable of blocks
414 |         # > on any array that holds that index.
415 |         # This means that we need to have all the input indexes present in the output index
416 |         # However, they will be reduced to singleton (len 1) dimensions
417 | 
418 |         adjust_chunks = {i: (lambda x: 1) for i in drop_axes}
419 | 
420 |         new_axes_start = max(input_axes) + 1
421 |         new_axes = {new_axes_start + i: len(bin) - 1 for i, bin in enumerate(bins)}
422 |         out_index = input_axes + tuple(new_axes)
423 | 
424 |         blockwise_args = []
425 |         for arg in all_arrays:
426 |             blockwise_args.append(arg)
427 |             blockwise_args.append(input_axes)
428 | 
429 |         bin_counts = dsa.blockwise(
430 |             _bincount,
431 |             out_index,
432 |             *blockwise_args,
433 |             new_axes=new_axes,
434 |             adjust_chunks=adjust_chunks,
435 |             meta=np.array((), dtype),
436 |             **bincount_kwargs,
437 |         )
438 |         # sum over the block dims
439 |         bin_counts = bin_counts.sum(drop_axes)
440 |     else:
441 |         # drop the extra axis used for summing over blocks
442 |         bin_counts = _bincount(*all_arrays, **bincount_kwargs).squeeze(drop_axes)
443 | 
444 |     if density:
445 |         # Normalize by dividing by bin counts and areas such that all the
446 |         # histogram data integrated over all dimensions = 1
447 |         bin_widths = [np.diff(b) for b in bins]
448 |         if n_inputs == 1:
449 |             bin_areas = bin_widths[0]
450 |         elif n_inputs == 2:
451 |             bin_areas = np.outer(*bin_widths)
452 |         else:
453 |             # Slower, but N-dimensional logic
454 |             bin_areas = np.prod(np.ix_(*bin_widths))
455 | 
456 |         # Sum over the last n_inputs axes, which correspond to the bins. All other axes
457 |         # are "bystander" axes. Sums must be done independently for each bystander axes
458 |         # so that nans are dealt with correctly (#51)
459 |         bin_axes = tuple(_range(-n_inputs, 0))
460 |         bin_count_sums = bin_counts.sum(axis=bin_axes)
461 |         bin_count_sums_shape = bin_count_sums.shape + len(bin_axes) * (1,)
462 |         h = bin_counts / bin_areas / reshape(bin_count_sums, bin_count_sums_shape)
463 |     else:
464 |         h = bin_counts
465 | 
466 |     return h, bins
467 | 


--------------------------------------------------------------------------------
/xhistogram/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgcm/xhistogram/a73be2fd7bbed5c34d95f7ca876ddcab1dd2ecc1/xhistogram/test/__init__.py


--------------------------------------------------------------------------------
/xhistogram/test/fixtures.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | import dask
 3 | import dask.array as dsa
 4 | import numpy as np
 5 | import xarray as xr
 6 | 
 7 | 
 8 | def empty_dask_array(shape, dtype=float, chunks=None):
 9 |     # a dask array that errors if you try to comput it
10 |     def raise_if_computed():
11 |         raise ValueError("Triggered forbidden computation")
12 | 
13 |     a = dsa.from_delayed(dask.delayed(raise_if_computed)(), shape, dtype)
14 |     if chunks is not None:
15 |         a = a.rechunk(chunks)
16 | 
17 |     return a
18 | 
19 | 
20 | def example_dataarray(shape=(5, 20)):
21 |     data = np.random.randn(*shape)
22 |     dims = [f"dim_{i}" for i in range(len(shape))]
23 |     da = xr.DataArray(data, dims=dims, name="T")
24 |     return da
25 | 
26 | 
27 | def example_dataset(n_dim=2, n_vars=2):
28 |     """Random dataset with every variable having the same shape"""
29 | 
30 |     shape = tuple(range(8, 8 + n_dim))
31 |     dims = [f"dim_{i}" for i in range(len(shape))]
32 |     var_names = [uuid.uuid4().hex for _ in range(n_vars)]
33 |     ds = xr.Dataset()
34 |     for i in range(n_vars):
35 |         name = var_names[i]
36 |         data = np.random.randn(*shape)
37 |         da = xr.DataArray(data, dims=dims, name=name)
38 |         ds[name] = da
39 |     return ds
40 | 


--------------------------------------------------------------------------------
/xhistogram/test/test_chunking.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from .fixtures import example_dataarray
  5 | from ..xarray import histogram
  6 | 
  7 | 
  8 | @pytest.mark.parametrize("weights", [False, True])
  9 | @pytest.mark.parametrize("chunksize", [1, 2, 3, 10])
 10 | @pytest.mark.parametrize("shape", [(10,), (10, 4)])
 11 | def test_chunked_weights(chunksize, shape, weights):
 12 | 
 13 |     data_a = example_dataarray(shape).chunk((chunksize,))
 14 | 
 15 |     if weights:
 16 |         weights = example_dataarray(shape).chunk((chunksize,))
 17 |         weights_arr = weights.values
 18 |     else:
 19 |         weights = weights_arr = None
 20 | 
 21 |     nbins_a = 6
 22 |     bins_a = np.linspace(-4, 4, nbins_a + 1)
 23 | 
 24 |     h = histogram(data_a, bins=[bins_a], weights=weights)
 25 | 
 26 |     assert h.shape == (nbins_a,)
 27 | 
 28 |     hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights_arr)
 29 | 
 30 |     np.testing.assert_allclose(hist, h.values)
 31 | 
 32 | 
 33 | @pytest.mark.parametrize("xchunksize", [1, 2, 3, 10])
 34 | @pytest.mark.parametrize("ychunksize", [1, 2, 3, 12])
 35 | class TestFixedSize2DChunks:
 36 |     def test_2d_chunks(self, xchunksize, ychunksize):
 37 | 
 38 |         data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
 39 | 
 40 |         nbins_a = 8
 41 |         bins_a = np.linspace(-4, 4, nbins_a + 1)
 42 | 
 43 |         h = histogram(data_a, bins=[bins_a])
 44 | 
 45 |         assert h.shape == (nbins_a,)
 46 | 
 47 |         hist, _ = np.histogram(data_a.values, bins=bins_a)
 48 | 
 49 |         np.testing.assert_allclose(hist, h.values)
 50 | 
 51 |     @pytest.mark.parametrize("reduce_dim", ["dim_0", "dim_1"])
 52 |     def test_2d_chunks_broadcast_dim(
 53 |         self,
 54 |         xchunksize,
 55 |         ychunksize,
 56 |         reduce_dim,
 57 |     ):
 58 |         data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
 59 |         dims = list(data_a.dims)
 60 |         broadcast_dim = [d for d in dims if d != reduce_dim][0]
 61 | 
 62 |         nbins_a = 8
 63 |         bins_a = np.linspace(-4, 4, nbins_a + 1)
 64 | 
 65 |         h = histogram(data_a, bins=[bins_a], dim=(reduce_dim,))
 66 | 
 67 |         assert h.shape == (data_a.sizes[broadcast_dim], nbins_a)
 68 | 
 69 |         def _np_hist(*args, **kwargs):
 70 |             h, _ = np.histogram(*args, **kwargs)
 71 |             return h
 72 | 
 73 |         hist = np.apply_along_axis(
 74 |             _np_hist, axis=dims.index(reduce_dim), arr=data_a.values, bins=bins_a
 75 |         )
 76 | 
 77 |         if reduce_dim == "dim_0":
 78 |             h = h.transpose()
 79 |         np.testing.assert_allclose(hist, h.values)
 80 | 
 81 |     def test_2d_chunks_2d_hist(self, xchunksize, ychunksize):
 82 | 
 83 |         data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
 84 |         data_b = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
 85 | 
 86 |         nbins_a = 8
 87 |         nbins_b = 9
 88 |         bins_a = np.linspace(-4, 4, nbins_a + 1)
 89 |         bins_b = np.linspace(-4, 4, nbins_b + 1)
 90 | 
 91 |         h = histogram(data_a, data_b, bins=[bins_a, bins_b])
 92 | 
 93 |         assert h.shape == (nbins_a, nbins_b)
 94 | 
 95 |         hist, _, _ = np.histogram2d(
 96 |             data_a.values.ravel(),
 97 |             data_b.values.ravel(),
 98 |             bins=[bins_a, bins_b],
 99 |         )
100 | 
101 |         np.testing.assert_allclose(hist, h.values)
102 | 
103 | 
104 | @pytest.mark.parametrize("xchunksize", [1, 2, 3, 10])
105 | @pytest.mark.parametrize("ychunksize", [1, 2, 3, 12])
106 | class TestUnalignedChunks:
107 |     def test_unaligned_data_chunks(self, xchunksize, ychunksize):
108 |         data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
109 |         data_b = example_dataarray(shape=(10, 12)).chunk(
110 |             (xchunksize + 1, ychunksize + 1)
111 |         )
112 | 
113 |         nbins_a = 8
114 |         nbins_b = 9
115 |         bins_a = np.linspace(-4, 4, nbins_a + 1)
116 |         bins_b = np.linspace(-4, 4, nbins_b + 1)
117 | 
118 |         h = histogram(data_a, data_b, bins=[bins_a, bins_b])
119 | 
120 |         assert h.shape == (nbins_a, nbins_b)
121 | 
122 |         hist, _, _ = np.histogram2d(
123 |             data_a.values.ravel(),
124 |             data_b.values.ravel(),
125 |             bins=[bins_a, bins_b],
126 |         )
127 | 
128 |         np.testing.assert_allclose(hist, h.values)
129 | 
130 |     def test_unaligned_weights_chunks(self, xchunksize, ychunksize):
131 | 
132 |         data_a = example_dataarray(shape=(10, 12)).chunk((xchunksize, ychunksize))
133 |         weights = example_dataarray(shape=(10, 12)).chunk(
134 |             (xchunksize + 1, ychunksize + 1)
135 |         )
136 | 
137 |         nbins_a = 8
138 |         bins_a = np.linspace(-4, 4, nbins_a + 1)
139 | 
140 |         h = histogram(data_a, bins=[bins_a], weights=weights)
141 | 
142 |         assert h.shape == (nbins_a,)
143 | 
144 |         hist, _ = np.histogram(data_a.values, bins=bins_a, weights=weights.values)
145 | 
146 |         np.testing.assert_allclose(hist, h.values)
147 | 


--------------------------------------------------------------------------------
/xhistogram/test/test_chunking_hypotheses.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from .fixtures import example_dataarray, example_dataset
 5 | from ..xarray import histogram
 6 | 
 7 | pytest.importorskip("hypothesis")
 8 | 
 9 | import hypothesis.strategies as st  # noqa
10 | from hypothesis import given, settings  # noqa
11 | 
12 | 
13 | @st.composite
14 | def chunk_shapes(draw, n_dim=3, max_arr_len=10):
15 |     """Generate different chunking patterns for an N-D array of data."""
16 |     chunks = []
17 |     for n in range(n_dim):
18 |         shape = draw(st.integers(min_value=1, max_value=max_arr_len))
19 |         chunks.append(shape)
20 |     return tuple(chunks)
21 | 
22 | 
23 | class TestChunkingHypotheses:
24 |     @given(chunk_shapes(n_dim=1, max_arr_len=20))
25 |     def test_all_chunking_patterns_1d(self, chunks):
26 | 
27 |         data = example_dataarray(shape=(20,)).chunk(chunks)
28 | 
29 |         nbins_a = 8
30 |         bins = np.linspace(-4, 4, nbins_a + 1)
31 | 
32 |         h = histogram(data, bins=[bins])
33 | 
34 |         assert h.shape == (nbins_a,)
35 | 
36 |         hist, _ = np.histogram(
37 |             data.values,
38 |             bins=bins,
39 |         )
40 | 
41 |         np.testing.assert_allclose(hist, h)
42 | 
43 |     # TODO mark as slow?
44 |     @settings(deadline=None)
45 |     @given(chunk_shapes(n_dim=2, max_arr_len=8))
46 |     def test_all_chunking_patterns_2d(self, chunks):
47 | 
48 |         data_a = example_dataarray(shape=(5, 20)).chunk(chunks)
49 |         data_b = example_dataarray(shape=(5, 20)).chunk(chunks)
50 | 
51 |         nbins_a = 8
52 |         nbins_b = 9
53 |         bins_a = np.linspace(-4, 4, nbins_a + 1)
54 |         bins_b = np.linspace(-4, 4, nbins_b + 1)
55 | 
56 |         h = histogram(data_a, data_b, bins=[bins_a, bins_b])
57 | 
58 |         assert h.shape == (nbins_a, nbins_b)
59 | 
60 |         hist, _, _ = np.histogram2d(
61 |             data_a.values.ravel(),
62 |             data_b.values.ravel(),
63 |             bins=[bins_a, bins_b],
64 |         )
65 | 
66 |         np.testing.assert_allclose(hist, h.values)
67 | 
68 |     # TODO mark as slow?
69 |     @settings(deadline=None)
70 |     @pytest.mark.parametrize("n_vars", [1, 2, 3, 4])
71 |     @given(chunk_shapes(n_dim=2, max_arr_len=7))
72 |     def test_all_chunking_patterns_dd_hist(self, n_vars, chunk_shapes):
73 |         ds = example_dataset(n_dim=2, n_vars=n_vars)
74 |         ds = ds.chunk({d: c for d, c in zip(ds.dims.keys(), chunk_shapes)})
75 | 
76 |         n_bins = (7, 8, 9, 10)[:n_vars]
77 |         bins = [np.linspace(-4, 4, n + 1) for n in n_bins]
78 | 
79 |         h = histogram(*[da for name, da in ds.data_vars.items()], bins=bins)
80 | 
81 |         assert h.shape == n_bins
82 | 
83 |         input_data = np.stack(
84 |             [da.values.ravel() for name, da in ds.data_vars.items()], axis=-1
85 |         )
86 |         hist, _ = np.histogramdd(input_data, bins=bins)
87 | 
88 |         np.testing.assert_allclose(hist, h.values)
89 | 


--------------------------------------------------------------------------------
/xhistogram/test/test_core.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from itertools import combinations
  5 | import dask.array as dsa
  6 | 
  7 | from ..core import (
  8 |     histogram,
  9 |     _ensure_correctly_formatted_bins,
 10 |     _ensure_correctly_formatted_range,
 11 | )
 12 | from .fixtures import empty_dask_array, example_dataarray
 13 | 
 14 | import pytest
 15 | 
 16 | import contextlib
 17 | 
 18 | 
 19 | bins_int = 10
 20 | bins_str = "auto"
 21 | bins_arr = np.linspace(-4, 4, 10)
 22 | range_ = (0, 1)
 23 | 
 24 | 
 25 | @pytest.mark.parametrize("density", [False, True])
 26 | @pytest.mark.parametrize("block_size", [None, 1, 2])
 27 | @pytest.mark.parametrize("axis", [1, None])
 28 | @pytest.mark.parametrize("bins", [10, np.linspace(-4, 4, 10), "auto"])
 29 | @pytest.mark.parametrize("range_", [None, (-4, 4)])
 30 | @pytest.mark.parametrize("add_nans", [False, True])
 31 | def test_histogram_results_1d(block_size, density, axis, bins, range_, add_nans):
 32 |     nrows, ncols = 5, 20
 33 |     # Setting the random seed here prevents np.testing.assert_allclose
 34 |     # from failing beow. We should investigate this further.
 35 |     np.random.seed(2)
 36 |     data = np.random.randn(nrows, ncols)
 37 |     if add_nans:
 38 |         N_nans = 20
 39 |         data.ravel()[np.random.choice(data.size, N_nans, replace=False)] = np.nan
 40 |     bins = np.linspace(-4, 4, 10)
 41 | 
 42 |     h, bin_edges = histogram(
 43 |         data, bins=bins, range=range_, axis=axis, block_size=block_size, density=density
 44 |     )
 45 | 
 46 |     expected_shape = (
 47 |         (nrows, len(bin_edges[0]) - 1) if axis == 1 else (len(bin_edges[0]) - 1,)
 48 |     )
 49 |     assert h.shape == expected_shape
 50 | 
 51 |     # make sure we get the same thing as numpy.histogram
 52 |     if axis:
 53 |         bins_np = np.histogram_bin_edges(
 54 |             data, bins=bins, range=range_
 55 |         )  # Use same bins for all slices below
 56 |         expected = np.stack(
 57 |             [
 58 |                 np.histogram(data[i], bins=bins_np, range=range_, density=density)[0]
 59 |                 for i in range(nrows)
 60 |             ]
 61 |         )
 62 |     else:
 63 |         expected = np.histogram(data, bins=bins, range=range_, density=density)[0]
 64 |     np.testing.assert_allclose(h, expected)
 65 | 
 66 |     if density:
 67 |         widths = np.diff(bins)
 68 |         integral = np.sum(h * widths, axis)
 69 |         np.testing.assert_allclose(integral, 1.0)
 70 | 
 71 | 
 72 | @pytest.mark.parametrize("block_size", [None, 1, 2])
 73 | def test_histogram_results_1d_weighted(block_size):
 74 |     nrows, ncols = 5, 20
 75 |     data = np.random.randn(nrows, ncols)
 76 |     bins = np.linspace(-4, 4, 10)
 77 |     h, _ = histogram(data, bins=bins, axis=1, block_size=block_size)
 78 |     weights = 2 * np.ones_like(data)
 79 |     h_w, _ = histogram(data, bins=bins, axis=1, weights=weights, block_size=block_size)
 80 |     np.testing.assert_array_equal(2 * h, h_w)
 81 | 
 82 | 
 83 | # @pytest.mark.skip(reason="Weight broadcasting on numpy arrays is not yet implemented")
 84 | @pytest.mark.parametrize("block_size", [None, 1, 2, "auto"])
 85 | def test_histogram_results_1d_weighted_broadcasting(block_size):
 86 |     nrows, ncols = 5, 20
 87 |     data = np.random.randn(nrows, ncols)
 88 |     bins = np.linspace(-4, 4, 10)
 89 |     h, _ = histogram(data, bins=bins, axis=1, block_size=block_size)
 90 |     weights = 2 * np.ones((1, ncols))
 91 |     h_w, _ = histogram(data, bins=bins, axis=1, weights=weights, block_size=block_size)
 92 |     np.testing.assert_array_equal(2 * h, h_w)
 93 | 
 94 | 
 95 | @pytest.mark.parametrize("block_size", [None, 1, 2])
 96 | def test_histogram_right_edge(block_size):
 97 |     """Test that last bin is both left- and right-edge inclusive as it
 98 |     is for numpy.histogram
 99 |     """
100 |     nrows, ncols = 5, 20
101 |     data = np.ones((nrows, ncols))
102 |     bins = np.array([0, 0.5, 1])  # All data at rightmost edge
103 | 
104 |     h, _ = histogram(data, bins=bins, axis=1, block_size=block_size)
105 |     assert h.shape == (nrows, len(bins) - 1)
106 | 
107 |     # make sure we get the same thing as histogram (all data in the last bin)
108 |     hist, _ = np.histogram(data, bins=bins)
109 |     np.testing.assert_array_equal(hist, h.sum(axis=0))
110 | 
111 |     # now try with no axis
112 |     h_na, _ = histogram(data, bins=bins, block_size=block_size)
113 |     np.testing.assert_array_equal(hist, h_na)
114 | 
115 | 
116 | def test_histogram_results_2d():
117 |     nrows, ncols = 5, 20
118 |     data_a = np.random.randn(nrows, ncols)
119 |     data_b = np.random.randn(nrows, ncols)
120 |     nbins_a = 9
121 |     bins_a = np.linspace(-4, 4, nbins_a + 1)
122 |     nbins_b = 10
123 |     bins_b = np.linspace(-4, 4, nbins_b + 1)
124 | 
125 |     h, _ = histogram(data_a, data_b, bins=[bins_a, bins_b])
126 |     assert h.shape == (nbins_a, nbins_b)
127 | 
128 |     hist, _, _ = np.histogram2d(data_a.ravel(), data_b.ravel(), bins=[bins_a, bins_b])
129 |     np.testing.assert_array_equal(hist, h)
130 | 
131 | 
132 | @pytest.mark.parametrize("dask", [False, True])
133 | def test_histogram_results_2d_broadcasting(dask):
134 |     nrows, ncols = 5, 20
135 |     data_a = np.random.randn(ncols)
136 |     data_b = np.random.randn(nrows, ncols)
137 |     nbins_a = 9
138 |     bins_a = np.linspace(-4, 4, nbins_a + 1)
139 |     nbins_b = 10
140 |     bins_b = np.linspace(-4, 4, nbins_b + 1)
141 | 
142 |     if dask:
143 |         test_data_a = dsa.from_array(data_a, chunks=3)
144 |         test_data_b = dsa.from_array(data_b, chunks=(2, 7))
145 |     else:
146 |         test_data_a = data_a
147 |         test_data_b = data_b
148 | 
149 |     h, _ = histogram(test_data_a, test_data_b, bins=[bins_a, bins_b])
150 |     assert h.shape == (nbins_a, nbins_b)
151 | 
152 |     hist, _, _ = np.histogram2d(
153 |         np.broadcast_to(data_a, data_b.shape).ravel(),
154 |         data_b.ravel(),
155 |         bins=[bins_a, bins_b],
156 |     )
157 |     np.testing.assert_array_equal(hist, h)
158 | 
159 | 
160 | @pytest.mark.parametrize("add_nans", [False, True])
161 | def test_histogram_results_2d_density(add_nans):
162 |     nrows, ncols = 5, 20
163 |     data_a = np.random.randn(nrows, ncols)
164 |     data_b = np.random.randn(nrows, ncols)
165 |     if add_nans:
166 |         N_nans = 20
167 |         data_a.ravel()[np.random.choice(data_a.size, N_nans, replace=False)] = np.nan
168 |         data_b.ravel()[np.random.choice(data_b.size, N_nans, replace=False)] = np.nan
169 |     nbins_a = 9
170 |     bins_a = np.linspace(-4, 4, nbins_a + 1)
171 |     nbins_b = 10
172 |     bins_b = np.linspace(-4, 4, nbins_b + 1)
173 | 
174 |     h, _ = histogram(data_a, data_b, bins=[bins_a, bins_b], density=True)
175 |     assert h.shape == (nbins_a, nbins_b)
176 | 
177 |     hist, _, _ = np.histogram2d(
178 |         data_a.ravel(), data_b.ravel(), bins=[bins_a, bins_b], density=True
179 |     )
180 |     np.testing.assert_allclose(hist, h)
181 | 
182 |     # check integral is 1
183 |     widths_a = np.diff(bins_a)
184 |     widths_b = np.diff(bins_b)
185 |     areas = np.outer(widths_a, widths_b)
186 |     integral = np.sum(hist * areas)
187 |     np.testing.assert_allclose(integral, 1.0)
188 | 
189 | 
190 | @pytest.mark.parametrize("add_nans", [False, True])
191 | def test_histogram_results_3d_density(add_nans):
192 |     nrows, ncols = 5, 20
193 |     data_a = np.random.randn(nrows, ncols)
194 |     data_b = np.random.randn(nrows, ncols)
195 |     data_c = np.random.randn(nrows, ncols)
196 |     if add_nans:
197 |         N_nans = 20
198 |         data_a.ravel()[np.random.choice(data_a.size, N_nans, replace=False)] = np.nan
199 |         data_b.ravel()[np.random.choice(data_b.size, N_nans, replace=False)] = np.nan
200 |         data_c.ravel()[np.random.choice(data_c.size, N_nans, replace=False)] = np.nan
201 |     nbins_a = 9
202 |     bins_a = np.linspace(-4, 4, nbins_a + 1)
203 |     nbins_b = 10
204 |     bins_b = np.linspace(-4, 4, nbins_b + 1)
205 |     nbins_c = 9
206 |     bins_c = np.linspace(-4, 4, nbins_c + 1)
207 | 
208 |     h, _ = histogram(
209 |         data_a, data_b, data_c, bins=[bins_a, bins_b, bins_c], density=True
210 |     )
211 | 
212 |     assert h.shape == (nbins_a, nbins_b, nbins_c)
213 | 
214 |     hist, _ = np.histogramdd(
215 |         (data_a.ravel(), data_b.ravel(), data_c.ravel()),
216 |         bins=[bins_a, bins_b, bins_c],
217 |         density=True,
218 |     )
219 | 
220 |     np.testing.assert_allclose(hist, h)
221 | 
222 |     # check integral is 1
223 |     widths_a = np.diff(bins_a)
224 |     widths_b = np.diff(bins_b)
225 |     widths_c = np.diff(bins_c)
226 |     areas = np.einsum("i,j,k", widths_a, widths_b, widths_c)
227 |     integral = np.sum(hist * areas)
228 |     np.testing.assert_allclose(integral, 1.0)
229 | 
230 | 
231 | @pytest.mark.parametrize("block_size", [None, 5, "auto"])
232 | @pytest.mark.parametrize("use_dask", [False, True])
233 | def test_histogram_shape(use_dask, block_size):
234 |     """These tests just verify that arrays with the right shape come out.
235 |     They don't verify correctness."""
236 | 
237 |     shape = 10, 15, 12, 20
238 |     if use_dask:
239 |         b = empty_dask_array(shape, chunks=(1,) + shape[1:])
240 |     else:
241 |         b = np.random.randn(*shape)
242 |     bins = np.linspace(-4, 4, 27)
243 | 
244 |     # no axis
245 |     c, _ = histogram(b, bins=bins, block_size=block_size)
246 |     assert c.shape == (len(bins) - 1,)
247 |     # same thing
248 |     for axis in [(0, 1, 2, 3), (0, 1, 3, 2), (3, 2, 1, 0), (3, 2, 0, 1)]:
249 |         c, _ = histogram(b, bins=bins, axis=axis)
250 |         assert c.shape == (len(bins) - 1,)
251 |         if use_dask:
252 |             assert isinstance(c, dsa.Array)
253 | 
254 |     # scalar axis (check positive and negative)
255 |     for axis in list(range(4)) + list(range(-1, -5, -1)):
256 |         c, _ = histogram(b, bins=bins, axis=axis, block_size=block_size)
257 |         shape = list(b.shape)
258 |         del shape[axis]
259 |         expected_shape = tuple(shape) + (len(bins) - 1,)
260 |         assert c.shape == expected_shape
261 |         if use_dask:
262 |             assert isinstance(c, dsa.Array)
263 | 
264 |     # two axes
265 |     for i, j in combinations(range(4), 2):
266 |         axis = (i, j)
267 |         c, _ = histogram(b, bins=bins, axis=axis, block_size=block_size)
268 |         shape = list(b.shape)
269 |         partial_shape = [shape[k] for k in range(b.ndim) if k not in axis]
270 |         expected_shape = tuple(partial_shape) + (len(bins) - 1,)
271 |         assert c.shape == expected_shape
272 |         if use_dask:
273 |             assert isinstance(c, dsa.Array)
274 | 
275 | 
276 | @pytest.mark.parametrize("arg_type", ["dask", "numpy"])
277 | @pytest.mark.parametrize("weights_type", ["dask", "numpy", None])
278 | @pytest.mark.parametrize("bins_type", ["int", "str", "numpy"])
279 | def test_histogram_dask(arg_type, weights_type, bins_type):
280 |     """Test that a TypeError is raised with dask arrays and inappropriate bins"""
281 |     shape = 10, 15, 12, 20
282 | 
283 |     if arg_type == "dask":
284 |         arg = empty_dask_array(shape)
285 |     else:
286 |         arg = example_dataarray(shape)
287 | 
288 |     if weights_type == "dask":
289 |         weights = empty_dask_array(shape)
290 |     elif weights_type == "numpy":
291 |         weights = example_dataarray(shape)
292 |     else:
293 |         weights = None
294 | 
295 |     if bins_type == "int":
296 |         bins = bins_int
297 |     elif bins_type == "str":
298 |         bins = bins_str
299 |     else:
300 |         bins = bins_arr
301 | 
302 |     # TypeError should be returned when
303 |     # 1. args or weights is a dask array and bins is not a numpy array, or
304 |     # 2. bins is a string and weights is a numpy array
305 |     cond_1 = ((arg_type == "dask") | (weights_type == "dask")) & (bins_type != "numpy")
306 |     cond_2 = (weights_type == "numpy") & (bins_type == "str")
307 |     should_TypeError = cond_1 | cond_2
308 | 
309 |     with contextlib.ExitStack() as stack:
310 |         if should_TypeError:
311 |             stack.enter_context(pytest.raises(TypeError))
312 |         histogram(arg, bins=bins, weights=weights)
313 |         histogram(arg, arg, bins=[bins, bins], weights=weights)
314 | 
315 | 
316 | @pytest.mark.parametrize(
317 |     "in_out",
318 |     [
319 |         (bins_int, 1, [bins_int]),  # ( bins_in, n_args, bins_out )
320 |         (bins_str, 1, [bins_str]),
321 |         (bins_arr, 1, [bins_arr]),
322 |         ([bins_int], 1, [bins_int]),
323 |         (bins_int, 2, 2 * [bins_int]),
324 |         (bins_str, 2, 2 * [bins_str]),
325 |         (bins_arr, 2, 2 * [bins_arr]),
326 |         ([bins_int, bins_str, bins_arr], 3, [bins_int, bins_str, bins_arr]),
327 |         ([bins_arr], 2, None),
328 |         (None, 1, None),
329 |         ([bins_arr, bins_arr], 1, None),
330 |     ],
331 | )
332 | def test_ensure_correctly_formatted_bins(in_out):
333 |     """Test the helper function _ensure_correctly_formatted_bins"""
334 |     bins_in, n, bins_expected = in_out
335 |     if bins_expected is not None:
336 |         bins = _ensure_correctly_formatted_bins(bins_in, n)
337 |         assert bins == bins_expected
338 |     else:
339 |         with pytest.raises((ValueError, TypeError)):
340 |             _ensure_correctly_formatted_bins(bins_in, n)
341 | 
342 | 
343 | @pytest.mark.parametrize(
344 |     "in_out",
345 |     [
346 |         (range_, 1, [range_]),  # ( range_in, n_args, range_out )
347 |         (range_, 2, [range_, range_]),
348 |         ([range_, range_], 2, [range_, range_]),
349 |         ([(range_[0],)], 1, None),
350 |         ([range_], 2, None),
351 |         ([range_, range_], 1, None),
352 |     ],
353 | )
354 | def test_ensure_correctly_formatted_range(in_out):
355 |     """Test the helper function _ensure_correctly_formatted_range"""
356 |     range_in, n, range_expected = in_out
357 |     if range_expected is not None:
358 |         range_ = _ensure_correctly_formatted_range(range_in, n)
359 |         assert range_ == range_expected
360 |     else:
361 |         with pytest.raises(ValueError):
362 |             _ensure_correctly_formatted_range(range_in, n)
363 | 
364 | 
365 | @pytest.mark.parametrize("block_size", [None, 1, 2])
366 | @pytest.mark.parametrize("use_dask", [False, True])
367 | def test_histogram_results_datetime(use_dask, block_size):
368 |     """Test computing histogram of datetime objects"""
369 |     data = pd.date_range(start="2000-06-01", periods=5)
370 |     if use_dask:
371 |         data = dsa.asarray(data, chunks=(5,))
372 |     # everything should be in the second bin (index 1)
373 |     bins = np.array(
374 |         [
375 |             np.datetime64("1999-01-01"),
376 |             np.datetime64("2000-01-01"),
377 |             np.datetime64("2001-01-01"),
378 |         ]
379 |     )
380 |     h = histogram(data, bins=bins, block_size=block_size)[0]
381 |     expected = np.histogram(data, bins=bins)[0]
382 |     np.testing.assert_allclose(h, expected)
383 | 


--------------------------------------------------------------------------------
/xhistogram/test/test_xarray.py:
--------------------------------------------------------------------------------
  1 | import xarray as xr
  2 | import numpy as np
  3 | import pytest
  4 | import pandas as pd
  5 | from itertools import combinations
  6 | 
  7 | from ..xarray import histogram
  8 | 
  9 | 
 10 | # example dimensions
 11 | DIMS = {"time": 5, "depth": 10, "lat": 45, "lon": 90}
 12 | COORDS = {
 13 |     "time": ("time", pd.date_range(start="2000-01-01", periods=DIMS["time"])),
 14 |     "depth": ("depth", np.arange(DIMS["depth"]) * 100.0 + 50),
 15 |     "lat": ("lat", np.arange(DIMS["lat"]) * 180 / DIMS["lat"] - 90 + 90 / DIMS["lat"]),
 16 |     "lon": ("lon", np.arange(DIMS["lon"]) * 360 / DIMS["lon"] + 180 / DIMS["lon"]),
 17 | }
 18 | 
 19 | 
 20 | @pytest.fixture(
 21 |     params=[
 22 |         ("lon",),
 23 |         ("lat", "lon"),
 24 |         ("depth", "lat", "lon"),
 25 |         ("time", "depth", "lat", "lon"),
 26 |     ],
 27 |     ids=["1D", "2D", "3D", "4D"],
 28 | )
 29 | def ones(request):
 30 |     dims = request.param
 31 |     shape = [DIMS[d] for d in dims]
 32 |     coords = {k: v for k, v in COORDS.items() if k in dims}
 33 |     data = np.ones(shape, dtype="f8")
 34 |     da = xr.DataArray(data, dims=dims, coords=coords, name="ones")
 35 |     return da
 36 | 
 37 | 
 38 | @pytest.mark.parametrize("ndims", [1, 2, 3, 4])
 39 | def test_histogram_ones(ones, ndims):
 40 |     dims = ones.dims
 41 |     if ones.ndim < ndims:
 42 |         pytest.skip(
 43 |             "Don't need to test when number of dimension combinations "
 44 |             "exceeds the number of array dimensions"
 45 |         )
 46 | 
 47 |     # everything should be in the middle bin (index 1)
 48 |     bins = np.array([0, 0.9, 1.1, 2])
 49 |     bins_c = 0.5 * (bins[1:] + bins[:-1])
 50 | 
 51 |     def _check_result(h, d):
 52 |         other_dims = [dim for dim in ones.dims if dim not in d]
 53 |         if len(other_dims) > 0:
 54 |             assert set(other_dims) <= set(h.dims)
 55 |         # check that all values are in the central bin
 56 |         h_sum = h.sum(other_dims)
 57 |         h_sum_expected = xr.DataArray(
 58 |             [0, ones.size, 0],
 59 |             dims=["ones_bin"],
 60 |             coords={"ones_bin": ("ones_bin", bins_c)},
 61 |             name="histogram_ones",
 62 |         )
 63 |         xr.testing.assert_identical(h_sum, h_sum_expected)
 64 | 
 65 |     for d in combinations(dims, ndims):
 66 |         h = histogram(ones, bins=[bins], dim=d)
 67 |         _check_result(h, d)
 68 | 
 69 | 
 70 | @pytest.mark.parametrize("ndims", [1, 2, 3, 4])
 71 | def test_histogram_ones_density(ones, ndims):
 72 |     dims = ones.dims
 73 |     if ones.ndim < ndims:
 74 |         pytest.skip(
 75 |             "Don't need to test when number of dimension combinations "
 76 |             "exceeds the number of array dimensions"
 77 |         )
 78 | 
 79 |     # everything should be in the middle bin (index 1)
 80 |     bins = np.array([0, 0.9, 1.1, 2])
 81 |     bin_area = 0.2
 82 | 
 83 |     def _check_result(h_density, d):
 84 |         other_dims = [dim for dim in ones.dims if dim not in d]
 85 |         if len(other_dims) > 0:
 86 |             assert set(other_dims) <= set(h_density.dims)
 87 | 
 88 |         # check that all integrals over pdfs at different locations are = 1
 89 |         h_integrals = (h_density * bin_area).sum(dim="ones_bin")
 90 |         np.testing.assert_allclose(h_integrals.values, 1.0)
 91 | 
 92 |     for d in combinations(dims, ndims):
 93 |         h_density = histogram(ones, bins=[bins], dim=d, density=True)
 94 |         _check_result(h_density, d)
 95 | 
 96 | 
 97 | # TODO: refactor this test to use better fixtures
 98 | # (it currently has a ton of loops)
 99 | @pytest.mark.parametrize("ndims", [1, 2, 3, 4])
100 | def test_weights(ones, ndims):
101 |     dims = ones.dims
102 |     if ones.ndim < ndims:
103 |         pytest.skip(
104 |             "Don't need to test when number of dimension combinations "
105 |             "exceeds the number of array dimensions"
106 |         )
107 | 
108 |     bins = np.array([0, 0.9, 1.1, 2])
109 |     bins_c = 0.5 * (bins[1:] + bins[:-1])
110 | 
111 |     weight_value = 0.5
112 | 
113 |     def _check_result(h, d):
114 |         other_dims = [dim for dim in ones.dims if dim not in d]
115 |         if len(other_dims) > 0:
116 |             assert set(other_dims) <= set(h.dims)
117 |         # check that all values are in the central bin
118 |         h_sum = h.sum(other_dims)
119 |         h_sum_expected = xr.DataArray(
120 |             [0, weight_value * ones.size, 0],
121 |             dims=["ones_bin"],
122 |             coords={"ones_bin": ("ones_bin", bins_c)},
123 |             name="histogram_ones",
124 |         )
125 |         xr.testing.assert_identical(h_sum, h_sum_expected)
126 | 
127 |     # get every possible combination of sub-dimensions
128 |     for n_combinations in range(ones.ndim):
129 |         for weight_dims in combinations(dims, n_combinations):
130 |             i_selector = {dim: 0 for dim in weight_dims}
131 |             weights = xr.full_like(ones.isel(**i_selector), weight_value)
132 |             for nc in range(ndims):
133 |                 for d in combinations(dims, nc + 1):
134 |                     h = histogram(ones, weights=weights, bins=[bins], dim=d)
135 |                     _check_result(h, d)
136 | 
137 | 
138 | # test for issue #5
139 | def test_dims_and_coords():
140 |     time_axis = np.arange(4)
141 |     depth_axis = np.arange(10)
142 |     X_axis = np.arange(30)
143 |     Y_axis = np.arange(30)
144 | 
145 |     dat1 = np.random.randint(
146 |         low=0,
147 |         high=100,
148 |         size=(len(time_axis), len(depth_axis), len(X_axis), len(Y_axis)),
149 |     )
150 |     array1 = xr.DataArray(
151 |         dat1,
152 |         coords=[time_axis, depth_axis, X_axis, Y_axis],
153 |         dims=["time", "depth", "X", "Y"],
154 |         name="one",
155 |     )
156 | 
157 |     dat2 = np.random.randint(
158 |         low=0, high=50, size=(len(time_axis), len(depth_axis), len(X_axis), len(Y_axis))
159 |     )
160 |     array2 = xr.DataArray(
161 |         dat2,
162 |         coords=[time_axis, depth_axis, X_axis, Y_axis],
163 |         dims=["time", "depth", "X", "Y"],
164 |         name="two",
165 |     )
166 | 
167 |     bins1 = np.linspace(0, 100, 50)
168 |     bins2 = np.linspace(0, 50, 25)
169 | 
170 |     result = histogram(array1, array2, dim=["X", "Y"], bins=[bins1, bins2])
171 |     assert result.dims == ("time", "depth", "one_bin", "two_bin")
172 |     assert result.time.identical(array1.time)
173 |     assert result.depth.identical(array2.depth)
174 | 
175 | 
176 | @pytest.mark.parametrize("number_of_inputs", [1, 2])
177 | @pytest.mark.parametrize("keep_coords", [True, False])
178 | @pytest.mark.parametrize("include_weights", [True, False])
179 | def test_carry_coords(keep_coords, number_of_inputs, include_weights):
180 |     time_axis = np.arange(40)
181 |     X_axis = np.arange(10)
182 |     Y_axis = np.arange(10)
183 |     weight_value = 0.5
184 | 
185 |     data = np.random.randint(
186 |         low=0, high=100, size=(len(time_axis), len(X_axis), len(Y_axis))
187 |     )
188 |     da = xr.DataArray(
189 |         data, coords=[time_axis, X_axis, Y_axis], dims=["time", "X", "Y"], name="one"
190 |     )
191 | 
192 |     if include_weights:
193 |         weights = xr.full_like(da, weight_value)
194 |     else:
195 |         weights = None
196 | 
197 |     # faking coordinates
198 |     da["lon"] = da.X**2 + da.Y**2
199 |     assert "lon" in da.coords
200 |     bins = np.linspace(0, 100, 10)
201 |     result = histogram(
202 |         *[da] * number_of_inputs,
203 |         bins=[bins] * number_of_inputs,
204 |         dim=["time"],
205 |         weights=weights,
206 |         keep_coords=keep_coords
207 |     )
208 |     if keep_coords:
209 |         assert "lon" in result.coords
210 |     else:
211 |         assert "lon" not in result.coords
212 | 
213 | 
214 | # test for issue #14
215 | def test_input_type_check():
216 |     np_array = np.arange(100)
217 |     with pytest.raises(TypeError):
218 |         histogram(np_array)
219 | 


--------------------------------------------------------------------------------
/xhistogram/xarray.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Xarray API for xhistogram.
  3 | """
  4 | 
  5 | import xarray as xr
  6 | from collections import OrderedDict
  7 | from .core import histogram as _histogram
  8 | 
  9 | # range is a keyword so save the builtin so they can use it.
 10 | _range = range
 11 | 
 12 | 
 13 | def histogram(
 14 |     *args,
 15 |     bins=None,
 16 |     range=None,
 17 |     dim=None,
 18 |     weights=None,
 19 |     density=False,
 20 |     block_size="auto",
 21 |     keep_coords=False,
 22 |     bin_dim_suffix="_bin",
 23 | ):
 24 |     """Histogram applied along specified dimensions.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     args : xarray.DataArray objects
 29 |         Input data. The number of input arguments determines the dimensonality of
 30 |         the histogram. For example, two arguments prodocue a 2D histogram. All
 31 |         args must be aligned and have the same dimensions.
 32 |     bins :  int, str or numpy array or a list of ints, strs and/or arrays, optional
 33 |         If a list, there should be one entry for each item in ``args``.
 34 |         The bin specifications are as follows:
 35 | 
 36 |           * If int; the number of bins for all arguments in ``args``.
 37 |           * If str; the method used to automatically calculate the optimal bin width
 38 |             for all arguments in ``args``, as defined by numpy `histogram_bin_edges`.
 39 |           * If numpy array; the bin edges for all arguments in ``args``.
 40 |           * If a list of ints, strs and/or arrays; the bin specification as
 41 |             above for every argument in ``args``.
 42 | 
 43 |         When bin edges are specified, all but the last (righthand-most) bin include
 44 |         the left edge and exclude the right edge. The last bin includes both edges.
 45 | 
 46 |         A TypeError will be raised if args or weights contains dask arrays and bins
 47 |         are not specified explicitly as an array or list of arrays. This is because
 48 |         other bin specifications trigger computation.
 49 |     range : (float, float) or a list of (float, float), optional
 50 |         If a list, there should be one entry for each item in ``args``.
 51 |         The range specifications are as follows:
 52 | 
 53 |           * If (float, float); the lower and upper range(s) of the bins for all
 54 |             arguments in ``args``. Values outside the range are ignored. The first
 55 |             element of the range must be less than or equal to the second. `range`
 56 |             affects the automatic bin computation as well. In this case, while bin
 57 |             width is computed to be optimal based on the actual data within `range`,
 58 |             the bin count will fill the entire range including portions containing
 59 |             no data.
 60 |           * If a list of (float, float); the ranges as above for every argument in
 61 |             ``args``.
 62 |           * If not provided, range is simply ``(arg.min(), arg.max())`` for each
 63 |             arg.
 64 |     dim : tuple of strings, optional
 65 |         Dimensions over which which the histogram is computed. The default is to
 66 |         compute the histogram of the flattened array.
 67 |     weights : array_like, optional
 68 |         An array of weights, of the same shape as `a`.  Each value in
 69 |         `a` only contributes its associated weight towards the bin count
 70 |         (instead of 1). If `density` is True, the weights are
 71 |         normalized, so that the integral of the density over the range
 72 |         remains 1. NaNs in the weights input will fill the entire bin with
 73 |         NaNs. If there are NaNs in the weights input call ``.fillna(0.)``
 74 |         before running ``histogram()``.
 75 |     density : bool, optional
 76 |         If ``False``, the result will contain the number of samples in
 77 |         each bin. If ``True``, the result is the value of the
 78 |         probability *density* function at the bin, normalized such that
 79 |         the *integral* over the range is 1. Note that the sum of the
 80 |         histogram values will not be equal to 1 unless bins of unity
 81 |         width are chosen; it is not a probability *mass* function.
 82 |     block_size : int or 'auto', optional
 83 |         A parameter which governs the algorithm used to compute the histogram.
 84 |         Using a nonzero value splits the histogram calculation over the
 85 |         non-histogram axes into blocks of size ``block_size``, iterating over
 86 |         them with a loop (numpy inputs) or in parallel (dask inputs). If
 87 |         ``'auto'``, blocks will be determined either by the underlying dask
 88 |         chunks (dask inputs) or an experimental built-in heuristic (numpy inputs).
 89 |     keep_coords : bool, optional
 90 |         If ``True``, keep all coordinates. Default: ``False``
 91 |     bin_dim_suffix : str, optional
 92 |         Suffix to append to input arg names to define names of output bin
 93 |         dimensions
 94 | 
 95 |     Returns
 96 |     -------
 97 |     hist : xarray.DataArray
 98 |         The values of the histogram. For each bin, the midpoint of the bin edges
 99 |         is given along the bin coordinates.
100 | 
101 |     """
102 | 
103 |     args = list(args)
104 |     N_args = len(args)
105 | 
106 |     # TODO: allow list of weights as well
107 |     N_weights = 1 if weights is not None else 0
108 | 
109 |     for a in args:
110 |         if not isinstance(a, xr.DataArray):
111 |             raise TypeError(
112 |                 "xhistogram.xarray.histogram accepts only xarray.DataArray "
113 |                 + f"objects but a {type(a).__name__} was provided"
114 |             )
115 | 
116 |     for a in args:
117 |         assert a.name is not None, "all arrays must have a name"
118 | 
119 |     # we drop coords to simplify alignment
120 |     if not keep_coords:
121 |         args = [da.reset_coords(drop=True) for da in args]
122 |     if N_weights:
123 |         args += [weights.reset_coords(drop=True)]
124 |     # explicitly broadcast so we understand what is going into apply_ufunc
125 |     # (apply_ufunc might be doing this by itself again)
126 |     args = list(xr.align(*args, join="exact"))
127 | 
128 |     # what happens if we skip this?
129 |     # args = list(xr.broadcast(*args))
130 |     a0 = args[0]
131 |     a_coords = a0.coords
132 | 
133 |     # roll our own broadcasting
134 |     # now manually expand the arrays
135 |     all_dims = [d for a in args for d in a.dims]
136 |     all_dims_ordered = list(OrderedDict.fromkeys(all_dims))
137 |     args_expanded = []
138 |     for a in args:
139 |         expand_keys = [d for d in all_dims_ordered if d not in a.dims]
140 |         a_expanded = a.expand_dims({k: 1 for k in expand_keys})
141 |         args_expanded.append(a_expanded)
142 | 
143 |     # only transpose if necessary, to avoid creating unnecessary dask tasks
144 |     args_transposed = []
145 |     for a in args_expanded:
146 |         if a.dims != all_dims_ordered:
147 |             args_transposed.append(a.transpose(*all_dims_ordered))
148 |         else:
149 |             args.transposed.append(a)
150 |     args_data = [a.data for a in args_transposed]
151 | 
152 |     if N_weights:
153 |         weights_data = args_data.pop()
154 |     else:
155 |         weights_data = None
156 | 
157 |     if dim is not None:
158 |         dims_to_keep = [d for d in all_dims_ordered if d not in dim]
159 |         axis = [args_transposed[0].get_axis_num(d) for d in dim]
160 |     else:
161 |         dims_to_keep = []
162 |         axis = None
163 | 
164 |     h_data, bins = _histogram(
165 |         *args_data,
166 |         weights=weights_data,
167 |         bins=bins,
168 |         range=range,
169 |         axis=axis,
170 |         density=density,
171 |         block_size=block_size,
172 |     )
173 | 
174 |     # create output dims
175 |     new_dims = [a.name + bin_dim_suffix for a in args[:N_args]]
176 |     output_dims = dims_to_keep + new_dims
177 | 
178 |     # create new coords
179 |     bin_centers = [0.5 * (bin[:-1] + bin[1:]) for bin in bins]
180 |     new_coords = {
181 |         name: ((name,), bin_center, a.attrs)
182 |         for name, bin_center, a in zip(new_dims, bin_centers, args)
183 |     }
184 | 
185 |     # old coords associated with dims
186 |     old_dim_coords = {name: a0[name] for name in dims_to_keep if name in a_coords}
187 | 
188 |     all_coords = {}
189 |     all_coords.update(old_dim_coords)
190 |     all_coords.update(new_coords)
191 |     # add compatible coords
192 |     if keep_coords:
193 |         for c in a_coords:
194 |             if c not in all_coords and set(a0[c].dims).issubset(output_dims):
195 |                 all_coords[c] = a0[c]
196 | 
197 |     output_name = "_".join(["histogram"] + [a.name for a in args[:N_args]])
198 | 
199 |     da_out = xr.DataArray(h_data, dims=output_dims, coords=all_coords, name=output_name)
200 | 
201 |     return da_out
202 | 
203 |     # we need weights to be passed through apply_func's alignment algorithm,
204 |     # so we include it as an arg, so we create a wrapper function to do so
205 |     # this feels like a hack
206 |     # def _histogram_wrapped(*args, **kwargs):
207 |     #     alist = list(args)
208 |     #     weights = [alist.pop() for n in _range(N_weights)]
209 |     #     if N_weights == 0:
210 |     #         weights = None
211 |     #     elif N_weights == 1:
212 |     #         weights = weights[0] # squeeze
213 |     #     return _histogram(*alist, weights=weights, **kwargs)
214 | 


--------------------------------------------------------------------------------