├── .ciocheck ├── .gitattributes ├── .github └── workflows │ ├── main.yaml │ └── pypipublish.yaml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── appveyor.yml ├── ci ├── appveyor │ ├── build.ps1 │ ├── install.ps1 │ └── test.ps1 ├── environment-py310.yml ├── environment-py311.yml ├── environment-py312.yml ├── environment-py39.yml ├── environment-upstream.yml └── travis │ ├── install.sh │ └── test.sh ├── conda └── meta.yaml ├── docs ├── Makefile ├── README.md ├── environment.yml ├── make.bat ├── requirements.txt └── source │ ├── api.rst │ ├── conf.py │ ├── contributing.rst │ ├── index.rst │ └── quickstart.rst ├── examples ├── OPeNDAP.ipynb ├── README.md ├── catalog.yml └── intake_xarray.ipynb ├── intake_xarray ├── __init__.py ├── _version.py ├── base.py ├── image.py ├── netcdf.py ├── opendap.py ├── raster.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── data │ │ ├── RGB.byte.tif │ │ ├── bears.nc │ │ ├── blank.zarr │ │ │ ├── .zattrs │ │ │ ├── .zgroup │ │ │ ├── lat │ │ │ │ ├── 0 │ │ │ │ ├── .zarray │ │ │ │ └── .zattrs │ │ │ ├── level │ │ │ │ ├── 0 │ │ │ │ ├── .zarray │ │ │ │ └── .zattrs │ │ │ ├── lon │ │ │ │ ├── 0 │ │ │ │ ├── .zarray │ │ │ │ └── .zattrs │ │ │ ├── rh │ │ │ │ ├── .zarray │ │ │ │ ├── .zattrs │ │ │ │ ├── 0.0.0 │ │ │ │ ├── 0.0.1 │ │ │ │ ├── 0.0.2 │ │ │ │ ├── 0.0.3 │ │ │ │ └── 0.0.4 │ │ │ ├── temp │ │ │ │ ├── .zarray │ │ │ │ ├── .zattrs │ │ │ │ ├── 0.0.0.0 │ │ │ │ ├── 0.0.0.1 │ │ │ │ ├── 0.0.0.2 │ │ │ │ ├── 0.0.0.3 │ │ │ │ └── 0.0.0.4 │ │ │ └── time │ │ │ │ ├── 0 │ │ │ │ ├── .zarray │ │ │ │ └── .zattrs │ │ ├── catalog.yaml │ │ ├── color_with_special_2.tif │ │ ├── color_with_special{}.tif │ │ ├── dog.jpg │ │ ├── example_1.nc │ │ ├── example_2.nc │ │ ├── images │ │ │ ├── beach01.tif │ │ │ ├── beach57.tif │ │ │ └── buildings96.tif │ │ ├── little_green.tif │ │ ├── little_red.tif │ │ ├── next_example_1.nc │ │ ├── wafsgfs_L_t06z_intdsk60.grib2 │ │ └── wafsgfs_L_t06z_intdsk61.grib2 │ ├── test_catalog.py │ ├── test_image.py │ ├── test_intake_xarray.py │ └── test_network.py └── xzarr.py ├── readthedocs.yml ├── setup.cfg ├── setup.py └── versioneer.py /.ciocheck: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # ciocheck 3 | # https://github.com/ContinuumIO/ciocheck 4 | # ----------------------------------------------------------------------------- 5 | [ciocheck] 6 | branch = origin/master 7 | diff_mode = commited 8 | file_mode = all 9 | check = pyformat,yapf,flake8,pytest,coverage 10 | enforce = pytest 11 | 12 | # Python (pyformat) 13 | add_copyright = false 14 | add_header = true 15 | add_init = true 16 | 17 | # ----------------------------------------------------------------------------- 18 | # pep8 19 | # http://pep8.readthedocs.io/en/latest/intro.html#configuration 20 | # ----------------------------------------------------------------------------- 21 | [pep8] 22 | max-line-length = 80 23 | 24 | # ----------------------------------------------------------------------------- 25 | # pydocstyle 26 | # http://www.pydocstyle.org/en/latest/usage.html#example 27 | # ----------------------------------------------------------------------------- 28 | [pydocstyle] 29 | add-ignore = D203 30 | inherit = false 31 | 32 | # ----------------------------------------------------------------------------- 33 | # Flake 8 34 | # http://flake8.readthedocs.io/en/latest/config.html 35 | # ----------------------------------------------------------------------------- 36 | [flake8] 37 | # D10x = missing docstrings, E266 = leading hash in block comment created by alembic 38 | ignore = D100,D101,D102,D103,D104,D105,E266 39 | max-line-length = 99 40 | max-complexity = 64 41 | 42 | # ----------------------------------------------------------------------------- 43 | # pylint 44 | # https://pylint.readthedocs.io/en/latest/ 45 | # ----------------------------------------------------------------------------- 46 | #[pylint:messages] 47 | 48 | # ----------------------------------------------------------------------------- 49 | # isort 50 | # https://github.com/timothycrosley/isort/wiki/isort-Settings 51 | # ----------------------------------------------------------------------------- 52 | [isort] 53 | from_first = true 54 | import_heading_stdlib = Standard library imports 55 | import_heading_firstparty = Local imports 56 | import_heading_thirdparty = Third party imports 57 | indent = ' ' 58 | known_first_party = anaconda_navigator 59 | known_third_party = six,_license,pytestqt 60 | line_length = 99 61 | sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER 62 | 63 | # ----------------------------------------------------------------------------- 64 | # yapf 65 | # https://github.com/google/yapf#formatting-style 66 | # ----------------------------------------------------------------------------- 67 | [yapf:style] 68 | based_on_style = pep8 69 | column_limit = 80 70 | spaces_before_comment = 2 71 | 72 | # ----------------------------------------------------------------------------- 73 | # autopep8 74 | # http://pep8.readthedocs.io/en/latest/intro.html#configuration 75 | # ----------------------------------------------------------------------------- 76 | [autopep8] 77 | exclude = */tests/* 78 | ignore = E126, 79 | max-line-length = 80 80 | 81 | # ----------------------------------------------------------------------------- 82 | # Coverage 83 | # http://coverage.readthedocs.io/en/latest/config.html 84 | # ----------------------------------------------------------------------------- 85 | [coverage:run] 86 | omit = 87 | */tests/* 88 | */integration_tests/* 89 | */build/* 90 | */envs/* 91 | */node_modules/* 92 | /apb.py 93 | 94 | [coverage:report] 95 | show_missing = true 96 | skip_covered = true 97 | exclude_lines = 98 | # Have to re-enable the standard pragma 99 | pragma: no cover 100 | # Ignore local file testing 101 | def test(): 102 | if __name__ == .__main__.: 103 | if TYPE_CHECKING: 104 | 105 | # ----------------------------------------------------------------------------- 106 | # pytest 107 | # http://doc.pytest.org/en/latest/usage.html 108 | # ----------------------------------------------------------------------------- 109 | [pytest] 110 | addopts = -rfew --durations=10 111 | python_functions = test_* 112 | # overridden in specific config files 113 | testpaths=NOT_A_PATH 114 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | intake_xarray/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: "*" 6 | pull_request: 7 | branches: master 8 | 9 | jobs: 10 | test: 11 | name: ${{ matrix.CONDA_ENV }}-pytest 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | CONDA_ENV: [py312, py311, py310, py39, upstream] 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | 21 | - name: Setup conda 22 | uses: mamba-org/setup-micromamba@v1 23 | with: 24 | environment-file: ci/environment-${{ matrix.CONDA_ENV }}.yml 25 | 26 | - name: Development Install Intake-Xarray 27 | shell: bash -l {0} 28 | run: | 29 | python -m pip install --no-deps -e . 30 | conda list 31 | 32 | - name: Run Tests 33 | shell: bash -l {0} 34 | run: | 35 | pytest --verbose -s --ignore=intake_xarray/tests/test_network.py 36 | -------------------------------------------------------------------------------- /.github/workflows/pypipublish.yaml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Python 13 | uses: actions/setup-python@v4 14 | with: 15 | python-version: "3.x" 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools setuptools-scm wheel twine 20 | - name: Build and publish 21 | env: 22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 24 | run: | 25 | python setup.py sdist bdist_wheel 26 | twine upload dist/* 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *_cache 2 | *.pyc 3 | .coverage* 4 | .idea/ 5 | __pycache__/ 6 | .cache/ 7 | *egg-info/ 8 | .ipynb_checkpoints/ 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 6 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 7 | 8 | ## [v0.4.1] - 2020-12-03 9 | 10 | - Bump setup-miniconda from v1 to v2 and remove Travis CI #92 11 | - Allow netcdf4 engine in opendap driver #91 12 | - Add logic for remote or local files in NetCDFSource #93 13 | 14 | ## [v0.4.0] - 2020-10-14 15 | 16 | - Logic for remote versus local filepaths for RasterIOSource + GitHub Actions CI #82 17 | - add github action for pytest on linux #83 18 | 19 | *changelog started with v0.4.0...* 20 | 21 | ## Compare adjacent versions: 22 | 23 | [v0.4.1]: https://github.com/intake/intake-xarray/compare/0.4.1...0.4.0 24 | [v0.4.0]: https://github.com/intake/intake-xarray/compare/0.4.0...0.3.2 25 | [v0.3.2]: https://github.com/intake/intake-xarray/compare/0.3.2...0.3.1 26 | [v0.3.1]: https://github.com/intake/intake-xarray/compare/0.3.1...0.3.0 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, Continuum Analytics, Inc. 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include . *.html 2 | recursive-include . *.csv 3 | recursive-include . *.yml 4 | 5 | include versioneer.py 6 | include intake_xarray/_version.py 7 | include LICENSE 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # intake-xarray 2 | 3 | ![CI](https://github.com/intake/intake-xarray/workflows/CI/badge.svg) 4 | 5 | Intake-xarray: xarray Plugin for [Intake](https://github.com/intake/intake) 6 | 7 | See [Intake docs](https://intake.readthedocs.io/en/latest/overview.html) for a general introduction and usage 8 | of Intake and the [intake-xarray docs](https://intake-xarray.readthedocs.io/) for details specific to the 9 | data drivers included in this package. 10 | 11 | In `intake-xarray`, there are plugins provided for reading data into [xarray](http://xarray.pydata.org/en/stable/) 12 | containers: 13 | - NetCDF (also handles other file formats which can be passed to 14 | [xarray.open_dataset](http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html) such as grib) 15 | - OPeNDAP 16 | - Rasterio 17 | - Zarr 18 | - images 19 | 20 | and it provides the ability to read xarray data from an Intake server. 21 | 22 | ### Installation 23 | 24 | The conda install instructions are: 25 | 26 | ``` 27 | conda install -c conda-forge intake-xarray 28 | ``` 29 | 30 | To install optional dependencies: 31 | 32 | ``` 33 | conda install -c conda-forge pydap rasterio 34 | ``` 35 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # Based on bokeh appveyor set up 2 | build: false 3 | 4 | platform: 5 | - x64 6 | 7 | environment: 8 | matrix: 9 | - MINICONDA: C:\Miniconda36-x64 10 | CONDA_ENV: py36 11 | - MINICONDA: C:\Miniconda36-x64 12 | CONDA_ENV: py36-defaults 13 | 14 | matrix: 15 | allow_failures: 16 | - CONDA_ENV: py36-defaults 17 | 18 | 19 | skip_branch_with_pr: true 20 | clone_depth: 5 21 | skip_tags: true 22 | 23 | init: 24 | - cmd: set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%MINICONDA%\\Library\\bin;%PATH% 25 | - cmd: echo %path% 26 | 27 | install: 28 | - powershell .\\ci\\appveyor\\install.ps1 29 | - "conda env create -n test_env --file ./ci/environment-%CONDA_ENV%.yml" 30 | - "activate test_env" 31 | - "python setup.py install" 32 | - "conda list" 33 | 34 | test_script: 35 | - "pytest --verbose" 36 | -------------------------------------------------------------------------------- /ci/appveyor/build.ps1: -------------------------------------------------------------------------------- 1 | function build(){ 2 | conda install -c conda-forge conda-build conda-verify jinja2 intake>=0.4.1 xarray>=0.11.0 zarr dask netcdf4 3 | conda list 4 | conda build -c conda-forge ./conda 5 | } 6 | 7 | build -------------------------------------------------------------------------------- /ci/appveyor/install.ps1: -------------------------------------------------------------------------------- 1 | function install() { 2 | conda config --set auto_update_conda off --set always_yes yes --set changeps1 no --set show_channel_urls true 3 | } 4 | 5 | install -------------------------------------------------------------------------------- /ci/appveyor/test.ps1: -------------------------------------------------------------------------------- 1 | function test() { 2 | conda env create -n test_env --file ci/environment-py36.yml 3 | source activate test_env 4 | conda list 5 | pip install --no-deps -e . 6 | pytest --verbose 7 | } 8 | 9 | test -------------------------------------------------------------------------------- /ci/environment-py310.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9 6 | - aiohttp 7 | - boto3 8 | - exifread 9 | - flask 10 | - h5netcdf 11 | - netcdf4 12 | - pip 13 | - pydap 14 | - pytest 15 | - rasterio 16 | - s3fs 17 | - scikit-image 18 | - rangehttpserver 19 | - xarray 20 | - zarr 21 | - moto 22 | - s3fs 23 | - rioxarray 24 | - werkzeug 25 | - dask 26 | - numpy <2 27 | - pip: 28 | - git+https://github.com/intake/intake 29 | -------------------------------------------------------------------------------- /ci/environment-py311.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9 6 | - aiohttp 7 | - boto3 8 | - exifread 9 | - flask 10 | - h5netcdf 11 | - netcdf4 12 | - pip 13 | - pydap 14 | - pytest 15 | - rasterio 16 | - s3fs 17 | - scikit-image 18 | - rangehttpserver 19 | - xarray 20 | - zarr 21 | - moto 22 | - s3fs 23 | - rioxarray 24 | - werkzeug 25 | - dask 26 | - numpy <2 27 | - pip: 28 | - git+https://github.com/intake/intake 29 | -------------------------------------------------------------------------------- /ci/environment-py312.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9 6 | - aiohttp 7 | - boto3 8 | - exifread 9 | - flask 10 | - h5netcdf 11 | - netcdf4 12 | - pip 13 | - pydap 14 | - pytest 15 | - rasterio 16 | - s3fs 17 | - scikit-image 18 | - rangehttpserver 19 | - xarray 20 | - zarr 21 | - moto 22 | - s3fs 23 | - rioxarray 24 | - werkzeug 25 | - dask 26 | - numpy <2 27 | - pip: 28 | - git+https://github.com/intake/intake 29 | -------------------------------------------------------------------------------- /ci/environment-py39.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9 6 | - aiohttp 7 | - boto3 8 | - exifread 9 | - flask 10 | - h5netcdf 11 | - netcdf4 12 | - pip 13 | - pydap 14 | - pytest 15 | - rasterio 16 | - s3fs 17 | - scikit-image 18 | - rangehttpserver 19 | - xarray 20 | - zarr 21 | - moto 22 | - s3fs 23 | - rioxarray 24 | - werkzeug 25 | - dask 26 | - numpy <2 27 | - pip: 28 | - git+https://github.com/intake/intake 29 | -------------------------------------------------------------------------------- /ci/environment-upstream.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python==3.10 6 | - aiohttp 7 | - exifread 8 | - flask 9 | - h5netcdf 10 | - netcdf4 11 | - pip 12 | - pydap 13 | - pytest 14 | - rangehttpserver 15 | - rasterio 16 | - s3fs 17 | - scikit-image 18 | - entrypoints 19 | - pandas 20 | - tornado 21 | - zarr 22 | - moto 23 | - intake 24 | - rioxarray 25 | - gdal 26 | - werkzeug 27 | - rioxarray 28 | - dask 29 | - numpy <2 30 | - pip: 31 | - git+https://github.com/fsspec/filesystem_spec.git 32 | - git+https://github.com/intake/intake.git 33 | - git+https://github.com/pydata/xarray.git 34 | -------------------------------------------------------------------------------- /ci/travis/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # exit on error 3 | 4 | echo "Configuring conda" 5 | conda config --set auto_update_conda off --set always_yes yes --set changeps1 no --set show_channel_urls true 6 | -------------------------------------------------------------------------------- /ci/travis/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # exit on error 3 | set -x 4 | 5 | echo "Creating test env" 6 | conda env create -n test_env --file ci/environment-${CONDA_ENV}.yml 7 | source activate test_env 8 | # dev versions 9 | pip install git+https://github.com/intake/filesystem_spec --no-deps 10 | pip install git+https://github.com/intake/intake --no-deps 11 | conda list 12 | 13 | echo "Installing intake_xarray." 14 | pip install --no-deps -e . 15 | 16 | echo "Running tests" 17 | pytest --verbose 18 | -------------------------------------------------------------------------------- /conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set data = load_setup_py_data() %} 2 | 3 | package: 4 | name: intake-xarray 5 | version: {{ data['version'] }} 6 | 7 | source: 8 | path: .. 9 | git: 10 | depth: false 11 | 12 | build: 13 | number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }} 14 | script: python setup.py install --single-version-externally-managed --record=record.txt 15 | noarch: python 16 | 17 | requirements: 18 | build: 19 | - python 20 | - jinja2 21 | run: 22 | - python 23 | 24 | {% for dep in data['install_requires'] %} 25 | - {{ dep.lower() }} 26 | {% endfor %} 27 | 28 | test: 29 | requires: 30 | - pytest 31 | commands: 32 | - py.test --verbose 33 | 34 | about: 35 | home: {{ data['url'] }} 36 | license: {{ data['license'] }} 37 | license_file: LICENSE 38 | summary: {{ data['description'] }} 39 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = intake_netcdf 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Building Documentation 2 | 3 | A basic python environment with packages listed in `./requirements.txt` is 4 | required to build the docs: 5 | 6 | ```bash 7 | pip install -r requirements.txt 8 | ``` 9 | 10 | To make HTML documentation: 11 | 12 | ```bash 13 | make html 14 | ``` 15 | 16 | Outputs to `build/html/index.html` 17 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: intake 2 | channels: 3 | - intake 4 | - defaults 5 | dependencies: 6 | - python=3.6 7 | - intake 8 | - intake-xarray 9 | - pip: 10 | - sphinx 11 | - sphinx-rtd-theme 12 | - numpydoc 13 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=intake_pcap 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | numpydoc 4 | 5 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. autosummary:: 5 | intake_xarray.netcdf.NetCDFSource 6 | intake_xarray.opendap.OpenDapSource 7 | intake_xarray.xzarr.ZarrSource 8 | intake_xarray.raster.RasterIOSource 9 | intake_xarray.image.ImageSource 10 | 11 | .. autoclass:: intake_xarray.netcdf.NetCDFSource 12 | :members: 13 | 14 | .. autoclass:: intake_xarray.opendap.OpenDapSource 15 | :members: 16 | 17 | .. autoclass:: intake_xarray.xzarr.ZarrSource 18 | :members: 19 | 20 | .. autoclass:: intake_xarray.raster.RasterIOSource 21 | :members: 22 | 23 | .. autoclass:: intake_xarray.image.ImageSource 24 | :members: 25 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # intake_xarray documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Jan 15 18:11:02 2018. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | # import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | # 27 | # needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.viewcode', 35 | 'sphinx.ext.autosummary', 36 | 'numpydoc', 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # The suffix(es) of source filenames. 43 | # You can specify multiple suffix as a list of string: 44 | # 45 | # source_suffix = ['.rst', '.md'] 46 | source_suffix = '.rst' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = u'intake_xarray' 53 | copyright = u'2018, Mike McCarty' 54 | author = u'Mike McCarty' 55 | 56 | # The version info for the project you're documenting, acts as replacement for 57 | # |version| and |release|, also used in various other places throughout the 58 | # built documents. 59 | # 60 | # The short X.Y version. 61 | import intake_xarray 62 | version = intake_xarray.__version__ 63 | # The full version, including alpha/beta/rc tags. 64 | release = intake_xarray.__version__ 65 | 66 | # The language for content autogenerated by Sphinx. Refer to documentation 67 | # for a list of supported languages. 68 | # 69 | # This is also used if you do content translation via gettext catalogs. 70 | # Usually you set "language" from the command line for these cases. 71 | language = None 72 | 73 | # List of patterns, relative to source directory, that match files and 74 | # directories to ignore when looking for source files. 75 | # This patterns also effect to html_static_path and html_extra_path 76 | exclude_patterns = [] 77 | 78 | # The name of the Pygments (syntax highlighting) style to use. 79 | pygments_style = 'sphinx' 80 | 81 | # If true, `todo` and `todoList` produce output, else they produce nothing. 82 | todo_include_todos = False 83 | 84 | # -- Options for HTML output ---------------------------------------------- 85 | 86 | # The theme to use for HTML and HTML Help pages. See the documentation for 87 | # a list of builtin themes. 88 | # 89 | html_theme = 'sphinx_rtd_theme' 90 | 91 | # Theme options are theme-specific and customize the look and feel of a theme 92 | # further. For a list of options available for each theme, see the 93 | # documentation. 94 | # 95 | # html_theme_options = {} 96 | 97 | # Add any paths that contain custom static files (such as style sheets) here, 98 | # relative to this directory. They are copied after the builtin static files, 99 | # so a file named "default.css" will overwrite the builtin "default.css". 100 | html_static_path = ['_static'] 101 | 102 | # Custom sidebar templates, must be a dictionary that maps document names 103 | # to template names. 104 | # 105 | # This is required for the alabaster theme 106 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 107 | html_sidebars = { 108 | '**': [ 109 | 'relations.html', # needs 'show_related': True theme option to display 110 | 'searchbox.html', 111 | ] 112 | } 113 | 114 | # -- Options for HTMLHelp output ------------------------------------------ 115 | 116 | # Output file base name for HTML help builder. 117 | htmlhelp_basename = 'intake_xarray' 118 | 119 | # -- Options for LaTeX output --------------------------------------------- 120 | 121 | latex_elements = { 122 | # The paper size ('letterpaper' or 'a4paper'). 123 | # 124 | # 'papersize': 'letterpaper', 125 | 126 | # The font size ('10pt', '11pt' or '12pt'). 127 | # 128 | # 'pointsize': '10pt', 129 | 130 | # Additional stuff for the LaTeX preamble. 131 | # 132 | # 'preamble': '', 133 | 134 | # Latex figure (float) alignment 135 | # 136 | # 'figure_align': 'htbp', 137 | } 138 | 139 | # Grouping the document tree into LaTeX files. List of tuples 140 | # (source start file, target name, title, 141 | # author, documentclass [howto, manual, or own class]). 142 | latex_documents = [ 143 | (master_doc, 'intake_xarray.tex', u'intake\\_xarray Documentation', 144 | u'Martin Durant', 'manual'), 145 | ] 146 | 147 | # -- Options for manual page output --------------------------------------- 148 | 149 | # One entry per manual page. List of tuples 150 | # (source start file, name, description, authors, manual section). 151 | man_pages = [(master_doc, 'intake_xarray', u'intake_xarray Documentation', 152 | [author], 1)] 153 | 154 | # -- Options for Texinfo output ------------------------------------------- 155 | 156 | # Grouping the document tree into Texinfo files. List of tuples 157 | # (source start file, target name, title, author, 158 | # dir menu entry, description, category) 159 | texinfo_documents = [ 160 | (master_doc, 'intake_xarray', u'intake_xarray Documentation', author, 161 | 'intake_xarray', 'One line description of project.', 'Miscellaneous'), 162 | ] 163 | -------------------------------------------------------------------------------- /docs/source/contributing.rst: -------------------------------------------------------------------------------- 1 | ============================ 2 | Contributing to intake-xarray 3 | ============================ 4 | 5 | Contributions are highly welcomed and appreciated. Every little help counts, 6 | so do not hesitate! 7 | 8 | .. contents:: Contribution links 9 | :depth: 2 10 | 11 | 12 | .. _submitfeedback: 13 | 14 | Feature requests and feedback 15 | ----------------------------- 16 | 17 | Do you like intake-xarray? Share some love on Twitter or in your blog posts! 18 | 19 | We'd also like to hear about your propositions and suggestions. Feel free to 20 | `submit them as issues `_ and: 21 | 22 | * Explain in detail how they should work. 23 | * Keep the scope as narrow as possible. This will make it easier to implement. 24 | 25 | 26 | .. _reportbugs: 27 | 28 | Report bugs 29 | ----------- 30 | 31 | Report bugs for intake-stac in the `issue tracker `_. 32 | 33 | If you are reporting a bug, please include: 34 | 35 | * Your operating system name and version. 36 | * Any details about your local setup that might be helpful in troubleshooting, 37 | specifically the Python interpreter version, installed libraries, and intake-stac 38 | version. 39 | * Detailed steps to reproduce the bug. 40 | 41 | If you can write a demonstration test that currently fails but should pass 42 | (xfail), that is a very useful commit to make as well, even if you cannot 43 | fix the bug itself. 44 | 45 | 46 | .. _fixbugs: 47 | 48 | Fix bugs 49 | -------- 50 | 51 | Look through the `GitHub issues for bugs `_. 52 | 53 | Talk to developers to find out how you can fix specific bugs. 54 | 55 | 56 | Write documentation 57 | ------------------- 58 | 59 | intake-xarray could always use more documentation. What exactly is needed? 60 | 61 | * More complementary documentation. Have you perhaps found something unclear? 62 | * Docstrings. There can never be too many of them. 63 | * Blog posts, articles and such -- they're all very appreciated. 64 | 65 | You can also edit documentation files directly in the GitHub web interface, 66 | without using a local copy. This can be convenient for small fixes. 67 | 68 | .. note:: 69 | Build the documentation locally with the following command: 70 | 71 | .. code:: bash 72 | 73 | $ conda env create -f docs/environment.yml 74 | $ cd docs 75 | $ make html 76 | 77 | The built documentation should be available in the ``docs/_build/``. 78 | 79 | 80 | 81 | .. _`pull requests`: 82 | .. _pull-requests: 83 | 84 | 85 | Preparing Pull Requests 86 | ----------------------- 87 | 88 | 89 | #. Fork the 90 | `intake-xarray GitHub repository `__. It's 91 | fine to use ``intake-xarray`` as your fork repository name because it will live 92 | under your user. 93 | 94 | 95 | #. Clone your fork locally using `git `_ and create a branch:: 96 | 97 | $ git clone git@github.com:YOUR_GITHUB_USERNAME/intake-xarray.git 98 | $ cd intake-xarray 99 | 100 | # now, to fix a bug or add feature create your own branch off "master": 101 | 102 | $ git checkout -b your-bugfix-feature-branch-name master 103 | 104 | 105 | #. Install development version in a conda environment:: 106 | 107 | $ conda env create -f ci/environment-py39.yml 108 | $ conda activate test_env 109 | $ pip install -e . 110 | 111 | 112 | #. Run all the tests 113 | 114 | Now running tests is as simple as issuing this command:: 115 | 116 | $ pytest --verbose 117 | 118 | 119 | This command will run tests via the "pytest" tool 120 | 121 | 122 | #. Commit and push once your tests pass and you are happy with your change(s):: 123 | 124 | $ git commit -a -m "" 125 | $ git push -u 126 | 127 | 128 | #. Finally, submit a pull request through the GitHub website using this data:: 129 | 130 | head-fork: YOUR_GITHUB_USERNAME/intake-xarray 131 | compare: your-branch-name 132 | 133 | base-fork: intake/intake-xarray 134 | base: master 135 | 136 | 137 | .. _`release a new version`: 138 | .. _release-a-new-version: 139 | 140 | 141 | Release a new version 142 | --------------------- 143 | 144 | intake-xarray uses the pypipublish GitHub action to publish new versions on PYPI. Just create a new tag `git tag 0.4.1`, `git push upstream --tags`, then create a release by visiting https://github.com/intake/intake-xarray/releases/new. When the release is created the version will automatically be uploaded to https://pypi.org/project/intake-xarray/. 145 | 146 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to intake_xarray's documentation! 2 | ================================================ 3 | 4 | This package enables the set of data-loading methods from Xarray to be used within the Intake 5 | data access and cataloging system. 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | :caption: Contents: 10 | 11 | quickstart.rst 12 | api.rst 13 | contributing.rst 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | -------------------------------------------------------------------------------- /docs/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quickstart 2 | ========== 3 | 4 | ``intake-xarray`` provides quick and easy access to n dimensional data 5 | suitable for reading by `xarray`_. 6 | 7 | .. _xarray: https://xarray.pydata.org 8 | 9 | Installation 10 | ------------ 11 | 12 | To use this plugin for `intake`_, install with the following command:: 13 | 14 | conda install -c conda-forge intake-xarray 15 | 16 | .. _intake: https://github.com/ContinuumIO/intake 17 | 18 | Usage 19 | ----- 20 | 21 | 22 | Inline use 23 | ~~~~~~~~~~ 24 | 25 | After installation, the functions ``intake.open_netcdf``, 26 | ``intake.open_rasterio``, ``intake.open_zarr``, 27 | ``intake.open_xarray_image``, and ``intake.open_opendap`` will become available. 28 | They can be used to open data files as xarray objects. 29 | 30 | 31 | Creating Catalog Entries 32 | ~~~~~~~~~~~~~~~~~~~~~~~~ 33 | 34 | Catalog entries must specify ``driver: netcdf``, ``driver: rasterio``, 35 | ``driver: zarr``, ``driver: xarray_image``, or ``driver: opendap`` 36 | as appropriate. 37 | 38 | 39 | The zarr and image plugins allow access to remote data stores (s3 and gcs), 40 | settings relevant to those should be passed in using the parameter 41 | ``storage_options``. 42 | 43 | 44 | Choosing a Driver 45 | ~~~~~~~~~~~~~~~~~ 46 | 47 | While all the drivers in the ``intake-xarray`` plugin yield ``xarray`` 48 | objects, they do not all accept the same file formats. 49 | 50 | 51 | netcdf/grib/tif 52 | --------------- 53 | 54 | Supports any local or downloadable file that can be passed to 55 | `xarray.open_mfdataset `_. 56 | Works for: 57 | 58 | - ``netcdf`` when installing `netcdf4 `_ 59 | - ``tif`` when installing `rioxarray `_ 60 | - ``grib`` when installing `cfgrib `_ 61 | 62 | opendap 63 | ------- 64 | 65 | Supports OPeNDAP URLs, optionally with ``esgf``, ``urs`` or ``generic_http`` authentication. 66 | 67 | zarr 68 | ----- 69 | 70 | Supports ``.zarr`` directories. See https://zarr.readthedocs.io/ for more 71 | information. 72 | 73 | rasterio 74 | -------- 75 | 76 | Supports any file format supported by ``rasterio.open`` - most commonly 77 | geotiffs. 78 | 79 | Note: Consider installing ``rioxarray`` and using the ``netcdf`` driver with ``engine="rasterio"``. 80 | 81 | 82 | xarray_image 83 | ------------ 84 | 85 | Supports any file format that can be passed to ``scikit-image.io.imread`` 86 | which includes all the common image formats (``jpg``, ``png``, ``tif``, ...) 87 | 88 | Caching 89 | ~~~~~~~ 90 | Remote files can be cached locally by `fsspec`_. 91 | Note that ``opendap`` does not support caching as the URL does not back a downloadable file. 92 | -------------------------------------------------------------------------------- /examples/OPeNDAP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## OPeNDAP\n", 8 | "\n", 9 | "Here we have adapted the OPeNDAP example from the [xarray documentation](http://xarray.pydata.org/en/stable/io.html#opendap) for Intake." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "['esgf']" 21 | ] 22 | }, 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "import intake\n", 30 | "cat = intake.open_catalog('catalog.yml')\n", 31 | "list(cat)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stderr", 41 | "output_type": "stream", 42 | "text": [ 43 | "/Users/mmccarty/anaconda3/envs/intake-xarray/lib/python3.6/site-packages/distributed/bokeh/core.py:55: UserWarning: \n", 44 | "Port 8787 is already in use. \n", 45 | "Perhaps you already have a cluster running?\n", 46 | "Hosting the diagnostics dashboard on a random port instead.\n", 47 | " warnings.warn('\\n' + msg)\n" 48 | ] 49 | }, 50 | { 51 | "data": { 52 | "text/html": [ 53 | "\n", 54 | "\n", 55 | "\n", 62 | "\n", 70 | "\n", 71 | "
\n", 56 | "

Client

\n", 57 | "\n", 61 | "
\n", 63 | "

Cluster

\n", 64 | "
    \n", 65 | "
  • Workers: 1
  • \n", 66 | "
  • Cores: 8
  • \n", 67 | "
  • Memory: 100.00 GB
  • \n", 68 | "
\n", 69 | "
" 72 | ], 73 | "text/plain": [ 74 | "" 75 | ] 76 | }, 77 | "execution_count": 2, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "# arbitrarily choose a small memory limit (4GB) to stress the \n", 84 | "# out of core processing infrastructure\n", 85 | "from dask.distributed import Client\n", 86 | "client = Client(memory_limit=10e10, processes=False) # Note: was 6e9 \n", 87 | "client" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 3, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "schema = cat.esgf._get_schema()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 4, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "\n", 108 | "Dimensions: (bnds: 2, lat: 241, lon: 480, time: 365)\n", 109 | "Coordinates:\n", 110 | " * time (time) float64 2.885e+04 2.886e+04 2.886e+04 2.886e+04 ...\n", 111 | " * lat (lat) float64 -90.0 -89.25 -88.5 -87.75 -87.0 -86.25 -85.5 ...\n", 112 | " * lon (lon) float64 0.0 0.75 1.5 2.25 3.0 3.75 4.5 5.25 6.0 6.75 ...\n", 113 | "Dimensions without coordinates: bnds\n", 114 | "Data variables:\n", 115 | " time_bnds (time, bnds) float64 dask.array\n", 116 | " lat_bnds (lat, bnds) float64 dask.array\n", 117 | " lon_bnds (lon, bnds) float64 dask.array\n", 118 | " pr (time, lat, lon) float32 dask.array\n", 119 | "Attributes:\n", 120 | " institution: European Centre for Medium-Range Weather...\n", 121 | " institute_id: ECMWF\n", 122 | " experiment_id: ERA-Interim\n", 123 | " source: ERA-Interim, 6-hourly, Full Resolution.\n", 124 | " model_id: IFS-Cy31r2\n", 125 | " contact: ECMWF, Dick Dee (dick.dee@ecmwf.int)\n", 126 | " references: http://www.ecmwf.int\n", 127 | " tracking_id: 2d387c17-89da-4757-9fcd-84479f986da1\n", 128 | " mip_specs: CMIP5\n", 129 | " source_id: ERA-Interim\n", 130 | " product: reanalysis\n", 131 | " frequency: day\n", 132 | " creation_date: 2018-08-02T13:43:29Z\n", 133 | " history: 2018-08-02T13:43:29Z CMOR rewrote data t...\n", 134 | " Conventions: CF-1.4\n", 135 | " project_id: CREATE-IP\n", 136 | " table_id: Table day (17 July 2013) 7c3c704d0ca8f4c...\n", 137 | " title: Reanalysis output prepared for CREATE-IP.\n", 138 | " modeling_realm: atmos\n", 139 | " cmor_version: 2.9.1\n", 140 | " DODS_EXTRA.Unlimited_Dimension: time" 141 | ] 142 | }, 143 | "execution_count": 4, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "ds = cat.esgf.read_chunked()\n", 150 | "ds" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "tmax = remote_data['tmax'][:500, ::3, ::3]\n", 160 | "tmax" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "%matplotlib inline\n", 170 | "import matplotlib.pyplot as plt\n", 171 | "tmax[0].plot()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python 3", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.6.6" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 2 203 | } 204 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/examples/README.md -------------------------------------------------------------------------------- /examples/catalog.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | sources: 5 | esgf: 6 | description: CREATE sample 7 | driver: opendap 8 | #cache: 9 | # - argkey: urlpath 10 | # regex: '' 11 | # type: file 12 | args: 13 | urlpath: 'http://esgf.nccs.nasa.gov/thredds/dodsC/CREATE-IP/reanalysis/ECMWF/IFS-Cy31r2/day/atmos/pr/pr_day_reanalysis_IFS-Cy31r2_19790101-19791231.nc' 14 | chunks: {} 15 | xarray_kwargs: 16 | decode_times: False 17 | 18 | geotiff: 19 | description: Geotiff image of Landsat Surface Reflectance Level-2 Science Product L5. 20 | driver: rasterio 21 | cache: 22 | - argkey: urlpath 23 | regex: 'earth-data/landsat' 24 | type: file 25 | args: 26 | urlpath: 's3://earth-data/landsat/small/LT05_L1TP_042033_{collection_date:%Y%m%d}_{processing_date:%Y%m%d}_01_T1_sr_band{band:1d}.tif' 27 | chunks: 28 | band: 1 29 | x: 50 30 | y: 50 31 | concat_dim: band 32 | storage_options: {'anon': True} 33 | metadata: 34 | plots: 35 | band_image: 36 | kind: 'image' 37 | x: 'x' 38 | y: 'y' 39 | groupby: 'band' 40 | rasterize: True 41 | 42 | image: 43 | description: Labeled images from UCMerced_LandUse/Images 44 | origin: http://weegee.vision.ucmerced.edu/datasets/landuse.html 45 | driver: xarray_image 46 | cache: 47 | - argkey: urlpath 48 | regex: 'earth-data/UCMerced_LandUse' 49 | type: file 50 | parameters: 51 | landuse: 52 | description: one landuse to gather 53 | type: str 54 | default: 'airplane' 55 | id: 56 | description: one id to gather 57 | type: int 58 | default: 0 59 | args: 60 | urlpath: "s3://earth-data/UCMerced_LandUse/Images/{{ landuse }}/{{ landuse }}{{ '%02d' % id }}.tif" 61 | storage_options: {'anon': True} 62 | 63 | images_labelled: 64 | description: All the labeled images from UCMerced_LandUse/Images for one id 65 | origin: http://weegee.vision.ucmerced.edu/datasets/landuse.html 66 | driver: xarray_image 67 | cache: 68 | - argkey: urlpath 69 | regex: 'earth-data/UCMerced_LandUse' 70 | type: file 71 | parameters: 72 | id: 73 | description: one id to gather 74 | type: int 75 | default: 0 76 | args: 77 | urlpath: "s3://earth-data/UCMerced_LandUse/Images/*/*{{ '%02d' % id }}.tif" 78 | path_as_pattern: "Images/{landuse}/{boo}{id:2d}.tif" 79 | storage_options: {'anon': True} 80 | concat_dim: [id, landuse] 81 | 82 | images_unlabelled: 83 | description: All the labeled images from UCMerced_LandUse/Images for one id 84 | origin: http://weegee.vision.ucmerced.edu/datasets/landuse.html 85 | driver: xarray_image 86 | cache: 87 | - argkey: urlpath 88 | regex: 'earth-data/UCMerced_LandUse' 89 | type: file 90 | parameters: 91 | id: 92 | description: one id to gather 93 | type: int 94 | default: 0 95 | args: 96 | urlpath: "s3://earth-data/UCMerced_LandUse/Images/*/*{{ '%02d' % id }}.tif" 97 | storage_options: {'anon': True} 98 | 99 | grib_thredds: 100 | description: Publicly available grib data available on thredds via opendap protocol 101 | driver: netcdf 102 | args: 103 | urlpath: 'http://thredds.ucar.edu/thredds/dodsC/grib/FNMOC/WW3/Global_1p0deg/Best' 104 | chunks: {} 105 | -------------------------------------------------------------------------------- /examples/intake_xarray.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Examples using intake with xarray\n", 8 | "\n", 9 | "In this notebook, we'll take a look at some of the file formats that can be written to `xarray` objects." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import intake\n", 19 | "intake.output_notebook()" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Inline example\n", 27 | "\n", 28 | "Anything that can be opened with `xarray.open_dataset` can be accessed with intake using `intake.open_netcdf`. \n", 29 | "\n", 30 | "### GRIB\n", 31 | "In this first example we will use an OpenDAP URL to lazily access a grib file. " 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "grib_da = intake.open_netcdf('http://thredds.ucar.edu/thredds/dodsC/grib/FNMOC/WW3/Global_1p0deg/Best', chunks={}).to_dask()\n", 41 | "grib_da" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### Image\n", 49 | "\n", 50 | "Intake-xarray also provides easy access to other backends to support loading other file formats. Such as image data." 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "image = intake.open_xarray_image('https://s3.amazonaws.com/earth-data/UCMerced_LandUse/Images/tenniscourt/tenniscourt02.tif')\n", 60 | "image_da = image.to_dask()\n", 61 | "image_da" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "from PIL import Image" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "Image.fromarray(image_da.data.compute())" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "#### Multiple Images \n", 87 | "\n", 88 | "We can lazily load lots of images from s3 and other backends using glob notation. " 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "multi_image = intake.open_xarray_image('s3://earth-data/UCMerced_LandUse/Images/*/*05.tif', \n", 98 | " storage_options=dict(anon=True))\n", 99 | "multi_image_da = multi_image.to_dask()\n", 100 | "multi_image_da" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "np_array = multi_image_da[1].dropna('x', 'all').dropna('y', 'all').data.compute().astype('uint8')\n", 110 | "Image.fromarray(np_array)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Catalog examples\n", 118 | "\n", 119 | "There is a sample catalog at `./catalog.yml` containing some examples of how to use the intake-xarray plugins. " 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "cat = intake.open_catalog('catalog.yml')\n", 129 | "list(cat)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### Grib: from unauthenticated OpenDAP\n", 137 | "\n", 138 | "Grib files can be read from catalog specs just as in the inline example above." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "grib_thredds = cat.grib_thredds\n", 148 | "grib_thredds_da = grib_thredds.to_dask()\n", 149 | "grib_thredds_da" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "import hvplot.xarray" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "var = 'sig_wav_ht_surface'\n", 168 | "time_dim = grib_thredds_da[var].dims[0]\n", 169 | "grib_thredds_da[var].hvplot(x='lon', y='lat', groupby=time_dim, rasterize=True)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Image\n", 177 | "\n", 178 | "Image files can be read in using a variety of backends." 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "image = cat.image(landuse='airplane', id=0)\n", 188 | "image_da = image.to_dask()\n", 189 | "image_da" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "np_array = image_da.data.compute()\n", 199 | "Image.fromarray(np_array)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "#### Multiple Images\n", 207 | "\n", 208 | "When loading multiple images, if the images don't have exactly the same size, you will have nan slices in your data and the dtype will be floats. " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "%time labelled_da = cat.images_labelled(id=0).to_dask()\n", 218 | "labelled_da" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "np_array = (labelled_da.sel(landuse='beach', id=0)\n", 228 | " .dropna('x', 'all')\n", 229 | " .dropna('y', 'all')\n", 230 | " .data.compute().astype('uint8'))\n", 231 | "Image.fromarray(np_array)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "If we aren't interested in labels, then it is even faster to read in the files. " 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "%time unlabelled_da = cat.images_unlabelled(id=5).to_dask()\n", 248 | "unlabelled_da" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "np_array = unlabelled_da[1].dropna('x', 'all').dropna('y', 'all').data.compute().astype('uint8')\n", 258 | "Image.fromarray(np_array)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "### GeoTiff\n", 266 | "For geotiffs there is a special `rasterio` backend that support loading using gdal. " 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "geotiff = cat.geotiff\n", 276 | "geotiff_da = geotiff.to_dask()\n", 277 | "geotiff_da" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "geotiff.plot.band_image()" 287 | ] 288 | } 289 | ], 290 | "metadata": { 291 | "language_info": { 292 | "name": "python", 293 | "pygments_lexer": "ipython3" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 2 298 | } 299 | -------------------------------------------------------------------------------- /intake_xarray/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import get_versions 2 | __version__ = get_versions()['version'] 3 | del get_versions 4 | 5 | import intake_xarray.base 6 | import intake 7 | from .netcdf import NetCDFSource 8 | from .opendap import OpenDapSource 9 | from .raster import RasterIOSource 10 | #from .xzarr import ZarrSource 11 | from .image import ImageSource 12 | -------------------------------------------------------------------------------- /intake_xarray/_version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # This file helps to compute a version number in source trees obtained from 4 | # git-archive tarball (such as those provided by githubs download-from-tag 5 | # feature). Distribution tarballs (built by setup.py sdist) and build 6 | # directories (produced by setup.py build) will contain a much shorter file 7 | # that just contains the computed version number. 8 | 9 | # This file is released into the public domain. Generated by 10 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master, tag: 2.0.0)" 27 | git_full = "719a7c84b58ec6783b07e648fbfaf988ea4c453a" 28 | git_date = "2024-11-13 16:26:50 -0500" 29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 30 | return keywords 31 | 32 | 33 | class VersioneerConfig: 34 | """Container for Versioneer configuration parameters.""" 35 | 36 | 37 | def get_config(): 38 | """Create, populate and return the VersioneerConfig() object.""" 39 | # these strings are filled in when 'setup.py versioneer' creates 40 | # _version.py 41 | cfg = VersioneerConfig() 42 | cfg.VCS = "git" 43 | cfg.style = "pep440" 44 | cfg.tag_prefix = "" 45 | cfg.parentdir_prefix = "None" 46 | cfg.versionfile_source = "intake_xarray/_version.py" 47 | cfg.verbose = False 48 | return cfg 49 | 50 | 51 | class NotThisMethod(Exception): 52 | """Exception raised if a method is not valid for the current scenario.""" 53 | 54 | 55 | LONG_VERSION_PY = {} 56 | HANDLERS = {} 57 | 58 | 59 | def register_vcs_handler(vcs, method): # decorator 60 | """Decorator to mark a method as the handler for a particular VCS.""" 61 | 62 | def decorate(f): 63 | """Store f in HANDLERS[vcs][method].""" 64 | if vcs not in HANDLERS: 65 | HANDLERS[vcs] = {} 66 | HANDLERS[vcs][method] = f 67 | return f 68 | 69 | return decorate 70 | 71 | 72 | def run_command(commands, 73 | args, 74 | cwd=None, 75 | verbose=False, 76 | hide_stderr=False, 77 | env=None): 78 | """Call the given command(s).""" 79 | assert isinstance(commands, list) 80 | p = None 81 | for c in commands: 82 | try: 83 | dispcmd = str([c] + args) 84 | # remember shell=False, so use git.cmd on windows, not just git 85 | p = subprocess.Popen( 86 | [c] + args, 87 | cwd=cwd, 88 | env=env, 89 | stdout=subprocess.PIPE, 90 | stderr=(subprocess.PIPE if hide_stderr else None)) 91 | break 92 | except EnvironmentError: 93 | e = sys.exc_info()[1] 94 | if e.errno == errno.ENOENT: 95 | continue 96 | if verbose: 97 | print("unable to run %s" % dispcmd) 98 | print(e) 99 | return None, None 100 | else: 101 | if verbose: 102 | print("unable to find command, tried %s" % (commands, )) 103 | return None, None 104 | stdout = p.communicate()[0].strip() 105 | if sys.version_info[0] >= 3: 106 | stdout = stdout.decode() 107 | if p.returncode != 0: 108 | if verbose: 109 | print("unable to run %s (error)" % dispcmd) 110 | print("stdout was %s" % stdout) 111 | return None, p.returncode 112 | return stdout, p.returncode 113 | 114 | 115 | def versions_from_parentdir(parentdir_prefix, root, verbose): 116 | """Try to determine the version from the parent directory name. 117 | 118 | Source tarballs conventionally unpack into a directory that includes both 119 | the project name and a version string. We will also support searching up 120 | two directory levels for an appropriately named parent directory 121 | """ 122 | rootdirs = [] 123 | 124 | for i in range(3): 125 | dirname = os.path.basename(root) 126 | if dirname.startswith(parentdir_prefix): 127 | return { 128 | "version": dirname[len(parentdir_prefix):], 129 | "full-revisionid": None, 130 | "dirty": False, 131 | "error": None, 132 | "date": None 133 | } 134 | else: 135 | rootdirs.append(root) 136 | root = os.path.dirname(root) # up a level 137 | 138 | if verbose: 139 | print("Tried directories %s but none started with prefix %s" % 140 | (str(rootdirs), parentdir_prefix)) 141 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 142 | 143 | 144 | @register_vcs_handler("git", "get_keywords") 145 | def git_get_keywords(versionfile_abs): 146 | """Extract version information from the given file.""" 147 | # the code embedded in _version.py can just fetch the value of these 148 | # keywords. When used from setup.py, we don't want to import _version.py, 149 | # so we do it with a regexp instead. This function is not used from 150 | # _version.py. 151 | keywords = {} 152 | try: 153 | f = open(versionfile_abs, "r") 154 | for line in f.readlines(): 155 | if line.strip().startswith("git_refnames ="): 156 | mo = re.search(r'=\s*"(.*)"', line) 157 | if mo: 158 | keywords["refnames"] = mo.group(1) 159 | if line.strip().startswith("git_full ="): 160 | mo = re.search(r'=\s*"(.*)"', line) 161 | if mo: 162 | keywords["full"] = mo.group(1) 163 | if line.strip().startswith("git_date ="): 164 | mo = re.search(r'=\s*"(.*)"', line) 165 | if mo: 166 | keywords["date"] = mo.group(1) 167 | f.close() 168 | except EnvironmentError: 169 | pass 170 | return keywords 171 | 172 | 173 | @register_vcs_handler("git", "keywords") 174 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 175 | """Get version information from git keywords.""" 176 | if not keywords: 177 | raise NotThisMethod("no keywords at all, weird") 178 | date = keywords.get("date") 179 | if date is not None: 180 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 181 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 182 | # -like" string, which we must then edit to make compliant), because 183 | # it's been around since git-1.5.3, and it's too difficult to 184 | # discover which version we're using, or to work around using an 185 | # older one. 186 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 187 | refnames = keywords["refnames"].strip() 188 | if refnames.startswith("$Format"): 189 | if verbose: 190 | print("keywords are unexpanded, not using") 191 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 192 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 193 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 194 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 195 | TAG = "tag: " 196 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 197 | if not tags: 198 | # Either we're using git < 1.8.3, or there really are no tags. We use 199 | # a heuristic: assume all version tags have a digit. The old git %d 200 | # expansion behaves like git log --decorate=short and strips out the 201 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 202 | # between branches and tags. By ignoring refnames without digits, we 203 | # filter out many common branch names like "release" and 204 | # "stabilization", as well as "HEAD" and "master". 205 | tags = set([r for r in refs if re.search(r'\d', r)]) 206 | if verbose: 207 | print("discarding '%s', no digits" % ",".join(refs - tags)) 208 | if verbose: 209 | print("likely tags: %s" % ",".join(sorted(tags))) 210 | for ref in sorted(tags): 211 | # sorting will prefer e.g. "2.0" over "2.0rc1" 212 | if ref.startswith(tag_prefix): 213 | r = ref[len(tag_prefix):] 214 | if verbose: 215 | print("picking %s" % r) 216 | return { 217 | "version": r, 218 | "full-revisionid": keywords["full"].strip(), 219 | "dirty": False, 220 | "error": None, 221 | "date": date 222 | } 223 | # no suitable tags, so version is "0+unknown", but full hex is still there 224 | if verbose: 225 | print("no suitable tags, using unknown + full revision id") 226 | return { 227 | "version": "0+unknown", 228 | "full-revisionid": keywords["full"].strip(), 229 | "dirty": False, 230 | "error": "no suitable tags", 231 | "date": None 232 | } 233 | 234 | 235 | @register_vcs_handler("git", "pieces_from_vcs") 236 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 237 | """Get version from 'git describe' in the root of the source tree. 238 | 239 | This only gets called if the git-archive 'subst' keywords were *not* 240 | expanded, and _version.py hasn't already been rewritten with a short 241 | version string, meaning we're inside a checked out source tree. 242 | """ 243 | GITS = ["git"] 244 | if sys.platform == "win32": 245 | GITS = ["git.cmd", "git.exe"] 246 | 247 | out, rc = run_command( 248 | GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) 249 | if rc != 0: 250 | if verbose: 251 | print("Directory %s not under git control" % root) 252 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 253 | 254 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 255 | # if there isn't one, this yields HEX[-dirty] (no NUM) 256 | describe_out, rc = run_command( 257 | GITS, [ 258 | "describe", "--tags", "--dirty", "--always", "--long", "--match", 259 | "%s*" % tag_prefix 260 | ], 261 | cwd=root) 262 | # --long was added in git-1.5.5 263 | if describe_out is None: 264 | raise NotThisMethod("'git describe' failed") 265 | describe_out = describe_out.strip() 266 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 267 | if full_out is None: 268 | raise NotThisMethod("'git rev-parse' failed") 269 | full_out = full_out.strip() 270 | 271 | pieces = {} 272 | pieces["long"] = full_out 273 | pieces["short"] = full_out[:7] # maybe improved later 274 | pieces["error"] = None 275 | 276 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 277 | # TAG might have hyphens. 278 | git_describe = describe_out 279 | 280 | # look for -dirty suffix 281 | dirty = git_describe.endswith("-dirty") 282 | pieces["dirty"] = dirty 283 | if dirty: 284 | git_describe = git_describe[:git_describe.rindex("-dirty")] 285 | 286 | # now we have TAG-NUM-gHEX or HEX 287 | 288 | if "-" in git_describe: 289 | # TAG-NUM-gHEX 290 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 291 | if not mo: 292 | # unparseable. Maybe git-describe is misbehaving? 293 | pieces["error"] = ( 294 | "unable to parse git-describe output: '%s'" % describe_out) 295 | return pieces 296 | 297 | # tag 298 | full_tag = mo.group(1) 299 | if not full_tag.startswith(tag_prefix): 300 | if verbose: 301 | fmt = "tag '%s' doesn't start with prefix '%s'" 302 | print(fmt % (full_tag, tag_prefix)) 303 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % 304 | (full_tag, tag_prefix)) 305 | return pieces 306 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 307 | 308 | # distance: number of commits since tag 309 | pieces["distance"] = int(mo.group(2)) 310 | 311 | # commit: short hex revision ID 312 | pieces["short"] = mo.group(3) 313 | 314 | else: 315 | # HEX: no tags 316 | pieces["closest-tag"] = None 317 | count_out, rc = run_command( 318 | GITS, ["rev-list", "HEAD", "--count"], cwd=root) 319 | pieces["distance"] = int(count_out) # total number of commits 320 | 321 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 322 | date = run_command( 323 | GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() 324 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 325 | 326 | return pieces 327 | 328 | 329 | def plus_or_dot(pieces): 330 | """Return a + if we don't already have one, else return a .""" 331 | if "+" in pieces.get("closest-tag", ""): 332 | return "." 333 | return "+" 334 | 335 | 336 | def render_pep440(pieces): 337 | """Build up version string, with post-release "local version identifier". 338 | 339 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 340 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 341 | 342 | Exceptions: 343 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 344 | """ 345 | if pieces["closest-tag"]: 346 | rendered = pieces["closest-tag"] 347 | if pieces["distance"] or pieces["dirty"]: 348 | rendered += plus_or_dot(pieces) 349 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 350 | if pieces["dirty"]: 351 | rendered += ".dirty" 352 | else: 353 | # exception #1 354 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 355 | if pieces["dirty"]: 356 | rendered += ".dirty" 357 | return rendered 358 | 359 | 360 | def render_pep440_pre(pieces): 361 | """TAG[.post.devDISTANCE] -- No -dirty. 362 | 363 | Exceptions: 364 | 1: no tags. 0.post.devDISTANCE 365 | """ 366 | if pieces["closest-tag"]: 367 | rendered = pieces["closest-tag"] 368 | if pieces["distance"]: 369 | rendered += ".post.dev%d" % pieces["distance"] 370 | else: 371 | # exception #1 372 | rendered = "0.post.dev%d" % pieces["distance"] 373 | return rendered 374 | 375 | 376 | def render_pep440_post(pieces): 377 | """TAG[.postDISTANCE[.dev0]+gHEX] . 378 | 379 | The ".dev0" means dirty. Note that .dev0 sorts backwards 380 | (a dirty tree will appear "older" than the corresponding clean one), 381 | but you shouldn't be releasing software with -dirty anyways. 382 | 383 | Exceptions: 384 | 1: no tags. 0.postDISTANCE[.dev0] 385 | """ 386 | if pieces["closest-tag"]: 387 | rendered = pieces["closest-tag"] 388 | if pieces["distance"] or pieces["dirty"]: 389 | rendered += ".post%d" % pieces["distance"] 390 | if pieces["dirty"]: 391 | rendered += ".dev0" 392 | rendered += plus_or_dot(pieces) 393 | rendered += "g%s" % pieces["short"] 394 | else: 395 | # exception #1 396 | rendered = "0.post%d" % pieces["distance"] 397 | if pieces["dirty"]: 398 | rendered += ".dev0" 399 | rendered += "+g%s" % pieces["short"] 400 | return rendered 401 | 402 | 403 | def render_pep440_old(pieces): 404 | """TAG[.postDISTANCE[.dev0]] . 405 | 406 | The ".dev0" means dirty. 407 | 408 | Eexceptions: 409 | 1: no tags. 0.postDISTANCE[.dev0] 410 | """ 411 | if pieces["closest-tag"]: 412 | rendered = pieces["closest-tag"] 413 | if pieces["distance"] or pieces["dirty"]: 414 | rendered += ".post%d" % pieces["distance"] 415 | if pieces["dirty"]: 416 | rendered += ".dev0" 417 | else: 418 | # exception #1 419 | rendered = "0.post%d" % pieces["distance"] 420 | if pieces["dirty"]: 421 | rendered += ".dev0" 422 | return rendered 423 | 424 | 425 | def render_git_describe(pieces): 426 | """TAG[-DISTANCE-gHEX][-dirty]. 427 | 428 | Like 'git describe --tags --dirty --always'. 429 | 430 | Exceptions: 431 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 432 | """ 433 | if pieces["closest-tag"]: 434 | rendered = pieces["closest-tag"] 435 | if pieces["distance"]: 436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 437 | else: 438 | # exception #1 439 | rendered = pieces["short"] 440 | if pieces["dirty"]: 441 | rendered += "-dirty" 442 | return rendered 443 | 444 | 445 | def render_git_describe_long(pieces): 446 | """TAG-DISTANCE-gHEX[-dirty]. 447 | 448 | Like 'git describe --tags --dirty --always -long'. 449 | The distance/hash is unconditional. 450 | 451 | Exceptions: 452 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 453 | """ 454 | if pieces["closest-tag"]: 455 | rendered = pieces["closest-tag"] 456 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 457 | else: 458 | # exception #1 459 | rendered = pieces["short"] 460 | if pieces["dirty"]: 461 | rendered += "-dirty" 462 | return rendered 463 | 464 | 465 | def render(pieces, style): 466 | """Render the given version pieces into the requested style.""" 467 | if pieces["error"]: 468 | return { 469 | "version": "unknown", 470 | "full-revisionid": pieces.get("long"), 471 | "dirty": None, 472 | "error": pieces["error"], 473 | "date": None 474 | } 475 | 476 | if not style or style == "default": 477 | style = "pep440" # the default 478 | 479 | if style == "pep440": 480 | rendered = render_pep440(pieces) 481 | elif style == "pep440-pre": 482 | rendered = render_pep440_pre(pieces) 483 | elif style == "pep440-post": 484 | rendered = render_pep440_post(pieces) 485 | elif style == "pep440-old": 486 | rendered = render_pep440_old(pieces) 487 | elif style == "git-describe": 488 | rendered = render_git_describe(pieces) 489 | elif style == "git-describe-long": 490 | rendered = render_git_describe_long(pieces) 491 | else: 492 | raise ValueError("unknown style '%s'" % style) 493 | 494 | return { 495 | "version": rendered, 496 | "full-revisionid": pieces["long"], 497 | "dirty": pieces["dirty"], 498 | "error": None, 499 | "date": pieces.get("date") 500 | } 501 | 502 | 503 | def get_versions(): 504 | """Get version information or return default if unable to do so.""" 505 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 506 | # __file__, we can work backwards from there to the root. Some 507 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 508 | # case we can only use expanded keywords. 509 | 510 | cfg = get_config() 511 | verbose = cfg.verbose 512 | 513 | try: 514 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 515 | verbose) 516 | except NotThisMethod: 517 | pass 518 | 519 | try: 520 | root = os.path.realpath(__file__) 521 | # versionfile_source is the relative path from the top of the source 522 | # tree (where the .git directory might live) to this file. Invert 523 | # this to find the root from __file__. 524 | for i in cfg.versionfile_source.split('/'): 525 | root = os.path.dirname(root) 526 | except NameError: 527 | return { 528 | "version": "0+unknown", 529 | "full-revisionid": None, 530 | "dirty": None, 531 | "error": "unable to find root of source tree", 532 | "date": None 533 | } 534 | 535 | try: 536 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 537 | return render(pieces, cfg.style) 538 | except NotThisMethod: 539 | pass 540 | 541 | try: 542 | if cfg.parentdir_prefix: 543 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 544 | except NotThisMethod: 545 | pass 546 | 547 | return { 548 | "version": "0+unknown", 549 | "full-revisionid": None, 550 | "dirty": None, 551 | "error": "unable to compute version", 552 | "date": None 553 | } 554 | -------------------------------------------------------------------------------- /intake_xarray/base.py: -------------------------------------------------------------------------------- 1 | class IntakeXarraySourceAdapter: 2 | container = "xarray" 3 | name = "xarray" 4 | version = "" 5 | 6 | def to_dask(self): 7 | if "chunks" not in self.reader.kwargs: 8 | return self.reader(chunks={}).read() 9 | else: 10 | return self.reader.read() 11 | 12 | def __call__(self, *args, **kwargs): 13 | return self 14 | 15 | get = __call__ 16 | 17 | def read(self): 18 | return self.reader(chunks=None).read() 19 | 20 | discover = read 21 | 22 | read_chunked = to_dask 23 | -------------------------------------------------------------------------------- /intake_xarray/image.py: -------------------------------------------------------------------------------- 1 | import fsspec 2 | 3 | from intake.source.utils import reverse_formats 4 | from intake import readers 5 | 6 | from intake_xarray.base import IntakeXarraySourceAdapter 7 | 8 | 9 | def _coerce_shape(array, shape): 10 | """ Trim or pad array to match desired shape""" 11 | import numpy as np 12 | 13 | if len(shape) != 2: 14 | raise ValueError('coerce_shape must be an iterable of len 2') 15 | 16 | target_shape = tuple(shape) 17 | actual_shape = array.shape 18 | ndims = len(actual_shape) 19 | 20 | if actual_shape[:2] == target_shape: 21 | # no trimming or padding needed 22 | return array 23 | 24 | # do any necessary trimming first 25 | for i, (a, t) in enumerate(zip(actual_shape[:2], target_shape)): 26 | if a > t: 27 | if i == 0: 28 | if ndims == 2: 29 | array = array[:t, :] 30 | else: 31 | array = array[:t, :, :] 32 | else: 33 | if ndims == 2: 34 | array = array[:, :t] 35 | else: 36 | array = array[:, :t, :] 37 | 38 | if array.shape[:2] == target_shape: 39 | # only needed trimming 40 | return array 41 | 42 | # create array of zeros and fill with trimmed value array 43 | if ndims == 2: 44 | new_array = np.zeros(target_shape, dtype=array.dtype) 45 | new_array[:array.shape[0], :array.shape[1]] = array 46 | else: 47 | new_array = np.zeros((target_shape[0], 48 | target_shape[1], 49 | actual_shape[2]), dtype=array.dtype) 50 | new_array[:array.shape[0], :array.shape[1], :] = array 51 | 52 | return new_array 53 | 54 | 55 | def _add_leading_dimension(x): 56 | """Add a new dimension to an array-like""" 57 | return x[None, ...] 58 | 59 | 60 | def _dask_imread(files, imread=None, preprocess=None, coerce_shape=None): 61 | """ Read a stack of images into a dask array """ 62 | from dask.array import Array 63 | from dask.base import tokenize 64 | from functools import partial 65 | 66 | if not imread: 67 | from skimage.io import imread 68 | 69 | def _imread(open_file): 70 | with open_file as f: 71 | return imread(f) 72 | 73 | filenames = [f.path for f in files] 74 | 75 | name = 'imread-%s' % tokenize(filenames) 76 | 77 | if coerce_shape is not None: 78 | reshape = partial(_coerce_shape, shape=coerce_shape) 79 | 80 | with files[0] as f: 81 | sample = imread(f) 82 | if coerce_shape is not None: 83 | sample = reshape(sample) 84 | if preprocess: 85 | sample = preprocess(sample) 86 | 87 | keys = [(name, i) + (0,) * len(sample.shape) 88 | for i in range(len(files))] 89 | 90 | if coerce_shape is not None: 91 | if preprocess: 92 | values = [(_add_leading_dimension, 93 | (preprocess, 94 | (reshape, 95 | (_imread, f)))) 96 | for f in files] 97 | else: 98 | values = [(_add_leading_dimension, 99 | (reshape, 100 | (_imread, f))) 101 | for f in files] 102 | elif preprocess: 103 | values = [(_add_leading_dimension, 104 | (preprocess, 105 | (_imread, f))) 106 | for f in files] 107 | else: 108 | values = [(_add_leading_dimension, 109 | (_imread, f)) 110 | for f in files] 111 | dsk = dict(zip(keys, values)) 112 | 113 | chunks = ((1, ) * len(files), ) + tuple((d, ) for d in sample.shape) 114 | 115 | return Array(dsk, name, chunks, sample.dtype) 116 | 117 | 118 | def _dask_exifread(files, exif_tags): 119 | """Construct a dask Array to read each tag in `exif_tags` (list of 120 | str) from the EXIF data of the images in `files` 121 | """ 122 | from copy import copy 123 | from numpy import array 124 | from dask.array import Array 125 | from dask.base import tokenize 126 | from exifread import process_file as read_exif 127 | 128 | def _read_exif(open_file): 129 | # Take a fresh copy of open_file, to work around occasional 130 | # 'I/O operation on closed file' and similar errors when 131 | # open_file is also opened elsewhere 132 | with copy(open_file) as f: 133 | return read_exif(f) 134 | 135 | if not isinstance(exif_tags, list): 136 | sample = _read_exif(files[0]) 137 | exif_tags = sample.keys() 138 | 139 | ntags = len(exif_tags) 140 | 141 | def extract_tags(d): 142 | return array([d.get(tag) for tag in exif_tags]) 143 | 144 | filenames = [f.path for f in files] 145 | name = 'exifread-%s' % tokenize(filenames) 146 | 147 | keys = [(name, i, 0) for i in range(len(files))] 148 | values = [(_add_leading_dimension, 149 | (extract_tags, 150 | (_read_exif, f))) 151 | for f in files] 152 | 153 | dsk = dict(zip(keys, values)) 154 | 155 | chunks = ((1,) * len(files), (ntags,)) 156 | 157 | exif_data = Array(dsk, name, chunks, object) 158 | 159 | return {'EXIF ' + tag: exif_data[:,i] for i, tag in enumerate(exif_tags)} 160 | 161 | 162 | def multireader(files, chunks, concat_dim, exif_tags, **kwargs): 163 | """Read a stack of images into a dask xarray object 164 | 165 | NOTE: copied from dask.array.image.imread but altering the input to accept 166 | a list of file objects. 167 | 168 | Parameters 169 | ---------- 170 | files : iter 171 | List of file objects where each file contains data with the same 172 | shape. If this is not the case, use preprocess to coerce data into 173 | a shape 174 | chunks : int or dict 175 | Chunks is used to load the new dataset into dask 176 | arrays. ``chunks={}`` loads the dataset with dask using a single 177 | chunk for all arrays. 178 | concat_dim : str or iterable 179 | Dimension over which to concatenate. If iterable, all fields must be 180 | part of the the pattern. 181 | imread : function (optional) 182 | Optionally provide custom imread function. 183 | Function should expect a file object and produce a numpy array. 184 | Defaults to ``skimage.io.imread``. 185 | preprocess : function (optional) 186 | Optionally provide custom function to preprocess the image. 187 | Function should expect a numpy array for a single image. 188 | coerce_shape : iterable of len 2 (optional) 189 | Optionally coerce the shape of the height and width of the image 190 | by setting `coerce_shape` to desired shape. 191 | exif_tags : boolean or list of str (optional) 192 | Controls whether exif tags are extracted from the images. If a 193 | list, the elements are treated as the particular tags to 194 | extract from each image. For any other truthy value, all tags 195 | that were able to be extracted from a sample image are used. 196 | When tags are extracted, an xarray Dataset is returned, with 197 | each exif tag in a corresponding data variable of the Dataset, 198 | (of type `Optional[exifread.classes.IfdTag]`), and the image 199 | data in a data variable 'raster'. 200 | 201 | Returns 202 | ------- 203 | A Dask xarray.DataArray or xarray.Dataset, of all images stacked 204 | along the first dimension, and (optionally) the value of any 205 | requested EXIF tags. All images will be treated as individual 206 | chunks unless chunks kwarg is specified. 207 | """ 208 | import numpy as np 209 | from xarray import DataArray, Dataset 210 | 211 | dask_array = _dask_imread(files, **kwargs) 212 | 213 | ny, nx = dask_array.shape[1:3] 214 | coords = {'y': np.arange(ny), 215 | 'x': np.arange(nx)} 216 | if isinstance(concat_dim, list): 217 | dims = ('dim_0',) 218 | else: 219 | dims = (concat_dim,) 220 | coords = {concat_dim: np.arange(dask_array.shape[0]), 221 | **coords} 222 | 223 | raster_dims = dims + ('y', 'x') 224 | if len(dask_array.shape) == 4: 225 | nchannel = dask_array.shape[3] 226 | coords['channel'] = np.arange(nchannel) 227 | raster_dims += ('channel',) 228 | 229 | if exif_tags: 230 | exif_dict = _dask_exifread(files, exif_tags) 231 | exif_dict_ds = {tag: (dims, arr) for tag, arr in exif_dict.items()} 232 | return Dataset( 233 | { 234 | 'raster': (raster_dims, dask_array), 235 | **exif_dict_ds, 236 | }, 237 | coords=coords, 238 | ).chunk(chunks=chunks) 239 | else: 240 | return DataArray( 241 | dask_array, coords=coords, dims=raster_dims 242 | ).chunk(chunks=chunks) 243 | 244 | 245 | class ImageReader(readers.BaseReader): 246 | """Open a xarray dataset from image files. 247 | 248 | This creates an xarray.DataArray or an xarray.Dataset. 249 | See http://scikit-image.org/docs/dev/api/skimage.io.html#skimage.io.imread 250 | for the file formats supported. 251 | 252 | Parameters 253 | ---------- 254 | urlpath : str or iterable, location of data 255 | May be a local path, or remote path if including a protocol specifier 256 | such as ``'s3://'``. May include glob wildcards or format pattern 257 | strings. Must be a format supported by ``skimage.io.imread`` or 258 | user-supplied ``imread``. Some examples: 259 | - ``{{ CATALOG_DIR }}/data/RGB.tif`` 260 | - ``s3://data/*.jpeg`` 261 | - ``https://example.com/image.png`` 262 | - ``s3://data/Images/{{ landuse }}/{{ '%02d' % id }}.tif`` 263 | chunks : int or dict 264 | Chunks is used to load the new dataset into dask 265 | arrays. ``chunks={}`` loads the dataset with dask using a single 266 | chunk for all arrays. 267 | path_as_pattern : bool or str, optional 268 | Whether to treat the path as a pattern (ie. ``data_{field}.tif``) 269 | and create new coodinates in the output corresponding to pattern 270 | fields. If str, is treated as pattern to match on. Default is True. 271 | concat_dim : str or iterable 272 | Dimension over which to concatenate. If iterable, all fields must be 273 | part of the the pattern. 274 | preprocess : function (optional) 275 | Optionally provide custom function to preprocess the image. 276 | Function should expect a numpy array for a single image and return 277 | a numpy array. 278 | coerce_shape : iterable of len 2 (optional) 279 | Optionally coerce the shape of the height and width of the image 280 | by setting `coerce_shape` to desired shape. 281 | exif_tags : boolean or list of str (optional) 282 | Controls whether exif tags are extracted from the images. If a 283 | list, the elements are treated as the particular tags to 284 | extract from each image. For any other truthy value, all tags 285 | that were able to be extracted from a sample image are used. 286 | When tags are extracted, an xarray Dataset is returned, with 287 | each exif tag in a corresponding data variable of the Dataset, 288 | (of type `Optional[exifread.classes.IfdTag]`), and the image 289 | data in a data variable 'raster'. 290 | 291 | """ 292 | output_instance = "xarray:Dataset" 293 | 294 | 295 | def _read(self, urlpath, chunks=None, concat_dim='concat_dim', 296 | metadata=None, path_as_pattern=None, 297 | storage_options=None, exif_tags=None, **kwargs): 298 | """ 299 | This function is called when the data source refers to more 300 | than one file either as a list or a glob. It sets up the 301 | dask graph for opening the files. 302 | 303 | Parameters 304 | ---------- 305 | files : iter 306 | List of file objects 307 | """ 308 | import pandas as pd 309 | from xarray import DataArray 310 | path_as_pattern = path_as_pattern or (path_as_pattern is None and "{" in urlpath) 311 | 312 | if path_as_pattern: 313 | from intake.readers.utils import pattern_to_glob 314 | 315 | url = pattern_to_glob(urlpath) 316 | __, _, paths = fsspec.get_fs_token_paths(url, **(storage_options or {})) 317 | field_values = reverse_formats(urlpath, paths) 318 | paths = paths 319 | else: 320 | paths = urlpath 321 | 322 | files = fsspec.open_files(paths, **(storage_options or {})) 323 | 324 | out = multireader( 325 | files, chunks, concat_dim, exif_tags, **kwargs 326 | ) 327 | if isinstance(out, DataArray) and len(files) == 1 and isinstance(urlpath, str) and "*" not in urlpath: 328 | out = out[0] 329 | if not path_as_pattern: 330 | return out 331 | 332 | coords = {} 333 | filenames = [f.path for f in files] 334 | 335 | if isinstance(concat_dim, list): 336 | if not set(field_values.keys()).issuperset(set(concat_dim)): 337 | raise KeyError('All concat_dims should be in pattern.') 338 | index = pd.MultiIndex.from_tuples( 339 | zip(*(field_values[dim] for dim in concat_dim)), 340 | names=concat_dim) 341 | coords = { 342 | k: DataArray(v, dims=('dim_0')) 343 | for k, v in field_values.items() if k not in concat_dim 344 | } 345 | out = (out.assign_coords(dim_0=index, **coords) # use the index 346 | .unstack().chunk(chunks)) # unstack along new index 347 | return out.transpose(*concat_dim, # reorder dims 348 | *filter(lambda x: x not in concat_dim, 349 | out.dims)) 350 | else: 351 | coords = { 352 | k: DataArray(v, dims=concat_dim) 353 | for k, v in field_values.items() 354 | } 355 | return out.assign_coords(**coords).chunk(chunks).unify_chunks() 356 | 357 | 358 | class ImageSource(IntakeXarraySourceAdapter): 359 | name = 'xarray_image' 360 | container = "xarray" 361 | 362 | def __init__(self, *ar, **kw): 363 | self.reader = ImageReader(*ar, **kw) 364 | -------------------------------------------------------------------------------- /intake_xarray/netcdf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from intake import readers 3 | 4 | from intake_xarray.base import IntakeXarraySourceAdapter 5 | 6 | 7 | class NetCDFSource(IntakeXarraySourceAdapter): 8 | """Open a xarray file. 9 | 10 | Parameters 11 | ---------- 12 | urlpath : str, List[str] 13 | Path to source file. May include glob "*" characters, format 14 | pattern strings, or list. 15 | Some examples: 16 | - ``{{ CATALOG_DIR }}/data/air.nc`` 17 | - ``{{ CATALOG_DIR }}/data/*.nc`` 18 | - ``{{ CATALOG_DIR }}/data/air_{year}.nc`` 19 | chunks : int or dict, optional 20 | Chunks is used to load the new dataset into dask 21 | arrays. ``chunks={}`` loads the dataset with dask using a single 22 | chunk for all arrays. 23 | combine : ({'by_coords', 'nested'}, optional) 24 | Which function is used to concatenate all the files when urlpath 25 | has a wildcard. It is recommended to set this argument in all 26 | your catalogs because the default has changed and is going to change. 27 | It was "nested", and is now the default of xarray.open_mfdataset 28 | which is "auto_combine", and is planed to change from "auto" to 29 | "by_corrds" in a near future. 30 | concat_dim : str, optional 31 | Name of dimension along which to concatenate the files. Can 32 | be new or pre-existing if combine is "nested". Must be None or new if 33 | combine is "by_coords". 34 | path_as_pattern : bool or str, optional 35 | Whether to treat the path as a pattern (ie. ``data_{field}.nc``) 36 | and create new coodinates in the output corresponding to pattern 37 | fields. If str, is treated as pattern to match on. Default is True. 38 | xarray_kwargs: dict 39 | Additional xarray kwargs for xr.open_dataset() or xr.open_mfdataset(). 40 | storage_options: dict 41 | If using a remote fs (whether caching locally or not), these are 42 | the kwargs to pass to that FS. 43 | """ 44 | name = 'netcdf' 45 | 46 | def __init__(self, urlpath, 47 | xarray_kwargs=None, metadata=None, 48 | path_as_pattern=True, storage_options=None, **kwargs): 49 | data = readers.datatypes.NetCDF3(urlpath, storage_options=storage_options, 50 | metadata=metadata) 51 | if (path_as_pattern is True and "{" in urlpath) or isinstance(path_as_pattern, str): 52 | reader = readers.XArrayPatternReader(data, **(xarray_kwargs or {}), metadata=metadata, 53 | pattern=path_as_pattern, **kwargs) 54 | else: 55 | reader = readers.XArrayDatasetReader(data, **(xarray_kwargs or {}), metadata=metadata, **kwargs) 56 | self.reader = reader 57 | -------------------------------------------------------------------------------- /intake_xarray/opendap.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import requests 3 | import os 4 | 5 | from intake import readers 6 | from intake_xarray.base import IntakeXarraySourceAdapter 7 | 8 | class OpenDapSource(IntakeXarraySourceAdapter): 9 | """Open a OPeNDAP source. 10 | 11 | Parameters 12 | ---------- 13 | urlpath: str 14 | Path to source file. 15 | chunks: None, int or dict 16 | Chunks is used to load the new dataset into dask 17 | arrays. ``chunks={}`` loads the dataset with dask using a single 18 | chunk for all arrays. 19 | auth: None, "esgf" or "urs" 20 | Method of authenticating to the OPeNDAP server. 21 | Choose from one of the following: 22 | None - [Default] Anonymous access. 23 | 'esgf' - Earth System Grid Federation. 24 | 'urs' - NASA Earthdata Login, also known as URS. 25 | 'generic_http' - OPeNDAP servers which support plain HTTP authentication 26 | None - No authentication. 27 | Note that you will need to set your username and password respectively using the 28 | environment variables DAP_USER and DAP_PASSWORD. 29 | engine: str 30 | Engine used for reading OPeNDAP URL. Should be one of 'pydap' or 'netcdf4'. 31 | """ 32 | name = 'opendap' 33 | 34 | def __init__(self, urlpath, chunks=None, engine="pydap", xarray_kwargs=None, metadata=None, 35 | **kwargs): 36 | data = readers.datatypes.OpenDAP(urlpath) 37 | self.reader = readers.XArrayDatasetReader( 38 | data, engine=engine, **(xarray_kwargs or {}), metadata=metadata, **kwargs 39 | ) 40 | -------------------------------------------------------------------------------- /intake_xarray/raster.py: -------------------------------------------------------------------------------- 1 | from intake import readers 2 | from intake.readers.utils import pattern_to_glob 3 | from intake.source.utils import reverse_formats 4 | 5 | from intake_xarray.base import IntakeXarraySourceAdapter 6 | 7 | 8 | class RasterIOSource(IntakeXarraySourceAdapter): 9 | """Open a xarray dataset via RasterIO. 10 | 11 | This creates an xarray.array, not a dataset (i.e., there is exactly one 12 | variable). 13 | 14 | See https://rasterio.readthedocs.io/en/latest/ for the file formats 15 | supported, particularly GeoTIFF, and 16 | http://xarray.pydata.org/en/stable/generated/xarray.open_rasterio.html#xarray.open_rasterio 17 | for possible extra arguments 18 | 19 | Parameters 20 | ---------- 21 | urlpath: str or iterable, location of data 22 | May be a local path, or remote path if including a protocol specifier 23 | such as ``'s3://'``. May include glob wildcards or format pattern strings. 24 | Must be a format supported by rasterIO (normally GeoTiff). 25 | Some examples: 26 | - ``{{ CATALOG_DIR }}data/RGB.tif`` 27 | - ``s3://data/*.tif`` 28 | - ``s3://data/landsat8_band{band}.tif`` 29 | - ``s3://data/{location}/landsat8_band{band}.tif`` 30 | - ``{{ CATALOG_DIR }}data/landsat8_{start_date:%Y%m%d}_band{band}.tif`` 31 | chunks: None or int or dict, optional 32 | Chunks is used to load the new dataset into dask 33 | arrays. ``chunks={}`` loads the dataset with dask using a single 34 | chunk for all arrays. default `None` loads numpy arrays. 35 | path_as_pattern: bool or str, optional 36 | Whether to treat the path as a pattern (ie. ``data_{field}.tif``) 37 | and create new coodinates in the output corresponding to pattern 38 | fields. If str, is treated as pattern to match on. Default is True. 39 | """ 40 | name = 'rasterio' 41 | container = "xarray" 42 | 43 | def __init__(self, urlpath, 44 | xarray_kwargs=None, metadata=None, path_as_pattern=True, 45 | storage_options=None, **kwargs): 46 | data = readers.datatypes.TIFF(urlpath, storage_options=storage_options) 47 | if (path_as_pattern is True and "{" in urlpath) or isinstance(path_as_pattern, str): 48 | reader = readers.XArrayPatternReader(data, **(xarray_kwargs or {}), metadata=metadata, engine="rasterio", 49 | pattern=path_as_pattern, **kwargs) 50 | else: 51 | reader = readers.XArrayDatasetReader(data, **(xarray_kwargs or {}), metadata=metadata, engine="rasterio", **kwargs) 52 | self.reader = reader 53 | -------------------------------------------------------------------------------- /intake_xarray/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /intake_xarray/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import posixpath 4 | import pytest 5 | import shutil 6 | import tempfile 7 | import xarray as xr 8 | 9 | import intake_xarray.base 10 | from intake_xarray.netcdf import NetCDFSource 11 | from intake_xarray.xzarr import ZarrSource 12 | 13 | TEST_DATA_DIR = 'data' 14 | TEST_DATA = 'example_1.nc' 15 | 16 | here = posixpath.dirname(__file__) 17 | TEST_URLPATH = posixpath.join(here, TEST_DATA_DIR, TEST_DATA) 18 | 19 | 20 | @pytest.fixture 21 | def netcdf_source(): 22 | return NetCDFSource(TEST_URLPATH, {}) 23 | 24 | 25 | @pytest.fixture 26 | def dataset(): 27 | return xr.open_dataset(TEST_URLPATH) 28 | 29 | 30 | @pytest.fixture(scope='module') 31 | def zarr_source(): 32 | pytest.importorskip('zarr') 33 | try: 34 | tdir = tempfile.mkdtemp() 35 | data = xr.open_dataset(TEST_URLPATH) 36 | data.to_zarr(tdir) 37 | yield ZarrSource(tdir) 38 | finally: 39 | shutil.rmtree(tdir) 40 | 41 | 42 | @pytest.fixture 43 | def mock_import_xarray(): 44 | import sys 45 | import xarray 46 | try: 47 | sys.modules['xarray'] = None 48 | yield 49 | finally: 50 | sys.modules['xarray'] = xarray 51 | -------------------------------------------------------------------------------- /intake_xarray/tests/data/RGB.byte.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/RGB.byte.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/bears.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/bears.nc -------------------------------------------------------------------------------- /intake_xarray/tests/data/blank.zarr/.zattrs: -------------------------------------------------------------------------------- 1 | { 2 | "source": "Fictional Model Output" 3 | } -------------------------------------------------------------------------------- /intake_xarray/tests/data/blank.zarr/.zgroup: -------------------------------------------------------------------------------- 1 | { 2 | "zarr_format": 2 3 | } -------------------------------------------------------------------------------- /intake_xarray/tests/data/blank.zarr/lat/.zarray: -------------------------------------------------------------------------------- 1 | { 2 | "chunks": [ 3 | 5 4 | ], 5 | "compressor": { 6 | "blocksize": 0, 7 | "clevel": 5, 8 | "cname": "lz4", 9 | "id": "blosc", 10 | "shuffle": 1 11 | }, 12 | "dtype": ".tif" 54 | driver: rasterio 55 | args: 56 | urlpath: '{{ CATALOG_DIR }}little_{color}.tif' 57 | concat_dim: new_dim 58 | open_local: True 59 | pattern_tiff_source_path_pattern_field_as_band: 60 | description: "https://github.com/mapbox/rasterio/blob/master/tests/data/.tif" 61 | driver: rasterio 62 | args: 63 | urlpath: '{{ CATALOG_DIR }}little_{color}.tif' 64 | pattern_tiff_source_path_pattern_as_str: 65 | description: "https://github.com/mapbox/rasterio/blob/master/tests/data/.tif" 66 | driver: rasterio 67 | args: 68 | urlpath: ['{{ CATALOG_DIR }}/little_red.tif', '{{ CATALOG_DIR }}/little_green.tif'] 69 | concat_dim: color 70 | path_as_pattern: '{{ CATALOG_DIR }}/little_{color}.tif' 71 | blank: 72 | desciption: little zarr set 73 | driver: zarr 74 | args: 75 | urlpath: "{{CATALOG_DIR}}/blank.zarr" 76 | opendap_source_pydap: 77 | description: example OPeNDAP source 78 | driver: opendap 79 | args: 80 | urlpath: http://test.opendap.org/opendap/hyrax/data/nc/data.nc 81 | auth: null 82 | engine: pydap 83 | opendap_source_netcdf4: 84 | description: example OPeNDAP source 85 | driver: opendap 86 | args: 87 | urlpath: http://test.opendap.org/opendap/hyrax/data/nc/data.nc 88 | auth: null 89 | engine: netcdf4 90 | -------------------------------------------------------------------------------- /intake_xarray/tests/data/color_with_special_2.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/color_with_special_2.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/color_with_special{}.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/color_with_special{}.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/dog.jpg -------------------------------------------------------------------------------- /intake_xarray/tests/data/example_1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/example_1.nc -------------------------------------------------------------------------------- /intake_xarray/tests/data/example_2.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/example_2.nc -------------------------------------------------------------------------------- /intake_xarray/tests/data/images/beach01.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/images/beach01.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/images/beach57.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/images/beach57.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/images/buildings96.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/images/buildings96.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/little_green.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/little_green.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/little_red.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/little_red.tif -------------------------------------------------------------------------------- /intake_xarray/tests/data/next_example_1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/next_example_1.nc -------------------------------------------------------------------------------- /intake_xarray/tests/data/wafsgfs_L_t06z_intdsk60.grib2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/wafsgfs_L_t06z_intdsk60.grib2 -------------------------------------------------------------------------------- /intake_xarray/tests/data/wafsgfs_L_t06z_intdsk61.grib2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intake/intake-xarray/719a7c84b58ec6783b07e648fbfaf988ea4c453a/intake_xarray/tests/data/wafsgfs_L_t06z_intdsk61.grib2 -------------------------------------------------------------------------------- /intake_xarray/tests/test_catalog.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import os 4 | import pytest 5 | 6 | from intake import open_catalog 7 | 8 | 9 | @pytest.fixture 10 | def catalog1(): 11 | path = os.path.dirname(__file__) 12 | return open_catalog(os.path.join(path, 'data', 'catalog.yaml')) 13 | 14 | 15 | def test_catalog(catalog1, dataset): 16 | source = catalog1['xarray_source'].get() 17 | ds = source.read() 18 | 19 | assert ds.dims == dataset.dims 20 | assert np.all(ds.temp == dataset.temp) 21 | assert np.all(ds.rh == dataset.rh) 22 | 23 | 24 | def test_import_error(mock_import_xarray, catalog1): 25 | s = catalog1['xarray_source']() # this is OK 26 | with pytest.raises(ImportError): 27 | s.discover() 28 | -------------------------------------------------------------------------------- /intake_xarray/tests/test_image.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import numpy as np 4 | import pytest 5 | 6 | from intake_xarray.image import _coerce_shape, ImageSource 7 | 8 | here = os.path.dirname(__file__) 9 | 10 | 11 | @pytest.mark.parametrize('im', [ 12 | [[1, 2], 13 | [3, 4]], 14 | [[1, 2, 7], 15 | [3, 4, 6]], 16 | [[1, 2, 7], 17 | [3, 4, 6], 18 | [5, 6, 8]], 19 | [[1, 2], 20 | [3, 4], 21 | [5, 6], 22 | [7, 8]], 23 | ]) 24 | def test_coerce_shape_2d_trim_only(im): 25 | shape = (2, 2) 26 | array = np.array(im) 27 | expected = np.array([[1, 2], 28 | [3, 4]]) 29 | actual = _coerce_shape(array, shape) 30 | assert (expected == actual).all() 31 | 32 | 33 | def test_coerce_shape_2d_pad_only(): 34 | shape = (3, 4) 35 | array = np.array([[1, 2], 36 | [3, 4]]) 37 | expected = np.array([[1, 2, 0, 0], 38 | [3, 4, 0, 0], 39 | [0, 0, 0, 0]]) 40 | actual = _coerce_shape(array, shape) 41 | assert (expected == actual).all() 42 | 43 | 44 | def test_coerce_shape_2d_pad_nrows_and_trim_ncols(): 45 | shape = (4, 2) 46 | array = np.array([[1, 2, 7], 47 | [3, 4, 6]]) 48 | expected = np.array([[1, 2], 49 | [3, 4], 50 | [0, 0], 51 | [0, 0]]) 52 | actual = _coerce_shape(array, shape) 53 | assert (expected == actual).all() 54 | 55 | 56 | def test_coerce_shape_2d_pad_ncols_and_trim_nrows(): 57 | shape = (2, 4) 58 | array = np.array([[1, 2], 59 | [3, 4], 60 | [5, 6], 61 | [7, 8]]) 62 | expected = np.array([[1, 2, 0, 0], 63 | [3, 4, 0, 0]]) 64 | actual = _coerce_shape(array, shape) 65 | assert (expected == actual).all() 66 | 67 | 68 | def test_coerce_shape_3d_no_change(): 69 | shape = (3, 3) 70 | array = np.arange(3**3).reshape(3, 3, 3) 71 | actual = _coerce_shape(array, shape) 72 | assert (array == actual).all() 73 | 74 | 75 | def test_coerce_shape_3d_pad_nrows_and_trim_ncols(): 76 | shape = (5, 2) 77 | array = np.arange(2*4*3).reshape(2, 4, 3) 78 | expected = np.array([[[0, 1, 2], 79 | [3, 4, 5]], 80 | 81 | [[12, 13, 14], 82 | [15, 16, 17]], 83 | 84 | [[0, 0, 0], 85 | [0, 0, 0]], 86 | 87 | [[0, 0, 0], 88 | [0, 0, 0]], 89 | 90 | [[0, 0, 0], 91 | [0, 0, 0]]]) 92 | actual = _coerce_shape(array, shape) 93 | assert (expected == actual).all() 94 | 95 | 96 | def test_coerce_shape_3d_pad_ncols_and_trim_nrows(): 97 | shape = (2, 5) 98 | array = np.arange(3*2*4).reshape(3, 2, 4) 99 | expected = np.array([[[0, 1, 2, 3], 100 | [4, 5, 6, 7], 101 | [0, 0, 0, 0], 102 | [0, 0, 0, 0], 103 | [0, 0, 0, 0]], 104 | 105 | [[8, 9, 10, 11], 106 | [12, 13, 14, 15], 107 | [0, 0, 0, 0], 108 | [0, 0, 0, 0], 109 | [0, 0, 0, 0]]]) 110 | actual = _coerce_shape(array, shape) 111 | assert (expected == actual).all() 112 | 113 | 114 | def test_coerce_shape_raises_error_if_shape_not_len_2(): 115 | shape = (2, 3, 3) 116 | array = np.arange(3**3).reshape(3, 3, 3) 117 | with pytest.raises(ValueError, 118 | match='coerce_shape must be an iterable of len 2'): 119 | _coerce_shape(array, shape) 120 | 121 | 122 | def test_coerce_shape_array_non_int(): 123 | shape = (2, 3) 124 | array = np.random.random((3, 2)) 125 | expected = np.append(array[:2, :], [[0], [0]], axis=1) 126 | actual = _coerce_shape(array, shape) 127 | assert (expected == actual).all() 128 | assert expected.dtype == np.float64 129 | 130 | 131 | def test_read_image(): 132 | pytest.importorskip('skimage') 133 | urlpath = os.path.join(here, 'data', 'images', 'beach57.tif') 134 | source = ImageSource(urlpath=urlpath) 135 | array = source.read() 136 | assert array.shape == (256, 252, 3) 137 | assert array.dtype == np.uint8 138 | 139 | 140 | def test_read_image_and_exif(): 141 | pytest.importorskip('skimage') 142 | urlpath = os.path.join(here, 'data', 'images', 'beach57.tif') 143 | source = ImageSource(urlpath=urlpath, exif_tags=True) 144 | ds = source.read() 145 | assert ds['raster'].shape == (1, 256, 252, 3) 146 | assert ds['raster'].dtype == np.uint8 147 | assert ds['EXIF Image ImageWidth'].values[0].values == [252] 148 | assert ds['EXIF Image ImageLength'].values[0].values == [256] 149 | 150 | 151 | def test_read_image_and_given_exif_tag(): 152 | pytest.importorskip('skimage') 153 | urlpath = os.path.join(here, 'data', 'images', 'beach57.tif') 154 | source = ImageSource(urlpath=urlpath, exif_tags=['Image ImageWidth']) 155 | ds = source.read() 156 | assert ds['raster'].shape == (1, 256, 252, 3) 157 | assert ds['raster'].dtype == np.uint8 158 | assert ds['EXIF Image ImageWidth'].values[0].values == [252] 159 | with pytest.raises(KeyError): 160 | ds['EXIF Image ImageLength'] 161 | 162 | 163 | def test_read_images_as_glob_with_coerce(): 164 | pytest.importorskip('skimage') 165 | urlpath = os.path.join(here, 'data', 'images', '*') 166 | source = ImageSource(urlpath=urlpath, coerce_shape=(256, 256)) 167 | array = source.read() 168 | assert array.shape == (3, 256, 256, 3) 169 | 170 | 171 | def test_read_images_and_exif_as_glob_with_coerce(): 172 | pytest.importorskip('skimage') 173 | urlpath = os.path.join(here, 'data', 'images', '*') 174 | source = ImageSource(urlpath=urlpath, coerce_shape=(256, 256), exif_tags=True) 175 | ds = source.read() 176 | assert ds['raster'].shape == (3, 256, 256, 3) 177 | assert ds['EXIF Image ImageWidth'].shape == (3,) 178 | -------------------------------------------------------------------------------- /intake_xarray/tests/test_intake_xarray.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | from unittest.mock import patch 4 | import tempfile 5 | 6 | import numpy as np 7 | import pytest 8 | import xarray as xr 9 | 10 | import intake 11 | 12 | here = os.path.dirname(__file__).rstrip('/') 13 | 14 | 15 | @pytest.mark.parametrize('source', ['netcdf', 'zarr']) 16 | def test_read(source, netcdf_source, zarr_source, dataset): 17 | source = {'netcdf': netcdf_source, 'zarr': zarr_source}[source] 18 | 19 | ds = source.read_chunked() 20 | assert ds.temp.chunks 21 | 22 | ds = source.read() 23 | assert ds.dims == dataset.dims 24 | assert np.all(ds.temp == dataset.temp) 25 | assert np.all(ds.rh == dataset.rh) 26 | 27 | 28 | def test_read_list_of_netcdf_files_with_combine_nested(): 29 | from intake_xarray.netcdf import NetCDFSource 30 | source = NetCDFSource([ 31 | os.path.join(here, 'data', 'example_1.nc'), 32 | os.path.join(here, 'data', 'example_2.nc'), 33 | ], 34 | combine='nested', 35 | concat_dim='concat_dim' 36 | ) 37 | d = source.to_dask() 38 | assert d.dims == {'lat': 5, 'lon': 10, 'level': 4, 'time': 1, 39 | 'concat_dim': 2} 40 | 41 | 42 | def test_read_glob_pattern_of_netcdf_files(): 43 | """If xarray is old, prompt user to update to use pattern""" 44 | from intake_xarray.netcdf import NetCDFSource 45 | source = NetCDFSource(os.path.join(here, 'data', 'example_{num: d}.nc'), 46 | concat_dim='num', combine='nested') 47 | d = source.to_dask() 48 | print(d.dims) 49 | assert d.dims == {'lat': 5, 'lon': 10, 'level': 4, 'time': 1, 50 | 'num': 2} 51 | assert (d.num.data == np.array([1, 2])).all() 52 | 53 | 54 | @pytest.mark.parametrize('source', ['netcdf', 'zarr']) 55 | def test_to_dask(source, netcdf_source, zarr_source, dataset): 56 | source = {'netcdf': netcdf_source, 'zarr': zarr_source}[source] 57 | ds = source.to_dask() 58 | 59 | assert ds.dims == dataset.dims 60 | assert np.all(ds.temp == dataset.temp) 61 | assert np.all(ds.rh == dataset.rh) 62 | 63 | 64 | def test_grib_dask(): 65 | pytest.importorskip('Nio') 66 | import dask.array as da 67 | cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) 68 | x = cat.grib.to_dask() 69 | assert len(x.fileno) == 2 70 | assert isinstance(x.APCP_P8_L1_GLL0_acc6h.data, da.Array) 71 | values = x.APCP_P8_L1_GLL0_acc6h.data.compute() 72 | x2 = cat.grib.read() 73 | assert (values == x2.APCP_P8_L1_GLL0_acc6h.values).all() 74 | 75 | 76 | def test_rasterio(): 77 | import dask.array as da 78 | pytest.importorskip('rasterio') 79 | cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) 80 | s = cat.tiff_source 81 | x = s.to_dask() 82 | assert isinstance(x.band_data.data, da.Array) 83 | x = s.read() 84 | assert x.band_data.shape == (3, 718, 791) 85 | 86 | 87 | def test_rasterio_glob(): 88 | import dask.array as da 89 | pytest.importorskip('rasterio') 90 | cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) 91 | s = cat.tiff_glob_source 92 | x = s.to_dask() 93 | assert isinstance(x.band_data.data, da.Array) 94 | x = s.read() 95 | assert x.band_data.shape == (3, 718, 791) 96 | 97 | 98 | def test_rasterio_empty_glob(): 99 | pytest.importorskip('rasterio') 100 | cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) 101 | s = cat.empty_glob 102 | with pytest.raises(Exception): 103 | s.read() 104 | 105 | 106 | def test_read_pattern_concat_on_new_dim(): 107 | pytest.importorskip('rasterio') 108 | cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) 109 | colors = cat.pattern_tiff_source_concat_on_new_dim 110 | 111 | da = colors.read().band_data 112 | assert da.shape == (2, 3, 64, 64) 113 | assert len(da.new_dim) == 2 114 | assert set(da.new_dim.data) == set(['red', 'green']) 115 | assert da[da.new_dim == 'red'].shape == (1, 3, 64, 64) 116 | 117 | rgb = {'red': [204, 17, 17], 'green': [17, 204, 17]} 118 | for color, values in rgb.items(): 119 | for i, v in enumerate(values): 120 | assert (da[da.new_dim == color][0].sel(band=i+1).values == v).all() 121 | 122 | 123 | def test_read_pattern_field_as_band(): 124 | pytest.importorskip('rasterio') 125 | cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) 126 | colors = cat.pattern_tiff_source_path_pattern_field_as_band 127 | 128 | da = colors.read().band_data 129 | assert da.shape == (2, 3, 64, 64) 130 | assert set(da.color.data) == set(['red', 'green']) 131 | assert da[da.color == 'red'].squeeze().shape == (3, 64, 64) 132 | 133 | rgb = {'red': [204, 17, 17], 'green': [17, 204, 17]} 134 | for color, values in rgb.items(): 135 | for i, v in enumerate(values): 136 | assert (da[da.color == color][0][i].values == v).all() 137 | 138 | 139 | def test_read_pattern_path_as_pattern_as_str_with_list_of_urlpaths(): 140 | pytest.importorskip('rasterio') 141 | cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) 142 | colors = cat.pattern_tiff_source_path_pattern_as_str 143 | 144 | da = colors.read().band_data 145 | assert da.shape == (2, 3, 64, 64) 146 | assert len(da.color) == 2 147 | assert set(da.color.data) == set(['red', 'green']) 148 | 149 | assert da.sel(color='red').shape == (3, 64, 64) 150 | 151 | rgb = {'red': [204, 17, 17], 'green': [17, 204, 17]} 152 | for color, values in rgb.items(): 153 | for i, v in enumerate(values): 154 | assert (da.sel(color=color).sel(band=i+1).values == v).all() 155 | 156 | 157 | def test_read_image(): 158 | pytest.importorskip('skimage') 159 | from intake_xarray.image import ImageSource 160 | im = ImageSource(os.path.join(here, 'data', 'little_red.tif')) 161 | da = im.read() 162 | assert da.shape == (64, 64, 3) 163 | 164 | 165 | def test_read_images(): 166 | pytest.importorskip('skimage') 167 | from intake_xarray.image import ImageSource 168 | im = ImageSource(os.path.join(here, 'data', 'little_*.tif')) 169 | da = im.read() 170 | assert da.shape == (2, 64, 64, 3) 171 | assert da.dims == ('concat_dim', 'y', 'x', 'channel') 172 | 173 | 174 | def test_read_images_with_pattern(): 175 | pytest.importorskip('skimage') 176 | from intake_xarray.image import ImageSource 177 | path = os.path.join(here, 'data', 'little_{color}.tif') 178 | im = ImageSource(path, concat_dim='color') 179 | da = im.read() 180 | assert da.shape == (2, 64, 64, 3) 181 | assert len(da.color) == 2 182 | assert set(da.color.data) == set(['red', 'green']) 183 | 184 | 185 | def test_read_images_with_multiple_concat_dims_with_pattern(): 186 | pytest.importorskip('skimage') 187 | from intake_xarray.image import ImageSource 188 | path = os.path.join(here, 'data', '{size}_{color}.tif') 189 | im = ImageSource(path, concat_dim=['size', 'color']) 190 | ds = im.read() 191 | assert ds.sel(color='red', size='little').shape == (64, 64, 3) 192 | 193 | 194 | def test_read_jpg_image(): 195 | pytest.importorskip('skimage') 196 | from intake_xarray.image import ImageSource 197 | im = ImageSource(os.path.join(here, 'data', 'dog.jpg')) 198 | da = im.read() 199 | assert da.shape == (192, 192) 200 | 201 | 202 | @pytest.mark.parametrize("engine", ["pydap", "netcdf4"]) 203 | def test_read_opendap_no_auth(engine): 204 | pytest.importorskip("pydap") 205 | cat = intake.open_catalog(os.path.join(here, "data", "catalog.yaml")) 206 | source = cat["opendap_source_{}".format(engine)] 207 | x = source.read() 208 | assert x.TIME.shape == (12,) 209 | 210 | 211 | @pytest.mark.parametrize("auth", ["esgf", "urs"]) 212 | def test_read_opendap_with_auth(auth): 213 | pytest.importorskip("pydap") 214 | from intake_xarray.opendap import OpenDapSource 215 | 216 | os.environ["DAP_USER"] = "username" 217 | os.environ["DAP_PASSWORD"] = "password" 218 | urlpath = "http://test.opendap.org/opendap/hyrax/data/nc/123.nc" 219 | 220 | with patch( 221 | f"pydap.cas.{auth}.setup_session", return_value=None 222 | ) as mock_setup_session: 223 | source = OpenDapSource(urlpath=urlpath, chunks={}, auth=auth, engine="pydap") 224 | source.discover() 225 | mock_setup_session.assert_called_once_with( 226 | os.environ["DAP_USER"], os.environ["DAP_PASSWORD"], check_url=urlpath 227 | ) 228 | 229 | 230 | def test_read_opendap_mfdataset_with_engine(): 231 | pytest.importorskip("pydap") 232 | from intake_xarray.opendap import OpenDapSource 233 | urls = [ 234 | 'http://example.com/opendap/fake1.nc', 235 | 'http://example.com/opendap/fake2.nc', 236 | ] 237 | with patch('xarray.open_mfdataset') as open_mfdataset_mock: 238 | open_mfdataset_mock.return_value = 'dataset' 239 | source = OpenDapSource(urlpath=urls, chunks={}, auth=None, engine='fake-engine') 240 | retval = source.read() 241 | assert open_mfdataset_mock.called_with(urls, chunks={}, engine='fake-engine') 242 | assert retval == 'dataset' 243 | 244 | 245 | @pytest.mark.parametrize("auth", ["esgf", "urs"]) 246 | def test_read_opendap_with_auth_netcdf4(auth): 247 | from intake_xarray.opendap import OpenDapSource 248 | 249 | os.environ["DAP_USER"] = "username" 250 | os.environ["DAP_PASSWORD"] = "password" 251 | urlpath = "http://test.opendap.org/opendap/hyrax/data/nc/123.nc" 252 | 253 | with patch( 254 | f"pydap.cas.{auth}.setup_session", return_value=1 255 | ) as mock_setup_session: 256 | source = OpenDapSource(urlpath=urlpath, chunks={}, auth=auth, engine="pydap") 257 | with pytest.raises(Exception): 258 | source.discover() 259 | 260 | 261 | def test_read_opendap_invalid_auth(): 262 | pytest.importorskip("pydap") 263 | from intake_xarray.opendap import OpenDapSource 264 | 265 | source = OpenDapSource(urlpath="https://test.url", chunks={}, auth="abcd", engine="pydap") 266 | with pytest.raises(Exception): 267 | source.discover() 268 | 269 | 270 | def test_read_opendap_invalid_engine(): 271 | from intake_xarray.opendap import OpenDapSource 272 | 273 | source = OpenDapSource(urlpath="https://test.url", chunks={}, auth=None, engine="abcd") 274 | with pytest.raises(ValueError): 275 | source.discover() 276 | 277 | 278 | def test_cached_list_netcdf(): 279 | tempd = str(tempfile.mkdtemp()) 280 | from intake_xarray.netcdf import NetCDFSource 281 | source = NetCDFSource([ 282 | 'filecache://' + os.path.join(here, 'data', 'example_1.nc'), 283 | 'filecache://' + os.path.join(here, 'data', 'example_2.nc'), 284 | ], 285 | combine='nested', 286 | concat_dim='concat_dim', 287 | storage_options={'cache_storage': tempd, 'target_protocol': 'file'}, 288 | xarray_kwargs={"engine": "scipy"}, 289 | open_local=True, 290 | ) 291 | d = source.read() 292 | assert d.dims == {'lat': 5, 'lon': 10, 'level': 4, 'time': 1, 293 | 'concat_dim': 2} 294 | assert os.listdir(tempd) 295 | -------------------------------------------------------------------------------- /intake_xarray/tests/test_network.py: -------------------------------------------------------------------------------- 1 | # Tests that read public data over the internet 2 | import intake 3 | import pytest 4 | import xarray as xr 5 | 6 | 7 | # RasterIOSource 8 | @pytest.mark.xfail 9 | def test_open_rasterio_http(): 10 | prefix = 'https://landsat-pds.s3.us-west-2.amazonaws.com/L8/139/045' 11 | image = 'LC81390452014295LGN00/LC81390452014295LGN00_B1.TIF' 12 | url = f'{prefix}/{image}' 13 | source = intake.open_rasterio(url, 14 | chunks=dict(band=1)) 15 | ds = source.to_dask() 16 | assert isinstance(ds, xr.core.dataarray.DataArray) 17 | 18 | 19 | def test_open_rasterio_s3(): 20 | bucket = 's3://landsat-pds' 21 | key = 'L8/139/045/LC81390452014295LGN00/LC81390452014295LGN00_B1.TIF' 22 | url = f'{bucket}/{key}' 23 | source = intake.open_rasterio(url, 24 | chunks=dict(band=1), 25 | storage_options = dict(anon=True)) 26 | ds = source.to_dask() 27 | assert isinstance(ds, xr.core.dataarray.DataArray) 28 | 29 | 30 | # NETCDFSource 31 | def test_open_netcdf_gs(): 32 | bucket = 'gs://ldeo-glaciology' 33 | key = 'bedmachine/BedMachineAntarctica_2019-11-05_v01.nc' 34 | url = f'{bucket}/{key}' 35 | source = intake.open_netcdf(url, 36 | chunks=3000, 37 | xarray_kwargs=dict(engine='h5netcdf'), 38 | ) 39 | ds = source.to_dask() 40 | assert isinstance(ds, xr.core.dataarray.Dataset) 41 | 42 | 43 | @pytest.mark.xfail 44 | def test_open_netcdf_s3(): 45 | bucket = 's3://its-live-data.jpl.nasa.gov' 46 | key = 'icesat2/alt06/rel003/ATL06_20181230162257_00340206_003_01.h5' 47 | url = f'{bucket}/{key}' 48 | source = intake.open_netcdf(url, 49 | xarray_kwargs=dict(group='gt1l/land_ice_segments', engine='h5netcdf'), 50 | storage_options=dict(anon=True), 51 | ) 52 | ds = source.to_dask() 53 | assert isinstance(ds._file_obj, xr.backends.h5netcdf_.H5NetCDFStore) 54 | assert isinstance(ds, xr.core.dataarray.Dataset) 55 | 56 | 57 | @pytest.mark.xfail 58 | def test_open_netcdf_s3_simplecache(): 59 | bucket = 's3://its-live-data.jpl.nasa.gov' 60 | key = 'icesat2/alt06/rel003/ATL06_20181230162257_00340206_003_01.h5' 61 | url = f'simplecache::{bucket}/{key}' 62 | source = intake.open_netcdf(url, 63 | xarray_kwargs=dict(group='gt1l/land_ice_segments', engine='h5netcdf'), 64 | storage_options=dict(s3={'anon': True}), 65 | ) 66 | ds = source.to_dask() 67 | assert isinstance(ds._file_obj, xr.backends.h5netcdf_.H5NetCDFStore) 68 | assert isinstance(ds, xr.core.dataarray.Dataset) 69 | 70 | 71 | def test_open_opendap(): 72 | url = 'https://www.ncei.noaa.gov/thredds/dodsC/model-cbofs-files/2021/12/nos.cbofs.fields.n001.20211231.t18z.nc' 73 | 74 | source = intake.open_opendap(url, engine='netcdf4', chunks={'time': 1}) 75 | ds = source.to_dask() 76 | assert isinstance(ds, xr.core.dataarray.Dataset) 77 | 78 | 79 | def test_open_list_opendap(): 80 | url1 = 'https://www.ncei.noaa.gov/thredds/dodsC/model-cbofs-files/2021/12/nos.cbofs.fields.n001.20211231.t18z.nc' 81 | url2 = 'https://www.ncei.noaa.gov/thredds/dodsC/model-cbofs-files/2021/12/nos.cbofs.fields.n002.20211231.t18z.nc' 82 | 83 | source = intake.open_opendap([url1, url2], engine='netcdf4', chunks={'time': 1}) 84 | ds = source.to_dask() 85 | assert isinstance(ds, xr.core.dataarray.Dataset) 86 | -------------------------------------------------------------------------------- /intake_xarray/xzarr.py: -------------------------------------------------------------------------------- 1 | from intake import readers 2 | 3 | from intake_xarray.base import IntakeXarraySourceAdapter 4 | 5 | 6 | class ZarrSource(IntakeXarraySourceAdapter): 7 | """Open a xarray dataset. 8 | 9 | If the path is passed as a list or a string containing "*", then multifile open 10 | will be called automatically. 11 | 12 | Note that the implicit default value of the ``chunks`` kwarg is ``{}``, i.e., dask 13 | will be used to open the dataset with chunksize as inherent in the file. To bypass 14 | dask (if you only want to use ``.read()``), use ``chunks=None``. 15 | 16 | Parameters 17 | ---------- 18 | urlpath: str 19 | Path to source. This can be a local directory or a remote data 20 | service (i.e., with a protocol specifier like ``'s3://``). 21 | storage_options: dict 22 | Parameters passed to the backend file-system 23 | kwargs: 24 | Further parameters are passed to xarray 25 | """ 26 | name = 'zarr' 27 | 28 | def __init__(self, urlpath, storage_options=None, metadata=None, **kwargs): 29 | data = readers.datatypes.Zarr(urlpath, storage_options=storage_options, 30 | metadata=metadata) 31 | self.reader = readers.XArrayDatasetReader(data, **kwargs) 32 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | conda: 2 | file: docs/environment.yml 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [versioneer] 2 | VCS = git 3 | style = pep440 4 | versionfile_source = intake_xarray/_version.py 5 | versionfile_build = intake_xarray/_version.py 6 | tag_prefix = -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #----------------------------------------------------------------------------- 3 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 4 | # All rights reserved. 5 | # 6 | # The full license is in the LICENSE file, distributed with this software. 7 | #----------------------------------------------------------------------------- 8 | 9 | from setuptools import setup, find_packages 10 | import versioneer 11 | 12 | INSTALL_REQUIRES = ['intake >=2', 'xarray >=02022', 'zarr', 'dask >=2.2', 'netcdf4', 'fsspec>2022', 13 | 'msgpack', 'requests'] 14 | 15 | setup( 16 | name='intake-xarray', 17 | version=versioneer.get_version(), 18 | cmdclass=versioneer.get_cmdclass(), 19 | description='xarray plugins for Intake', 20 | url='https://github.com/intake/intake-xarray', 21 | maintainer='Martin Durant', 22 | maintainer_email='mdurant@anaconda.com', 23 | license='BSD', 24 | py_modules=['intake_xarray'], 25 | packages=find_packages(), 26 | entry_points={ 27 | 'intake.drivers': [ 28 | 'netcdf = intake_xarray.netcdf:NetCDFSource', 29 | 'zarr = intake_xarray.xzarr:ZarrSource', 30 | 'opendap = intake_xarray.opendap:OpenDapSource', 31 | 'xarray_image = intake_xarray.image:ImageSource', 32 | 'rasterio = intake_xarray.raster:RasterIOSource', 33 | ] 34 | }, 35 | package_data={'': ['*.csv', '*.yml', '*.html']}, 36 | include_package_data=True, 37 | install_requires=INSTALL_REQUIRES, 38 | long_description=open('README.md').read(), 39 | long_description_content_type="text/markdown", 40 | zip_safe=False, ) 41 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Version: 0.18 4 | """The Versioneer - like a rocketeer, but for versions. 5 | 6 | The Versioneer 7 | ============== 8 | 9 | * like a rocketeer, but for versions! 10 | * https://github.com/warner/python-versioneer 11 | * Brian Warner 12 | * License: Public Domain 13 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy 14 | * [![Latest Version] 15 | (https://pypip.in/version/versioneer/badge.svg?style=flat) 16 | ](https://pypi.python.org/pypi/versioneer/) 17 | * [![Build Status] 18 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) 19 | ](https://travis-ci.org/warner/python-versioneer) 20 | 21 | This is a tool for managing a recorded version number in distutils-based 22 | python projects. The goal is to remove the tedious and error-prone "update 23 | the embedded version string" step from your release process. Making a new 24 | release should be as easy as recording a new tag in your version-control 25 | system, and maybe making new tarballs. 26 | 27 | 28 | ## Quick Install 29 | 30 | * `pip install versioneer` to somewhere to your $PATH 31 | * add a `[versioneer]` section to your setup.cfg (see below) 32 | * run `versioneer install` in your source tree, commit the results 33 | 34 | ## Version Identifiers 35 | 36 | Source trees come from a variety of places: 37 | 38 | * a version-control system checkout (mostly used by developers) 39 | * a nightly tarball, produced by build automation 40 | * a snapshot tarball, produced by a web-based VCS browser, like github's 41 | "tarball from tag" feature 42 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 43 | 44 | Within each source tree, the version identifier (either a string or a number, 45 | this tool is format-agnostic) can come from a variety of places: 46 | 47 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 48 | about recent "tags" and an absolute revision-id 49 | * the name of the directory into which the tarball was unpacked 50 | * an expanded VCS keyword ($Id$, etc) 51 | * a `_version.py` created by some earlier build step 52 | 53 | For released software, the version identifier is closely related to a VCS 54 | tag. Some projects use tag names that include more than just the version 55 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 56 | needs to strip the tag prefix to extract the version identifier. For 57 | unreleased software (between tags), the version identifier should provide 58 | enough information to help developers recreate the same tree, while also 59 | giving them an idea of roughly how old the tree is (after version 1.2, before 60 | version 1.3). Many VCS systems can report a description that captures this, 61 | for example `git describe --tags --dirty --always` reports things like 62 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 63 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 64 | uncommitted changes. 65 | 66 | The version identifier is used for multiple purposes: 67 | 68 | * to allow the module to self-identify its version: `myproject.__version__` 69 | * to choose a name and prefix for a 'setup.py sdist' tarball 70 | 71 | ## Theory of Operation 72 | 73 | Versioneer works by adding a special `_version.py` file into your source 74 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 75 | dynamically ask the VCS tool for version information at import time. 76 | 77 | `_version.py` also contains `$Revision$` markers, and the installation 78 | process marks `_version.py` to have this marker rewritten with a tag name 79 | during the `git archive` command. As a result, generated tarballs will 80 | contain enough information to get the proper version. 81 | 82 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 83 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 84 | that configures it. This overrides several distutils/setuptools commands to 85 | compute the version when invoked, and changes `setup.py build` and `setup.py 86 | sdist` to replace `_version.py` with a small static file that contains just 87 | the generated version data. 88 | 89 | ## Installation 90 | 91 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions. 92 | 93 | ## Version-String Flavors 94 | 95 | Code which uses Versioneer can learn about its version string at runtime by 96 | importing `_version` from your main `__init__.py` file and running the 97 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 98 | import the top-level `versioneer.py` and run `get_versions()`. 99 | 100 | Both functions return a dictionary with different flavors of version 101 | information: 102 | 103 | * `['version']`: A condensed version string, rendered using the selected 104 | style. This is the most commonly used value for the project's version 105 | string. The default "pep440" style yields strings like `0.11`, 106 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 107 | below for alternative styles. 108 | 109 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 110 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 111 | 112 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the 113 | commit date in ISO 8601 format. This will be None if the date is not 114 | available. 115 | 116 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 117 | this is only accurate if run in a VCS checkout, otherwise it is likely to 118 | be False or None 119 | 120 | * `['error']`: if the version string could not be computed, this will be set 121 | to a string describing the problem, otherwise it will be None. It may be 122 | useful to throw an exception in setup.py if this is set, to avoid e.g. 123 | creating tarballs with a version string of "unknown". 124 | 125 | Some variants are more useful than others. Including `full-revisionid` in a 126 | bug report should allow developers to reconstruct the exact code being tested 127 | (or indicate the presence of local changes that should be shared with the 128 | developers). `version` is suitable for display in an "about" box or a CLI 129 | `--version` output: it can be easily compared against release notes and lists 130 | of bugs fixed in various releases. 131 | 132 | The installer adds the following text to your `__init__.py` to place a basic 133 | version in `YOURPROJECT.__version__`: 134 | 135 | from ._version import get_versions 136 | __version__ = get_versions()['version'] 137 | del get_versions 138 | 139 | ## Styles 140 | 141 | The setup.cfg `style=` configuration controls how the VCS information is 142 | rendered into a version string. 143 | 144 | The default style, "pep440", produces a PEP440-compliant string, equal to the 145 | un-prefixed tag name for actual releases, and containing an additional "local 146 | version" section with more detail for in-between builds. For Git, this is 147 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 148 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 149 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 150 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 151 | software (exactly equal to a known tag), the identifier will only contain the 152 | stripped tag, e.g. "0.11". 153 | 154 | Other styles are available. See [details.md](details.md) in the Versioneer 155 | source tree for descriptions. 156 | 157 | ## Debugging 158 | 159 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 160 | to return a version of "0+unknown". To investigate the problem, run `setup.py 161 | version`, which will run the version-lookup code in a verbose mode, and will 162 | display the full contents of `get_versions()` (including the `error` string, 163 | which may help identify what went wrong). 164 | 165 | ## Known Limitations 166 | 167 | Some situations are known to cause problems for Versioneer. This details the 168 | most significant ones. More can be found on Github 169 | [issues page](https://github.com/warner/python-versioneer/issues). 170 | 171 | ### Subprojects 172 | 173 | Versioneer has limited support for source trees in which `setup.py` is not in 174 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are 175 | two common reasons why `setup.py` might not be in the root: 176 | 177 | * Source trees which contain multiple subprojects, such as 178 | [Buildbot](https://github.com/buildbot/buildbot), which contains both 179 | "master" and "slave" subprojects, each with their own `setup.py`, 180 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI 181 | distributions (and upload multiple independently-installable tarballs). 182 | * Source trees whose main purpose is to contain a C library, but which also 183 | provide bindings to Python (and perhaps other langauges) in subdirectories. 184 | 185 | Versioneer will look for `.git` in parent directories, and most operations 186 | should get the right version string. However `pip` and `setuptools` have bugs 187 | and implementation details which frequently cause `pip install .` from a 188 | subproject directory to fail to find a correct version string (so it usually 189 | defaults to `0+unknown`). 190 | 191 | `pip install --editable .` should work correctly. `setup.py install` might 192 | work too. 193 | 194 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in 195 | some later version. 196 | 197 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking 198 | this issue. The discussion in 199 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the 200 | issue from the Versioneer side in more detail. 201 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and 202 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve 203 | pip to let Versioneer work correctly. 204 | 205 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the 206 | `setup.cfg`, so subprojects were completely unsupported with those releases. 207 | 208 | ### Editable installs with setuptools <= 18.5 209 | 210 | `setup.py develop` and `pip install --editable .` allow you to install a 211 | project into a virtualenv once, then continue editing the source code (and 212 | test) without re-installing after every change. 213 | 214 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a 215 | convenient way to specify executable scripts that should be installed along 216 | with the python package. 217 | 218 | These both work as expected when using modern setuptools. When using 219 | setuptools-18.5 or earlier, however, certain operations will cause 220 | `pkg_resources.DistributionNotFound` errors when running the entrypoint 221 | script, which must be resolved by re-installing the package. This happens 222 | when the install happens with one version, then the egg_info data is 223 | regenerated while a different version is checked out. Many setup.py commands 224 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into 225 | a different virtualenv), so this can be surprising. 226 | 227 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes 228 | this one, but upgrading to a newer version of setuptools should probably 229 | resolve it. 230 | 231 | ### Unicode version strings 232 | 233 | While Versioneer works (and is continually tested) with both Python 2 and 234 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. 235 | Newer releases probably generate unicode version strings on py2. It's not 236 | clear that this is wrong, but it may be surprising for applications when then 237 | write these strings to a network connection or include them in bytes-oriented 238 | APIs like cryptographic checksums. 239 | 240 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates 241 | this question. 242 | 243 | 244 | ## Updating Versioneer 245 | 246 | To upgrade your project to a new release of Versioneer, do the following: 247 | 248 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 249 | * edit `setup.cfg`, if necessary, to include any new configuration settings 250 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. 251 | * re-run `versioneer install` in your source tree, to replace 252 | `SRC/_version.py` 253 | * commit any changed files 254 | 255 | ## Future Directions 256 | 257 | This tool is designed to make it easily extended to other version-control 258 | systems: all VCS-specific components are in separate directories like 259 | src/git/ . The top-level `versioneer.py` script is assembled from these 260 | components by running make-versioneer.py . In the future, make-versioneer.py 261 | will take a VCS name as an argument, and will construct a version of 262 | `versioneer.py` that is specific to the given VCS. It might also take the 263 | configuration arguments that are currently provided manually during 264 | installation by editing setup.py . Alternatively, it might go the other 265 | direction and include code from all supported VCS systems, reducing the 266 | number of intermediate scripts. 267 | 268 | 269 | ## License 270 | 271 | To make Versioneer easier to embed, all its code is dedicated to the public 272 | domain. The `_version.py` that it creates is also in the public domain. 273 | Specifically, both are released under the Creative Commons "Public Domain 274 | Dedication" license (CC0-1.0), as described in 275 | https://creativecommons.org/publicdomain/zero/1.0/ . 276 | 277 | """ 278 | 279 | from __future__ import print_function 280 | try: 281 | import configparser 282 | except ImportError: 283 | import ConfigParser as configparser 284 | import errno 285 | import json 286 | import os 287 | import re 288 | import subprocess 289 | import sys 290 | 291 | 292 | class VersioneerConfig: 293 | """Container for Versioneer configuration parameters.""" 294 | 295 | 296 | def get_root(): 297 | """Get the project root directory. 298 | 299 | We require that all commands are run from the project root, i.e. the 300 | directory that contains setup.py, setup.cfg, and versioneer.py . 301 | """ 302 | root = os.path.realpath(os.path.abspath(os.getcwd())) 303 | setup_py = os.path.join(root, "setup.py") 304 | versioneer_py = os.path.join(root, "versioneer.py") 305 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 306 | # allow 'python path/to/setup.py COMMAND' 307 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 308 | setup_py = os.path.join(root, "setup.py") 309 | versioneer_py = os.path.join(root, "versioneer.py") 310 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 311 | err = ("Versioneer was unable to run the project root directory. " 312 | "Versioneer requires setup.py to be executed from " 313 | "its immediate directory (like 'python setup.py COMMAND'), " 314 | "or in a way that lets it use sys.argv[0] to find the root " 315 | "(like 'python path/to/setup.py COMMAND').") 316 | raise VersioneerBadRootError(err) 317 | try: 318 | # Certain runtime workflows (setup.py install/develop in a setuptools 319 | # tree) execute all dependencies in a single python process, so 320 | # "versioneer" may be imported multiple times, and python's shared 321 | # module-import table will cache the first one. So we can't use 322 | # os.path.dirname(__file__), as that will find whichever 323 | # versioneer.py was first imported, even in later projects. 324 | me = os.path.realpath(os.path.abspath(__file__)) 325 | me_dir = os.path.normcase(os.path.splitext(me)[0]) 326 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) 327 | if me_dir != vsr_dir: 328 | print("Warning: build in %s is using versioneer.py from %s" % 329 | (os.path.dirname(me), versioneer_py)) 330 | except NameError: 331 | pass 332 | return root 333 | 334 | 335 | def get_config_from_root(root): 336 | """Read the project setup.cfg file to determine Versioneer config.""" 337 | # This might raise EnvironmentError (if setup.cfg is missing), or 338 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 339 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 340 | # the top of versioneer.py for instructions on writing your setup.cfg . 341 | setup_cfg = os.path.join(root, "setup.cfg") 342 | parser = configparser.SafeConfigParser() 343 | with open(setup_cfg, "r") as f: 344 | parser.readfp(f) 345 | VCS = parser.get("versioneer", "VCS") # mandatory 346 | 347 | def get(parser, name): 348 | if parser.has_option("versioneer", name): 349 | return parser.get("versioneer", name) 350 | return None 351 | 352 | cfg = VersioneerConfig() 353 | cfg.VCS = VCS 354 | cfg.style = get(parser, "style") or "" 355 | cfg.versionfile_source = get(parser, "versionfile_source") 356 | cfg.versionfile_build = get(parser, "versionfile_build") 357 | cfg.tag_prefix = get(parser, "tag_prefix") 358 | if cfg.tag_prefix in ("''", '""'): 359 | cfg.tag_prefix = "" 360 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") 361 | cfg.verbose = get(parser, "verbose") 362 | return cfg 363 | 364 | 365 | class NotThisMethod(Exception): 366 | """Exception raised if a method is not valid for the current scenario.""" 367 | 368 | 369 | # these dictionaries contain VCS-specific tools 370 | LONG_VERSION_PY = {} 371 | HANDLERS = {} 372 | 373 | 374 | def register_vcs_handler(vcs, method): # decorator 375 | """Decorator to mark a method as the handler for a particular VCS.""" 376 | 377 | def decorate(f): 378 | """Store f in HANDLERS[vcs][method].""" 379 | if vcs not in HANDLERS: 380 | HANDLERS[vcs] = {} 381 | HANDLERS[vcs][method] = f 382 | return f 383 | 384 | return decorate 385 | 386 | 387 | def run_command(commands, 388 | args, 389 | cwd=None, 390 | verbose=False, 391 | hide_stderr=False, 392 | env=None): 393 | """Call the given command(s).""" 394 | assert isinstance(commands, list) 395 | p = None 396 | for c in commands: 397 | try: 398 | dispcmd = str([c] + args) 399 | # remember shell=False, so use git.cmd on windows, not just git 400 | p = subprocess.Popen( 401 | [c] + args, 402 | cwd=cwd, 403 | env=env, 404 | stdout=subprocess.PIPE, 405 | stderr=(subprocess.PIPE if hide_stderr else None)) 406 | break 407 | except EnvironmentError: 408 | e = sys.exc_info()[1] 409 | if e.errno == errno.ENOENT: 410 | continue 411 | if verbose: 412 | print("unable to run %s" % dispcmd) 413 | print(e) 414 | return None, None 415 | else: 416 | if verbose: 417 | print("unable to find command, tried %s" % (commands, )) 418 | return None, None 419 | stdout = p.communicate()[0].strip() 420 | if sys.version_info[0] >= 3: 421 | stdout = stdout.decode() 422 | if p.returncode != 0: 423 | if verbose: 424 | print("unable to run %s (error)" % dispcmd) 425 | print("stdout was %s" % stdout) 426 | return None, p.returncode 427 | return stdout, p.returncode 428 | 429 | 430 | LONG_VERSION_PY['git'] = ''' 431 | # This file helps to compute a version number in source trees obtained from 432 | # git-archive tarball (such as those provided by githubs download-from-tag 433 | # feature). Distribution tarballs (built by setup.py sdist) and build 434 | # directories (produced by setup.py build) will contain a much shorter file 435 | # that just contains the computed version number. 436 | 437 | # This file is released into the public domain. Generated by 438 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 439 | 440 | """Git implementation of _version.py.""" 441 | 442 | import errno 443 | import os 444 | import re 445 | import subprocess 446 | import sys 447 | 448 | 449 | def get_keywords(): 450 | """Get the keywords needed to look up the version information.""" 451 | # these strings will be replaced by git during git-archive. 452 | # setup.py/versioneer.py will grep for the variable names, so they must 453 | # each be defined on a line of their own. _version.py will just call 454 | # get_keywords(). 455 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 456 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 457 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" 458 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 459 | return keywords 460 | 461 | 462 | class VersioneerConfig: 463 | """Container for Versioneer configuration parameters.""" 464 | 465 | 466 | def get_config(): 467 | """Create, populate and return the VersioneerConfig() object.""" 468 | # these strings are filled in when 'setup.py versioneer' creates 469 | # _version.py 470 | cfg = VersioneerConfig() 471 | cfg.VCS = "git" 472 | cfg.style = "%(STYLE)s" 473 | cfg.tag_prefix = "%(TAG_PREFIX)s" 474 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 475 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 476 | cfg.verbose = False 477 | return cfg 478 | 479 | 480 | class NotThisMethod(Exception): 481 | """Exception raised if a method is not valid for the current scenario.""" 482 | 483 | 484 | LONG_VERSION_PY = {} 485 | HANDLERS = {} 486 | 487 | 488 | def register_vcs_handler(vcs, method): # decorator 489 | """Decorator to mark a method as the handler for a particular VCS.""" 490 | def decorate(f): 491 | """Store f in HANDLERS[vcs][method].""" 492 | if vcs not in HANDLERS: 493 | HANDLERS[vcs] = {} 494 | HANDLERS[vcs][method] = f 495 | return f 496 | return decorate 497 | 498 | 499 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 500 | env=None): 501 | """Call the given command(s).""" 502 | assert isinstance(commands, list) 503 | p = None 504 | for c in commands: 505 | try: 506 | dispcmd = str([c] + args) 507 | # remember shell=False, so use git.cmd on windows, not just git 508 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 509 | stdout=subprocess.PIPE, 510 | stderr=(subprocess.PIPE if hide_stderr 511 | else None)) 512 | break 513 | except EnvironmentError: 514 | e = sys.exc_info()[1] 515 | if e.errno == errno.ENOENT: 516 | continue 517 | if verbose: 518 | print("unable to run %%s" %% dispcmd) 519 | print(e) 520 | return None, None 521 | else: 522 | if verbose: 523 | print("unable to find command, tried %%s" %% (commands,)) 524 | return None, None 525 | stdout = p.communicate()[0].strip() 526 | if sys.version_info[0] >= 3: 527 | stdout = stdout.decode() 528 | if p.returncode != 0: 529 | if verbose: 530 | print("unable to run %%s (error)" %% dispcmd) 531 | print("stdout was %%s" %% stdout) 532 | return None, p.returncode 533 | return stdout, p.returncode 534 | 535 | 536 | def versions_from_parentdir(parentdir_prefix, root, verbose): 537 | """Try to determine the version from the parent directory name. 538 | 539 | Source tarballs conventionally unpack into a directory that includes both 540 | the project name and a version string. We will also support searching up 541 | two directory levels for an appropriately named parent directory 542 | """ 543 | rootdirs = [] 544 | 545 | for i in range(3): 546 | dirname = os.path.basename(root) 547 | if dirname.startswith(parentdir_prefix): 548 | return {"version": dirname[len(parentdir_prefix):], 549 | "full-revisionid": None, 550 | "dirty": False, "error": None, "date": None} 551 | else: 552 | rootdirs.append(root) 553 | root = os.path.dirname(root) # up a level 554 | 555 | if verbose: 556 | print("Tried directories %%s but none started with prefix %%s" %% 557 | (str(rootdirs), parentdir_prefix)) 558 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 559 | 560 | 561 | @register_vcs_handler("git", "get_keywords") 562 | def git_get_keywords(versionfile_abs): 563 | """Extract version information from the given file.""" 564 | # the code embedded in _version.py can just fetch the value of these 565 | # keywords. When used from setup.py, we don't want to import _version.py, 566 | # so we do it with a regexp instead. This function is not used from 567 | # _version.py. 568 | keywords = {} 569 | try: 570 | f = open(versionfile_abs, "r") 571 | for line in f.readlines(): 572 | if line.strip().startswith("git_refnames ="): 573 | mo = re.search(r'=\s*"(.*)"', line) 574 | if mo: 575 | keywords["refnames"] = mo.group(1) 576 | if line.strip().startswith("git_full ="): 577 | mo = re.search(r'=\s*"(.*)"', line) 578 | if mo: 579 | keywords["full"] = mo.group(1) 580 | if line.strip().startswith("git_date ="): 581 | mo = re.search(r'=\s*"(.*)"', line) 582 | if mo: 583 | keywords["date"] = mo.group(1) 584 | f.close() 585 | except EnvironmentError: 586 | pass 587 | return keywords 588 | 589 | 590 | @register_vcs_handler("git", "keywords") 591 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 592 | """Get version information from git keywords.""" 593 | if not keywords: 594 | raise NotThisMethod("no keywords at all, weird") 595 | date = keywords.get("date") 596 | if date is not None: 597 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant 598 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 599 | # -like" string, which we must then edit to make compliant), because 600 | # it's been around since git-1.5.3, and it's too difficult to 601 | # discover which version we're using, or to work around using an 602 | # older one. 603 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 604 | refnames = keywords["refnames"].strip() 605 | if refnames.startswith("$Format"): 606 | if verbose: 607 | print("keywords are unexpanded, not using") 608 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 609 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 610 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 611 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 612 | TAG = "tag: " 613 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 614 | if not tags: 615 | # Either we're using git < 1.8.3, or there really are no tags. We use 616 | # a heuristic: assume all version tags have a digit. The old git %%d 617 | # expansion behaves like git log --decorate=short and strips out the 618 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 619 | # between branches and tags. By ignoring refnames without digits, we 620 | # filter out many common branch names like "release" and 621 | # "stabilization", as well as "HEAD" and "master". 622 | tags = set([r for r in refs if re.search(r'\d', r)]) 623 | if verbose: 624 | print("discarding '%%s', no digits" %% ",".join(refs - tags)) 625 | if verbose: 626 | print("likely tags: %%s" %% ",".join(sorted(tags))) 627 | for ref in sorted(tags): 628 | # sorting will prefer e.g. "2.0" over "2.0rc1" 629 | if ref.startswith(tag_prefix): 630 | r = ref[len(tag_prefix):] 631 | if verbose: 632 | print("picking %%s" %% r) 633 | return {"version": r, 634 | "full-revisionid": keywords["full"].strip(), 635 | "dirty": False, "error": None, 636 | "date": date} 637 | # no suitable tags, so version is "0+unknown", but full hex is still there 638 | if verbose: 639 | print("no suitable tags, using unknown + full revision id") 640 | return {"version": "0+unknown", 641 | "full-revisionid": keywords["full"].strip(), 642 | "dirty": False, "error": "no suitable tags", "date": None} 643 | 644 | 645 | @register_vcs_handler("git", "pieces_from_vcs") 646 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 647 | """Get version from 'git describe' in the root of the source tree. 648 | 649 | This only gets called if the git-archive 'subst' keywords were *not* 650 | expanded, and _version.py hasn't already been rewritten with a short 651 | version string, meaning we're inside a checked out source tree. 652 | """ 653 | GITS = ["git"] 654 | if sys.platform == "win32": 655 | GITS = ["git.cmd", "git.exe"] 656 | 657 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 658 | hide_stderr=True) 659 | if rc != 0: 660 | if verbose: 661 | print("Directory %%s not under git control" %% root) 662 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 663 | 664 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 665 | # if there isn't one, this yields HEX[-dirty] (no NUM) 666 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 667 | "--always", "--long", 668 | "--match", "%%s*" %% tag_prefix], 669 | cwd=root) 670 | # --long was added in git-1.5.5 671 | if describe_out is None: 672 | raise NotThisMethod("'git describe' failed") 673 | describe_out = describe_out.strip() 674 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 675 | if full_out is None: 676 | raise NotThisMethod("'git rev-parse' failed") 677 | full_out = full_out.strip() 678 | 679 | pieces = {} 680 | pieces["long"] = full_out 681 | pieces["short"] = full_out[:7] # maybe improved later 682 | pieces["error"] = None 683 | 684 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 685 | # TAG might have hyphens. 686 | git_describe = describe_out 687 | 688 | # look for -dirty suffix 689 | dirty = git_describe.endswith("-dirty") 690 | pieces["dirty"] = dirty 691 | if dirty: 692 | git_describe = git_describe[:git_describe.rindex("-dirty")] 693 | 694 | # now we have TAG-NUM-gHEX or HEX 695 | 696 | if "-" in git_describe: 697 | # TAG-NUM-gHEX 698 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 699 | if not mo: 700 | # unparseable. Maybe git-describe is misbehaving? 701 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 702 | %% describe_out) 703 | return pieces 704 | 705 | # tag 706 | full_tag = mo.group(1) 707 | if not full_tag.startswith(tag_prefix): 708 | if verbose: 709 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 710 | print(fmt %% (full_tag, tag_prefix)) 711 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 712 | %% (full_tag, tag_prefix)) 713 | return pieces 714 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 715 | 716 | # distance: number of commits since tag 717 | pieces["distance"] = int(mo.group(2)) 718 | 719 | # commit: short hex revision ID 720 | pieces["short"] = mo.group(3) 721 | 722 | else: 723 | # HEX: no tags 724 | pieces["closest-tag"] = None 725 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 726 | cwd=root) 727 | pieces["distance"] = int(count_out) # total number of commits 728 | 729 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 730 | date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], 731 | cwd=root)[0].strip() 732 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 733 | 734 | return pieces 735 | 736 | 737 | def plus_or_dot(pieces): 738 | """Return a + if we don't already have one, else return a .""" 739 | if "+" in pieces.get("closest-tag", ""): 740 | return "." 741 | return "+" 742 | 743 | 744 | def render_pep440(pieces): 745 | """Build up version string, with post-release "local version identifier". 746 | 747 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 748 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 749 | 750 | Exceptions: 751 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 752 | """ 753 | if pieces["closest-tag"]: 754 | rendered = pieces["closest-tag"] 755 | if pieces["distance"] or pieces["dirty"]: 756 | rendered += plus_or_dot(pieces) 757 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 758 | if pieces["dirty"]: 759 | rendered += ".dirty" 760 | else: 761 | # exception #1 762 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 763 | pieces["short"]) 764 | if pieces["dirty"]: 765 | rendered += ".dirty" 766 | return rendered 767 | 768 | 769 | def render_pep440_pre(pieces): 770 | """TAG[.post.devDISTANCE] -- No -dirty. 771 | 772 | Exceptions: 773 | 1: no tags. 0.post.devDISTANCE 774 | """ 775 | if pieces["closest-tag"]: 776 | rendered = pieces["closest-tag"] 777 | if pieces["distance"]: 778 | rendered += ".post.dev%%d" %% pieces["distance"] 779 | else: 780 | # exception #1 781 | rendered = "0.post.dev%%d" %% pieces["distance"] 782 | return rendered 783 | 784 | 785 | def render_pep440_post(pieces): 786 | """TAG[.postDISTANCE[.dev0]+gHEX] . 787 | 788 | The ".dev0" means dirty. Note that .dev0 sorts backwards 789 | (a dirty tree will appear "older" than the corresponding clean one), 790 | but you shouldn't be releasing software with -dirty anyways. 791 | 792 | Exceptions: 793 | 1: no tags. 0.postDISTANCE[.dev0] 794 | """ 795 | if pieces["closest-tag"]: 796 | rendered = pieces["closest-tag"] 797 | if pieces["distance"] or pieces["dirty"]: 798 | rendered += ".post%%d" %% pieces["distance"] 799 | if pieces["dirty"]: 800 | rendered += ".dev0" 801 | rendered += plus_or_dot(pieces) 802 | rendered += "g%%s" %% pieces["short"] 803 | else: 804 | # exception #1 805 | rendered = "0.post%%d" %% pieces["distance"] 806 | if pieces["dirty"]: 807 | rendered += ".dev0" 808 | rendered += "+g%%s" %% pieces["short"] 809 | return rendered 810 | 811 | 812 | def render_pep440_old(pieces): 813 | """TAG[.postDISTANCE[.dev0]] . 814 | 815 | The ".dev0" means dirty. 816 | 817 | Eexceptions: 818 | 1: no tags. 0.postDISTANCE[.dev0] 819 | """ 820 | if pieces["closest-tag"]: 821 | rendered = pieces["closest-tag"] 822 | if pieces["distance"] or pieces["dirty"]: 823 | rendered += ".post%%d" %% pieces["distance"] 824 | if pieces["dirty"]: 825 | rendered += ".dev0" 826 | else: 827 | # exception #1 828 | rendered = "0.post%%d" %% pieces["distance"] 829 | if pieces["dirty"]: 830 | rendered += ".dev0" 831 | return rendered 832 | 833 | 834 | def render_git_describe(pieces): 835 | """TAG[-DISTANCE-gHEX][-dirty]. 836 | 837 | Like 'git describe --tags --dirty --always'. 838 | 839 | Exceptions: 840 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 841 | """ 842 | if pieces["closest-tag"]: 843 | rendered = pieces["closest-tag"] 844 | if pieces["distance"]: 845 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 846 | else: 847 | # exception #1 848 | rendered = pieces["short"] 849 | if pieces["dirty"]: 850 | rendered += "-dirty" 851 | return rendered 852 | 853 | 854 | def render_git_describe_long(pieces): 855 | """TAG-DISTANCE-gHEX[-dirty]. 856 | 857 | Like 'git describe --tags --dirty --always -long'. 858 | The distance/hash is unconditional. 859 | 860 | Exceptions: 861 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 862 | """ 863 | if pieces["closest-tag"]: 864 | rendered = pieces["closest-tag"] 865 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 866 | else: 867 | # exception #1 868 | rendered = pieces["short"] 869 | if pieces["dirty"]: 870 | rendered += "-dirty" 871 | return rendered 872 | 873 | 874 | def render(pieces, style): 875 | """Render the given version pieces into the requested style.""" 876 | if pieces["error"]: 877 | return {"version": "unknown", 878 | "full-revisionid": pieces.get("long"), 879 | "dirty": None, 880 | "error": pieces["error"], 881 | "date": None} 882 | 883 | if not style or style == "default": 884 | style = "pep440" # the default 885 | 886 | if style == "pep440": 887 | rendered = render_pep440(pieces) 888 | elif style == "pep440-pre": 889 | rendered = render_pep440_pre(pieces) 890 | elif style == "pep440-post": 891 | rendered = render_pep440_post(pieces) 892 | elif style == "pep440-old": 893 | rendered = render_pep440_old(pieces) 894 | elif style == "git-describe": 895 | rendered = render_git_describe(pieces) 896 | elif style == "git-describe-long": 897 | rendered = render_git_describe_long(pieces) 898 | else: 899 | raise ValueError("unknown style '%%s'" %% style) 900 | 901 | return {"version": rendered, "full-revisionid": pieces["long"], 902 | "dirty": pieces["dirty"], "error": None, 903 | "date": pieces.get("date")} 904 | 905 | 906 | def get_versions(): 907 | """Get version information or return default if unable to do so.""" 908 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 909 | # __file__, we can work backwards from there to the root. Some 910 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 911 | # case we can only use expanded keywords. 912 | 913 | cfg = get_config() 914 | verbose = cfg.verbose 915 | 916 | try: 917 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 918 | verbose) 919 | except NotThisMethod: 920 | pass 921 | 922 | try: 923 | root = os.path.realpath(__file__) 924 | # versionfile_source is the relative path from the top of the source 925 | # tree (where the .git directory might live) to this file. Invert 926 | # this to find the root from __file__. 927 | for i in cfg.versionfile_source.split('/'): 928 | root = os.path.dirname(root) 929 | except NameError: 930 | return {"version": "0+unknown", "full-revisionid": None, 931 | "dirty": None, 932 | "error": "unable to find root of source tree", 933 | "date": None} 934 | 935 | try: 936 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 937 | return render(pieces, cfg.style) 938 | except NotThisMethod: 939 | pass 940 | 941 | try: 942 | if cfg.parentdir_prefix: 943 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 944 | except NotThisMethod: 945 | pass 946 | 947 | return {"version": "0+unknown", "full-revisionid": None, 948 | "dirty": None, 949 | "error": "unable to compute version", "date": None} 950 | ''' 951 | 952 | 953 | @register_vcs_handler("git", "get_keywords") 954 | def git_get_keywords(versionfile_abs): 955 | """Extract version information from the given file.""" 956 | # the code embedded in _version.py can just fetch the value of these 957 | # keywords. When used from setup.py, we don't want to import _version.py, 958 | # so we do it with a regexp instead. This function is not used from 959 | # _version.py. 960 | keywords = {} 961 | try: 962 | f = open(versionfile_abs, "r") 963 | for line in f.readlines(): 964 | if line.strip().startswith("git_refnames ="): 965 | mo = re.search(r'=\s*"(.*)"', line) 966 | if mo: 967 | keywords["refnames"] = mo.group(1) 968 | if line.strip().startswith("git_full ="): 969 | mo = re.search(r'=\s*"(.*)"', line) 970 | if mo: 971 | keywords["full"] = mo.group(1) 972 | if line.strip().startswith("git_date ="): 973 | mo = re.search(r'=\s*"(.*)"', line) 974 | if mo: 975 | keywords["date"] = mo.group(1) 976 | f.close() 977 | except EnvironmentError: 978 | pass 979 | return keywords 980 | 981 | 982 | @register_vcs_handler("git", "keywords") 983 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 984 | """Get version information from git keywords.""" 985 | if not keywords: 986 | raise NotThisMethod("no keywords at all, weird") 987 | date = keywords.get("date") 988 | if date is not None: 989 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 990 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 991 | # -like" string, which we must then edit to make compliant), because 992 | # it's been around since git-1.5.3, and it's too difficult to 993 | # discover which version we're using, or to work around using an 994 | # older one. 995 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 996 | refnames = keywords["refnames"].strip() 997 | if refnames.startswith("$Format"): 998 | if verbose: 999 | print("keywords are unexpanded, not using") 1000 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 1001 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 1002 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 1003 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 1004 | TAG = "tag: " 1005 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 1006 | if not tags: 1007 | # Either we're using git < 1.8.3, or there really are no tags. We use 1008 | # a heuristic: assume all version tags have a digit. The old git %d 1009 | # expansion behaves like git log --decorate=short and strips out the 1010 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 1011 | # between branches and tags. By ignoring refnames without digits, we 1012 | # filter out many common branch names like "release" and 1013 | # "stabilization", as well as "HEAD" and "master". 1014 | tags = set([r for r in refs if re.search(r'\d', r)]) 1015 | if verbose: 1016 | print("discarding '%s', no digits" % ",".join(refs - tags)) 1017 | if verbose: 1018 | print("likely tags: %s" % ",".join(sorted(tags))) 1019 | for ref in sorted(tags): 1020 | # sorting will prefer e.g. "2.0" over "2.0rc1" 1021 | if ref.startswith(tag_prefix): 1022 | r = ref[len(tag_prefix):] 1023 | if verbose: 1024 | print("picking %s" % r) 1025 | return { 1026 | "version": r, 1027 | "full-revisionid": keywords["full"].strip(), 1028 | "dirty": False, 1029 | "error": None, 1030 | "date": date 1031 | } 1032 | # no suitable tags, so version is "0+unknown", but full hex is still there 1033 | if verbose: 1034 | print("no suitable tags, using unknown + full revision id") 1035 | return { 1036 | "version": "0+unknown", 1037 | "full-revisionid": keywords["full"].strip(), 1038 | "dirty": False, 1039 | "error": "no suitable tags", 1040 | "date": None 1041 | } 1042 | 1043 | 1044 | @register_vcs_handler("git", "pieces_from_vcs") 1045 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 1046 | """Get version from 'git describe' in the root of the source tree. 1047 | 1048 | This only gets called if the git-archive 'subst' keywords were *not* 1049 | expanded, and _version.py hasn't already been rewritten with a short 1050 | version string, meaning we're inside a checked out source tree. 1051 | """ 1052 | GITS = ["git"] 1053 | if sys.platform == "win32": 1054 | GITS = ["git.cmd", "git.exe"] 1055 | 1056 | out, rc = run_command( 1057 | GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) 1058 | if rc != 0: 1059 | if verbose: 1060 | print("Directory %s not under git control" % root) 1061 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 1062 | 1063 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 1064 | # if there isn't one, this yields HEX[-dirty] (no NUM) 1065 | describe_out, rc = run_command( 1066 | GITS, [ 1067 | "describe", "--tags", "--dirty", "--always", "--long", "--match", 1068 | "%s*" % tag_prefix 1069 | ], 1070 | cwd=root) 1071 | # --long was added in git-1.5.5 1072 | if describe_out is None: 1073 | raise NotThisMethod("'git describe' failed") 1074 | describe_out = describe_out.strip() 1075 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 1076 | if full_out is None: 1077 | raise NotThisMethod("'git rev-parse' failed") 1078 | full_out = full_out.strip() 1079 | 1080 | pieces = {} 1081 | pieces["long"] = full_out 1082 | pieces["short"] = full_out[:7] # maybe improved later 1083 | pieces["error"] = None 1084 | 1085 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1086 | # TAG might have hyphens. 1087 | git_describe = describe_out 1088 | 1089 | # look for -dirty suffix 1090 | dirty = git_describe.endswith("-dirty") 1091 | pieces["dirty"] = dirty 1092 | if dirty: 1093 | git_describe = git_describe[:git_describe.rindex("-dirty")] 1094 | 1095 | # now we have TAG-NUM-gHEX or HEX 1096 | 1097 | if "-" in git_describe: 1098 | # TAG-NUM-gHEX 1099 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 1100 | if not mo: 1101 | # unparseable. Maybe git-describe is misbehaving? 1102 | pieces["error"] = ("unable to parse git-describe output: '%s'" % 1103 | describe_out) 1104 | return pieces 1105 | 1106 | # tag 1107 | full_tag = mo.group(1) 1108 | if not full_tag.startswith(tag_prefix): 1109 | if verbose: 1110 | fmt = "tag '%s' doesn't start with prefix '%s'" 1111 | print(fmt % (full_tag, tag_prefix)) 1112 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % 1113 | (full_tag, tag_prefix)) 1114 | return pieces 1115 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 1116 | 1117 | # distance: number of commits since tag 1118 | pieces["distance"] = int(mo.group(2)) 1119 | 1120 | # commit: short hex revision ID 1121 | pieces["short"] = mo.group(3) 1122 | 1123 | else: 1124 | # HEX: no tags 1125 | pieces["closest-tag"] = None 1126 | count_out, rc = run_command( 1127 | GITS, ["rev-list", "HEAD", "--count"], cwd=root) 1128 | pieces["distance"] = int(count_out) # total number of commits 1129 | 1130 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 1131 | date = run_command( 1132 | GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() 1133 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 1134 | 1135 | return pieces 1136 | 1137 | 1138 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1139 | """Git-specific installation logic for Versioneer. 1140 | 1141 | For Git, this means creating/changing .gitattributes to mark _version.py 1142 | for export-subst keyword substitution. 1143 | """ 1144 | GITS = ["git"] 1145 | if sys.platform == "win32": 1146 | GITS = ["git.cmd", "git.exe"] 1147 | files = [manifest_in, versionfile_source] 1148 | if ipy: 1149 | files.append(ipy) 1150 | try: 1151 | me = __file__ 1152 | if me.endswith(".pyc") or me.endswith(".pyo"): 1153 | me = os.path.splitext(me)[0] + ".py" 1154 | versioneer_file = os.path.relpath(me) 1155 | except NameError: 1156 | versioneer_file = "versioneer.py" 1157 | files.append(versioneer_file) 1158 | present = False 1159 | try: 1160 | f = open(".gitattributes", "r") 1161 | for line in f.readlines(): 1162 | if line.strip().startswith(versionfile_source): 1163 | if "export-subst" in line.strip().split()[1:]: 1164 | present = True 1165 | f.close() 1166 | except EnvironmentError: 1167 | pass 1168 | if not present: 1169 | f = open(".gitattributes", "a+") 1170 | f.write("%s export-subst\n" % versionfile_source) 1171 | f.close() 1172 | files.append(".gitattributes") 1173 | run_command(GITS, ["add", "--"] + files) 1174 | 1175 | 1176 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1177 | """Try to determine the version from the parent directory name. 1178 | 1179 | Source tarballs conventionally unpack into a directory that includes both 1180 | the project name and a version string. We will also support searching up 1181 | two directory levels for an appropriately named parent directory 1182 | """ 1183 | rootdirs = [] 1184 | 1185 | for i in range(3): 1186 | dirname = os.path.basename(root) 1187 | if dirname.startswith(parentdir_prefix): 1188 | return { 1189 | "version": dirname[len(parentdir_prefix):], 1190 | "full-revisionid": None, 1191 | "dirty": False, 1192 | "error": None, 1193 | "date": None 1194 | } 1195 | else: 1196 | rootdirs.append(root) 1197 | root = os.path.dirname(root) # up a level 1198 | 1199 | if verbose: 1200 | print("Tried directories %s but none started with prefix %s" % 1201 | (str(rootdirs), parentdir_prefix)) 1202 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1203 | 1204 | 1205 | SHORT_VERSION_PY = """ 1206 | # This file was generated by 'versioneer.py' (0.18) from 1207 | # revision-control system data, or from the parent directory name of an 1208 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1209 | # of this file. 1210 | 1211 | import json 1212 | 1213 | version_json = ''' 1214 | %s 1215 | ''' # END VERSION_JSON 1216 | 1217 | 1218 | def get_versions(): 1219 | return json.loads(version_json) 1220 | """ 1221 | 1222 | 1223 | def versions_from_file(filename): 1224 | """Try to determine the version from _version.py if present.""" 1225 | try: 1226 | with open(filename) as f: 1227 | contents = f.read() 1228 | except EnvironmentError: 1229 | raise NotThisMethod("unable to read _version.py") 1230 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, 1231 | re.M | re.S) 1232 | if not mo: 1233 | mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", 1234 | contents, re.M | re.S) 1235 | if not mo: 1236 | raise NotThisMethod("no version_json in _version.py") 1237 | return json.loads(mo.group(1)) 1238 | 1239 | 1240 | def write_to_version_file(filename, versions): 1241 | """Write the given version number to the given _version.py file.""" 1242 | os.unlink(filename) 1243 | contents = json.dumps( 1244 | versions, sort_keys=True, indent=1, separators=(",", ": ")) 1245 | with open(filename, "w") as f: 1246 | f.write(SHORT_VERSION_PY % contents) 1247 | 1248 | print("set %s to '%s'" % (filename, versions["version"])) 1249 | 1250 | 1251 | def plus_or_dot(pieces): 1252 | """Return a + if we don't already have one, else return a .""" 1253 | if "+" in pieces.get("closest-tag", ""): 1254 | return "." 1255 | return "+" 1256 | 1257 | 1258 | def render_pep440(pieces): 1259 | """Build up version string, with post-release "local version identifier". 1260 | 1261 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1262 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1263 | 1264 | Exceptions: 1265 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1266 | """ 1267 | if pieces["closest-tag"]: 1268 | rendered = pieces["closest-tag"] 1269 | if pieces["distance"] or pieces["dirty"]: 1270 | rendered += plus_or_dot(pieces) 1271 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1272 | if pieces["dirty"]: 1273 | rendered += ".dirty" 1274 | else: 1275 | # exception #1 1276 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 1277 | if pieces["dirty"]: 1278 | rendered += ".dirty" 1279 | return rendered 1280 | 1281 | 1282 | def render_pep440_pre(pieces): 1283 | """TAG[.post.devDISTANCE] -- No -dirty. 1284 | 1285 | Exceptions: 1286 | 1: no tags. 0.post.devDISTANCE 1287 | """ 1288 | if pieces["closest-tag"]: 1289 | rendered = pieces["closest-tag"] 1290 | if pieces["distance"]: 1291 | rendered += ".post.dev%d" % pieces["distance"] 1292 | else: 1293 | # exception #1 1294 | rendered = "0.post.dev%d" % pieces["distance"] 1295 | return rendered 1296 | 1297 | 1298 | def render_pep440_post(pieces): 1299 | """TAG[.postDISTANCE[.dev0]+gHEX] . 1300 | 1301 | The ".dev0" means dirty. Note that .dev0 sorts backwards 1302 | (a dirty tree will appear "older" than the corresponding clean one), 1303 | but you shouldn't be releasing software with -dirty anyways. 1304 | 1305 | Exceptions: 1306 | 1: no tags. 0.postDISTANCE[.dev0] 1307 | """ 1308 | if pieces["closest-tag"]: 1309 | rendered = pieces["closest-tag"] 1310 | if pieces["distance"] or pieces["dirty"]: 1311 | rendered += ".post%d" % pieces["distance"] 1312 | if pieces["dirty"]: 1313 | rendered += ".dev0" 1314 | rendered += plus_or_dot(pieces) 1315 | rendered += "g%s" % pieces["short"] 1316 | else: 1317 | # exception #1 1318 | rendered = "0.post%d" % pieces["distance"] 1319 | if pieces["dirty"]: 1320 | rendered += ".dev0" 1321 | rendered += "+g%s" % pieces["short"] 1322 | return rendered 1323 | 1324 | 1325 | def render_pep440_old(pieces): 1326 | """TAG[.postDISTANCE[.dev0]] . 1327 | 1328 | The ".dev0" means dirty. 1329 | 1330 | Eexceptions: 1331 | 1: no tags. 0.postDISTANCE[.dev0] 1332 | """ 1333 | if pieces["closest-tag"]: 1334 | rendered = pieces["closest-tag"] 1335 | if pieces["distance"] or pieces["dirty"]: 1336 | rendered += ".post%d" % pieces["distance"] 1337 | if pieces["dirty"]: 1338 | rendered += ".dev0" 1339 | else: 1340 | # exception #1 1341 | rendered = "0.post%d" % pieces["distance"] 1342 | if pieces["dirty"]: 1343 | rendered += ".dev0" 1344 | return rendered 1345 | 1346 | 1347 | def render_git_describe(pieces): 1348 | """TAG[-DISTANCE-gHEX][-dirty]. 1349 | 1350 | Like 'git describe --tags --dirty --always'. 1351 | 1352 | Exceptions: 1353 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1354 | """ 1355 | if pieces["closest-tag"]: 1356 | rendered = pieces["closest-tag"] 1357 | if pieces["distance"]: 1358 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1359 | else: 1360 | # exception #1 1361 | rendered = pieces["short"] 1362 | if pieces["dirty"]: 1363 | rendered += "-dirty" 1364 | return rendered 1365 | 1366 | 1367 | def render_git_describe_long(pieces): 1368 | """TAG-DISTANCE-gHEX[-dirty]. 1369 | 1370 | Like 'git describe --tags --dirty --always -long'. 1371 | The distance/hash is unconditional. 1372 | 1373 | Exceptions: 1374 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1375 | """ 1376 | if pieces["closest-tag"]: 1377 | rendered = pieces["closest-tag"] 1378 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1379 | else: 1380 | # exception #1 1381 | rendered = pieces["short"] 1382 | if pieces["dirty"]: 1383 | rendered += "-dirty" 1384 | return rendered 1385 | 1386 | 1387 | def render(pieces, style): 1388 | """Render the given version pieces into the requested style.""" 1389 | if pieces["error"]: 1390 | return { 1391 | "version": "unknown", 1392 | "full-revisionid": pieces.get("long"), 1393 | "dirty": None, 1394 | "error": pieces["error"], 1395 | "date": None 1396 | } 1397 | 1398 | if not style or style == "default": 1399 | style = "pep440" # the default 1400 | 1401 | if style == "pep440": 1402 | rendered = render_pep440(pieces) 1403 | elif style == "pep440-pre": 1404 | rendered = render_pep440_pre(pieces) 1405 | elif style == "pep440-post": 1406 | rendered = render_pep440_post(pieces) 1407 | elif style == "pep440-old": 1408 | rendered = render_pep440_old(pieces) 1409 | elif style == "git-describe": 1410 | rendered = render_git_describe(pieces) 1411 | elif style == "git-describe-long": 1412 | rendered = render_git_describe_long(pieces) 1413 | else: 1414 | raise ValueError("unknown style '%s'" % style) 1415 | 1416 | return { 1417 | "version": rendered, 1418 | "full-revisionid": pieces["long"], 1419 | "dirty": pieces["dirty"], 1420 | "error": None, 1421 | "date": pieces.get("date") 1422 | } 1423 | 1424 | 1425 | class VersioneerBadRootError(Exception): 1426 | """The project root directory is unknown or missing key files.""" 1427 | 1428 | 1429 | def get_versions(verbose=False): 1430 | """Get the project version from whatever source is available. 1431 | 1432 | Returns dict with two keys: 'version' and 'full'. 1433 | """ 1434 | if "versioneer" in sys.modules: 1435 | # see the discussion in cmdclass.py:get_cmdclass() 1436 | del sys.modules["versioneer"] 1437 | 1438 | root = get_root() 1439 | cfg = get_config_from_root(root) 1440 | 1441 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1442 | handlers = HANDLERS.get(cfg.VCS) 1443 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1444 | verbose = verbose or cfg.verbose 1445 | assert cfg.versionfile_source is not None, \ 1446 | "please set versioneer.versionfile_source" 1447 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1448 | 1449 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1450 | 1451 | # extract version from first of: _version.py, VCS command (e.g. 'git 1452 | # describe'), parentdir. This is meant to work for developers using a 1453 | # source checkout, for users of a tarball created by 'setup.py sdist', 1454 | # and for users of a tarball/zipball created by 'git archive' or github's 1455 | # download-from-tag feature or the equivalent in other VCSes. 1456 | 1457 | get_keywords_f = handlers.get("get_keywords") 1458 | from_keywords_f = handlers.get("keywords") 1459 | if get_keywords_f and from_keywords_f: 1460 | try: 1461 | keywords = get_keywords_f(versionfile_abs) 1462 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1463 | if verbose: 1464 | print("got version from expanded keyword %s" % ver) 1465 | return ver 1466 | except NotThisMethod: 1467 | pass 1468 | 1469 | try: 1470 | ver = versions_from_file(versionfile_abs) 1471 | if verbose: 1472 | print("got version from file %s %s" % (versionfile_abs, ver)) 1473 | return ver 1474 | except NotThisMethod: 1475 | pass 1476 | 1477 | from_vcs_f = handlers.get("pieces_from_vcs") 1478 | if from_vcs_f: 1479 | try: 1480 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1481 | ver = render(pieces, cfg.style) 1482 | if verbose: 1483 | print("got version from VCS %s" % ver) 1484 | return ver 1485 | except NotThisMethod: 1486 | pass 1487 | 1488 | try: 1489 | if cfg.parentdir_prefix: 1490 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1491 | if verbose: 1492 | print("got version from parentdir %s" % ver) 1493 | return ver 1494 | except NotThisMethod: 1495 | pass 1496 | 1497 | if verbose: 1498 | print("unable to compute version") 1499 | 1500 | return { 1501 | "version": "0+unknown", 1502 | "full-revisionid": None, 1503 | "dirty": None, 1504 | "error": "unable to compute version", 1505 | "date": None 1506 | } 1507 | 1508 | 1509 | def get_version(): 1510 | """Get the short version string for this project.""" 1511 | return get_versions()["version"] 1512 | 1513 | 1514 | def get_cmdclass(): 1515 | """Get the custom setuptools/distutils subclasses used by Versioneer.""" 1516 | if "versioneer" in sys.modules: 1517 | del sys.modules["versioneer"] 1518 | # this fixes the "python setup.py develop" case (also 'install' and 1519 | # 'easy_install .'), in which subdependencies of the main project are 1520 | # built (using setup.py bdist_egg) in the same python process. Assume 1521 | # a main project A and a dependency B, which use different versions 1522 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1523 | # sys.modules by the time B's setup.py is executed, causing B to run 1524 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1525 | # sandbox that restores sys.modules to it's pre-build state, so the 1526 | # parent is protected against the child's "import versioneer". By 1527 | # removing ourselves from sys.modules here, before the child build 1528 | # happens, we protect the child from the parent's versioneer too. 1529 | # Also see https://github.com/warner/python-versioneer/issues/52 1530 | 1531 | cmds = {} 1532 | 1533 | # we add "version" to both distutils and setuptools 1534 | from distutils.core import Command 1535 | 1536 | class cmd_version(Command): 1537 | description = "report generated version string" 1538 | user_options = [] 1539 | boolean_options = [] 1540 | 1541 | def initialize_options(self): 1542 | pass 1543 | 1544 | def finalize_options(self): 1545 | pass 1546 | 1547 | def run(self): 1548 | vers = get_versions(verbose=True) 1549 | print("Version: %s" % vers["version"]) 1550 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1551 | print(" dirty: %s" % vers.get("dirty")) 1552 | print(" date: %s" % vers.get("date")) 1553 | if vers["error"]: 1554 | print(" error: %s" % vers["error"]) 1555 | 1556 | cmds["version"] = cmd_version 1557 | 1558 | # we override "build_py" in both distutils and setuptools 1559 | # 1560 | # most invocation pathways end up running build_py: 1561 | # distutils/build -> build_py 1562 | # distutils/install -> distutils/build ->.. 1563 | # setuptools/bdist_wheel -> distutils/install ->.. 1564 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1565 | # setuptools/install -> bdist_egg ->.. 1566 | # setuptools/develop -> ? 1567 | # pip install: 1568 | # copies source tree to a tempdir before running egg_info/etc 1569 | # if .git isn't copied too, 'git describe' will fail 1570 | # then does setup.py bdist_wheel, or sometimes setup.py install 1571 | # setup.py egg_info -> ? 1572 | 1573 | # we override different "build_py" commands for both environments 1574 | if "setuptools" in sys.modules: 1575 | from setuptools.command.build_py import build_py as _build_py 1576 | else: 1577 | from distutils.command.build_py import build_py as _build_py 1578 | 1579 | class cmd_build_py(_build_py): 1580 | def run(self): 1581 | root = get_root() 1582 | cfg = get_config_from_root(root) 1583 | versions = get_versions() 1584 | _build_py.run(self) 1585 | # now locate _version.py in the new build/ directory and replace 1586 | # it with an updated value 1587 | if cfg.versionfile_build: 1588 | target_versionfile = os.path.join(self.build_lib, 1589 | cfg.versionfile_build) 1590 | print("UPDATING %s" % target_versionfile) 1591 | write_to_version_file(target_versionfile, versions) 1592 | 1593 | cmds["build_py"] = cmd_build_py 1594 | 1595 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1596 | from cx_Freeze.dist import build_exe as _build_exe 1597 | 1598 | # nczeczulin reports that py2exe won't like the pep440-style string 1599 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. 1600 | # setup(console=[{ 1601 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION 1602 | # "product_version": versioneer.get_version(), 1603 | # ... 1604 | 1605 | class cmd_build_exe(_build_exe): 1606 | def run(self): 1607 | root = get_root() 1608 | cfg = get_config_from_root(root) 1609 | versions = get_versions() 1610 | target_versionfile = cfg.versionfile_source 1611 | print("UPDATING %s" % target_versionfile) 1612 | write_to_version_file(target_versionfile, versions) 1613 | 1614 | _build_exe.run(self) 1615 | os.unlink(target_versionfile) 1616 | with open(cfg.versionfile_source, "w") as f: 1617 | LONG = LONG_VERSION_PY[cfg.VCS] 1618 | f.write(LONG % { 1619 | "DOLLAR": "$", 1620 | "STYLE": cfg.style, 1621 | "TAG_PREFIX": cfg.tag_prefix, 1622 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1623 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1624 | }) 1625 | 1626 | cmds["build_exe"] = cmd_build_exe 1627 | del cmds["build_py"] 1628 | 1629 | if 'py2exe' in sys.modules: # py2exe enabled? 1630 | try: 1631 | from py2exe.distutils_buildexe import py2exe as _py2exe # py3 1632 | except ImportError: 1633 | from py2exe.build_exe import py2exe as _py2exe # py2 1634 | 1635 | class cmd_py2exe(_py2exe): 1636 | def run(self): 1637 | root = get_root() 1638 | cfg = get_config_from_root(root) 1639 | versions = get_versions() 1640 | target_versionfile = cfg.versionfile_source 1641 | print("UPDATING %s" % target_versionfile) 1642 | write_to_version_file(target_versionfile, versions) 1643 | 1644 | _py2exe.run(self) 1645 | os.unlink(target_versionfile) 1646 | with open(cfg.versionfile_source, "w") as f: 1647 | LONG = LONG_VERSION_PY[cfg.VCS] 1648 | f.write(LONG % { 1649 | "DOLLAR": "$", 1650 | "STYLE": cfg.style, 1651 | "TAG_PREFIX": cfg.tag_prefix, 1652 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1653 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1654 | }) 1655 | 1656 | cmds["py2exe"] = cmd_py2exe 1657 | 1658 | # we override different "sdist" commands for both environments 1659 | if "setuptools" in sys.modules: 1660 | from setuptools.command.sdist import sdist as _sdist 1661 | else: 1662 | from distutils.command.sdist import sdist as _sdist 1663 | 1664 | class cmd_sdist(_sdist): 1665 | def run(self): 1666 | versions = get_versions() 1667 | self._versioneer_generated_versions = versions 1668 | # unless we update this, the command will keep using the old 1669 | # version 1670 | self.distribution.metadata.version = versions["version"] 1671 | return _sdist.run(self) 1672 | 1673 | def make_release_tree(self, base_dir, files): 1674 | root = get_root() 1675 | cfg = get_config_from_root(root) 1676 | _sdist.make_release_tree(self, base_dir, files) 1677 | # now locate _version.py in the new base_dir directory 1678 | # (remembering that it may be a hardlink) and replace it with an 1679 | # updated value 1680 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1681 | print("UPDATING %s" % target_versionfile) 1682 | write_to_version_file(target_versionfile, 1683 | self._versioneer_generated_versions) 1684 | 1685 | cmds["sdist"] = cmd_sdist 1686 | 1687 | return cmds 1688 | 1689 | 1690 | CONFIG_ERROR = """ 1691 | setup.cfg is missing the necessary Versioneer configuration. You need 1692 | a section like: 1693 | 1694 | [versioneer] 1695 | VCS = git 1696 | style = pep440 1697 | versionfile_source = src/myproject/_version.py 1698 | versionfile_build = myproject/_version.py 1699 | tag_prefix = 1700 | parentdir_prefix = myproject- 1701 | 1702 | You will also need to edit your setup.py to use the results: 1703 | 1704 | import versioneer 1705 | setup(version=versioneer.get_version(), 1706 | cmdclass=versioneer.get_cmdclass(), ...) 1707 | 1708 | Please read the docstring in ./versioneer.py for configuration instructions, 1709 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1710 | """ 1711 | 1712 | SAMPLE_CONFIG = """ 1713 | # See the docstring in versioneer.py for instructions. Note that you must 1714 | # re-run 'versioneer.py setup' after changing this section, and commit the 1715 | # resulting files. 1716 | 1717 | [versioneer] 1718 | #VCS = git 1719 | #style = pep440 1720 | #versionfile_source = 1721 | #versionfile_build = 1722 | #tag_prefix = 1723 | #parentdir_prefix = 1724 | 1725 | """ 1726 | 1727 | INIT_PY_SNIPPET = """ 1728 | from ._version import get_versions 1729 | __version__ = get_versions()['version'] 1730 | del get_versions 1731 | """ 1732 | 1733 | 1734 | def do_setup(): 1735 | """Main VCS-independent setup function for installing Versioneer.""" 1736 | root = get_root() 1737 | try: 1738 | cfg = get_config_from_root(root) 1739 | except (EnvironmentError, configparser.NoSectionError, 1740 | configparser.NoOptionError) as e: 1741 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1742 | print( 1743 | "Adding sample versioneer config to setup.cfg", file=sys.stderr) 1744 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1745 | f.write(SAMPLE_CONFIG) 1746 | print(CONFIG_ERROR, file=sys.stderr) 1747 | return 1 1748 | 1749 | print(" creating %s" % cfg.versionfile_source) 1750 | with open(cfg.versionfile_source, "w") as f: 1751 | LONG = LONG_VERSION_PY[cfg.VCS] 1752 | f.write(LONG % { 1753 | "DOLLAR": "$", 1754 | "STYLE": cfg.style, 1755 | "TAG_PREFIX": cfg.tag_prefix, 1756 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1757 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1758 | }) 1759 | 1760 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") 1761 | if os.path.exists(ipy): 1762 | try: 1763 | with open(ipy, "r") as f: 1764 | old = f.read() 1765 | except EnvironmentError: 1766 | old = "" 1767 | if INIT_PY_SNIPPET not in old: 1768 | print(" appending to %s" % ipy) 1769 | with open(ipy, "a") as f: 1770 | f.write(INIT_PY_SNIPPET) 1771 | else: 1772 | print(" %s unmodified" % ipy) 1773 | else: 1774 | print(" %s doesn't exist, ok" % ipy) 1775 | ipy = None 1776 | 1777 | # Make sure both the top-level "versioneer.py" and versionfile_source 1778 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1779 | # they'll be copied into source distributions. Pip won't be able to 1780 | # install the package without this. 1781 | manifest_in = os.path.join(root, "MANIFEST.in") 1782 | simple_includes = set() 1783 | try: 1784 | with open(manifest_in, "r") as f: 1785 | for line in f: 1786 | if line.startswith("include "): 1787 | for include in line.split()[1:]: 1788 | simple_includes.add(include) 1789 | except EnvironmentError: 1790 | pass 1791 | # That doesn't cover everything MANIFEST.in can do 1792 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1793 | # it might give some false negatives. Appending redundant 'include' 1794 | # lines is safe, though. 1795 | if "versioneer.py" not in simple_includes: 1796 | print(" appending 'versioneer.py' to MANIFEST.in") 1797 | with open(manifest_in, "a") as f: 1798 | f.write("include versioneer.py\n") 1799 | else: 1800 | print(" 'versioneer.py' already in MANIFEST.in") 1801 | if cfg.versionfile_source not in simple_includes: 1802 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 1803 | cfg.versionfile_source) 1804 | with open(manifest_in, "a") as f: 1805 | f.write("include %s\n" % cfg.versionfile_source) 1806 | else: 1807 | print(" versionfile_source already in MANIFEST.in") 1808 | 1809 | # Make VCS-specific changes. For git, this means creating/changing 1810 | # .gitattributes to mark _version.py for export-subst keyword 1811 | # substitution. 1812 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 1813 | return 0 1814 | 1815 | 1816 | def scan_setup_py(): 1817 | """Validate the contents of setup.py against Versioneer's expectations.""" 1818 | found = set() 1819 | setters = False 1820 | errors = 0 1821 | with open("setup.py", "r") as f: 1822 | for line in f.readlines(): 1823 | if "import versioneer" in line: 1824 | found.add("import") 1825 | if "versioneer.get_cmdclass()" in line: 1826 | found.add("cmdclass") 1827 | if "versioneer.get_version()" in line: 1828 | found.add("get_version") 1829 | if "versioneer.VCS" in line: 1830 | setters = True 1831 | if "versioneer.versionfile_source" in line: 1832 | setters = True 1833 | if len(found) != 3: 1834 | print("") 1835 | print("Your setup.py appears to be missing some important items") 1836 | print("(but I might be wrong). Please make sure it has something") 1837 | print("roughly like the following:") 1838 | print("") 1839 | print(" import versioneer") 1840 | print(" setup( version=versioneer.get_version(),") 1841 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 1842 | print("") 1843 | errors += 1 1844 | if setters: 1845 | print("You should remove lines like 'versioneer.VCS = ' and") 1846 | print("'versioneer.versionfile_source = ' . This configuration") 1847 | print("now lives in setup.cfg, and should be removed from setup.py") 1848 | print("") 1849 | errors += 1 1850 | return errors 1851 | 1852 | 1853 | if __name__ == "__main__": 1854 | cmd = sys.argv[1] 1855 | if cmd == "setup": 1856 | errors = do_setup() 1857 | errors += scan_setup_py() 1858 | if errors: 1859 | sys.exit(1) 1860 | --------------------------------------------------------------------------------