├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md ├── release-template.yml └── workflows │ ├── ci-tests.yml │ ├── publish-to-pypi.yml │ └── release-drafter.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── docs ├── .readthedocs.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── _config.yml ├── _toc.yml ├── api.md ├── changelog.md ├── chipping.md ├── index.md ├── multi-resolution.md ├── object-detection-boxes.md ├── stacking.md ├── vector-segmentation-masks.md └── walkthrough.md ├── poetry.lock ├── pyproject.toml └── zen3geo ├── __init__.py ├── datapipes ├── __init__.py ├── datashader.py ├── geopandas.py ├── pyogrio.py ├── pystac.py ├── pystac_client.py ├── rioxarray.py ├── stackstac.py ├── xbatcher.py └── xpystac.py └── tests ├── test_datapipes_datashader.py ├── test_datapipes_geopandas.py ├── test_datapipes_pyogrio.py ├── test_datapipes_pystac.py ├── test_datapipes_pystac_client.py ├── test_datapipes_rioxarray.py ├── test_datapipes_stackstac.py ├── test_datapipes_xbatcher.py ├── test_datapipes_xpystac.py └── test_zen3geo.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go do '...' 16 | 2. Run the following code 17 | 18 | ```python 19 | # Insert your code here 20 | ``` 21 | 22 | 3. See error `...` 23 | 24 | **Expected behavior** 25 | A clear and concise description of what you expected to happen. 26 | 27 | **System details (please complete the following information):** 28 | - OS: [e.g. Linux, macOS, Windows] 29 | - Python Version [e.g. 3.11] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/release-template.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION 🌈' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 Features' 5 | label: 'feature' 6 | - title: '🐛 Bug Fixes' 7 | label: 'bug' 8 | - title: '📖 Documentation' 9 | label: 'documentation' 10 | - title: '🧰 Maintenance' 11 | label: 'maintenance' 12 | version-resolver: 13 | minor: 14 | labels: 15 | - 'feature' 16 | default: patch 17 | exclude-labels: 18 | - 'skip-changelog' 19 | category-template: '### $TITLE' 20 | change-template: '* $TITLE ([#$NUMBER]($URL))' 21 | template: | 22 | ## Release v$RESOLVED_VERSION (20YY/MM/DD) 23 | 24 | ### 💫 Highlights 25 | 26 | * 27 | 28 | $CHANGES 29 | 30 | ### 🧑‍🤝‍🧑 Contributors 31 | 32 | $CONTRIBUTORS 33 | -------------------------------------------------------------------------------- /.github/workflows/ci-tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Tests 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | types: [opened, ready_for_review, reopened, synchronize] 11 | branches: [ "main" ] 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | test: 18 | name: ${{ matrix.os }} - Python ${{ matrix.python-version }} 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: ["3.8", "3.10", "3.11.8"] 24 | os: [ubuntu-22.04] 25 | # Is it a draft Pull Request (true or false)? 26 | isDraft: 27 | - ${{ github.event.pull_request.draft }} 28 | # Exclude Ubuntu + Python 3.8 and 3.11 jobs for draft PRs 29 | exclude: 30 | - python-version: '3.8' 31 | isDraft: true 32 | - python-version: '3.11.8' 33 | isDraft: true 34 | # Only install optional packages on Ubuntu-22.04/Python 3.10 and 3.11 35 | include: 36 | - os: 'ubuntu-22.04' 37 | python-version: '3.10' 38 | extra-packages: '--extras "raster spatial stac vector"' 39 | - os: 'ubuntu-22.04' 40 | python-version: '3.11.8' 41 | extra-packages: '--extras "raster spatial stac vector"' 42 | 43 | steps: 44 | # Checkout current git repository 45 | - name: Checkout 46 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 47 | 48 | # Install Python 49 | - name: Set up Python ${{ matrix.python-version }} 50 | uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 51 | with: 52 | python-version: ${{ matrix.python-version }} 53 | 54 | # Install poetry package manager and dependencies from poetry.lock 55 | - name: Install Poetry python dependencies 56 | run: | 57 | pip install poetry==1.6.1 58 | poetry install ${{ matrix.extra-packages }} 59 | poetry self add poetry-dynamic-versioning[plugin] 60 | poetry show 61 | 62 | # Run the unit tests and doctests 63 | - name: Test with pytest 64 | run: poetry run pytest --verbose --doctest-modules zen3geo/ 65 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | # Publish archives to PyPI and TestPyPI using GitHub Actions 2 | # https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ 3 | 4 | name: Publish to PyPI 5 | 6 | # Only run for pushes to the main branch and releases. 7 | on: 8 | push: 9 | branches: 10 | - main 11 | release: 12 | types: 13 | - published 14 | # Runs for pull requests should be disabled other than for testing purposes 15 | #pull_request: 16 | # branches: 17 | # - main 18 | 19 | permissions: 20 | contents: read 21 | 22 | jobs: 23 | publish-pypi: 24 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 25 | runs-on: ubuntu-22.04 26 | permissions: 27 | # This permission is mandatory for OIDC publishing 28 | id-token: write 29 | if: github.repository == 'weiji14/zen3geo' 30 | 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 34 | with: 35 | # fetch all history so that poetry-dynamic-versioning works 36 | fetch-depth: 0 37 | 38 | - name: Set up Python 3.11 39 | uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 40 | with: 41 | python-version: '3.11.8' 42 | 43 | - name: Install Poetry and dynamic-versioning plugin 44 | run: | 45 | pip install poetry==1.6.1 46 | poetry self add poetry-dynamic-versioning[plugin] 47 | poetry show 48 | 49 | - name: Fix up version string for TestPyPI and PyPI 50 | run: | 51 | # Change poetry-dynamic-versioning to use metadata=false so that the 52 | # local part of the version isn't included, making the version string 53 | # compatible with PyPI. 54 | sed --in-place "s/metadata = true/metadata = false/g" pyproject.toml 55 | 56 | - name: Build a binary wheel and a source tarball 57 | run: | 58 | poetry build -vvv 59 | echo "" 60 | echo "Generated files:" 61 | ls -lh dist/ 62 | 63 | - name: Publish distribution 📦 to Test PyPI 64 | uses: pypa/gh-action-pypi-publish@a56da0b891b3dc519c7ee3284aff1fad93cc8598 # v1.8.6 65 | with: 66 | repository-url: https://test.pypi.org/legacy/ 67 | skip-existing: true 68 | 69 | - name: Publish distribution 📦 to PyPI 70 | if: startsWith(github.ref, 'refs/tags') 71 | uses: pypa/gh-action-pypi-publish@a56da0b891b3dc519c7ee3284aff1fad93cc8598 # v1.8.6 72 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | update_release_draft: 13 | permissions: 14 | contents: write # for release-drafter/release-drafter to create a github release 15 | runs-on: ubuntu-22.04 16 | steps: 17 | # Drafts your next Release notes as Pull Requests are merged into "main" 18 | - uses: release-drafter/release-drafter@569eb7ee3a85817ab916c8f8ff03a5bd96c9c83e # v5.23.0 19 | with: 20 | # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml 21 | config-name: release-template.yml 22 | env: 23 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # Distribution / packaging 6 | build/ 7 | dist/ 8 | *.egg 9 | *.egg-info/ 10 | .eggs/ 11 | MANIFEST 12 | 13 | # Unit test / coverage reports 14 | .pytest_cache/ 15 | 16 | # Jupyter Book 17 | /docs/_build/ 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | GNU LESSER GENERAL PUBLIC LICENSE 3 | Version 3, 29 June 2007 4 | 5 | Copyright (C) 2007 Free Software Foundation, Inc. 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | 10 | This version of the GNU Lesser General Public License incorporates 11 | the terms and conditions of version 3 of the GNU General Public 12 | License, supplemented by the additional permissions listed below. 13 | 14 | 0. Additional Definitions. 15 | 16 | As used herein, "this License" refers to version 3 of the GNU Lesser 17 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 18 | General Public License. 19 | 20 | "The Library" refers to a covered work governed by this License, 21 | other than an Application or a Combined Work as defined below. 22 | 23 | An "Application" is any work that makes use of an interface provided 24 | by the Library, but which is not otherwise based on the Library. 25 | Defining a subclass of a class defined by the Library is deemed a mode 26 | of using an interface provided by the Library. 27 | 28 | A "Combined Work" is a work produced by combining or linking an 29 | Application with the Library. The particular version of the Library 30 | with which the Combined Work was made is also called the "Linked 31 | Version". 32 | 33 | The "Minimal Corresponding Source" for a Combined Work means the 34 | Corresponding Source for the Combined Work, excluding any source code 35 | for portions of the Combined Work that, considered in isolation, are 36 | based on the Application, and not on the Linked Version. 37 | 38 | The "Corresponding Application Code" for a Combined Work means the 39 | object code and/or source code for the Application, including any data 40 | and utility programs needed for reproducing the Combined Work from the 41 | Application, but excluding the System Libraries of the Combined Work. 42 | 43 | 1. Exception to Section 3 of the GNU GPL. 44 | 45 | You may convey a covered work under sections 3 and 4 of this License 46 | without being bound by section 3 of the GNU GPL. 47 | 48 | 2. Conveying Modified Versions. 49 | 50 | If you modify a copy of the Library, and, in your modifications, a 51 | facility refers to a function or data to be supplied by an Application 52 | that uses the facility (other than as an argument passed when the 53 | facility is invoked), then you may convey a copy of the modified 54 | version: 55 | 56 | a) under this License, provided that you make a good faith effort to 57 | ensure that, in the event an Application does not supply the 58 | function or data, the facility still operates, and performs 59 | whatever part of its purpose remains meaningful, or 60 | 61 | b) under the GNU GPL, with none of the additional permissions of 62 | this License applicable to that copy. 63 | 64 | 3. Object Code Incorporating Material from Library Header Files. 65 | 66 | The object code form of an Application may incorporate material from 67 | a header file that is part of the Library. You may convey such object 68 | code under terms of your choice, provided that, if the incorporated 69 | material is not limited to numerical parameters, data structure 70 | layouts and accessors, or small macros, inline functions and templates 71 | (ten or fewer lines in length), you do both of the following: 72 | 73 | a) Give prominent notice with each copy of the object code that the 74 | Library is used in it and that the Library and its use are 75 | covered by this License. 76 | 77 | b) Accompany the object code with a copy of the GNU GPL and this license 78 | document. 79 | 80 | 4. Combined Works. 81 | 82 | You may convey a Combined Work under terms of your choice that, 83 | taken together, effectively do not restrict modification of the 84 | portions of the Library contained in the Combined Work and reverse 85 | engineering for debugging such modifications, if you also do each of 86 | the following: 87 | 88 | a) Give prominent notice with each copy of the Combined Work that 89 | the Library is used in it and that the Library and its use are 90 | covered by this License. 91 | 92 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 93 | document. 94 | 95 | c) For a Combined Work that displays copyright notices during 96 | execution, include the copyright notice for the Library among 97 | these notices, as well as a reference directing the user to the 98 | copies of the GNU GPL and this license document. 99 | 100 | d) Do one of the following: 101 | 102 | 0) Convey the Minimal Corresponding Source under the terms of this 103 | License, and the Corresponding Application Code in a form 104 | suitable for, and under terms that permit, the user to 105 | recombine or relink the Application with a modified version of 106 | the Linked Version to produce a modified Combined Work, in the 107 | manner specified by section 6 of the GNU GPL for conveying 108 | Corresponding Source. 109 | 110 | 1) Use a suitable shared library mechanism for linking with the 111 | Library. A suitable mechanism is one that (a) uses at run time 112 | a copy of the Library already present on the user's computer 113 | system, and (b) will operate properly with a modified version 114 | of the Library that is interface-compatible with the Linked 115 | Version. 116 | 117 | e) Provide Installation Information, but only if you would otherwise 118 | be required to provide such information under section 6 of the 119 | GNU GPL, and only to the extent that such information is 120 | necessary to install and execute a modified version of the 121 | Combined Work produced by recombining or relinking the 122 | Application with a modified version of the Linked Version. (If 123 | you use option 4d0, the Installation Information must accompany 124 | the Minimal Corresponding Source and Corresponding Application 125 | Code. If you use option 4d1, you must provide the Installation 126 | Information in the manner specified by section 6 of the GNU GPL 127 | for conveying Corresponding Source.) 128 | 129 | 5. Combined Libraries. 130 | 131 | You may place library facilities that are a work based on the 132 | Library side by side in a single library together with other library 133 | facilities that are not Applications and are not covered by this 134 | License, and convey such a combined library under terms of your 135 | choice, if you do both of the following: 136 | 137 | a) Accompany the combined library with a copy of the same work based 138 | on the Library, uncombined with any other library facilities, 139 | conveyed under the terms of this License. 140 | 141 | b) Give prominent notice with the combined library that part of it 142 | is a work based on the Library, and explaining where to find the 143 | accompanying uncombined form of the same work. 144 | 145 | 6. Revised Versions of the GNU Lesser General Public License. 146 | 147 | The Free Software Foundation may publish revised and/or new versions 148 | of the GNU Lesser General Public License from time to time. Such new 149 | versions will be similar in spirit to the present version, but may 150 | differ in detail to address new problems or concerns. 151 | 152 | Each version is given a distinguishing version number. If the 153 | Library as you received it specifies that a certain numbered version 154 | of the GNU Lesser General Public License "or any later version" 155 | applies to it, you have the option of following the terms and 156 | conditions either of that published version or of any later version 157 | published by the Free Software Foundation. If the Library as you 158 | received it does not specify a version number of the GNU Lesser 159 | General Public License, you may choose any version of the GNU Lesser 160 | General Public License ever published by the Free Software Foundation. 161 | 162 | If the Library as you received it specifies that a proxy can decide 163 | whether future versions of the GNU Lesser General Public License shall 164 | apply, that proxy's public statement of acceptance of any version is 165 | permanent authorization for you to choose that version for the 166 | Library. 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zen3geo 2 | 3 | The 🌏 data science library you've been waiting for~ 4 | 5 | > 君の前前前世から僕は 君を探しはじめたよ 6 | > 7 | > Since your past life, I have been searching for you 8 | 9 | ## 公案 10 | 11 | ``` 12 | Geography is difficult, but easy it can also be 13 | Deep Learning, you hope, has an answer to all 14 | Too this, too that, where to though, where to? 15 | Look out, sense within, and now you must know 16 | ``` 17 | 18 | ## Installation 19 | 20 | To install the development version from GitHub, do: 21 | 22 | pip install git+https://github.com/weiji14/zen3geo.git 23 | 24 | Or the stable version from [PyPI](https://pypi.org/project/zen3geo): 25 | 26 | pip install zen3geo 27 | 28 | If instead, [conda-forge](https://anaconda.org/conda-forge/zen3geo) you desire: 29 | 30 | mamba install --channel conda-forge zen3geo 31 | 32 | Other instructions, see https://zen3geo.readthedocs.io/en/latest/#installation 33 | -------------------------------------------------------------------------------- /docs/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | apt_packages: 13 | - graphviz 14 | jobs: 15 | pre_build: 16 | # Generate the Sphinx configuration for this Jupyter Book so it builds. 17 | # https://jupyterbook.org/en/stable/publish/readthedocs.html 18 | - "jupyter-book config sphinx docs/" 19 | post_install: 20 | # Install stackstac=0.4.4 instead of 0.5.0 to prevent 21 | # TypeError: Unsupported data type float16 22 | # because stackstac casts to float16 at read-in instead of post-read 23 | # see https://github.com/gjoseph92/stackstac/pull/208 24 | # Need to wait for rasterio/GDAL to support float16 25 | # see https://gdal.org/api/raster_c_api.html#_CPPv412GDALDataType 26 | # Install dask<2024.3.0 to prevent 27 | # ModuleNotFoundError: No module named 'dask_expr' 28 | # ImportError: Dask dataframe requirements are not installed 29 | # https://github.com/holoviz/datashader/issues/1319 30 | - "pip install stackstac==0.4.4 dask==2024.2.1" 31 | 32 | # Optional but recommended, declare the Python requirements required 33 | # to build your documentation 34 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 35 | python: 36 | install: 37 | - method: pip 38 | path: . 39 | extra_requirements: 40 | - docs 41 | 42 | sphinx: 43 | builder: html 44 | fail_on_warning: true 45 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | te6s3z67 at duck dot com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1, available at 119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 126 | [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | [translations]: https://www.contributor-covenant.org/translations 133 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | We accept different types of contributions, 4 | including some that don't require you to write a single line of code. 5 | 6 | ## 📝 Types of contributions 7 | 8 | ### Discussions 🎉 9 | 10 | Discussions are where we have conversations. 11 | 12 | If have a great new idea, or want to share something amazing with the community, 13 | join us in [discussions](https://github.com/weiji14/zen3geo/discussions). 14 | 15 | ### Issues 🐞 16 | 17 | [Issues](https://docs.github.com/en/github/managing-your-work-on-github/about-issues) 18 | are used to track tasks that contributors can help with. 19 | 20 | If you've found something in the content or the website that should be updated, 21 | search open issues to see if someone else has reported the same thing. If it's 22 | something new, [open an issue](https://github.com/weiji14/zen3geo/issues/new/choose)! 23 | We'll use the issue to have a conversation about the problem you want to fix. 24 | 25 | ### Pull requests 🛠️ 26 | 27 | A [pull request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) 28 | is a way to suggest changes in our repository. 29 | 30 | When we merge those changes, they should be deployed to the live site within a few minutes. 31 | To learn more about opening a pull request in this repo, 32 | see [Opening a pull request](#opening-a-pull-request) below. 33 | 34 | ### Translations 🌏 35 | 36 | 人虽有南北之分,但佛性本无南北。 37 | 38 | Yes, the source content in this repository is mostly written in English, 39 | but we welcome folks from across the world! Please reach out if you have experience in translations and are interested in contributing! 40 | 41 | --- 42 | 43 | ## 👐 Opening a Pull Request 44 | 45 | 1. [Login](https://github.com/login) to your GitHub account, 46 | or sign up for a new one at https://github.com/signup. 47 | 48 | 2. Navigate to the file you want to modify, e.g. the 49 | [API docs file](https://github.com/weiji14/zen3geo/blob/main/docs/api.md). 50 | 51 | 3. Click on the pen 🖊️ icon on the top right corner that says "Edit this file" 52 | 53 | 4. This should bring you to a page similar to 54 | https://github.com/weiji14/zen3geo/edit/main/docs/api.md 55 | where you can make edits to the text using a web-based editor. 56 | Feel free to switch between the "Edit file" and "Preview changes" tabs as 57 | you modify the content to make sure things look ok. 58 | 59 | 5. Once you're happy with your changes, scroll down to the bottom where it says 60 | **Commit changes**. This is where you will add a short summary of the 61 | changes you have made. 62 | 63 | ![The place to commit changes](https://user-images.githubusercontent.com/23487320/172029885-947e4e24-675a-4498-a2d8-f1fa4c26b934.png) 64 | 65 | Specifically, in the first box, you will need to give a short title (e.g. 66 | "Fixed typo in api.md file") that describes the changes you've made. 67 | Optionally, you can write a few extra sentences in the second box to explain 68 | things in more detail. 69 | 70 | 6. Select the "Create a new branch for this commit and start a pull request" 71 | option and provide a new branch name (e.g. "fix-api-typo"). What this 72 | does is to ensure your changes are made in an independent manner or 'branch' 73 | away from the main trunk, and those changes will have the opportunity to be 74 | double checked and openly reviewed by other people. 75 | 76 | 7. Click on the green 'Propose changes' button. This will bring you to a new 77 | page. 78 | 79 | 8. Almost there! This "Open a pull request" page is where you can finalize 80 | things for the 'pull request' (a request to make changes) you will be 81 | opening soon. Again you will need to provide a title (e.g. 'Minor changes to 82 | the API markdown file') and a description. 83 | 84 | ![Pull request dialog page](https://user-images.githubusercontent.com/23487320/172030066-63dbdaa3-c7d4-403f-a3b6-5bccd966d038.png) 85 | 86 | Be sure to provide any context on **why** you are making the changes, and 87 | **how** you are doing so. This will make it easier for other people to 88 | know what is happening when they review your changes. 89 | 90 | 9. Ready? Click on the green 'Create pull request' button! This will make your 91 | changes available for everyone to see and review publicly. The maintainers 92 | will be notified about your great new addition and will get back to you on 93 | the next steps. 94 | 95 | --- 96 | 97 | (contributing:running:locally)= 98 | ## 🏠 Running things locally 99 | 100 | This project uses [``poetry``](https://python-poetry.org/docs/master/) for 101 | installing Python dependencies required in ``zen3geo``, as well as the 102 | development and documentation-related dependencies. 103 | 104 | ### Cloning the repository ♊ 105 | 106 | ``` 107 | git clone git@github.com:weiji14/zen3geo.git 108 | cd zen3geo 109 | ``` 110 | 111 | ### Setup virtual environment ☁️ 112 | 113 | ``` 114 | mamba create --name zen3geo python=3.11 115 | mamba activate zen3geo 116 | 117 | pip install poetry==1.6.1 118 | poetry install --extras "raster spatial stac vector" 119 | ``` 120 | 121 | ### Building documentation 📖 122 | 123 | ``` 124 | poetry install --extras=docs # or `pip install .[docs]` 125 | sudo apt install graphviz # if rendering graphviz plots 126 | jupyter-book build docs/ 127 | ``` 128 | 129 | Then open ``docs/_build/html/index.html`` in your browser to see the docs. 130 | 131 | --- 132 | 133 | ## 🥳 And that's it! 134 | 135 | You're now part of the zen3geo community ✨ 136 | 137 | ```{admonition} Credits 138 | :class: seealso 139 | *This contributing guide was adapted from* 140 | [GitHub docs](https://github.com/github/docs/blob/main/contributing/types-of-contributions.md) 141 | and the [APECS-Earth-Observation/Polar-EO-Database](https://github.com/APECS-Earth-Observation/Polar-EO-Database/blob/main/CONTRIBUTING.md) project. 142 | ``` 143 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | # Book settings 2 | # Learn more at https://jupyterbook.org/customize/config.html 3 | 4 | title: zen3geo 5 | author: The zen3geo Team 6 | 7 | # Cache execution outputs of notebooks on each build. 8 | # See https://jupyterbook.org/content/execute.html 9 | execute: 10 | execute_notebooks: cache 11 | # https://jupyterbook.org/en/latest/content/execute.html#setting-execution-timeout 12 | timeout: 300 13 | 14 | # Define the name of the latex output file for PDF builds 15 | latex: 16 | latex_documents: 17 | targetname: zen3geo.tex 18 | 19 | # Information about where the book exists on the web 20 | repository: 21 | url: https://github.com/weiji14/zen3geo # Online location of your book 22 | path_to_book: docs # Optional path to your book, relative to the repository root 23 | branch: main # Which branch of the repository should be used when creating links (optional) 24 | 25 | # Add GitHub buttons to your book 26 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository 27 | html: 28 | use_edit_page_button: true 29 | use_issues_button: true 30 | use_repository_button: true 31 | 32 | sphinx: 33 | config: 34 | autodoc_typehints: 'description' 35 | myst_all_links_external: true 36 | nb_execution_show_tb: true 37 | html_show_copyright: false 38 | html_theme_options: 39 | # https://sphinx-book-theme.readthedocs.io/en/stable/customize/sidebar-secondary.html 40 | show_toc_level: 3 41 | intersphinx_mapping: 42 | contextily: 43 | - 'https://contextily.readthedocs.io/en/latest/' 44 | - null 45 | dask: 46 | - 'https://docs.dask.org/en/latest/' 47 | - null 48 | datashader: 49 | - 'https://datashader.org/' 50 | - null 51 | datatree: 52 | - 'https://xarray-datatree.readthedocs.io/en/latest/' 53 | - null 54 | geopandas: 55 | - 'https://geopandas.org/en/latest/' 56 | - null 57 | mmdetection: 58 | - 'https://mmdetection.readthedocs.io/zh_CN/latest/' 59 | - null 60 | numpy: 61 | - 'https://numpy.org/doc/stable/' 62 | - null 63 | pyogrio: 64 | - 'https://pyogrio.readthedocs.io/en/latest/' 65 | - null 66 | pystac: 67 | - 'https://pystac.readthedocs.io/en/latest/' 68 | - null 69 | pystac_client: 70 | - 'https://pystac-client.readthedocs.io/en/latest/' 71 | - null 72 | python: 73 | - 'https://docs.python.org/3/' 74 | - null 75 | rasterio: 76 | - 'https://rasterio.readthedocs.io/en/stable/' 77 | - null 78 | rioxarray: 79 | - 'https://corteva.github.io/rioxarray/stable/' 80 | - null 81 | shapely: 82 | - 'https://shapely.readthedocs.io/en/latest/' 83 | - null 84 | stackstac: 85 | - 'https://stackstac.readthedocs.io/en/latest/' 86 | - null 87 | torch: 88 | - 'https://pytorch.org/docs/stable/' 89 | - null 90 | torchdata: 91 | - 'https://pytorch.org/data/main/' 92 | - null 93 | torchvision: 94 | - 'https://pytorch.org/vision/main/' 95 | - null 96 | xarray: 97 | - 'https://docs.xarray.dev/en/stable/' 98 | - null 99 | xbatcher: 100 | - 'https://xbatcher.readthedocs.io/en/latest/' 101 | - null 102 | zarr: 103 | - 'https://zarr.readthedocs.io/en/latest/' 104 | - null 105 | extra_extensions: 106 | - 'sphinx.ext.autodoc' 107 | - 'sphinx.ext.intersphinx' 108 | - 'sphinx.ext.napoleon' 109 | - 'sphinx.ext.viewcode' 110 | -------------------------------------------------------------------------------- /docs/_toc.yml: -------------------------------------------------------------------------------- 1 | # Table of contents 2 | # Learn more at https://jupyterbook.org/customize/toc.html 3 | 4 | format: jb-book 5 | root: index 6 | chapters: 7 | - title: 🦮 Walkthrough 8 | file: walkthrough 9 | sections: 10 | - title: 🀄 Chipping and Batching 11 | file: chipping 12 | - title: 🫧 Vector Segmentation Masks 13 | file: vector-segmentation-masks 14 | - title: 🥡 Object Detection Boxes 15 | file: object-detection-boxes 16 | - title: 🏳️‍🌈 Stacking layers 17 | file: stacking 18 | - title: 📶 Multi-resolution 19 | file: multi-resolution 20 | - title: 📖 API Reference 21 | file: api 22 | - title: 📆 Changelog 23 | file: changelog 24 | - title: 🫶 Code of Conduct 25 | file: CODE_OF_CONDUCT 26 | - title: 🧑‍🤝‍🧑 Contributing 27 | file: CONTRIBUTING 28 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | ## DataPipes 4 | 5 | ```{eval-rst} 6 | .. automodule:: zen3geo.datapipes 7 | :members: 8 | ``` 9 | 10 | ### Datashader 11 | 12 | ```{eval-rst} 13 | .. automodule:: zen3geo.datapipes.datashader 14 | .. autoclass:: zen3geo.datapipes.DatashaderRasterizer 15 | .. autoclass:: zen3geo.datapipes.datashader.DatashaderRasterizerIterDataPipe 16 | .. autoclass:: zen3geo.datapipes.XarrayCanvas 17 | .. autoclass:: zen3geo.datapipes.datashader.XarrayCanvasIterDataPipe 18 | :show-inheritance: 19 | ``` 20 | 21 | ### Geopandas 22 | 23 | ```{eval-rst} 24 | .. automodule:: zen3geo.datapipes.geopandas 25 | .. autoclass:: zen3geo.datapipes.GeoPandasRectangleClipper 26 | .. autoclass:: zen3geo.datapipes.geopandas.GeoPandasRectangleClipperIterDataPipe 27 | :show-inheritance: 28 | ``` 29 | 30 | ### Pyogrio 31 | 32 | ```{eval-rst} 33 | .. automodule:: zen3geo.datapipes.pyogrio 34 | .. autoclass:: zen3geo.datapipes.PyogrioReader 35 | .. autoclass:: zen3geo.datapipes.pyogrio.PyogrioReaderIterDataPipe 36 | :show-inheritance: 37 | ``` 38 | 39 | ### PySTAC 40 | 41 | ```{eval-rst} 42 | .. automodule:: zen3geo.datapipes.pystac 43 | .. autoclass:: zen3geo.datapipes.PySTACItemReader 44 | .. autoclass:: zen3geo.datapipes.pystac.PySTACItemReaderIterDataPipe 45 | :show-inheritance: 46 | ``` 47 | 48 | ### PySTAC Client 49 | 50 | ```{eval-rst} 51 | .. automodule:: zen3geo.datapipes.pystac_client 52 | .. autoclass:: zen3geo.datapipes.PySTACAPISearcher 53 | .. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPISearcherIterDataPipe 54 | .. autoclass:: zen3geo.datapipes.PySTACAPIItemLister 55 | .. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPIItemListerIterDataPipe 56 | :show-inheritance: 57 | ``` 58 | 59 | ### Rioxarray 60 | 61 | ```{eval-rst} 62 | .. automodule:: zen3geo.datapipes.rioxarray 63 | .. autoclass:: zen3geo.datapipes.RioXarrayReader 64 | .. autoclass:: zen3geo.datapipes.rioxarray.RioXarrayReaderIterDataPipe 65 | :show-inheritance: 66 | ``` 67 | 68 | ### Stackstac 69 | 70 | ```{eval-rst} 71 | .. automodule:: zen3geo.datapipes.stackstac 72 | .. autoclass:: zen3geo.datapipes.StackSTACMosaicker 73 | .. autoclass:: zen3geo.datapipes.stackstac.StackSTACMosaickerIterDataPipe 74 | .. autoclass:: zen3geo.datapipes.StackSTACStacker 75 | .. autoclass:: zen3geo.datapipes.stackstac.StackSTACStackerIterDataPipe 76 | :show-inheritance: 77 | ``` 78 | 79 | ### Xbatcher 80 | 81 | ```{eval-rst} 82 | .. automodule:: zen3geo.datapipes.xbatcher 83 | .. autoclass:: zen3geo.datapipes.XbatcherSlicer 84 | .. autoclass:: zen3geo.datapipes.xbatcher.XbatcherSlicerIterDataPipe 85 | :show-inheritance: 86 | ``` 87 | 88 | ### XpySTAC 89 | 90 | ```{eval-rst} 91 | .. automodule:: zen3geo.datapipes.xpystac 92 | .. autoclass:: zen3geo.datapipes.XpySTACAssetReader 93 | .. autoclass:: zen3geo.datapipes.xpystac.XpySTACAssetReaderIterDataPipe 94 | :show-inheritance: 95 | ``` 96 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Release v0.6.2 (2023/06/29) 4 | 5 | ### 💫 Highlights 6 | 7 | * 🎉 **Patch release for zen3geo** 🎉 8 | * 🚀 Quick addition of a new PySTACAPIItemLister DataPipe 9 | 10 | ### 🚀 Features 11 | 12 | * ✨ PySTACAPIItemLister to list STAC Items matching STAC API search ([#111](https://github.com/weiji14/zen3geo/pull/111)) 13 | 14 | ### 🧰 Maintenance 15 | 16 | * ⬆️ Bump poetry from 1.4.2 to 1.5.1 ([#110](https://github.com/weiji14/zen3geo/pull/110)) 17 | 18 | ### 🧑‍🤝‍🧑 Contributors 19 | 20 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14) 21 | 22 | --- 23 | 24 | ## Release v0.6.1 (2023/05/31) 25 | 26 | ### 💫 Highlights 27 | 28 | * 🎉 **Patch release for zen3geo** 🎉 29 | * 😎 Full Python 3.11 support and a couple of bug fixes for DatashaderRasterizer 30 | 31 | ### 🚀 Features 32 | 33 | * 🥚 Allow using XpySTACAssetReader without xpystac when engine!=stac ([#100](https://github.com/weiji14/zen3geo/pull/100)) 34 | 35 | ### 🐛 Bug Fixes 36 | 37 | * 🐛 Fix DatashaderRasterizer for GeoDataFrame wrapped in StreamWrapper ([#104](https://github.com/weiji14/zen3geo/pull/104)) 38 | * 🐛 Fix DatashaderRasterizer to allow N:1 instead of just 1:1 ([#98](https://github.com/weiji14/zen3geo/pull/98)) 39 | 40 | ### 📖 Documentation 41 | 42 | * 👽️ Handle ms-buildings 20230425 update in Object Detection tutorial ([#106](https://github.com/weiji14/zen3geo/pull/106)) 43 | 44 | ### 🧰 Maintenance 45 | 46 | * 👷 NEP29: Run CI and Docs build on Python 3.11 ([#103](https://github.com/weiji14/zen3geo/pull/103)) 47 | * ⬆️ Bump poetry from 1.3.0 to 1.4.2 ([#99](https://github.com/weiji14/zen3geo/pull/99)) 48 | 49 | ### 🧑‍🤝‍🧑 Contributors 50 | 51 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14) 52 | 53 | --- 54 | 55 | ## Release v0.6.0 (2023/04/18) 56 | 57 | ### 💫 Highlights 58 | 59 | * 🎉 **Sixth release of zen3geo** 🎉 60 | * 🚸 Walkthrough on handling multi-resolution climate data ([#91](https://github.com/weiji14/zen3geo/pull/91)) 61 | 62 | ### 🚀 Features 63 | 64 | * ✨ XpySTACAssetReader for reading COG, NetCDF & Zarr STAC assets ([#87](https://github.com/weiji14/zen3geo/pull/87)) 65 | * ✨ Implement len function for XbatcherSlicerIterDataPipe ([#75](https://github.com/weiji14/zen3geo/pull/75)) 66 | 67 | ### 📖 Documentation 68 | 69 | * ♻️ Use xarray.merge with join="override" in collate functions ([#72](https://github.com/weiji14/zen3geo/pull/72)) 70 | 71 | ### 🧰 Maintenance 72 | 73 | * ⬆️ Bump jupyter-book from 0.14.0 to 0.15.1 ([#94](https://github.com/weiji14/zen3geo/pull/94)) 74 | * 📦️ Publish to TestPyPI and PyPI via OpenID Connect token ([#90](https://github.com/weiji14/zen3geo/pull/90)) 75 | * 👷 NEP29: Run Continuous Integration on Python 3.11 ([#89](https://github.com/weiji14/zen3geo/pull/89)) 76 | * ⬆️ Bump jupyter-book from 0.13.0 to 0.14.0 ([#85](https://github.com/weiji14/zen3geo/pull/85)) 77 | * 📌 Pin maximum python version to <4.0 ([#78](https://github.com/weiji14/zen3geo/pull/78)) 78 | * ⬆️ Bump poetry from 1.2.0 to 1.3.0 ([#77](https://github.com/weiji14/zen3geo/pull/77)) 79 | * 📌 Pin minimum xbatcher version to 0.2.0 ([#73](https://github.com/weiji14/zen3geo/pull/73)) 80 | 81 | ### 🧑‍🤝‍🧑 Contributors 82 | 83 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14) 84 | 85 | --- 86 | 87 | ## Release v0.5.0 (2022/09/26) 88 | 89 | ### 💫 Highlights 90 | 91 | * 🎉 **Fifth release of zen3geo** 🎉 92 | * 🚸 Walkthrough on stacking time-series earth observation data ([#62](https://github.com/weiji14/zen3geo/pull/62)) 93 | 94 | ### 🚀 Features 95 | 96 | * ✨ StackSTACMosaicIterDataPipe to mosaic tiles into one piece ([#63](https://github.com/weiji14/zen3geo/pull/63)) 97 | * ✨ StackSTACStackerIterDataPipe for stacking STAC items ([#61](https://github.com/weiji14/zen3geo/pull/61)) 98 | * ✨ PySTACAPISearchIterDataPipe to query dynamic STAC Catalogs ([#59](https://github.com/weiji14/zen3geo/pull/59)) 99 | * ✨ PySTACItemReaderIterDataPipe for reading STAC Items ([#46](https://github.com/weiji14/zen3geo/pull/46)) 100 | 101 | ### 📖 Documentation 102 | 103 | * 🚚 Rename to PySTACAPISearcher and StackSTACMosaicker ([#64](https://github.com/weiji14/zen3geo/pull/64)) 104 | 105 | ### 🧰 Maintenance 106 | 107 | * 📌 Pin min pystac-client and stackstac to v0.4.0, pystac to 1.4.0 ([#66](https://github.com/weiji14/zen3geo/pull/66)) 108 | * 📦️ Exclude tests from source distribution and binary wheel ([#58](https://github.com/weiji14/zen3geo/pull/58)) 109 | 110 | ### 🧑‍🤝‍🧑 Contributors 111 | 112 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14) 113 | 114 | --- 115 | 116 | ## Release v0.4.0 (2022/09/08) 117 | 118 | ### 💫 Highlights 119 | 120 | * 🎉 **Fourth release of zen3geo** 🎉 121 | * 🚸 Walkthrough on object detection with bounding boxes ([#49](https://github.com/weiji14/zen3geo/pull/49)) 122 | 123 | ### 🚀 Features 124 | 125 | * ✨ GeoPandasRectangleClipper for spatially subsetting vectors ([#52](https://github.com/weiji14/zen3geo/pull/52)) 126 | 127 | ### 📖 Documentation 128 | 129 | * 📝 Add install from conda-forge instructions ([#55](https://github.com/weiji14/zen3geo/pull/55)) 130 | * ✏️ Edit docs to use OGC:CRS84 lon/lat instead of EPSG:4326 ([#45](https://github.com/weiji14/zen3geo/pull/45)) 131 | * 💡 Warn about overlapping strides if followed by train/val split ([#43](https://github.com/weiji14/zen3geo/pull/43)) 132 | 133 | ### 🧰 Maintenance 134 | 135 | * ⬆️ Bump poetry from 1.2.0rc1 to 1.2.0 ([#47](https://github.com/weiji14/zen3geo/pull/47)) 136 | * ⬆️ Bump poetry from 1.2.0b3 to 1.2.0rc1 ([#44](https://github.com/weiji14/zen3geo/pull/44)) 137 | 138 | ### 🧑‍🤝‍🧑 Contributors 139 | 140 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14) 141 | 142 | --- 143 | 144 | ## Release v0.3.0 (2022/08/19) 145 | 146 | ### 💫 Highlights 147 | 148 | * 🎉 **Third release of zen3geo** 🎉 149 | * 🚸 Walkthrough on rasterizing vector polygons into label masks ([#31](https://github.com/weiji14/zen3geo/pull/31)) 150 | 151 | ### 🚀 Features 152 | 153 | * ✨ DatashaderRasterizer for burning vector shapes to xarray grids ([#35](https://github.com/weiji14/zen3geo/pull/35)) 154 | * ✨ XarrayCanvasIterDataPipe for creating blank datashader canvas ([#34](https://github.com/weiji14/zen3geo/pull/34)) 155 | * ♻️ Let PyogrioReader return geodataframe only instead of tuple ([#33](https://github.com/weiji14/zen3geo/pull/33)) 156 | 157 | ### 🐛 Bug Fixes 158 | 159 | * ♻️ Refactor DatashaderRasterizer to be up front about datapipe lengths ([#39](https://github.com/weiji14/zen3geo/pull/39)) 160 | * 🩹 Raise ModuleNotFoundError when xbatcher not installed ([#37](https://github.com/weiji14/zen3geo/pull/37)) 161 | 162 | ### 📖 Documentation 163 | 164 | * 📝 Improve pip install zen3geo instructions with extras dependencies ([#40](https://github.com/weiji14/zen3geo/pull/40)) 165 | * 🔍 Show more levels for the in-page table of contents ([#36](https://github.com/weiji14/zen3geo/pull/36)) 166 | 167 | ### 🧑‍🤝‍🧑 Contributors 168 | 169 | [@weiji14](https://github.com/weiji14) 170 | 171 | --- 172 | 173 | ## Release v0.2.0 (2022/07/17) 174 | 175 | ### 💫 Highlights 176 | 177 | * 🎉 **Second release of zen3geo** 🎉 178 | * 🚸 Walkthrough on creating batches of data chips ([#20](https://github.com/weiji14/zen3geo/pull/20)) 179 | 180 | ### 🚀 Features 181 | 182 | * ♻️ Let RioXarrayReader return dataarray only instead of tuple ([#24](https://github.com/weiji14/zen3geo/pull/24)) 183 | * ✨ XbatcherSlicerIterDataPipe for slicing xarray.DataArray ([#22](https://github.com/weiji14/zen3geo/pull/22)) 184 | * ✨ PyogrioReaderIterDataPipe for reading vector OGR files ([#19](https://github.com/weiji14/zen3geo/pull/19)) 185 | 186 | ### 📖 Documentation 187 | 188 | * 🎨 Extra subsection for rioxarray datapipes ([#18](https://github.com/weiji14/zen3geo/pull/18)) 189 | 190 | ### 🧰 Maintenance 191 | 192 | * 👷 NEP29: Run CI and Docs build on Python 3.10 ([#29](https://github.com/weiji14/zen3geo/pull/29)) 193 | * ⬆️ Bump poetry from 1.2.0b2 to 1.2.0b3 ([#28](https://github.com/weiji14/zen3geo/pull/28)) 194 | * 📌 Pin minimum torchdata version to 0.4.0 ([#25](https://github.com/weiji14/zen3geo/pull/25)) 195 | * 📌 Pin minimum pyogrio version to 0.4.0 ([#21](https://github.com/weiji14/zen3geo/pull/21)) 196 | 197 | ### 🧑‍🤝‍🧑 Contributors 198 | 199 | [@weiji14](https://github.com/weiji14) 200 | 201 | --- 202 | 203 | ## Release v0.1.0 (2022/06/08) 204 | 205 | ### 💫 Highlights 206 | 207 | * 🎉 **First release of zen3geo** 🎉 208 | * 🚸 Walkthrough on using RioXarray IterDataPipes at https://zen3geo.readthedocs.io/en/latest/walkthrough.html ([#8](https://github.com/weiji14/zen3geo/pull/8)) 209 | 210 | ### 🚀 Features 211 | 212 | * ✨ Introducing RioXarrayReaderIterDataPipe for reading GeoTIFFs ([#6](https://github.com/weiji14/zen3geo/pull/6)) 213 | 214 | ### 📖 Documentation 215 | 216 | * 🔧 Configure readthedocs documentation build ([#13](https://github.com/weiji14/zen3geo/pull/13)) 217 | * 💬 Show how to convert xarray.DataArray to torch.Tensor ([#9](https://github.com/weiji14/zen3geo/pull/9)) 218 | * 📝 Add basic installation instructions ([#7](https://github.com/weiji14/zen3geo/pull/7)) 219 | * 👥 Healthy community standards ([#4](https://github.com/weiji14/zen3geo/pull/4)) 220 | 221 | ### 🧰 Maintenance 222 | 223 | * 📦 Publish to TestPyPI and PyPI using GitHub Actions ([#14](https://github.com/weiji14/zen3geo/pull/14)) 224 | * 🧑‍💻 Draft changelog with Release Drafter GitHub Actions ([#11](https://github.com/weiji14/zen3geo/pull/11)) 225 | * 👷 Setup GitHub Actions Continuous Integration tests ([#2](https://github.com/weiji14/zen3geo/pull/2)) 226 | * 🌱 Initialize pyproject.toml file ([#1](https://github.com/weiji14/zen3geo/pull/1)) 227 | 228 | ### 🧑‍🤝‍🧑 Contributors 229 | 230 | [@weiji14](https://github.com/weiji14) 231 | -------------------------------------------------------------------------------- /docs/chipping.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | kernelspec: 8 | display_name: Python 3 9 | language: python 10 | name: python3 11 | --- 12 | 13 | # Chipping and batching data 14 | 15 | > What is separation? 16 | > 17 | > What isn't? 18 | 19 | Following on from the previous tutorial, 20 | let's 🧑‍🎓 learn more about creating a more complicated 🌈 raster data pipeline. 21 | Specifically, we'll go through the following: 22 | - Loading Cloud-Optimized GeoTIFFs (COGs) from different geographic regions 🌏 23 | - Cut up each large GeoTIFF into several 512 x 512 pixel chips 🥨 24 | - Create batches of chips/tensors to feed into a DataLoader 🏋️ 25 | 26 | Some terminology 📜 disambiguation: 27 | - scene - the big image (e.g. 10000x10000 pixels) from a satellite 🛰️ (e.g. a GeoTIFF) 28 | - chip - the small image (e.g. 512x512 pixels) cut ✂️ out from a satellite scene to be loaded as a tensor 29 | 30 | See also: 31 | - https://github.com/microsoft/torchgeo/wiki/Design-Decisions#chip-vs-tile-vs-region 32 | - https://github.com/cogeotiff/cog-spec/blob/master/spec.md 33 | 34 | ## 🎉 **Getting started** 35 | 36 | Load up them libraries! 37 | 38 | ```{code-cell} 39 | import pystac 40 | import planetary_computer 41 | import rioxarray 42 | 43 | import torch 44 | import torchdata 45 | import zen3geo 46 | ``` 47 | 48 | ## 0️⃣ Find [Cloud-Optimized GeoTIFFs](https://www.cogeo.org) ☁️ 49 | 50 | Synthetic-Aperture Radar (SAR) from a [STAC](https://stacspec.org) catalog! 51 | We'll get some Sentinel-1 Ground-Range Detected (GRD) data over Osaka and Tokyo 52 | in Japan 🇯🇵. 53 | 54 | 🔗 Links: 55 | - [Official Sentinel-1 description page at ESA](https://sentinel.esa.int/web/sentinel/missions/sentinel-1) 56 | - [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=137.4907%2C35.0014&z=7.94&v=2&d=sentinel-1-grd&s=false%3A%3A100%3A%3Atrue&ae=0&m=cql%3A08211c0dd907a5066c41422c75629d5f&r=VV%2C+VH+False-color+composite) 57 | - [AWS Sentinel-1 Cloud-Optimized GeoTIFFs](https://registry.opendata.aws/sentinel-1) 58 | 59 | 60 | ```{code-cell} 61 | item_urls = [ 62 | # Osaka 63 | "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20220614T210034_20220614T210059_043664_05368A", 64 | # Tokyo 65 | "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20220616T204349_20220616T204414_043693_053764", 66 | ] 67 | 68 | # Load each STAC item's metadata and sign the assets 69 | items = [pystac.Item.from_file(item_url) for item_url in item_urls] 70 | signed_items = [planetary_computer.sign(item) for item in items] 71 | signed_items 72 | ``` 73 | 74 | ### Inspect one of the data assets 🍱 75 | 76 | The Sentinel-1 STAC item contains several assets. 77 | These include different 〰️ polarizations (e.g. 'VH', 'VV'). 78 | Let's just take the 'thumbnail' product for now which is an RGB preview, with 79 | the red 🟥 channel (R) representing the co-polarization (VV or HH), the green 80 | 🟩 channel (G) representing the cross-polarization (VH or HV) and the blue 🟦 81 | channel (B) representing the ratio of the cross and co-polarizations. 82 | 83 | ```{code-cell} 84 | url: str = signed_items[0].assets["thumbnail"].href 85 | da = rioxarray.open_rasterio(filename=url) 86 | da 87 | ``` 88 | 89 | This is how the Sentinel-1 radar image looks like over Osaka on 14 June 2022. 90 | 91 | ![Sentinel-1 GRD image over Osaka, Japan on 20220614](https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1A_IW_GRDH_1SDV_20220614T210034_20220614T210059_043664_05368A&assets=vv&assets=vh&expression=vv%3Bvh%3Bvv%2Fvh&rescale=0%2C600&rescale=0%2C270&rescale=0%2C9&asset_as_band=True&tile_format=png&format=png) 92 | 93 | ## 1️⃣ Creating 512x512 chips from large satellite scenes 🪟 94 | 95 | Unless you have a lot of RAM, it is common to cut ✂️ a large satellite scene 96 | into multiple smaller chips (or patches, tiles 🀄, etc) first. 97 | This is typically done in a rolling or sliding window 🪟 fashion, 98 | via a nested loop through the y-dimension and x-dimension in strides of say, 99 | 512 pixels x 512 pixels. 100 | 101 | Let's begin by setting up the first part of the DataPipe, 102 | which is to read the satellite scene 🖼️ using `rioxarray`. 103 | 104 | ```{code-cell} 105 | # Just get the VV polarization for now from Sentinel-1 106 | urls = [item.assets["vv"].href for item in signed_items] 107 | dp = torchdata.datapipes.iter.IterableWrapper(iterable=urls) 108 | dp_rioxarray = dp.read_from_rioxarray(overview_level=3) 109 | dp_rioxarray 110 | ``` 111 | 112 | ### Slicing with XbatcherSlicer 🍕 113 | 114 | To create the chips, we'll be using ``xbatcher`` which allows slicing 🔪 of an 115 | n-dimensional datacube along any dimension (e.g. longitude, latitude, time 🕛). 116 | This ``xbatcher`` library is integrated into ☯ ``zen3geo`` as a DataPipe called 117 | {py:class}`zen3geo.datapipes.XbatcherSlicer` (functional name: 118 | `slice_with_xbatcher`), which can be used as follows: 119 | 120 | ```{code-cell} 121 | dp_xbatcher = dp_rioxarray.slice_with_xbatcher(input_dims={"y": 512, "x": 512}) 122 | dp_xbatcher 123 | ``` 124 | 125 | This should give us about 12 chips in total, 6 from each of the 2 Sentinel-1 126 | images that were passed in. 127 | 128 | ```{code-cell} 129 | print(f"Number of chips: {len(dp_xbatcher)}") 130 | ``` 131 | 132 | Now, if you want to customize the sliding window (e.g. do overlapping strides), 133 | pass in extra parameters to ``slice_with_xbatcher``, and it will be handled by 134 | {py:class}`xbatcher.BatchGenerator`. 135 | 136 | ```{code-cell} 137 | dp_xbatcher = dp_rioxarray.slice_with_xbatcher( 138 | input_dims={"y": 512, "x": 512}, input_overlap={"y": 256, "x": 256} 139 | ) 140 | dp_xbatcher 141 | ``` 142 | 143 | Great, and this overlapping stride method should give us more 512x512 chips 🧮 144 | than before. 145 | 146 | ```{code-cell} 147 | print(f"Number of chips: {len(dp_xbatcher)}") 148 | ``` 149 | 150 | Double-check that single chips are of the correct dimensions 151 | (band: 1, y: 512, x: 512). 152 | 153 | ```{code-cell} 154 | chips = list(dp_xbatcher) 155 | sample = chips[0] 156 | sample 157 | ``` 158 | 159 | ```{danger} 160 | Please do not use overlapping strides (i.e. `input_overlap` < `input_dim`) if 161 | you will be 🪓 splitting your chips into training, validation and test sets 162 | later! If you have say 60 overlapping chips and then go on to divide those 🍪 163 | chips randomly into train/val/test sets of 30/20/10, you will have information 164 | leakage 🚰 between the 30 training chips and 20 validation plus 10 test chips, 165 | so your model's reported validation and test metrics 📈 will be overestimating 166 | the actual performance 😲! 167 | 168 | Ideally, your train/val/test chips should be situated independently within 169 | spatially contiguous blocks 🧱. See these links for more information on why: 170 | 171 | - Kattenborn, T., Schiefer, F., Frey, J., Feilhauer, H., Mahecha, M. D., & 172 | Dormann, C. F. (2022). Spatially autocorrelated training and validation 173 | samples inflate performance assessment of convolutional neural networks. 174 | ISPRS Open Journal of Photogrammetry and Remote Sensing, 5, 100018. 175 | https://doi.org/10.1016/j.ophoto.2022.100018 176 | - https://github.com/pangeo-data/xbatcher/discussions/78#discussioncomment-3387295 177 | 178 | Yes, spatial statistics 🧮 matter, geography is special 🤓. 179 | ``` 180 | 181 | 182 | ## 2️⃣ Pool chips into mini-batches ⚙️ 183 | 184 | In total, we now have a set of 30 🍪 chips of size 512 x 512 pixels each. 185 | These chips can be divided into batches that are of a reasonable size. 186 | Let's use {py:class}`torchdata.datapipes.iter.Batcher` 187 | (functional name: `batch`) to do so. 188 | 189 | ```{code-cell} 190 | dp_batch = dp_xbatcher.batch(batch_size=10) 191 | print(f"Number of items in first batch: {len(list(dp_batch)[0])}") 192 | ``` 193 | 194 | Now each batch will have 10 chips of size 512 x 512, with 195 | each chip being an {py:class}``xarray.DataArray``. 196 | 197 | ```{note} 198 | Notice how no mosaicking nor reprojection was done for the two satellite 199 | scenes. This is the beauty of zen3geo - full flexibility of combining 200 | geospatial datasets 😎. Respect the native coordinate system and let the data 201 | flow directly into your models! 202 | 203 | Oh, and to be super clear, of the 3 batches of 10 chips each: 204 | - The first batch has 10 chips are from the 1st satellite scene over Osaka 205 | - The second batch has 5 chips over Osaka, and 5 chips over Tokyo 206 | - The third batch has 10 chips from the 2nd satellite scene over Tokyo 207 | ``` 208 | 209 | ### Stack many chips in mini-batches into a single tensor 🥞 210 | 211 | Let's now stack all these chips into a single tensor per batch, with a 212 | (number, channel, height, width) shape like (10, 1, 512, 512). We'll need a 213 | custom 🪄 collate function to do the conversion 214 | (from {py:class}``xarray.DataArray`` to {py:class}``torch.Tensor``) and 215 | stacking. 216 | 217 | ```{code-cell} 218 | def xr_collate_fn(samples) -> torch.Tensor: 219 | """ 220 | Converts individual xarray.DataArray objects to a torch.Tensor (int16 221 | dtype), and stacks them all into a single torch.Tensor. 222 | """ 223 | tensors = [ 224 | torch.as_tensor(data=sample.data.astype(dtype="int16")) for sample in samples 225 | ] 226 | return torch.stack(tensors=tensors) 227 | ``` 228 | 229 | Then, pass this collate function to 230 | {py:class}`torchdata.datapipes.iter.Collator` (functional name: `collate`). 231 | 232 | ```{code-cell} 233 | dp_collate = dp_batch.collate(collate_fn=xr_collate_fn) 234 | print(f"Number of mini-batches: {len(dp_collate)}") 235 | print(f"Mini-batch tensor shape: {list(dp_collate)[0].shape}") 236 | ``` 237 | 238 | ### Into a DataLoader 🏋️ 239 | 240 | One more thing 🍎, throw the DataPipe into 241 | {py:class}`torch.utils.data.DataLoader`! 242 | Set `batch_size` to `None`, since we've handled the batching manually in the 243 | above sections already. 244 | 245 | ```{code-cell} 246 | dataloader = torch.utils.data.DataLoader(dataset=dp_collate, batch_size=None) 247 | for i, batch in enumerate(dataloader): 248 | tensor = batch 249 | print(f"Batch {i}: {tensor.shape}") 250 | ``` 251 | 252 | Lights, camera, action 💥 253 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | #
☯ *zen3geo* - The 🌏 data science library you've been waiting for
2 | 3 | ## Installation 4 | 5 | Get what you need, not more, not less: 6 | 7 | | Command | Dependencies | 8 | |:-------------------------------|---------------| 9 | | `pip install zen3geo` | rioxarray, torchdata | 10 | | `pip install zen3geo[raster]` | rioxarray, torchdata, xbatcher, zarr | 11 | | `pip install zen3geo[spatial]` | rioxarray, torchdata, datashader, spatialpandas | 12 | | `pip install zen3geo[stac]` | rioxarray, torchdata, pystac, pystac-client, stackstac, xpystac | 13 | | `pip install zen3geo[vector]` | rioxarray, torchdata, pyogrio[geopandas] | 14 | 15 | Retrieve more ['extras'](https://github.com/weiji14/zen3geo/blob/main/pyproject.toml) using 16 | 17 | pip install zen3geo[raster,spatial,stac,vector] 18 | 19 | To install the development version from [TestPyPI](https://test.pypi.org/project/zen3geo), do: 20 | 21 | pip install --pre --extra-index-url https://test.pypi.org/simple/ zen3geo 22 | 23 | May [conda-forge](https://anaconda.org/conda-forge/zen3geo) be with you, 24 | though optional dependencies it has not. 25 | 26 | mamba install --channel conda-forge zen3geo 27 | 28 | For the eager ones, {ref}`contributing ` will take you further. 29 | -------------------------------------------------------------------------------- /docs/multi-resolution.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | kernelspec: 8 | display_name: Python 3 9 | language: python 10 | name: python3 11 | --- 12 | 13 | # Multi-resolution 14 | 15 | > On top of a hundred foot pole you linger 16 | > 17 | > Clinging to the first mark of the scale 18 | > 19 | > How do you proceed higher? 20 | > 21 | > It will take more than a leap of faith 22 | 23 | Earth Observation 🛰️ and climate projection 🌡️ data can be captured at 24 | different levels of detail. In this lesson, we'll work with a multitude of 25 | spatial resolutions 📏, learning to respect the ground sampling distance or 26 | native resolution 🔬 of the physical variable being measured, while 🪶 27 | minimizing memory usage. By the end of the lesson, you should be able to: 28 | 29 | - Find 🔍 low and high spatial resolution climate datasets and load them from 30 | {doc}`Zarr ` stores 31 | - Stack 🥞 and subset time-series datasets with different spatial resolutions 32 | stored in a hierarchical {py:class}`datatree.DataTree` structure 33 | - Slice 🔪 the multi-resolution dataset along the time-axis into monthly bins 34 | 35 | 🔗 Links: 36 | - https://carbonplan.org/research/cmip6-downscaling-explainer 37 | - https://github.com/carbonplan/cmip6-downscaling/blob/1.0/notebooks/accessing_data_example.ipynb 38 | - https://github.com/xarray-contrib/xbatcher/issues/93 39 | 40 | 41 | ## 🎉 **Getting started** 42 | 43 | These are the tools 🛠️ you'll need. 44 | 45 | ```{code-cell} 46 | import matplotlib.pyplot as plt 47 | import pandas as pd 48 | import torchdata.dataloader2 49 | import xarray as xr 50 | import xpystac 51 | import zen3geo 52 | 53 | from datatree import DataTree 54 | ``` 55 | 56 | ## 0️⃣ Find climate model datasets 🪸 57 | 58 | The two datasets we'll be working with are 🌐 gridded climate projections, one 59 | that is in its original low 🔅 spatial resolution, and another one of a 60 | higher 🔆 spatial resolution. Specifically, we'll be looking at the maximum 61 | temperature 🌡️ (tasmax) variable from one of the Coupled Model Intercomparison 62 | Project Phase 6 (CMIP6) global coupled ocean-atmosphere general circulation 63 | model (GCM) 💨 outputs that is of low-resolution (67.5 arcminute), and a 64 | super-resolution product from DeepSD 🤔 that is of a higher resolution (15 65 | arcminute). 66 | 67 | ```{note} 68 | The following tutorial will mostly use the term super-resolution 🔭 from 69 | Computer Vision instead of downscaling ⏬. It's just that the term 70 | downscaling ⏬ (going from low to high resolution) can get confused with 71 | downsampling 🙃 (going from high to low resolution), whereas 72 | super-resolution 🔭 is unambiguously about going from low 🔅 to high 🔆 73 | resolution. 74 | ``` 75 | 76 | 🔖 References: 77 | - https://carbonplan.org/research/cmip6-downscaling 78 | - https://github.com/tjvandal/deepsd 79 | - https://tutorial.xarray.dev/intermediate/cmip6-cloud.html 80 | 81 | ```{code-cell} 82 | lowres_raw = "https://cpdataeuwest.blob.core.windows.net/cp-cmip/cmip6/ScenarioMIP/MRI/MRI-ESM2-0/ssp585/r1i1p1f1/Amon/tasmax/gn/v20191108" 83 | highres_deepsd = "https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/data/DeepSD/ScenarioMIP.MRI.MRI-ESM2-0.ssp585.r1i1p1f1.month.DeepSD.tasmax.zarr" 84 | ``` 85 | 86 | This is how the projected maximum temperature 🥵 for August 2089 looks like over 87 | South Asia 🪷 for a low-resolution 🔅 Global Climate Model (left) and a 88 | high-resolution 🔆 downscaled product (right). 89 | 90 | ```{code-cell} 91 | :tags: [hide-input] 92 | # Zarr datasets from https://github.com/carbonplan/research/blob/d05d148fd716ba6304e3833d765069dd890eaf4a/articles/cmip6-downscaling-explainer/components/downscaled-data.js#L97-L122 93 | ds_gcm = xr.open_dataset( 94 | filename_or_obj="https://cmip6downscaling.blob.core.windows.net/vis/article/fig1/regions/india/gcm-tasmax.zarr" 95 | ) 96 | ds_gcm -= 273.15 # convert from Kelvin to Celsius 97 | ds_downscaled = xr.open_dataset( 98 | filename_or_obj="https://cmip6downscaling.blob.core.windows.net/vis/article/fig1/regions/india/downscaled-tasmax.zarr" 99 | ) 100 | ds_downscaled -= 273.15 # convert from Kelvin to Celsius 101 | 102 | # Plot projected maximum temperature over South Asia from GCM and GARD-MV 103 | fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 3), sharey=True) 104 | 105 | img1 = ds_gcm.tasmax.plot.imshow( 106 | ax=ax[0], cmap="inferno", vmin=16, vmax=48, add_colorbar=False 107 | ) 108 | ax[0].set_title("Global Climate Model (67.5 arcminute)") 109 | 110 | img2 = ds_downscaled.tasmax.plot.imshow( 111 | ax=ax[1], cmap="inferno", vmin=16, vmax=48, add_colorbar=False 112 | ) 113 | ax[1].set_title("Downscaled result (15 arcminute)") 114 | 115 | cbar = fig.colorbar(mappable=img1, ax=ax.ravel().tolist(), extend="both") 116 | cbar.set_label(label="Daily Max Near-Surface Air\nTemperature in Aug 2089 (°C)") 117 | 118 | plt.show() 119 | ``` 120 | 121 | ### Load Zarr stores 📦 122 | 123 | The {doc}`Zarr ` stores 🧊 can be loaded into an 124 | {py:class}`xarray.Dataset` via {py:class}`zen3geo.datapipes.XpySTACAssetReader` 125 | (functional name: ``read_from_xpystac``) with the `engine="zarr"` keyword 126 | argument. 127 | 128 | ```{code-cell} 129 | dp_lowres = torchdata.datapipes.iter.IterableWrapper(iterable=[lowres_raw]) 130 | dp_highres = torchdata.datapipes.iter.IterableWrapper(iterable=[highres_deepsd]) 131 | 132 | dp_lowres_dataset = dp_lowres.read_from_xpystac(engine="zarr", chunks="auto") 133 | dp_highres_dataset = dp_highres.read_from_xpystac(engine="zarr", chunks="auto") 134 | ``` 135 | 136 | ### Inspect the climate datasets 🔥 137 | 138 | Let's now preview 👀 the low-resolution 🔅 and high-resolution 🔆 temperature 139 | datasets. 140 | 141 | ```{code-cell} 142 | it = iter(dp_lowres_dataset) 143 | ds_lowres = next(it) 144 | ds_lowres 145 | ``` 146 | 147 | ```{code-cell} 148 | it = iter(dp_highres_dataset) 149 | ds_highres = next(it) 150 | ds_highres 151 | ``` 152 | 153 | Notice that the low-resolution 🔅 dataset has lon/lat pixels of shape 154 | (320, 160), whereas the high-resolution 🔆 dataset is of shape (1440, 720). So 155 | there has been a 4.5x increase 📈 in spatial resolution going from the raw GCM 156 | 🌐 grid to the super-resolution 🔭 DeepSD grid. 157 | 158 | ### Shift from 0-360 to -180-180 🌐 159 | 160 | A sharp eye 👁️ would have noticed that the longitudinal range of the 161 | low-resolution 🔅 and high-resolution 🔆 dataset are offset ↔️ by 180°, going 162 | from 0° to 360° and -180° to +180° respectively. Let's shift the coordinates 📍 163 | of the low-resolution grid 🌍 from 0-360 to -180-180 using a custom 164 | {py:class}`torchdata.datapipes.iter.Mapper` (functional name: `map`) function. 165 | 166 | 🔖 References: 167 | - https://discourse.pangeo.io/t/handling-slicing-with-circular-longitude-coordinates-in-xarray/1608/3 168 | - https://gis.stackexchange.com/questions/416091/converting-a-netcdf-from-0-to-360-to-180-to-180-via-xarray 169 | 170 | ```{code-cell} 171 | def shift_longitude_360_to_180(ds: xr.Dataset) -> xr.Dataset: 172 | ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)) 173 | ds = ds.roll(lon=int(len(ds.lon) / 2), roll_coords=True) 174 | return ds 175 | ``` 176 | 177 | ```{code-cell} 178 | dp_lowres_dataset_180 = dp_lowres_dataset.map(fn=shift_longitude_360_to_180) 179 | dp_lowres_dataset_180 180 | ``` 181 | 182 | Double check that the low-resolution 🔆 grid's longitude coordinates 🔢 are now 183 | in the -180° to +180° range. 184 | 185 | ```{code-cell} 186 | it = iter(dp_lowres_dataset_180) 187 | ds_lowres_180 = next(it) 188 | ds_lowres_180 189 | ``` 190 | 191 | 192 | ## Spatiotemporal stack and subset 🍱 193 | 194 | Following on from {doc}`./stacking` where multiple 🥞 layers with the **same** 195 | spatial resolution were stacked together into an {py:class}`xarray.DataArray` 196 | object, this section will teach 🧑‍🏫 you about stacking datasets with 197 | **different** spatial resolutions 📶 into a {py:class}`datatree.DataTree` 198 | object that has a nested/hierarchical structure. That 199 | {py:class}`datatree.DataTree` can then be subsetted 🥮 to the desired spatial 200 | and temporal extent in one go 😎. 201 | 202 | ### Stack multi-resolution datasets 📚 203 | 204 | First, we'll need to combine 🪢 the low-resolution GCM and high-resolution 205 | DeepSD {py:class}`xarray.Dataset` objects into a tuple 🎵 using 206 | {py:class}`torchdata.datapipes.iter.Zipper` (functional name: zip). 207 | 208 | ```{code-cell} 209 | dp_lowres_highres = dp_lowres_dataset_180.zip(dp_highres_dataset) 210 | dp_lowres_highres 211 | ``` 212 | 213 | Next, use {py:class}`torchdata.datapipes.iter.Collator` (functional name: 214 | `collate`) to convert 🤸 the tuple of {py:class}`xarray.Dataset` objects into 215 | an {py:class}`datatree.DataTree` 🎋, similar to what was done in 216 | {doc}`./stacking`. Note that we'll only take the 'tasmax' ♨️ (Daily Maximum 217 | Near-Surface Air Temperature) {py:class}`xarray.DataArray` variable from each 218 | of the {py:class}`xarray.Dataset` objects. 219 | 220 | ```{code-cell} 221 | def multires_collate_fn(lowres_and_highres: tuple) -> DataTree: 222 | """ 223 | Combine a pair of xarray.Dataset (lowres, highres) inputs into a 224 | datatree.DataTree with groups named 'lowres' and 'highres'. 225 | """ 226 | # Turn 2 xr.Dataset objects into 1 xr.DataTree with multiple groups 227 | ds_lowres, ds_highres = lowres_and_highres 228 | 229 | # Create DataTree with lowres and highres groups 230 | datatree: DataTree = DataTree.from_dict( 231 | d={"lowres": ds_lowres.tasmax, "highres": ds_highres.tasmax} 232 | ) 233 | 234 | return datatree 235 | ``` 236 | 237 | ```{code-cell} 238 | dp_datatree = dp_lowres_highres.collate(collate_fn=multires_collate_fn) 239 | dp_datatree 240 | ``` 241 | 242 | See the nested 🪆 structure of the {py:class}`datatree.DataTree`. The 243 | low-resolution 🔅 GCM and high-resolution 🔆 DeepSD outputs have been placed in 244 | separate groups 🖖. 245 | 246 | ```{code-cell} 247 | it = iter(dp_datatree) 248 | datatree = next(it) 249 | datatree 250 | ``` 251 | 252 | ### Subset multi-resolution layers 🥮 253 | 254 | The climate model outputs above are a global 🗺️ one covering a timespan from 255 | January 2015 to December 2100 📅. If you're only interested in a particular 256 | region 🌏 or timespan ⌚, then the {py:class}`datatree.DataTree` will need to 257 | be trimmed 💇 down. Let's use {py:meth}`datatree.DataTree.sel` to subset the 258 | multi-resolution data to just the Philippines 🇵🇭 for the period 2015 to 2030. 259 | 260 | ```{code-cell} 261 | def spatiotemporal_subset(dt: DataTree) -> DataTree: 262 | dt_subset = dt.sel( 263 | lon=slice(116.4375, 126.5625), 264 | lat=slice(5.607445, 19.065325), 265 | time=slice("2015-01-01", "2030-12-31"), 266 | ) 267 | return dt_subset 268 | ``` 269 | 270 | ```{code-cell} 271 | dp_datatree_subset = dp_datatree.map(fn=spatiotemporal_subset) 272 | dp_datatree_subset 273 | ``` 274 | 275 | Inspect the subsetted climate dataset 🕵️ 276 | 277 | ```{code-cell} 278 | it = iter(dp_datatree_subset) 279 | datatree_subset = next(it) 280 | datatree_subset 281 | ``` 282 | 283 | Let's plot the projected temperature 🌡️ for Dec 2030 over the Philippine 284 | Archipelago to ensure things look ok. 285 | 286 | ```{code-cell} 287 | ds_lowres = ( 288 | datatree_subset["lowres/tasmax"] 289 | .sel(time=slice("2030-12-01", "2030-12-31")) 290 | .squeeze() 291 | ) 292 | ds_lowres -= 273.15 # convert from Kelvin to Celsius 293 | ds_highres = ( 294 | datatree_subset["highres/tasmax"] 295 | .sel(time=slice("2030-12-01", "2030-12-31")) 296 | .squeeze() 297 | ) 298 | ds_highres -= 273.15 # convert from Kelvin to Celsius 299 | 300 | # Plot projected maximum temperature over the Philippines from GCM and DeepSD 301 | fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 8), sharey=True) 302 | 303 | img1 = ds_lowres.plot.imshow( 304 | ax=ax[0], cmap="inferno", vmin=22, vmax=33, add_colorbar=False 305 | ) 306 | ax[0].set_title("Global Climate Model (67.5 arcminute)") 307 | 308 | img2 = ds_highres.plot.imshow( 309 | ax=ax[1], cmap="inferno", vmin=22, vmax=33, add_colorbar=False 310 | ) 311 | ax[1].set_title("DeepSD output (15 arcminute)") 312 | 313 | cbar = fig.colorbar(mappable=img1, ax=ax.ravel().tolist(), extend="max") 314 | cbar.set_label(label="Daily Max Near-Surface Air\nTemperature in Dec 2030 (°C)") 315 | 316 | plt.show() 317 | ``` 318 | 319 | ```{important} 320 | When slicing ✂️ different spatial resolution grids, put some 🧠 thought into the 321 | process. Do some 🧮 math to ensure the coordinates of the bounding box (min/max 322 | lon/lat) cut through the pixels exactly at the 📐 pixel boundaries whenever 323 | possible. 324 | 325 | If your multi-resolution 📶 layers have spatial resolutions that are 326 | round multiples ✖️ of each other (e.g. 10m, 20m, 60m), it is advisable to align 327 | 🎯 the pixel corners, such that the high-resolution 🔆 pixels fit within the 328 | low-resolution 🔅 pixels (e.g. one 20m pixel should contain four 10m pixels). 329 | This can be done by resampling 🖌️ or interpolating the grid (typically the 330 | higher resolution one) onto a new reference frame 🖼️. 331 | 332 | For datasets ℹ️ that come from different sources and need to be reprojected 🔁, 333 | you can do the reprojection and pixel alignment in a single step 🔂. Be extra 334 | careful about resampling, as certain datasets (e.g. complex SAR 📡 data that 335 | has been collected off-nadir) may require special 🌷 treatment. 336 | ``` 337 | 338 | 339 | ## Time to slice again ⌛ 340 | 341 | So, we now have a {py:class}`datatree.DataTree` with two 💕 groups/nodes called 342 | 'lowres' and 'highres' that have tensor shapes `(lat: 12, lon: 9, time: 192)` 343 | and `(lat: 54, lon: 40, time: 192)` respectively. While the time dimension ⏱️ 344 | is of the same length, the timestamp values between the low-resolution 🔅 GCM 345 | and high-resolution 🔆 DeepSD output are different. Specifically, the GCM 346 | output dates at the middle of the month 📅, while the DeepSD output has dates 347 | at the start of the month. Let's see how this can be handled 🫖. 348 | 349 | ### Slicing by month 🗓️ 350 | 351 | Assuming that the roughly two week offset ↔️ between the monthly resolution GCM 352 | and DeepSD time-series is negligible 🤏, we can split the dataset on the time 353 | dimension at the start/end of each month 📆. Let's write a function and use 354 | {py:class}`torchdata.datapipes.iter.FlatMapper` (functional name: `flatmap`) 355 | for this. 356 | 357 | ```{code-cell} 358 | def split_on_month(dt: DataTree, node:str = "highres/tasmax") -> DataTree: 359 | """ 360 | Return a slice of data for every month in a datatree.DataTree time-series. 361 | """ 362 | for t in dt[node].time.to_pandas(): 363 | dt_slice = dt.sel( 364 | time=slice(t + pd.offsets.MonthBegin(0), t + pd.offsets.MonthEnd(0)) 365 | ) 366 | yield dt_slice.squeeze(dim="time") 367 | ``` 368 | 369 | ```{code-cell} 370 | dp_datatree_timeslices = dp_datatree_subset.flatmap(fn=split_on_month) 371 | dp_datatree_timeslices 372 | ``` 373 | 374 | The datapipe should yield a {py:class}`datatree.DataTree` with just one 375 | month's 📅 worth of temperature 🌡️ data per iteration. 376 | 377 | ```{code-cell} 378 | it = iter(dp_datatree_timeslices) 379 | datatree_timeslice = next(it) 380 | datatree_timeslice 381 | ``` 382 | 383 | ```{seealso} 384 | Those interested in slicing multi-resolution arrays spatially can keep an eye 385 | on the 🚧 ongoing implementation at 386 | https://github.com/xarray-contrib/xbatcher/pull/171 and the discussion at 387 | https://github.com/xarray-contrib/xbatcher/issues/93. This 🧑‍🏫 tutorial will be 388 | updated ♻️ once there's a clean way to generate multi-resolution 389 | {py:class}`datatree.DataTree` slices in a newer release of 390 | {doc}`xbatcher ` 😉 391 | ``` 392 | 393 | Visualize the final DataPipe graph ⛓️. 394 | 395 | ```{code-cell} 396 | torchdata.datapipes.utils.to_graph(dp=dp_datatree_timeslices) 397 | ``` 398 | 399 | ### Into a DataLoader 🏋️ 400 | 401 | Ready to populate the {py:class}`torchdata.dataloader2.DataLoader2` 🏭! 402 | 403 | ```{code-cell} 404 | dataloader = torchdata.dataloader2.DataLoader2(datapipe=dp_datatree_timeslices) 405 | for i, batch in enumerate(dataloader): 406 | ds_lowres = batch["lowres/tasmax"] 407 | ds_highres = batch["highres/tasmax"] 408 | print(f"Batch {i} - lowres: {ds_lowres.shape}, highres: {ds_highres.shape}") 409 | if i > 8: 410 | break 411 | ``` 412 | 413 | Do super-resolution, but make no illusion 🧚 414 | 415 | ```{seealso} 416 | Credits to [CarbonPlan](https://github.com/carbonplan) for making the code and 417 | data for their 418 | [CMIP6 downscaling](https://github.com/carbonplan/cmip6-downscaling) work 419 | openly available. Find out more at 420 | https://docs.carbonplan.org/cmip6-downscaling! 421 | ``` 422 | -------------------------------------------------------------------------------- /docs/object-detection-boxes.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | kernelspec: 8 | display_name: Python 3 9 | language: python 10 | name: python3 11 | --- 12 | 13 | # Object detection boxes 14 | 15 | > *You shouldn't set up limits in boundless openness, 16 | > but if you set up limitlessness as boundless openness, 17 | > you've trapped yourself* 18 | 19 | Boxes are quick to draw ✏️, but finicky to train a neural network with. 20 | This time, we'll show you a geospatial object detection 🕵️ problem, where the 21 | objects are defined by a bounding box 🔲 with a specific class. 22 | By the end of this lesson, you should be able to: 23 | 24 | - Read OGR supported vector files and obtain the bounding boxes 🟨 of each 25 | geometry 26 | - Convert bounding boxes from geographic coordinates to 🖼️ image coordinates 27 | while clipping to the image extent 28 | - Use an affine transform to convert boxes in image coordinates to 🌐 29 | geographic coordinates 30 | 31 | 🔗 Links: 32 | - https://planetarycomputer.microsoft.com/dataset/ms-buildings#Example-Notebook 33 | - https://github.com/microsoft/GlobalMLBuildingFootprints/ 34 | - https://mlhub.earth/datasets?tags=object+detection 35 | 36 | ## 🎉 **Getting started** 37 | 38 | These are the tools 🛠️ you'll need. 39 | 40 | ```{code-cell} 41 | import contextily 42 | import numpy as np 43 | import geopandas as gpd 44 | import matplotlib.patches 45 | import matplotlib.pyplot as plt 46 | import pandas as pd 47 | import planetary_computer 48 | import pystac_client 49 | import rioxarray 50 | import shapely.affinity 51 | import shapely.geometry 52 | import torch 53 | import torchdata 54 | import torchdata.dataloader2 55 | import xarray as xr 56 | import zen3geo 57 | ``` 58 | 59 | ## 0️⃣ Find high-resolution imagery and building footprints 🌇 60 | 61 | Let's take a look at buildings over 62 | [Kampong Ayer](https://en.wikipedia.org/wiki/Kampong_Ayer), Brunei 🇧🇳! We'll 63 | use {py:func}`contextily.bounds2img` to get some 4-band RGBA 64 | 🌈 [optical imagery](https://www.arcgis.com/home/item.html?id=10df2279f9684e4a9f6a7f08febac2a9) 65 | in a {py:class}`numpy.ndarray` format. 66 | 67 | ```{code-cell} 68 | image, extent = contextily.bounds2img( 69 | w=114.94, 70 | s=4.88, 71 | e=114.95, 72 | n=4.89, 73 | ll=True, 74 | source=contextily.providers.Esri.WorldImagery, 75 | ) 76 | print(f"Spatial extent in EPSG:3857: {extent}") 77 | print(f"Image dimensions (height, width, channels): {image.shape}") 78 | ``` 79 | 80 | This is how Brunei's 🚣 Venice of the East looks like from above. 81 | 82 | ```{code-cell} 83 | fig, ax = plt.subplots(nrows=1, figsize=(9, 9)) 84 | plt.imshow(X=image, extent=extent) 85 | ``` 86 | 87 | ```{tip} 88 | For more raster basemaps, check out: 89 | - https://xyzservices.readthedocs.io/en/stable/introduction.html#overview-of-built-in-providers 90 | - https://leaflet-extras.github.io/leaflet-providers/preview/ 91 | ``` 92 | 93 | ### Georeference image using rioxarray 🌐 94 | 95 | To enable slicing 🔪 with xbatcher later, we'll need to turn the 96 | {py:class}`numpy.ndarray` image 🖼️ into an {py:class}`xarray.DataArray` grid 97 | with coordinates 🖼️. If you already have a georeferenced grid (e.g. from 98 | {py:class}`zen3geo.datapipes.RioXarrayReader`), this step can be skipped ⏭️. 99 | 100 | 101 | ```{code-cell} 102 | # Turn RGBA image from channel-last to channel-first and get 3-band RGB only 103 | _image = image.transpose(2, 0, 1) # Change image from (H, W, C) to (C, H, W) 104 | rgb_image = _image[0:3, :, :] # Get just RGB by dropping RGBA's alpha channel 105 | print(f"RGB image shape: {rgb_image.shape}") 106 | ``` 107 | 108 | Georeferencing is done by putting the 🚦 RGB image into an 109 | {py:class}`xarray.DataArray` object with (band, y, x) coordinates, and then 110 | setting a coordinate reference system 📐 using 111 | {py:meth}`rioxarray.rioxarray.XRasterBase.set_crs`. 112 | 113 | ```{code-cell} 114 | left, right, bottom, top = extent # xmin, xmax, ymin, ymax 115 | dataarray = xr.DataArray( 116 | data=rgb_image, 117 | coords=dict( 118 | band=[0, 1, 2], # Red, Green, Blue 119 | y=np.linspace(start=top, stop=bottom, num=rgb_image.shape[1]), 120 | x=np.linspace(start=left, stop=right, num=rgb_image.shape[2]), 121 | ), 122 | dims=("band", "y", "x"), 123 | ) 124 | dataarray = dataarray.rio.write_crs(input_crs="EPSG:3857") 125 | dataarray 126 | ``` 127 | 128 | ### Load cloud-native vector files 💠 129 | 130 | Now to pull in some building footprints 🛖. Let's make a STAC API query to get 131 | a [GeoParquet](https://github.com/opengeospatial/geoparquet) file (a 132 | cloud-native columnar 🀤 geospatial vector file format) that intersects our 133 | study area. 134 | 135 | ```{code-cell} 136 | catalog = pystac_client.Client.open( 137 | url="https://planetarycomputer.microsoft.com/api/stac/v1", 138 | modifier=planetary_computer.sign_inplace, 139 | ) 140 | search = catalog.search( 141 | collections=["ms-buildings"], 142 | query={"msbuildings:region": {"eq": "Brunei"}}, 143 | intersects=shapely.geometry.box(minx=114.94, miny=4.88, maxx=114.95, maxy=4.89), 144 | ) 145 | item = next(search.items()) 146 | item 147 | ``` 148 | 149 | ```{note} 150 | Accessing the building footprint STAC Assets from Planetary Computer will 151 | require signing 🔏 the URL. This can be done with a `modifier` function in the 152 | {py:meth}`pystac_client.Client.open` call. See also 'Automatically modifying 153 | results' under {doc}`PySTAC-Client Usage `). 154 | ``` 155 | 156 | Next, we'll load ⤵️ the GeoParquet file using 157 | {py:func}`geopandas.read_parquet`. 158 | 159 | ```{code-cell} 160 | asset = item.assets["data"] 161 | 162 | geodataframe = gpd.read_parquet( 163 | path=asset.href, storage_options=asset.extra_fields["table:storage_options"] 164 | ) 165 | geodataframe 166 | ``` 167 | 168 | This {py:class}`geopandas.GeoDataFrame` contains building outlines across 169 | Brunei 🇧🇳 that intersects and extends beyond our study area. Let's do a spatial 170 | subset ✂️ to just the Kampong Ayer study area using 171 | {py:attr}`geopandas.GeoDataFrame.cx`, and reproject the polygon coordinates 172 | using {py:meth}`geopandas.GeoDataFrame.to_crs` to match the coordinate 173 | reference system of the optical image. 174 | 175 | ```{code-cell} 176 | _gdf_kpgayer = geodataframe.cx[114.94:114.95, 4.88:4.89] 177 | gdf_kpgayer = _gdf_kpgayer.to_crs(crs="EPSG:3857") 178 | gdf_kpgayer 179 | ``` 180 | 181 | Preview 👀 the building footprints to check that things are in the right place. 182 | 183 | ```{code-cell} 184 | ax = gdf_kpgayer.plot(figsize=(9, 9)) 185 | contextily.add_basemap( 186 | ax=ax, 187 | source=contextily.providers.CartoDB.Voyager, 188 | crs=gdf_kpgayer.crs.to_string(), 189 | ) 190 | ax 191 | ``` 192 | 193 | Cool, we see that there are some building are on water as expected 😁. 194 | 195 | 196 | ## 1️⃣ Pair image chips with bounding boxes 🧑‍🤝‍🧑 197 | 198 | Here comes the fun 🛝 part! This section is all about generating 128x128 chips 199 | 🫶 paired with bounding boxes. Let's go 🚲! 200 | 201 | ### Create 128x128 raster chips and clip vector geometries with it ✂️ 202 | 203 | From the large 1280x1280 scene 🖽️, we will first slice out a hundred 128x128 204 | chips 🍕 using {py:class}`zen3geo.datapipes.XbatcherSlicer` (functional name: 205 | `slice_with_xbatcher`). 206 | 207 | ```{code-cell} 208 | dp_raster = torchdata.datapipes.iter.IterableWrapper(iterable=[dataarray]) 209 | dp_xbatcher = dp_raster.slice_with_xbatcher(input_dims={"y": 128, "x": 128}) 210 | dp_xbatcher 211 | ``` 212 | 213 | For each 128x128 chip 🍕, we'll then find the vector geometries 🌙 that fit 214 | within the chip's spatial extent. This will be 🤸 done using 215 | {py:class}`zen3geo.datapipes.GeoPandasRectangleClipper` (functional name: 216 | `clip_vector_with_rectangle`). 217 | 218 | ```{code-cell} 219 | dp_vector = torchdata.datapipes.iter.IterableWrapper(iterable=[gdf_kpgayer]) 220 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_xbatcher) 221 | dp_clipped 222 | ``` 223 | 224 | ```{important} 225 | When using {py:class}`zen3geo.datapipes.GeoPandasRectangleClipper` 💇, there 226 | should only be one 'global' 🌐 vector {py:class}`geopandas.GeoSeries` or 227 | {py:class}`geopandas.GeoDataFrame`. 228 | 229 | If your raster DataPipe has chips 🍕 with different coordinate reference 230 | systems (e.g. multiple UTM Zones 🌏🌍🌎), 231 | {py:class}`zen3geo.datapipes.GeoPandasRectangleClipper` will actually reproject 232 | 🔄 the 'global' vector to the coordinate reference system of each chip, and 233 | clip ✂️ the geometries accordingly to the chip's bounding box extent 😎. 234 | ``` 235 | 236 | This ``dp_clipped`` DataPipe will yield 🤲 a tuple of ``(vector, raster)`` 237 | objects for each 128x128 chip. Let's inspect 🧐 one to see how they look like. 238 | 239 | ```{code-cell} 240 | # Get one chip with over 10 building footprint geometries 241 | for vector, raster in dp_clipped: 242 | if len(vector) > 10: 243 | break 244 | ``` 245 | 246 | These are the spatially subsetted vector geometries 🌙 in one 128x128 chip. 247 | 248 | ```{code-cell} 249 | vector 250 | ``` 251 | 252 | This is the raster chip/mask 🤿 used to clip the vector. 253 | 254 | ```{code-cell} 255 | raster 256 | ``` 257 | 258 | And here's a side by side visualization of the 🌈 RGB chip image (left) and 259 | 🔷 vector building footprint polygons (right). 260 | 261 | ```{code-cell} 262 | fig, ax = plt.subplots(ncols=2, figsize=(18, 9), sharex=True, sharey=True) 263 | raster.plot.imshow(ax=ax[0]) 264 | vector.plot(ax=ax[1]) 265 | ``` 266 | 267 | Cool, these buildings are part of the 🏬 268 | [Yayasan Shopping Complex](https://web.archive.org/web/20220906020248/http://www.yayasancomplex.com) 269 | in Bandar Seri Begawan 🌆. We can see that the raster image 🖼️ on the left 270 | aligns ok with the vector polygons 💠 on the right. 271 | 272 | ```{note} 273 | The optical 🛰️ imagery shown here is **not** the imagery used to digitize the 274 | [building footprints](https://planetarycomputer.microsoft.com/dataset/ms-buildings) 275 | 🏢! This is an example tutorial using two different data sources, that we just 276 | so happened to have plotted in the same geographic space 😝. 277 | ``` 278 | 279 | ### From polygons in geographic coordinates to boxes in image coordinates ↕️ 280 | 281 | Up to this point, we still have the actual 🛖 building footprint polygons. In 282 | this step 📶, we'll convert these polygons into a format suitable for 'basic' 283 | object detection 🥅 models in computer vision. Specifically: 284 | 285 | 1. The polygons 🌙 (with multiple vertices) will be simplified to a horizontal 286 | bounding box 🔲 with 4 corner vertices only. 287 | 2. The 🌐 geographic coordinates of the box which use lower left corner and 288 | upper right corner (i.e. y increases from South to North ⬆️) will be 289 | converted to 🖼️ image coordinates (0-128) which use the top left corner and 290 | bottom right corner (i.e y increases from Top to Bottom ⬇️). 291 | 292 | Let's start by using {py:attr}`geopandas.GeoSeries.bounds` to get the 293 | geographic bounds 🗺️ of each building footprint geometry 📐 in each 128x128 294 | chip. 295 | 296 | ```{code-cell} 297 | def polygon_to_bbox(geom_and_chip) -> (gpd.GeoDataFrame, xr.DataArray): 298 | """ 299 | Get bounding box (minx, miny, maxx, maxy) coordinates for each geometry in 300 | a geopandas.GeoDataFrame. 301 | 302 | (maxx,maxy) 303 | ul-------ur 304 | ^ | | 305 | | | geo | y increases going up, x increases going right 306 | y | | 307 | ll-------lr 308 | (minx,miny) x--> 309 | 310 | """ 311 | gdf, chip = geom_and_chip 312 | bounds: gpd.GeoDataFrame = gdf.bounds 313 | assert tuple(bounds.columns) == ("minx", "miny", "maxx", "maxy") 314 | 315 | return bounds, chip 316 | ``` 317 | 318 | ```{code-cell} 319 | dp_bbox = dp_clipped.map(fn=polygon_to_bbox) 320 | ``` 321 | 322 | Next, the geographic 🗺️ bounding box coordinates (in EPSG:3857) will be 323 | converted to image 🖼️ or pixel coordinates (0-128 scale). The y-direction will 324 | be flipped 🤸 upside down, and we'll be using the spatial bounds (or corner 325 | coordinates) of the 128x128 image chip as a reference 📍. 326 | 327 | ```{code-cell} 328 | def geobox_to_imgbox(bbox_and_chip) -> (pd.DataFrame, xr.DataArray): 329 | """ 330 | Convert bounding boxes in a pandas.DataFrame from geographic coordinates 331 | (minx, miny, maxx, maxy) to image coordinates (x1, y1, x2, y2) based on the 332 | spatial extent of a raster image chip. 333 | 334 | (x1,y1) 335 | ul-------ur 336 | y | | 337 | | | img | y increases going down, x increases going right 338 | v | | 339 | ll-------lr 340 | x--> (x2,y2) 341 | 342 | """ 343 | geobox, chip = bbox_and_chip 344 | 345 | x_res, y_res = chip.rio.resolution() 346 | assert y_res < 0 347 | 348 | left, bottom, right, top = chip.rio.bounds() 349 | assert top > bottom 350 | 351 | imgbox = pd.DataFrame() 352 | imgbox["x1"] = (geobox.minx - left) / x_res # left 353 | imgbox["y1"] = (top - geobox.maxy) / -y_res # top 354 | imgbox["x2"] = (geobox.maxx - left) / x_res # right 355 | imgbox["y2"] = (top - geobox.miny) / -y_res # bottom 356 | 357 | assert all(imgbox.x2 > imgbox.x1) 358 | assert all(imgbox.y2 > imgbox.y1) 359 | 360 | return imgbox, chip 361 | ``` 362 | 363 | ```{code-cell} 364 | dp_ibox = dp_bbox.map(fn=geobox_to_imgbox) 365 | ``` 366 | 367 | Now to plot 🎨 and double check that the boxes are positioned correctly in 368 | 0-128 image space 🌌. 369 | 370 | ```{code-cell} 371 | # Get one chip with over 10 building footprint geometries 372 | for ibox, ichip in dp_ibox: 373 | if len(ibox) > 10: 374 | break 375 | ibox 376 | ``` 377 | 378 | ```{code-cell} 379 | fig, ax = plt.subplots(ncols=2, figsize=(18, 9), sharex=True, sharey=True) 380 | ax[0].imshow(X=ichip.transpose("y", "x", "band")) 381 | for i, row in ibox.iterrows(): 382 | rectangle = matplotlib.patches.Rectangle( 383 | xy=(row.x1, row.y1), 384 | width=row.x2 - row.x1, 385 | height=row.y2 - row.y1, 386 | edgecolor="blue", 387 | linewidth=1, 388 | facecolor="none", 389 | ) 390 | ax[1].add_patch(rectangle) 391 | ``` 392 | 393 | Cool, the 🟦 bounding boxes on the right subplot are correctly positioned 🧭 394 | (compare it with the figure in the previous subsection). 395 | 396 | ```{hint} 397 | Instead of a bounding box 🥡 object detection task, you can also use the 398 | building polygons 🏘️ for a segmentation task 🧑‍🎨 following 399 | {doc}`./vector-segmentation-masks`. 400 | 401 | If you still prefer doing object detection 🕵️, but want a different box format 402 | (see options in {py:func}`torchvision.ops.box_convert`), 403 | like 🎌 centre-based coordinates with width and height (`cxcywh`), or 404 | 📨 oriented/rotated bounding box coordinates, feel free to implement your own 405 | function and DataPipe for it 🤗! 406 | ``` 407 | 408 | 409 | ## 2️⃣ There and back again 🧙 410 | 411 | What follows on from here requires focus 🤫. To start, we'll pool the hundred 412 | 💯 128x128 chips into 10 batches (10 chips per batch) using 413 | {py:class}`torchdata.datapipes.iter.Batcher` (functional name: `batch`). 414 | 415 | ```{code-cell} 416 | dp_batch = dp_ibox.batch(batch_size=10) 417 | print(f"Number of items in first batch: {len(list(dp_batch)[0])}") 418 | ``` 419 | 420 | ### Batch boxes with variable lengths 📏 421 | 422 | Next, we'll stack 🥞 all the image chips into a single tensor (recall 423 | {doc}`./chipping`), and concatenate 📚 the bounding boxes into a list of 424 | tensors using {py:class}`torchdata.datapipes.iter.Collator` (functional name: 425 | `collate`). 426 | 427 | ```{code-cell} 428 | def boximg_collate_fn(samples) -> (list[torch.Tensor], torch.Tensor, list[dict]): 429 | """ 430 | Converts bounding boxes and raster images to tensor objects and keeps 431 | geographic metadata (spatial extent, coordinate reference system and 432 | spatial resolution). 433 | 434 | Specifically, the bounding boxes in pandas.DataFrame format are each 435 | converted to a torch.Tensor and collated into a list, while the raster 436 | images in xarray.DataArray format are converted to a torch.Tensor (int16 437 | dtype) and stacked into a single torch.Tensor. 438 | """ 439 | box_tensors: list[torch.Tensor] = [ 440 | torch.as_tensor(sample[0].to_numpy(dtype=np.float32)) for sample in samples 441 | ] 442 | 443 | tensors: list[torch.Tensor] = [ 444 | torch.as_tensor(data=sample[1].data.astype(dtype="int16")) for sample in samples 445 | ] 446 | img_tensors = torch.stack(tensors=tensors) 447 | 448 | metadata: list[dict] = [ 449 | { 450 | "bbox": sample[1].rio.bounds(), 451 | "crs": sample[1].rio.crs, 452 | "resolution": sample[1].rio.resolution(), 453 | } 454 | for sample in samples 455 | ] 456 | 457 | return box_tensors, img_tensors, metadata 458 | ``` 459 | 460 | ```{code-cell} 461 | dp_collate = dp_batch.collate(collate_fn=boximg_collate_fn) 462 | print(f"Number of mini-batches: {len(dp_collate)}") 463 | mini_batch_box, mini_batch_img, mini_batch_metadata = list(dp_collate)[1] 464 | print(f"Mini-batch image tensor shape: {mini_batch_img.shape}") 465 | print(f"Mini-batch box tensors: {mini_batch_box}") 466 | print(f"Mini-batch metadata: {mini_batch_metadata}") 467 | ``` 468 | 469 | The DataPipe is complete 🙌, let's visualize the entire data pipeline graph. 470 | 471 | ```{code-cell} 472 | torchdata.datapipes.utils.to_graph(dp=dp_collate) 473 | ``` 474 | 475 | ### Into a DataLoader 🏋️ 476 | 477 | Loop over the DataPipe using {py:class}`torch.utils.data.DataLoader` ⚙️! 478 | 479 | ```{code-cell} 480 | dataloader = torchdata.dataloader2.DataLoader2(datapipe=dp_collate) 481 | for i, batch in enumerate(dataloader): 482 | box, img, metadata = batch 483 | print(f"Batch {i} - img: {img.shape}, box sizes: {[len(b) for b in box]}") 484 | ``` 485 | 486 | There's probably hundreds of models you can 🍜 feed this data into, from 487 | mmdetection's {doc}`mmdetection:model_zoo` 🐼 to torchvision's 488 | {doc}`torchvision:models`). But are we out of the woods yet? 489 | 490 | ### Georeference image boxes 📍 491 | 492 | To turn the model's predicted bounding boxes in image space 🌌 back to 493 | geographic coordinates 🌐, you'll need to use an 494 | [affine transform](https://web.archive.org/web/20210506173651/https://www.perrygeo.com/python-affine-transforms.html). 495 | Assuming you've kept your 🏷️ metadata intact, here's an example on how to do 496 | the georeferencing: 497 | 498 | ```{code-cell} 499 | for batch in dataloader: 500 | pred_boxes, images, metadata = batch 501 | 502 | objs: list = [] 503 | for idx in range(0, len(images)): 504 | left, bottom, right, top = metadata[idx]["bbox"] 505 | crs = metadata[idx]["crs"] 506 | x_res, y_res = metadata[idx]["resolution"] 507 | 508 | gdf = gpd.GeoDataFrame( 509 | geometry=[ 510 | shapely.affinity.affine_transform( 511 | geom=shapely.geometry.box(*coords), 512 | matrix=[x_res, 0, 0, y_res, left, top], 513 | ) 514 | for coords in pred_boxes[idx] 515 | ], 516 | crs=crs, 517 | ) 518 | objs.append(gdf.to_crs(crs=crs)) 519 | 520 | geodataframe: gpd.GeoDataFrame = pd.concat(objs=objs, ignore_index=True) 521 | geodataframe.set_crs(crs=crs, inplace=True) 522 | break 523 | 524 | geodataframe 525 | ``` 526 | 527 | Back at square one, or are we? 528 | -------------------------------------------------------------------------------- /docs/vector-segmentation-masks.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | kernelspec: 8 | display_name: Python 3 9 | language: python 10 | name: python3 11 | --- 12 | 13 | # Vector segmentation masks 14 | 15 | > *Clouds float by, water flows on; 16 | > in movement there is no grasping, in Chan there is no settling* 17 | 18 | For 🧑‍🏫 supervised machine learning, labels 🏷️ are needed in addition to the 19 | input image 🖼️. Here, we'll step through an example workflow on matching vector 20 | 🚏 label data (points, lines, polygons) to 🛰️ Earth Observation data inputs. 21 | Specifically, this tutorial will cover: 22 | 23 | - Reading shapefiles 📁 directly from the web via {doc}`pyogrio ` 24 | - Rasterizing vector polygons from a {py:class}`geopandas.GeoDataFrame` to an {py:class}`xarray.DataArray` 25 | - Pairing 🛰️ satellite images with the rasterized label masks and feeding them into a DataLoader 26 | 27 | 28 | ## 🎉 **Getting started** 29 | 30 | These are the tools 🛠️ you'll need. 31 | 32 | ```{code-cell} 33 | import matplotlib.pyplot as plt 34 | import numpy as np 35 | import planetary_computer 36 | import pyogrio 37 | import pystac 38 | import torch 39 | import torchdata 40 | import xarray as xr 41 | import zen3geo 42 | ``` 43 | 44 | ## 0️⃣ Find cloud-hosted raster and vector data ⛳ 45 | 46 | In this case study, we'll look at the flood water extent over the Narathiwat Province 47 | in Thailand 🇹🇭 and the Northern Kelantan State in Malaysia 🇲🇾 on 04 Jan 2017 that were 48 | digitized by 🇺🇳 UNITAR-UNOSAT's rapid mapping service over Synthetic Aperture Radar 49 | (SAR) 🛰️ images. Specifically, we'll be using the 🇪🇺 Sentinel-1 Ground Range Detected 50 | (GRD) product's VV polarization channel. 51 | 52 | 🔗 Links: 53 | - https://www.unitar.org/maps 54 | - https://unitar.org/maps/all-maps 55 | - [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=102.7555%2C5.7222&z=7.92&v=2&d=sentinel-1-grd&m=cql%3Afdba821238c1a390e7c75d7ced805b2e&r=VV%2C+VH+False-color+composite&s=false%3A%3A100%3A%3Atrue&sr=desc&ae=0) 56 | 57 | To start, let's get the 🛰️ satellite scene we'll be using for this tutorial. 58 | 59 | ```{code-cell} 60 | item_url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20170104T225443_20170104T225512_014688_017E5D" 61 | 62 | # Load the individual item metadata and sign the assets 63 | item = pystac.Item.from_file(item_url) 64 | signed_item = planetary_computer.sign(item) 65 | signed_item 66 | ``` 67 | 68 | This is how the Sentinel-1 🩻 image looks like over Southern Thailand / Northern 69 | Peninsular Malaysia on 04 Jan 2017. 70 | 71 | ![Sentinel-1 GRD image over Southern Thailand and Northern Peninsular Malaysia on 20170104](https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-1-grd&item=S1A_IW_GRDH_1SDV_20170104T225443_20170104T225512_014688_017E5D&assets=vv&assets=vh&expression=vv%3Bvh%3Bvv%2Fvh&rescale=0%2C600&rescale=0%2C270&rescale=0%2C9&asset_as_band=True&tile_format=png&format=png) 72 | 73 | ### Load and reproject image data 🔄 74 | 75 | To keep things simple, we'll load just the VV channel into a DataPipe via 76 | {py:class}`zen3geo.datapipes.RioXarrayReader` (functional name: 77 | `read_from_rioxarray`) 😀. 78 | 79 | ```{code-cell} 80 | url = signed_item.assets["vv"].href 81 | dp = torchdata.datapipes.iter.IterableWrapper(iterable=[url]) 82 | # Reading lower resolution grid using overview_level=3 83 | dp_rioxarray = dp.read_from_rioxarray(overview_level=3) 84 | dp_rioxarray 85 | ``` 86 | 87 | The Sentinel-1 image from Planetary Computer comes in longitude/latitude 🌐 88 | geographic coordinates by default (OGC:CRS84). To make the pixels more equal 🔲 89 | area, we can project it to a 🌏 local projected coordinate system instead. 90 | 91 | ```{code-cell} 92 | def reproject_to_local_utm(dataarray: xr.DataArray, resolution: float=80.0) -> xr.DataArray: 93 | """ 94 | Reproject an xarray.DataArray grid from OGC:CRS84 to a local UTM coordinate 95 | reference system. 96 | """ 97 | # Estimate UTM coordinate reference from a single pixel 98 | pixel = dataarray.isel(y=slice(0, 1), x=slice(0,1)) 99 | new_crs = dataarray.rio.reproject(dst_crs="OGC:CRS84").rio.estimate_utm_crs() 100 | 101 | return dataarray.rio.reproject(dst_crs=new_crs, resolution=resolution) 102 | ``` 103 | 104 | ```{code-cell} 105 | dp_reprojected = dp_rioxarray.map(fn=reproject_to_local_utm) 106 | ``` 107 | 108 | ```{note} 109 | Universal Transverse Mercator (UTM) isn't actually an equal-area projection 110 | system. However, Sentinel-1 🛰️ satellite scenes from Copernicus are usually 111 | distributed in a UTM coordinate reference system, and UTM is typically a close 112 | enough 🤏 approximation to the local geographic area, or at least it won't 113 | matter much when we're looking at spatial resolutions over several 10s of 114 | metres 🙂. 115 | ``` 116 | 117 | ```{hint} 118 | For those wondering what `OGC:CRS84` is, it is the longitude/latitude version 119 | of [`EPSG:4326`](https://epsg.io/4326) 🌐 (latitude/longitude). I.e., it's a 120 | matter of axis order, with `OGC:CRS84` being x/y and `EPSG:4326` being y/x. 121 | 122 | 🔖 References: 123 | - https://gis.stackexchange.com/questions/54073/what-is-crs84-projection 124 | - https://github.com/opengeospatial/geoparquet/issues/52 125 | ``` 126 | 127 | ### Transform and visualize raster data 🔎 128 | 129 | Let's visualize 👀 the Sentinel-1 image, but before that, we'll transform 🔄 130 | the VV data from linear to [decibel](https://en.wikipedia.org/wiki/Decibel) 131 | scale. 132 | 133 | ```{code-cell} 134 | def linear_to_decibel(dataarray: xr.DataArray) -> xr.DataArray: 135 | """ 136 | Transforming the input xarray.DataArray's VV or VH values from linear to 137 | decibel scale using the formula ``10 * log_10(x)``. 138 | """ 139 | # Mask out areas with 0 so that np.log10 is not undefined 140 | da_linear = dataarray.where(cond=dataarray != 0) 141 | da_decibel = 10 * np.log10(da_linear) 142 | return da_decibel 143 | ``` 144 | 145 | ```{code-cell} 146 | dp_decibel = dp_reprojected.map(fn=linear_to_decibel) 147 | dp_decibel 148 | ``` 149 | 150 | As an aside, we'll be using the Sentinel-1 image datapipe twice later, once as 151 | a template to create a blank canvas 🎞️, and another time by itself 🪞. This 152 | requires forking 🍴 the DataPipe into two branches, which can be achieved using 153 | {py:class}`torchdata.datapipes.iter.Forker` (functional name: `fork`). 154 | 155 | ```{code-cell} 156 | dp_decibel_canvas, dp_decibel_image = dp_decibel.fork(num_instances=2) 157 | dp_decibel_canvas, dp_decibel_image 158 | ``` 159 | 160 | Now to visualize the transformed Sentinel-1 image 🖼️. Let's zoom in 🔭 to one 161 | of the analysis extent areas we'll be working on later. 162 | 163 | ```{code-cell} 164 | it = iter(dp_decibel_image) 165 | dataarray = next(it) 166 | 167 | da_clip = dataarray.rio.clip_box(minx=125718, miny=523574, maxx=326665, maxy=722189) 168 | da_clip.isel(band=0).plot.imshow(figsize=(11.5, 9), cmap="Blues_r", vmin=18, vmax=26) 169 | ``` 170 | 171 | Notice how the darker blue areas 🔵 tend to correlate more with water features 172 | like the meandering rivers and the 🐚 sea on the NorthEast. This is because the 173 | SAR 🛰️ signal which is side looking reflects off flat water bodies like a 174 | mirror 🪞, with little energy getting reflected 🙅 back directly to the sensor 175 | (hence why it looks darker ⚫). 176 | 177 | ### Load and visualize cloud-hosted vector files 💠 178 | 179 | Let's now load some vector data from the web 🕸️. These are polygons of the 180 | segmented 🌊 water extent digitized by UNOSAT's AI Based Rapid Mapping Service. 181 | We'll be converting these vector polygons to 🌈 raster masks later. 182 | 183 | 🔗 Links: 184 | - https://github.com/UNITAR-UNOSAT/UNOSAT-AI-Based-Rapid-Mapping-Service 185 | - [UNOSAT link to polygon dataset](https://unosat.org/products/2460) 186 | - [Disaster Risk Monitoring Using Satellite Imagery online course](https://courses.nvidia.com/courses/course-v1:DLI+S-ES-01+V1) 187 | 188 | ```{code-cell} 189 | # https://gdal.org/user/virtual_file_systems.html#vsizip-zip-archives 190 | shape_url = "/vsizip/vsicurl/https://web.archive.org/web/20240411214446/https://unosat.org/static/unosat_filesystem/2460/FL20170106THA_SHP.zip/ST20170104_SatelliteDetectedWaterAndSaturatedSoil.shp" 191 | ``` 192 | 193 | This is a shapefile containing 🔷 polygons of the mapped water extent. Let's 194 | put it into a DataPipe called {py:class}`zen3geo.datapipes.PyogrioReader` 195 | (functional name: ``read_from_pyogrio``). 196 | 197 | ```{code-cell} 198 | dp_shapes = torchdata.datapipes.iter.IterableWrapper(iterable=[shape_url]) 199 | dp_pyogrio = dp_shapes.read_from_pyogrio() 200 | dp_pyogrio 201 | ``` 202 | 203 | This will take care of loading the shapefile into a 204 | {py:class}`geopandas.GeoDataFrame` object. Let's take a look at the data table 205 | 📊 to see what attributes are inside. 206 | 207 | ```{code-cell} 208 | it = iter(dp_pyogrio) 209 | geodataframe = next(it) 210 | geodataframe.dropna(axis="columns") 211 | ``` 212 | 213 | Cool, and we can also visualize the polygons 🔷 on a 2D map. To align the 214 | coordinates with the 🛰️ Sentinel-1 image above, we'll first use 215 | {py:meth}`geopandas.GeoDataFrame.to_crs` to reproject the vector from 🌐 216 | EPSG:9707 (WGS 84 + EGM96 height, latitude/longitude) to 🌏 EPSG:32648 (UTM 217 | Zone 48N). 218 | 219 | ```{code-cell} 220 | print(f"Original bounds in EPSG:9707:\n{geodataframe.bounds}") 221 | gdf = geodataframe.to_crs(crs="EPSG:32648") 222 | print(f"New bounds in EPSG:32648:\n{gdf.bounds}") 223 | ``` 224 | 225 | Plot it with {py:meth}`geopandas.GeoDataFrame.plot`. This vector map 🗺️ should 226 | correspond to the zoomed in Sentinel-1 image plotted earlier above. 227 | 228 | ```{code-cell} 229 | gdf.plot(figsize=(11.5, 9)) 230 | ``` 231 | 232 | ```{tip} 233 | Make sure to understand your raster and vector datasets well first! Open the 234 | files up in your favourite 🌐 Geographic Information System (GIS) tool, see how 235 | they actually look like spatially. Then you'll have a better idea to decide on 236 | how to create your data pipeline. The zen3geo way puts you as the Master 🧙 in 237 | control. 238 | ``` 239 | 240 | 241 | ## 1️⃣ Create a canvas to paint on 🎨 242 | 243 | In this section, we'll work on converting the flood water 🌊 polygons above 244 | from a 🚩 vector to a 🌈 raster format, i.e. rasterization. This will be done 245 | in two steps 📶: 246 | 247 | 1. Defining a blank canvas 🎞️ 248 | 2. Paint the polygons onto this blank canvas 🧑‍🎨 249 | 250 | For this, we'll be using tools from {py:meth}`zen3geo.datapipes.datashader`. 251 | Let's see how this can be done. 252 | 253 | ### Blank canvas from template raster 🖼️ 254 | 255 | A canvas represents a 2D area with a height and a width 📏. For us, we'll be 256 | using a {py:class}`datashader.Canvas`, which also defines the range of y-values 257 | (ymin to ymax) and x-values (xmin to xmax), essentially coordinates for 258 | every unit 🇾 height and 🇽 width. 259 | 260 | Since we already have a Sentinel-1 🛰️ raster grid with defined height/width 261 | and y/x coordinates, let's use it as a 📄 template to define our canvas. This 262 | is done via {py:class}`zen3geo.datapipes.XarrayCanvas` (functional name: 263 | ``canvas_from_xarray``). 264 | 265 | ```{code-cell} 266 | dp_canvas = dp_decibel_canvas.canvas_from_xarray() 267 | dp_canvas 268 | ``` 269 | 270 | Cool, and here's a quick inspection 👀 of the canvas dimensions and metadata. 271 | 272 | ```{code-cell} 273 | it = iter(dp_canvas) 274 | canvas = next(it) 275 | print(f"Canvas height: {canvas.plot_height}, width: {canvas.plot_width}") 276 | print(f"Y-range: {canvas.y_range}") 277 | print(f"X-range: {canvas.x_range}") 278 | print(f"Coordinate reference system: {canvas.crs}") 279 | ``` 280 | 281 | This information should match the template Sentinel-1 dataarray 🏁. 282 | 283 | ```{code-cell} 284 | print(f"Dimensions: {dict(dataarray.sizes)}") 285 | print(f"Affine transform: {dataarray.rio.transform()}") 286 | print(f"Bounding box: {dataarray.rio.bounds()}") 287 | print(f"Coordinate reference system: {dataarray.rio.crs}") 288 | ``` 289 | 290 | ### Rasterize vector polygons onto canvas 🖌️ 291 | 292 | Now's the time to paint or rasterize the 293 | vector {py:class}`geopandas.GeoDataFrame` polygons 🔷 onto the blank 294 | {py:class}`datashader.Canvas`! This would enable us to have a direct pixel-wise 295 | X -> Y mapping ↔️ between the Sentinel-1 image (X) and target flood label (Y). 296 | 297 | The vector polygons can be rasterized or painted 🖌️ onto the template canvas 298 | using {py:class}`zen3geo.datapipes.DatashaderRasterizer` (functional name: 299 | ``rasterize_with_datashader``). 300 | 301 | ```{code-cell} 302 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_pyogrio) 303 | dp_datashader 304 | ``` 305 | 306 | This will turn the vector {py:class}`geopandas.GeoDataFrame` into a 307 | raster {py:class}`xarray.DataArray` grid, with the spatial coordinates and 308 | bounds matching exactly with the template Sentinel-1 image 😎. 309 | 310 | ```{note} 311 | Since we have just one Sentinel-1 🛰️ image and one raster 💧 flood 312 | mask, we have an easy 1:1 mapping. There are two other scenarios supported by 313 | {py:class}`zen3geo.datapipes.DatashaderRasterizer`: 314 | 315 | 1. N:1 - Many {py:class}`datashader.Canvas` objects to one vector 316 | {py:class}`geopandas.GeoDataFrame`. The single vector geodataframe will be 317 | broadcasted to match the length of the canvas list. This is useful for 318 | situations when you have a 🌐 'global' vector database that you want to pair 319 | with multiple 🛰️ satellite images. 320 | 2. N:N - Many {py:class}`datashader.Canvas` objects to many vector 321 | {py:class}`geopandas.GeoDataFrame` objects. In this case, the list of grids 322 | **must** ❗ have the same length as the list of vector geodataframes. E.g. 323 | if you have 5 grids, there must also be 5 vector files. This is so that a 324 | 1:1 pairing can be done, useful when each raster tile 🖽 has its own 325 | associated vector annotation. 326 | ``` 327 | 328 | ```{seealso} 329 | For more details on how rasterization of polygons work behind the scenes 🎦, 330 | check out {doc}`Datashader `'s documentation on: 331 | 332 | - {doc}`The datashader pipeline ` 333 | (especially the section on Aggregation). 334 | - {doc}`Rendering large collections of polygons ` 335 | ``` 336 | 337 | 338 | ## 2️⃣ Combine and conquer ⚔️ 339 | 340 | So far, we've got two datapipes that should be 🧑‍🤝‍🧑 paired up in an X -> Y 341 | manner: 342 | 343 | 1. The pre-processed Sentinel-1 🌈 raster image in ``dp_decibel_image`` 344 | 2. The rasterized 💧 flood segmentation masks in ``dp_datashader`` 345 | 346 | One way to get these two pieces in a Machine Learning ready chip format is via 347 | a stack, slice and split ™️ approach. Think of it like a sandwich 🥪, we first 348 | stack the bread 🍞 and lettuce 🥬, and then slice the pieces 🍕 through the 349 | layers once. Ok, that was a bad analogy, let's just stick with tensors 🤪. 350 | 351 | ### Stacking the raster layers 🥞 352 | 353 | Each of our 🌈 raster inputs are {py:class}`xarray.DataArray` objects with the 354 | same spatial resolution and extent 🪟, so these can be stacked into an 355 | {py:class}`xarray.Dataset` with multiple data variables. First, we'll zip 🤐 356 | the two datapipes together using {py:class}`torchdata.datapipes.iter.Zipper` 357 | (functional name: ``zip``) 358 | 359 | ```{code-cell} 360 | dp_zip = dp_decibel_image.zip(dp_datashader) 361 | dp_zip 362 | ``` 363 | 364 | This will result in a DataPipe where each item is a tuple of (X, Y) pairs 🧑‍🤝‍🧑. 365 | Just to illustrate what we've done so far, we can use 366 | {py:class}`torchdata.datapipes.utils.to_graph` to visualize the data pipeline 367 | ⛓️. 368 | 369 | ```{code-cell} 370 | torchdata.datapipes.utils.to_graph(dp=dp_zip) 371 | ``` 372 | 373 | Next, let's combine 🖇️ the two (X, Y) {py:class}`xarray.DataArray` objects in 374 | the tuple into an {py:class}`xarray.Dataset` using 375 | {py:class}`torchdata.datapipes.iter.Collator` (functional name: `collate`). 376 | We'll also ✂️ clip the dataset to a bounding box area where the target water 377 | mask has no 0 or NaN values. 378 | 379 | ```{code-cell} 380 | def xr_collate_fn(image_and_mask: tuple) -> xr.Dataset: 381 | """ 382 | Combine a pair of xarray.DataArray (image, mask) inputs into an 383 | xarray.Dataset with two data variables named 'image' and 'mask'. 384 | """ 385 | # Turn 2 xr.DataArray objects into 1 xr.Dataset with multiple data vars 386 | image, mask = image_and_mask 387 | dataset: xr.Dataset = xr.merge( 388 | objects=[image.isel(band=0).rename("image"), mask.rename("mask")], 389 | join="override", 390 | ) 391 | 392 | # Clip dataset to bounding box extent of where labels are 393 | mask_extent: tuple = mask.where(cond=mask == 1, drop=True).rio.bounds() 394 | clipped_dataset: xr.Dataset = dataset.rio.clip_box(*mask_extent) 395 | 396 | return clipped_dataset 397 | ``` 398 | 399 | ```{code-cell} 400 | dp_dataset = dp_zip.collate(collate_fn=xr_collate_fn) 401 | dp_dataset 402 | ``` 403 | 404 | Double check to see that resulting {py:class}`xarray.Dataset`'s image and mask 405 | looks ok 🙆‍♂️. 406 | 407 | ```{code-cell} 408 | it = iter(dp_dataset) 409 | dataset = next(it) 410 | 411 | # Create subplot with VV image on the left and Water mask on the right 412 | fig, axs = plt.subplots(ncols=2, figsize=(11.5, 4.5), sharey=True) 413 | dataset.image.plot.imshow(ax=axs[0], cmap="Blues_r") 414 | axs[0].set_title("Sentinel-1 VV channel") 415 | dataset.mask.plot.imshow(ax=axs[1], cmap="Blues") 416 | axs[1].set_title("Water mask") 417 | plt.show() 418 | ``` 419 | 420 | ### Slice into chips and turn into tensors 🗡️ 421 | 422 | To cut 🔪 the {py:class}`xarray.Dataset` into 512x512 sized chips, we'll use 423 | {py:class}`zen3geo.datapipes.XbatcherSlicer` (functional name: 424 | `slice_with_xbatcher`). Refer to {doc}`./chipping` if you need a 🧑‍🎓 refresher. 425 | 426 | ```{code-cell} 427 | dp_xbatcher = dp_dataset.slice_with_xbatcher(input_dims={"y": 512, "x": 512}) 428 | dp_xbatcher 429 | ``` 430 | 431 | Next step is to convert the 512x512 chips into a {py:class}`torch.Tensor` via 432 | {py:class}`torchdata.datapipes.iter.Mapper` (functional name: `map`). The 🛰️ 433 | Sentinel-1 image and 💧 water mask will be split out at this point too. 434 | 435 | ```{code-cell} 436 | def dataset_to_tensors(chip: xr.Dataset) -> (torch.Tensor, torch.Tensor): 437 | """ 438 | Converts an xarray.Dataset into to two torch.Tensor objects, the first one 439 | being the satellite image, and the second one being the target mask. 440 | """ 441 | image: torch.Tensor = torch.as_tensor(chip.image.data) 442 | mask: torch.Tensor = torch.as_tensor(chip.mask.data.astype("uint8")) 443 | 444 | return image, mask 445 | ``` 446 | 447 | ```{code-cell} 448 | dp_map = dp_xbatcher.map(fn=dataset_to_tensors) 449 | dp_map 450 | ``` 451 | 452 | At this point, we could do some batching and collating, but we'll point you 453 | again to {doc}`./chipping` to figure it out 😝. Let's take a look at a graph 454 | of the complete data pipeline. 455 | 456 | ```{code-cell} 457 | torchdata.datapipes.utils.to_graph(dp=dp_map) 458 | ``` 459 | 460 | Sweet, time for the final step ⏩. 461 | 462 | ### Into a DataLoader 🏋️ 463 | 464 | Pass the DataPipe into {py:class}`torch.utils.data.DataLoader` 🤾! 465 | 466 | ```{code-cell} 467 | dataloader = torch.utils.data.DataLoader(dataset=dp_map) 468 | for i, batch in enumerate(dataloader): 469 | image, mask = batch 470 | print(f"Batch {i} - image: {image.shape}, mask: {mask.shape}") 471 | ``` 472 | 473 | Now go train some flood water detection models 🌊🌊🌊 474 | 475 | ```{seealso} 476 | To learn more about AI-based flood mapping with SAR, check out these resources: 477 | 478 | - [UNOSAT/NVIDIA Disaster Risk Monitoring Using Satellite Imagery online course](https://event.unitar.org/full-catalog/disaster-risk-monitoring-using-satellite-imagery) 479 | - [Code to train a Convolutional Neural Network for flood segmentation](https://github.com/UNITAR-UNOSAT/UNOSAT-AI-Based-Rapid-Mapping-Service/blob/master/Fastai%20training.ipynb) 480 | ``` 481 | -------------------------------------------------------------------------------- /docs/walkthrough.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | kernelspec: 8 | display_name: Python 3 9 | language: python 10 | name: python3 11 | --- 12 | 13 | # Walkthrough 14 | 15 | > *To get it, you first see it, and then let it go* 16 | 17 | In this tutorial 🧑‍🏫, we'll step through an Earth Observation 🛰️ data pipeline 18 | using ``torchdata`` and by the end of this lesson, you should be able to: 19 | - Find Cloud-Optimized GeoTIFFs (COGs) from STAC catalogs 🥞 20 | - Construct a DataPipe that iteratively reads several COGs in a stream 🌊 21 | - Loop through batches of images in a DataPipe with a DataLoader 🏋️ 22 | 23 | ## 🎉 **Getting started** 24 | 25 | These are the tools 🛠️ you'll need. 26 | 27 | ```{code-cell} 28 | # Geospatial libraries 29 | import pystac 30 | import planetary_computer 31 | import rioxarray 32 | # Deep Learning libraries 33 | import torch 34 | import torchdata 35 | import zen3geo 36 | ``` 37 | 38 | Just to make sure we’re on the same page 📃, 39 | let’s check that we’ve got compatible versions installed. 40 | 41 | ```{code-cell} 42 | print(f"pystac version: {pystac.__version__}") 43 | print(f"planetary-computer version: {planetary_computer.__version__}") 44 | print(f"torch version: {torch.__version__}") 45 | 46 | print(f"torchdata version: {torchdata.__version__}") 47 | print(f"zen3geo version: {zen3geo.__version__}") 48 | rioxarray.show_versions() 49 | ``` 50 | 51 | ## 0️⃣ Find [Cloud-Optimized GeoTIFFs](https://www.cogeo.org) 🗺️ 52 | 53 | Let's get some optical satellite data using [STAC](https://stacspec.org)! 54 | How about Sentinel-2 L2A data over Singapore 🇸🇬? 55 | 56 | 🔗 Links: 57 | - [Official Sentinel-2 description page at ESA](https://sentinel.esa.int/web/sentinel/missions/sentinel-2) 58 | - [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=103.8152%2C1.3338&z=10.08&v=2&d=sentinel-2-l2a&s=false%3A%3A100%3A%3Atrue&ae=0&m=cql%3A2ff1401acb50731fa0a6d1e2a46f3064&r=Natural+color) 59 | - [AWS Sentinel-2 Cloud-Optimized GeoTIFFs](https://registry.opendata.aws/sentinel-2-l2a-cogs) 60 | 61 | 62 | ```{code-cell} 63 | item_url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435" 64 | 65 | # Load the individual item metadata and sign the assets 66 | item = pystac.Item.from_file(item_url) 67 | signed_item = planetary_computer.sign(item) 68 | signed_item 69 | ``` 70 | 71 | ### Inspect one of the data assets 🍱 72 | 73 | The Sentinel-2 STAC item contains several assets. 74 | These include different 🌈 bands (e.g. 'B02', 'B03', 'B04'). 75 | Let's just use the 'visual' product for now which includes the RGB bands. 76 | 77 | ```{code-cell} 78 | url: str = signed_item.assets["visual"].href 79 | da = rioxarray.open_rasterio(filename=url) 80 | da 81 | ``` 82 | 83 | This is how the Sentinel-2 image looks like over Singapore on 15 Jan 2022. 84 | 85 | ![Sentinel-2 L2A image over Singapore on 20220115](https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=sentinel-2-l2a&item=S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435&assets=visual&asset_bidx=visual%7C1%2C2%2C3&nodata=0&format=png) 86 | 87 | ## 1️⃣ Construct [DataPipe](https://github.com/pytorch/data/tree/v0.6.1#what-are-datapipes) 📡 88 | 89 | A torch `DataPipe` is a way of composing data (rather than inheriting data). 90 | Yes, I don't know what it really means either, so here's some extra reading. 91 | 92 | 🔖 References: 93 | - https://pytorch.org/blog/pytorch-1.11-released/#introducing-torchdata 94 | - https://github.com/pytorch/data/tree/v0.6.1#what-are-datapipes 95 | - https://realpython.com/inheritance-composition-python 96 | 97 | ### Create an Iterable 📏 98 | 99 | Start by wrapping a list of URLs to the Cloud-Optimized GeoTIFF files. 100 | We only have 1 item so we'll use ``[url]``, but if you have more, you can do 101 | ``[url1, url2, url3]``, etc. Pass this iterable list into 102 | {py:class}`torchdata.datapipes.iter.IterableWrapper`: 103 | 104 | ```{code-cell} 105 | dp = torchdata.datapipes.iter.IterableWrapper(iterable=[url]) 106 | dp 107 | ``` 108 | 109 | The ``dp`` variable is the DataPipe! 110 | Now to apply some more transformations/functions on it. 111 | 112 | ### Read using RioXarrayReader 🌐 113 | 114 | This is where ☯ ``zen3geo`` comes in. We'll be using the 115 | {py:class}`zen3geo.datapipes.rioxarray.RioXarrayReaderIterDataPipe` class, or 116 | rather, the short alias {py:class}`zen3geo.datapipes.RioXarrayReader`. 117 | 118 | Confusingly, there are two ways or forms of applying ``RioXarrayReader``, 119 | a class-based method and a functional method. 120 | 121 | ```{code-cell} 122 | # Using class constructors 123 | dp_rioxarray = zen3geo.datapipes.RioXarrayReader(source_datapipe=dp) 124 | dp_rioxarray 125 | ``` 126 | 127 | ```{code-cell} 128 | # Using functional form (recommended) 129 | dp_rioxarray = dp.read_from_rioxarray() 130 | dp_rioxarray 131 | ``` 132 | 133 | Note that both ways are equivalent (they produce the same IterDataPipe output), 134 | but the latter (functional) form is preferred, see also 135 | https://pytorch.org/data/0.4/tutorial.html#registering-datapipes-with-the-functional-api 136 | 137 | What if you don't want the whole Sentinel-2 scene at the full 10m resolution? 138 | Since we're using Cloud-Optimized GeoTIFFs, you could set an ``overview_level`` 139 | (following https://corteva.github.io/rioxarray/stable/examples/COG.html). 140 | 141 | ```{code-cell} 142 | dp_rioxarray_zoom3 = dp.read_from_rioxarray(overview_level=3) 143 | dp_rioxarray_zoom3 144 | ``` 145 | 146 | Extra keyword arguments will be handled by {py:func}`rioxarray.open_rasterio` 147 | or {py:func}`rasterio.open`. 148 | 149 | ```{note} 150 | Other DataPipe classes/functions can be stacked or joined to this basic GeoTIFF 151 | reader. For example, clipping by bounding box or reprojecting to a certain 152 | Coordinate Reference System. If you would like to implement this, check out the 153 | [Contributing Guidelines](./CONTRIBUTING) to get started! 154 | ``` 155 | 156 | ## 2️⃣ Loop through DataPipe ⚙️ 157 | 158 | A DataPipe describes a flow of information. 159 | Through a series of steps it goes, 160 | as one piece comes in, another might follow. 161 | 162 | ### Basic iteration ♻️ 163 | 164 | At the most basic level, you could iterate through the DataPipe like so: 165 | 166 | ```{code-cell} 167 | it = iter(dp_rioxarray_zoom3) 168 | dataarray = next(it) 169 | dataarray 170 | ``` 171 | 172 | Or if you're more familiar with a for-loop, here it is: 173 | 174 | ```{code-cell} 175 | for dataarray in dp_rioxarray_zoom3: 176 | print(dataarray) 177 | # Run model on this data batch 178 | ``` 179 | 180 | ### Into a DataLoader 🏋️ 181 | 182 | For the deep learning folks, you might need one extra step. 183 | The {py:class}``xarray.DataArray`` needs to be converted to a tensor. 184 | In the Pytorch world, that can happen via {py:func}``torch.as_tensor``. 185 | 186 | ```{code-cell} 187 | def fn(da): 188 | return torch.as_tensor(da.data) 189 | ``` 190 | 191 | Using {py:class}`torchdata.datapipes.iter.Mapper` (functional name: `map`), 192 | we'll apply the tensor conversion function to each dataarray in the DataPipe. 193 | 194 | ```{code-cell} 195 | dp_tensor = dp_rioxarray_zoom3.map(fn=fn) 196 | dp_tensor 197 | ``` 198 | 199 | Finally, let's put our DataPipe into a {py:class}`torch.utils.data.DataLoader`! 200 | 201 | ```{code-cell} 202 | dataloader = torch.utils.data.DataLoader(dataset=dp_tensor) 203 | for batch in dataloader: 204 | tensor = batch 205 | print(tensor) 206 | ``` 207 | 208 | And so it begins 🌄 209 | 210 | --- 211 | 212 | That’s all 🎉! For more information on how to use DataPipes, check out: 213 | 214 | - {doc}`TorchData DataPipe Tutorial ` 215 | - {doc}`TorchData Usage Examples ` 216 | 217 | If you have any questions 🙋, feel free to ask us anything at 218 | https://github.com/weiji14/zen3geo/discussions or visit the Pytorch forums at 219 | https://discuss.pytorch.org/c/data/37. 220 | 221 | Cheers! 222 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "zen3geo" 3 | version = "0.6.2" 4 | description = "The 🌏 data science library you've been waiting for~" 5 | authors = ["Wei Ji <23487320+weiji14@users.noreply.github.com>"] 6 | license = "LGPL-3.0-or-later" 7 | readme = "README.md" 8 | classifiers = [ 9 | "Development Status :: 4 - Beta", 10 | "Intended Audience :: Science/Research", 11 | "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", 12 | "Topic :: Scientific/Engineering", 13 | "Topic :: Scientific/Engineering :: GIS", 14 | "Topic :: Scientific/Engineering :: Image Processing", 15 | "Topic :: Software Development :: Libraries", 16 | "Programming Language :: Python :: 3.8", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | ] 21 | exclude = ["zen3geo/tests"] 22 | 23 | [tool.poetry.urls] 24 | "Homepage" = "https://github.com/weiji14/zen3geo/discussions" 25 | "Changelog" = "https://zen3geo.readthedocs.io/en/latest/changelog.html" 26 | "Documentation" = "https://zen3geo.readthedocs.io" 27 | "Download" = "https://anaconda.org/conda-forge/zen3geo" 28 | "Source Code" = "https://github.com/weiji14/zen3geo" 29 | "Sponsor" = "https://github.com/sponsors/weiji14" 30 | 31 | [tool.poetry.dependencies] 32 | # Required 33 | python = ">=3.8, <4.0" 34 | rioxarray = ">=0.10.0" 35 | torchdata = ">=0.4.0" 36 | # Optional 37 | datashader = {version = ">=0.14.0", optional = true} 38 | pyogrio = {version = ">=0.4.0", extras = ["geopandas"], optional = true} 39 | pystac = {version=">=1.4.0", optional=true} 40 | pystac-client = {version = ">=0.4.0", optional = true} 41 | spatialpandas = {version = ">=0.4.0", optional = true} 42 | stackstac = {version = ">=0.4.0", optional = true} 43 | xbatcher = {version = ">=0.2.0", optional = true} 44 | xpystac = {version = ">=0.0.1", optional = true} 45 | zarr = {version = ">=2.13.0", optional = true} 46 | # Docs 47 | adlfs = {version = "*", optional = true} 48 | contextily = {version = "*", optional = true} 49 | graphviz = {version = "*", optional = true} 50 | jupyter-book = {version="*", optional=true} 51 | matplotlib = {version = "*", optional = true} 52 | planetary-computer = {version="*", optional=true} 53 | xarray-datatree = {version="*", optional=true} 54 | 55 | [tool.poetry.group.dev.dependencies] 56 | aiohttp = "*" 57 | black = "*" 58 | pytest = "*" 59 | 60 | [tool.poetry.extras] 61 | docs = [ 62 | "adlfs", 63 | "contextily", 64 | "datashader", 65 | "graphviz", 66 | "jupyter-book", 67 | "matplotlib", 68 | "planetary-computer", 69 | "pyogrio", 70 | "pystac", 71 | "pystac_client", 72 | "spatialpandas", 73 | "stackstac", 74 | "xarray-datatree", 75 | "xbatcher", 76 | "xpystac", 77 | "zarr" 78 | ] 79 | raster = [ 80 | "xbatcher", 81 | "zarr" 82 | ] 83 | spatial = [ 84 | "datashader", 85 | "spatialpandas" 86 | ] 87 | stac = [ 88 | "pystac", 89 | "pystac_client", 90 | "stackstac", 91 | "xpystac" 92 | ] 93 | vector = ["pyogrio"] 94 | 95 | [tool.poetry-dynamic-versioning] 96 | bump = true 97 | enable = true 98 | metadata = true 99 | style = "pep440" 100 | 101 | [build-system] 102 | requires = ["poetry-core>=1.7.0", "poetry-dynamic-versioning"] 103 | build-backend = "poetry.core.masonry.api" 104 | -------------------------------------------------------------------------------- /zen3geo/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | zen3geo - The 🌏 data science library you've been waiting for~ 3 | """ 4 | 5 | from importlib.metadata import version 6 | 7 | from zen3geo import datapipes 8 | 9 | __version__ = version("zen3geo") # e.g. 0.1.2.dev3+g0ab3cd78 10 | -------------------------------------------------------------------------------- /zen3geo/datapipes/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Iterable-style DataPipes for geospatial raster 🌈 and vector 🚏 data. 3 | """ 4 | 5 | from zen3geo.datapipes.datashader import ( 6 | DatashaderRasterizerIterDataPipe as DatashaderRasterizer, 7 | XarrayCanvasIterDataPipe as XarrayCanvas, 8 | ) 9 | from zen3geo.datapipes.geopandas import ( 10 | GeoPandasRectangleClipperIterDataPipe as GeoPandasRectangleClipper, 11 | ) 12 | from zen3geo.datapipes.pyogrio import PyogrioReaderIterDataPipe as PyogrioReader 13 | from zen3geo.datapipes.pystac import PySTACItemReaderIterDataPipe as PySTACItemReader 14 | from zen3geo.datapipes.pystac_client import ( 15 | PySTACAPIItemListerIterDataPipe as PySTACAPIItemLister, 16 | PySTACAPISearcherIterDataPipe as PySTACAPISearcher, 17 | ) 18 | from zen3geo.datapipes.rioxarray import RioXarrayReaderIterDataPipe as RioXarrayReader 19 | from zen3geo.datapipes.stackstac import ( 20 | StackSTACMosaickerIterDataPipe as StackSTACMosaicker, 21 | StackSTACStackerIterDataPipe as StackSTACStacker, 22 | ) 23 | from zen3geo.datapipes.xbatcher import XbatcherSlicerIterDataPipe as XbatcherSlicer 24 | from zen3geo.datapipes.xpystac import ( 25 | XpySTACAssetReaderIterDataPipe as XpySTACAssetReader, 26 | ) 27 | -------------------------------------------------------------------------------- /zen3geo/datapipes/datashader.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`datashader `. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional, Union 5 | 6 | try: 7 | import datashader 8 | except ImportError: 9 | datashader = None 10 | try: 11 | import spatialpandas 12 | from spatialpandas.geometry import ( 13 | LineDtype, 14 | MultiLineDtype, 15 | MultiPointDtype, 16 | MultiPolygonDtype, 17 | PointDtype, 18 | PolygonDtype, 19 | ) 20 | except ImportError: 21 | spatialpandas = None 22 | 23 | import xarray as xr 24 | from torchdata.datapipes import functional_datapipe 25 | from torchdata.datapipes.iter import IterDataPipe 26 | 27 | 28 | @functional_datapipe("rasterize_with_datashader") 29 | class DatashaderRasterizerIterDataPipe(IterDataPipe): 30 | """ 31 | Takes vector :py:class:`geopandas.GeoSeries` or 32 | :py:class:`geopandas.GeoDataFrame` geometries and rasterizes them using 33 | :py:class:`datashader.Canvas` to yield an :py:class:`xarray.DataArray` 34 | raster with the input geometries aggregated into a fixed-sized grid 35 | (functional name: ``rasterize_with_datashader``). 36 | 37 | Parameters 38 | ---------- 39 | source_datapipe : IterDataPipe[datashader.Canvas] 40 | A DataPipe that contains :py:class:`datashader.Canvas` objects with a 41 | ``.crs`` attribute. This will be the template defining the output 42 | raster's spatial extent and x/y range. 43 | 44 | vector_datapipe : IterDataPipe[geopandas.GeoDataFrame] 45 | A DataPipe that contains :py:class:`geopandas.GeoSeries` or 46 | :py:class:`geopandas.GeoDataFrame` vector geometries with a 47 | :py:attr:`.crs ` property. 48 | 49 | agg : Optional[datashader.reductions.Reduction] 50 | Reduction operation to compute. Default depends on the input vector 51 | type: 52 | 53 | - For points, default is :py:class:`datashader.reductions.count` 54 | - For lines, default is :py:class:`datashader.reductions.any` 55 | - For polygons, default is :py:class:`datashader.reductions.any` 56 | 57 | For more information, refer to the section on Aggregation under 58 | datashader's :doc:`datashader:getting_started/Pipeline` docs. 59 | 60 | kwargs : Optional 61 | Extra keyword arguments to pass to the :py:class:`datashader.Canvas` 62 | class's aggregation methods such as ``datashader.Canvas.points``. 63 | 64 | Yields 65 | ------ 66 | raster : xarray.DataArray 67 | An :py:class:`xarray.DataArray` object containing the raster data. This 68 | raster will have a :py:attr:`rioxarray.rioxarray.XRasterBase.crs` 69 | property and a proper affine transform viewable with 70 | :py:meth:`rioxarray.rioxarray.XRasterBase.transform`. 71 | 72 | Raises 73 | ------ 74 | ModuleNotFoundError 75 | If ``spatialpandas`` is not installed. Please install it (e.g. via 76 | ``pip install spatialpandas``) before using this class. 77 | 78 | ValueError 79 | If either the length of the ``vector_datapipe`` is not 1, or if the 80 | length of the ``vector_datapipe`` is not equal to the length of the 81 | ``source_datapipe``. I.e. the ratio of vector:canvas must be 1:N or 82 | be exactly N:N. 83 | 84 | AttributeError 85 | If either the canvas in ``source_datapipe`` or vector geometry in 86 | ``vector_datapipe`` is missing a ``.crs`` attribute. Please set the 87 | coordinate reference system (e.g. using ``canvas.crs = 'OGC:CRS84'`` 88 | for the :py:class:`datashader.Canvas` input or 89 | ``vector = vector.set_crs(crs='OGC:CRS84')`` for the 90 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame` 91 | input) before passing them into the datapipe. 92 | 93 | NotImplementedError 94 | If the input vector geometry type to ``vector_datapipe`` is not 95 | supported, typically when a 96 | :py:class:`shapely.geometry.GeometryCollection` is used. Supported 97 | types include `Point`, `LineString`, and `Polygon`, plus their 98 | multipart equivalents `MultiPoint`, `MultiLineString`, and 99 | `MultiPolygon`. 100 | 101 | Example 102 | ------- 103 | >>> import pytest 104 | >>> datashader = pytest.importorskip("datashader") 105 | >>> pyogrio = pytest.importorskip("pyogrio") 106 | >>> spatialpandas = pytest.importorskip("spatialpandas") 107 | ... 108 | >>> from torchdata.datapipes.iter import IterableWrapper 109 | >>> from zen3geo.datapipes import DatashaderRasterizer 110 | ... 111 | >>> # Read in a vector point data source 112 | >>> geodataframe = pyogrio.read_dataframe( 113 | ... "https://github.com/geopandas/pyogrio/raw/v0.4.0/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg", 114 | ... read_geometry=True, 115 | ... ) 116 | >>> assert geodataframe.crs == "EPSG:4326" # latitude/longitude coords 117 | >>> dp_vector = IterableWrapper(iterable=[geodataframe]) 118 | ... 119 | >>> # Setup blank raster canvas where we will burn vector geometries onto 120 | >>> canvas = datashader.Canvas( 121 | ... plot_width=5, 122 | ... plot_height=6, 123 | ... x_range=(160000.0, 620000.0), 124 | ... y_range=(0.0, 450000.0), 125 | ... ) 126 | >>> canvas.crs = "EPSG:32631" # UTM Zone 31N, North of Gulf of Guinea 127 | >>> dp_canvas = IterableWrapper(iterable=[canvas]) 128 | ... 129 | >>> # Rasterize vector point geometries onto blank canvas 130 | >>> dp_datashader = dp_canvas.rasterize_with_datashader( 131 | ... vector_datapipe=dp_vector 132 | ... ) 133 | ... 134 | >>> # Loop or iterate over the DataPipe stream 135 | >>> it = iter(dp_datashader) 136 | >>> dataarray = next(it) 137 | >>> dataarray 138 | 139 | array([[0, 0, 0, 0, 1], 140 | [0, 0, 0, 0, 0], 141 | [0, 0, 0, 0, 0], 142 | [0, 0, 1, 0, 0], 143 | [0, 1, 0, 0, 0], 144 | [1, 0, 0, 0, 0]], dtype=uint32) 145 | Coordinates: 146 | * x (x) float64 2.094e+05 3.083e+05 4.072e+05 5.06e+05 6.049e+05 147 | * y (y) float64 4.157e+05 3.47e+05 2.783e+05 ... 1.41e+05 7.237e+04 148 | spatial_ref int64 0 149 | ... 150 | >>> dataarray.rio.crs 151 | CRS.from_epsg(32631) 152 | >>> dataarray.rio.transform() 153 | Affine(98871.00388807665, 0.0, 160000.0, 154 | 0.0, -68660.4193667199, 450000.0) 155 | """ 156 | 157 | def __init__( 158 | self, 159 | source_datapipe: IterDataPipe, 160 | vector_datapipe: IterDataPipe, 161 | agg: Optional = None, 162 | **kwargs: Optional[Dict[str, Any]], 163 | ) -> None: 164 | if spatialpandas is None: 165 | raise ModuleNotFoundError( 166 | "Package `spatialpandas` is required to be installed to use this datapipe. " 167 | "Please use `pip install spatialpandas` or " 168 | "`conda install -c conda-forge spatialpandas` " 169 | "to install the package" 170 | ) 171 | self.source_datapipe: IterDataPipe = source_datapipe # datashader.Canvas 172 | self.vector_datapipe: IterDataPipe = vector_datapipe # geopandas.GeoDataFrame 173 | self.agg: Optional = agg # Datashader Aggregation/Reduction function 174 | self.kwargs = kwargs 175 | 176 | len_vector_datapipe: int = len(self.vector_datapipe) 177 | len_canvas_datapipe: int = len(self.source_datapipe) 178 | if len_vector_datapipe != 1 and len_vector_datapipe != len_canvas_datapipe: 179 | raise ValueError( 180 | f"Unmatched lengths for the canvas datapipe ({self.source_datapipe}) " 181 | f"and vector datapipe ({self.vector_datapipe}). \n" 182 | f"The vector datapipe's length ({len_vector_datapipe}) should either " 183 | f"be (1) to allow for broadcasting, or match the canvas datapipe's " 184 | f"length of ({len_canvas_datapipe})." 185 | ) 186 | 187 | def __iter__(self) -> Iterator[xr.DataArray]: 188 | # Broadcast vector iterator to match length of raster iterator 189 | for canvas, vector in self.source_datapipe.zip_longest( 190 | self.vector_datapipe, fill_value=list(self.vector_datapipe).pop() 191 | ): 192 | # print(canvas, vector) 193 | # If canvas has no CRS attribute, set one to prevent AttributeError 194 | canvas.crs = getattr(canvas, "crs", None) 195 | if canvas.crs is None: 196 | raise AttributeError( 197 | "Missing crs information for datashader.Canvas with " 198 | f"x_range: {canvas.x_range} and y_range: {canvas.y_range}. " 199 | "Please set crs using e.g. `canvas.crs = 'OGC:CRS84'`." 200 | ) 201 | 202 | # Reproject vector geometries to coordinate reference system 203 | # of the raster canvas if both are different 204 | try: 205 | if vector.crs != canvas.crs: 206 | vector = vector.to_crs(crs=canvas.crs) 207 | except (AttributeError, ValueError) as e: 208 | raise AttributeError( 209 | f"Missing crs information for input {vector.__class__} object " 210 | f"with the following bounds: \n {vector.bounds} \n" 211 | f"Please set crs using e.g. `vector = vector.set_crs(crs='OGC:CRS84')`." 212 | ) from e 213 | 214 | # Convert vector to spatialpandas format to allow datashader's 215 | # rasterization methods to work 216 | try: 217 | _vector = spatialpandas.GeoDataFrame(data=vector.geometry) 218 | except ValueError as e: 219 | if str(e) == "Unable to convert data argument to a GeometryList array": 220 | raise NotImplementedError( 221 | f"Unsupported geometry type(s) {set(vector.geom_type)} detected, " 222 | "only point, line or polygon vector geometry types " 223 | "(or their multi- equivalents) are supported." 224 | ) from e 225 | else: 226 | raise e 227 | 228 | # Determine geometry type to know which rasterization method to use 229 | vector_dtype: spatialpandas.geometry.GeometryDtype = _vector.geometry.dtype 230 | 231 | if isinstance(vector_dtype, (PointDtype, MultiPointDtype)): 232 | raster: xr.DataArray = canvas.points( 233 | source=_vector, agg=self.agg, geometry="geometry", **self.kwargs 234 | ) 235 | elif isinstance(vector_dtype, (LineDtype, MultiLineDtype)): 236 | raster: xr.DataArray = canvas.line( 237 | source=_vector, agg=self.agg, geometry="geometry", **self.kwargs 238 | ) 239 | elif isinstance(vector_dtype, (PolygonDtype, MultiPolygonDtype)): 240 | raster: xr.DataArray = canvas.polygons( 241 | source=_vector, agg=self.agg, geometry="geometry", **self.kwargs 242 | ) 243 | 244 | # Convert boolean dtype rasters to uint8 to enable reprojection 245 | if raster.dtype == "bool": 246 | raster: xr.DataArray = raster.astype(dtype="uint8") 247 | # Set coordinate transform for raster and ensure affine 248 | # transform is correct (the y-coordinate goes from North to South) 249 | raster: xr.DataArray = raster.rio.set_crs(input_crs=canvas.crs) 250 | # assert raster.rio.transform().e > 0 # y goes South to North 251 | _raster: xr.DataArray = raster.rio.reproject( 252 | dst_crs=canvas.crs, shape=raster.rio.shape 253 | ) 254 | # assert _raster.rio.transform().e < 0 # y goes North to South 255 | 256 | yield _raster 257 | 258 | def __len__(self) -> int: 259 | return len(self.source_datapipe) 260 | 261 | 262 | @functional_datapipe("canvas_from_xarray") 263 | class XarrayCanvasIterDataPipe(IterDataPipe[Union[xr.DataArray, xr.Dataset]]): 264 | """ 265 | Takes an :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` 266 | and creates a blank :py:class:`datashader.Canvas` based on the spatial 267 | extent and coordinates of the input (functional name: 268 | ``canvas_from_xarray``). 269 | 270 | Parameters 271 | ---------- 272 | source_datapipe : IterDataPipe[xarrray.DataArray] 273 | A DataPipe that contains :py:class:`xarray.DataArray` or 274 | :py:class:`xarray.Dataset` objects. These data objects need to have 275 | both a ``.rio.x_dim`` and ``.rio.y_dim`` attribute, which is present 276 | if the original dataset was opened using 277 | :py:func:`rioxarray.open_rasterio`, or by setting it manually using 278 | :py:meth:`rioxarray.rioxarray.XRasterBase.set_spatial_dims`. 279 | 280 | kwargs : Optional 281 | Extra keyword arguments to pass to :py:class:`datashader.Canvas`. 282 | 283 | Yields 284 | ------ 285 | canvas : datashader.Canvas 286 | A :py:class:`datashader.Canvas` object representing the same spatial 287 | extent and x/y coordinates of the input raster grid. This canvas 288 | will also have a ``.crs`` attribute that captures the original 289 | Coordinate Reference System from the input xarray object's 290 | :py:attr:`rioxarray.rioxarray.XRasterBase.crs` property. 291 | 292 | Raises 293 | ------ 294 | ModuleNotFoundError 295 | If ``datashader`` is not installed. Follow 296 | :doc:`install instructions for datashader ` 297 | before using this class. 298 | 299 | Example 300 | ------- 301 | >>> import pytest 302 | >>> import numpy as np 303 | >>> import xarray as xr 304 | >>> datashader = pytest.importorskip("datashader") 305 | ... 306 | >>> from torchdata.datapipes.iter import IterableWrapper 307 | >>> from zen3geo.datapipes import XarrayCanvas 308 | ... 309 | >>> # Create blank canvas from xarray.DataArray using DataPipe 310 | >>> y = np.arange(0, -3, step=-1) 311 | >>> x = np.arange(0, 6) 312 | >>> dataarray: xr.DataArray = xr.DataArray( 313 | ... data=np.zeros(shape=(1, 3, 6)), 314 | ... coords=dict(band=[1], y=y, x=x), 315 | ... ) 316 | >>> dataarray = dataarray.rio.set_spatial_dims(x_dim="x", y_dim="y") 317 | >>> dp = IterableWrapper(iterable=[dataarray]) 318 | >>> dp_canvas = dp.canvas_from_xarray() 319 | ... 320 | >>> # Loop or iterate over the DataPipe stream 321 | >>> it = iter(dp_canvas) 322 | >>> canvas = next(it) 323 | >>> print(canvas.raster(source=dataarray)) 324 | 325 | array([[[0., 0., 0., 0., 0., 0.], 326 | [0., 0., 0., 0., 0., 0.], 327 | [0., 0., 0., 0., 0., 0.]]]) 328 | Coordinates: 329 | * x (x) int64 0 1 2 3 4 5 330 | * y (y) int64 0 -1 -2 331 | * band (band) int64 1 332 | ... 333 | """ 334 | 335 | def __init__( 336 | self, 337 | source_datapipe: IterDataPipe[Union[xr.DataArray, xr.Dataset]], 338 | **kwargs: Optional[Dict[str, Any]], 339 | ) -> None: 340 | if datashader is None: 341 | raise ModuleNotFoundError( 342 | "Package `datashader` is required to be installed to use this datapipe. " 343 | "Please use `pip install datashader` or " 344 | "`conda install -c conda-forge datashader` " 345 | "to install the package" 346 | ) 347 | self.source_datapipe: IterDataPipe[ 348 | Union[xr.DataArray, xr.Dataset] 349 | ] = source_datapipe 350 | self.kwargs = kwargs 351 | 352 | def __iter__(self) -> Iterator: 353 | for dataarray in self.source_datapipe: 354 | x_dim: str = dataarray.rio.x_dim 355 | y_dim: str = dataarray.rio.y_dim 356 | plot_width: int = len(dataarray[x_dim]) 357 | plot_height: int = len(dataarray[y_dim]) 358 | xmin, ymin, xmax, ymax = dataarray.rio.bounds() 359 | 360 | canvas = datashader.Canvas( 361 | plot_width=plot_width, 362 | plot_height=plot_height, 363 | x_range=(xmin, xmax), 364 | y_range=(ymin, ymax), 365 | **self.kwargs, 366 | ) 367 | canvas.crs = dataarray.rio.crs 368 | yield canvas 369 | 370 | def __len__(self) -> int: 371 | return len(self.source_datapipe) 372 | -------------------------------------------------------------------------------- /zen3geo/datapipes/geopandas.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`geopandas `. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional, Union 5 | 6 | try: 7 | import geopandas as gpd 8 | except ImportError: 9 | gpd = None 10 | import xarray as xr 11 | from torchdata.datapipes import functional_datapipe 12 | from torchdata.datapipes.iter import IterDataPipe 13 | 14 | 15 | @functional_datapipe("clip_vector_with_rectangle") 16 | class GeoPandasRectangleClipperIterDataPipe(IterDataPipe): 17 | """ 18 | Takes vector :py:class:`geopandas.GeoSeries` or 19 | :py:class:`geopandas.GeoDataFrame` geometries and clips them with the 20 | rectangular extent of an :py:class:`xarray.DataArray` or 21 | :py:class:`xarray.Dataset` grid to yield tuples of spatially subsetted 22 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame` 23 | vectors and the correponding :py:class:`xarray.DataArray` or 24 | :py:class:`xarray.Dataset` raster object used as the clip mask (functional 25 | name: ``clip_vector_with_rectangle``). 26 | 27 | Uses the rectangular clip algorithm of :py:func:`geopandas.clip`, with the 28 | bounding box rectangle (minx, miny, maxx, maxy) derived from input raster 29 | mask's bounding box extent. 30 | 31 | Note 32 | ---- 33 | If the input vector's coordinate reference system (``crs``) is different to 34 | the raster mask's coordinate reference system (``rio.crs``), the vector 35 | will be reprojected using :py:meth:`geopandas.GeoDataFrame.to_crs` to match 36 | the raster's coordinate reference system. 37 | 38 | Parameters 39 | ---------- 40 | source_datapipe : IterDataPipe[geopandas.GeoDataFrame] 41 | A DataPipe that contains :py:class:`geopandas.GeoSeries` or 42 | :py:class:`geopandas.GeoDataFrame` vector geometries with a 43 | :py:attr:`.crs ` property. 44 | 45 | mask_datapipe : IterDataPipe[xarray.DataArray] 46 | A DataPipe that contains :py:class:`xarray.DataArray` or 47 | :py:class:`xarray.Dataset` objects with a 48 | :py:attr:`.rio.crs ` property and 49 | :py:meth:`.rio.bounds ` method. 50 | 51 | kwargs : Optional 52 | Extra keyword arguments to pass to :py:func:`geopandas.clip`. 53 | 54 | Yields 55 | ------ 56 | paired_obj : Tuple[geopandas.GeoDataFrame, xarray.DataArray] 57 | A tuple consisting of the spatially subsetted 58 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame` 59 | vector, and the corresponding :py:class:`xarray.DataArray` or 60 | :py:class:`xarray.Dataset` raster used as the clip mask. 61 | 62 | Raises 63 | ------ 64 | ModuleNotFoundError 65 | If ``geopandas`` is not installed. See 66 | :doc:`install instructions for geopandas ` 67 | (e.g. via ``pip install geopandas``) before using this class. 68 | 69 | NotImplementedError 70 | If the length of the vector ``source_datapipe`` is not 1. Currently, 71 | all of the vector geometries have to be merged into a single 72 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame`. 73 | Refer to the section on Appending under geopandas' 74 | :doc:`geopandas:docs/user_guide/mergingdata` docs. 75 | 76 | Example 77 | ------- 78 | >>> import pytest 79 | >>> import rioxarray 80 | >>> gpd = pytest.importorskip("geopandas") 81 | ... 82 | >>> from torchdata.datapipes.iter import IterableWrapper 83 | >>> from zen3geo.datapipes import GeoPandasRectangleClipper 84 | ... 85 | >>> # Read in a vector polygon data source 86 | >>> geodataframe = gpd.read_file( 87 | ... filename="https://github.com/geopandas/geopandas/raw/v0.11.1/geopandas/tests/data/overlay/polys/df1.geojson", 88 | ... ) 89 | >>> assert geodataframe.crs == "EPSG:4326" # latitude/longitude coords 90 | >>> dp_vector = IterableWrapper(iterable=[geodataframe]) 91 | ... 92 | >>> # Get list of raster grids to cut up the vector polygon later 93 | >>> dataarray = rioxarray.open_rasterio( 94 | ... filename="https://github.com/rasterio/rasterio/raw/1.3.2/tests/data/world.byte.tif" 95 | ... ) 96 | >>> assert dataarray.rio.crs == "EPSG:4326" # latitude/longitude coords 97 | >>> dp_raster = IterableWrapper( 98 | ... iterable=[ 99 | ... dataarray.sel(x=slice(0, 2)), # longitude 0 to 2 degrees 100 | ... dataarray.sel(x=slice(2, 4)), # longitude 2 to 4 degrees 101 | ... ] 102 | ... ) 103 | ... 104 | >>> # Clip vector point geometries based on raster masks 105 | >>> dp_clipped = dp_vector.clip_vector_with_rectangle( 106 | ... mask_datapipe=dp_raster 107 | ... ) 108 | ... 109 | >>> # Loop or iterate over the DataPipe stream 110 | >>> it = iter(dp_clipped) 111 | >>> geodataframe0, raster0 = next(it) 112 | >>> geodataframe0 113 | col1 geometry 114 | 0 1 POLYGON ((0.00000 0.00000, 0.00000 2.00000, 2.... 115 | >>> raster0 116 | 117 | array([[[0, 0, ..., 0, 0], 118 | [0, 0, ..., 0, 0], 119 | ..., 120 | [1, 1, ..., 1, 1], 121 | [1, 1, ..., 1, 1]]], dtype=uint8) 122 | Coordinates: 123 | * band (band) int64 1 124 | * x (x) float64 0.0625 0.1875 0.3125 0.4375 ... 1.688 1.812 1.938 125 | * y (y) float64 74.94 74.81 74.69 74.56 ... -74.69 -74.81 -74.94 126 | spatial_ref int64 0 127 | ... 128 | >>> geodataframe1, raster1 = next(it) 129 | >>> geodataframe1 130 | col1 geometry 131 | 1 2 POLYGON ((2.00000 2.00000, 2.00000 4.00000, 4.... 132 | """ 133 | 134 | def __init__( 135 | self, 136 | source_datapipe: IterDataPipe, 137 | mask_datapipe: IterDataPipe[Union[xr.DataArray, xr.Dataset]], 138 | **kwargs: Optional[Dict[str, Any]], 139 | ) -> None: 140 | if gpd is None: 141 | raise ModuleNotFoundError( 142 | "Package `geopandas` is required to be installed to use this datapipe. " 143 | "Please use `pip install geopandas` or " 144 | "`conda install -c conda-forge geopandas` " 145 | "to install the package" 146 | ) 147 | self.source_datapipe: IterDataPipe = source_datapipe 148 | self.mask_datapipe: IterDataPipe[xr.DataArray] = mask_datapipe 149 | self.kwargs = kwargs 150 | 151 | len_vector_datapipe: int = len(self.source_datapipe) 152 | if len_vector_datapipe != 1: 153 | raise NotImplementedError( 154 | f"The vector datapipe's length can only be (1) for now, but got " 155 | f"({len_vector_datapipe}) instead. Consider merging your vector data " 156 | f"into a single `geopandas.GeoSeries` or `geopandas.GeoDataFrame`, " 157 | f"e.g. using `geodataframe0.append(geodataframe2)`." 158 | ) 159 | 160 | def __iter__(self) -> Iterator: 161 | geodataframe = list(self.source_datapipe).pop() 162 | 163 | for raster in self.mask_datapipe: 164 | mask = raster.rio.bounds() 165 | 166 | try: 167 | assert geodataframe.crs == raster.rio.crs 168 | _geodataframe = geodataframe 169 | except AssertionError: 170 | _geodataframe = geodataframe.to_crs(crs=raster.rio.crs) 171 | 172 | clipped_geodataframe = _geodataframe.clip(mask=mask, **self.kwargs) 173 | 174 | yield clipped_geodataframe, raster 175 | 176 | def __len__(self) -> int: 177 | return len(self.mask_datapipe) 178 | -------------------------------------------------------------------------------- /zen3geo/datapipes/pyogrio.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`pyogrio `. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional 5 | 6 | try: 7 | import pyogrio 8 | except ImportError: 9 | pyogrio = None 10 | from torchdata.datapipes import functional_datapipe 11 | from torchdata.datapipes.iter import IterDataPipe 12 | from torchdata.datapipes.utils import StreamWrapper 13 | 14 | 15 | @functional_datapipe("read_from_pyogrio") 16 | class PyogrioReaderIterDataPipe(IterDataPipe[StreamWrapper]): 17 | """ 18 | Takes vector files (e.g. FlatGeoBuf, GeoPackage, GeoJSON) from local disk 19 | or URLs (as long as they can be read by pyogrio) and yields 20 | :py:class:`geopandas.GeoDataFrame` objects (functional name: 21 | ``read_from_pyogrio``). 22 | 23 | Based on 24 | https://github.com/pytorch/data/blob/v0.4.0/torchdata/datapipes/iter/load/iopath.py#L42-L97 25 | 26 | Parameters 27 | ---------- 28 | source_datapipe : IterDataPipe[str] 29 | A DataPipe that contains filepaths or URL links to vector files such as 30 | FlatGeoBuf, GeoPackage, GeoJSON, etc. 31 | 32 | kwargs : Optional 33 | Extra keyword arguments to pass to :py:func:`pyogrio.read_dataframe`. 34 | 35 | Yields 36 | ------ 37 | stream_obj : geopandas.GeoDataFrame 38 | A :py:class:`geopandas.GeoDataFrame` object containing the vector data. 39 | 40 | Raises 41 | ------ 42 | ModuleNotFoundError 43 | If ``pyogrio`` is not installed. See 44 | :doc:`install instructions for pyogrio `, and ensure 45 | that ``geopandas`` is installed too (e.g. via 46 | ``pip install pyogrio[geopandas]``) before using this class. 47 | 48 | Example 49 | ------- 50 | >>> import pytest 51 | >>> pyogrio = pytest.importorskip("pyogrio") 52 | ... 53 | >>> from torchdata.datapipes.iter import IterableWrapper 54 | >>> from zen3geo.datapipes import PyogrioReader 55 | ... 56 | >>> # Read in GeoPackage data using DataPipe 57 | >>> file_url: str = "https://github.com/geopandas/pyogrio/raw/v0.4.0/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg" 58 | >>> dp = IterableWrapper(iterable=[file_url]) 59 | >>> dp_pyogrio = dp.read_from_pyogrio() 60 | ... 61 | >>> # Loop or iterate over the DataPipe stream 62 | >>> it = iter(dp_pyogrio) 63 | >>> geodataframe = next(it) 64 | >>> geodataframe 65 | StreamWrapper< col_bool col_int8 ... col_float64 geometry 66 | 0 1.0 1.0 ... 1.5 POINT (0.00000 0.00000) 67 | 1 0.0 2.0 ... 2.5 POINT (1.00000 1.00000) 68 | 2 1.0 3.0 ... 3.5 POINT (2.00000 2.00000) 69 | 3 NaN NaN ... NaN POINT (4.00000 4.00000) 70 | 71 | [4 rows x 12 columns]> 72 | """ 73 | 74 | def __init__( 75 | self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]] 76 | ) -> None: 77 | if pyogrio is None: 78 | raise ModuleNotFoundError( 79 | "Package `pyogrio` is required to be installed to use this datapipe. " 80 | "Please use `pip install pyogrio[geopandas]` or " 81 | "`conda install -c conda-forge pyogrio` " 82 | "to install the package" 83 | ) 84 | self.source_datapipe: IterDataPipe[str] = source_datapipe 85 | self.kwargs = kwargs 86 | 87 | def __iter__(self) -> Iterator[StreamWrapper]: 88 | for filename in self.source_datapipe: 89 | yield StreamWrapper(pyogrio.read_dataframe(filename, **self.kwargs)) 90 | 91 | def __len__(self) -> int: 92 | return len(self.source_datapipe) 93 | -------------------------------------------------------------------------------- /zen3geo/datapipes/pystac.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`pystac `. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional 5 | 6 | try: 7 | import pystac 8 | except ImportError: 9 | pystac = None 10 | from torchdata.datapipes import functional_datapipe 11 | from torchdata.datapipes.iter import IterDataPipe 12 | 13 | 14 | @functional_datapipe("read_to_pystac_item") 15 | class PySTACItemReaderIterDataPipe(IterDataPipe): 16 | """ 17 | Takes files from local disk or URLs (as long as they can be read by pystac) 18 | and yields :py:class:`pystac.Item` objects (functional name: 19 | ``read_to_pystac_item``). 20 | 21 | Parameters 22 | ---------- 23 | source_datapipe : IterDataPipe[str] 24 | A DataPipe that contains filepaths or URL links to STAC items. 25 | 26 | kwargs : Optional 27 | Extra keyword arguments to pass to :py:meth:`pystac.Item.from_file`. 28 | 29 | Yields 30 | ------ 31 | stac_item : pystac.Item 32 | A :py:class:`pystac.Item` object containing the specific 33 | :py:class:`pystac.STACObject` implementation class represented in a 34 | JSON format. 35 | 36 | Raises 37 | ------ 38 | ModuleNotFoundError 39 | If ``pystac`` is not installed. See 40 | :doc:`install instructions for pystac `, (e.g. via 41 | ``pip install pystac``) before using this class. 42 | 43 | Example 44 | ------- 45 | >>> import pytest 46 | >>> pystac = pytest.importorskip("pystac") 47 | ... 48 | >>> from torchdata.datapipes.iter import IterableWrapper 49 | >>> from zen3geo.datapipes import PySTACItemReader 50 | ... 51 | >>> # Read in STAC Item using DataPipe 52 | >>> item_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435" 53 | >>> dp = IterableWrapper(iterable=[item_url]) 54 | >>> dp_pystac = dp.read_to_pystac_item() 55 | ... 56 | >>> # Loop or iterate over the DataPipe stream 57 | >>> it = iter(dp_pystac) 58 | >>> stac_item = next(it) 59 | >>> stac_item.bbox 60 | [103.20205689, 0.81602476, 104.18934086, 1.8096362] 61 | >>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE 62 | {'datetime': '2022-01-15T03:21:01.024000Z', 63 | 'platform': 'Sentinel-2A', 64 | 'proj:epsg': 32648, 65 | 'instruments': ['msi'], 66 | 's2:mgrs_tile': '48NUG', 67 | 'constellation': 'Sentinel 2', 68 | 's2:granule_id': 'S2A_OPER_MSI_L2A_TL_ESRI_20220115T170436_A034292_T48NUG_N03.00', 69 | 'eo:cloud_cover': 17.352597, 70 | 's2:datatake_id': 'GS2A_20220115T032101_034292_N03.00', 71 | 's2:product_uri': 'S2A_MSIL2A_20220115T032101_N0300_R118_T48NUG_20220115T170435.SAFE', 72 | 's2:datastrip_id': 'S2A_OPER_MSI_L2A_DS_ESRI_20220115T170436_S20220115T033502_N03.00', 73 | 's2:product_type': 'S2MSI2A', 74 | 'sat:orbit_state': 'descending', 75 | ... 76 | """ 77 | 78 | def __init__( 79 | self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]] 80 | ) -> None: 81 | if pystac is None: 82 | raise ModuleNotFoundError( 83 | "Package `pystac` is required to be installed to use this datapipe. " 84 | "Please use `pip install pystac` or " 85 | "`conda install -c conda-forge pystac` " 86 | "to install the package" 87 | ) 88 | self.source_datapipe: IterDataPipe[str] = source_datapipe 89 | self.kwargs = kwargs 90 | 91 | def __iter__(self) -> Iterator: 92 | for href in self.source_datapipe: 93 | yield pystac.Item.from_file(href=href, **self.kwargs) 94 | 95 | def __len__(self) -> int: 96 | return len(self.source_datapipe) 97 | -------------------------------------------------------------------------------- /zen3geo/datapipes/pystac_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`pystac-client `. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional 5 | 6 | try: 7 | import pystac_client 8 | except ImportError: 9 | pystac_client = None 10 | from torchdata.datapipes import functional_datapipe 11 | from torchdata.datapipes.iter import IterDataPipe 12 | 13 | 14 | @functional_datapipe("search_for_pystac_item") 15 | class PySTACAPISearcherIterDataPipe(IterDataPipe): 16 | """ 17 | Takes dictionaries containing a STAC API query (as long as the parameters 18 | are understood by :py:meth:`pystac_client.Client.search`) and yields 19 | :py:class:`pystac_client.ItemSearch` objects (functional name: 20 | ``search_for_pystac_item``). 21 | 22 | Parameters 23 | ---------- 24 | source_datapipe : IterDataPipe[dict] 25 | A DataPipe that contains STAC API query parameters in the form of a 26 | Python dictionary to pass to :py:meth:`pystac_client.Client.search`. 27 | For example: 28 | 29 | - **bbox** - A list, tuple, or iterator representing a bounding box of 30 | 2D or 3D coordinates. Results will be filtered to only those 31 | intersecting the bounding box. 32 | - **datetime** - Either a single datetime or datetime range used to 33 | filter results. You may express a single datetime using a 34 | :py:class:`datetime.datetime` instance, a 35 | `RFC 3339-compliant `_ 36 | timestamp, or a simple date string. 37 | - **collections** - List of one or more Collection IDs or 38 | :py:class:`pystac.Collection` instances. Only Items in one of the 39 | provided Collections will be searched. 40 | 41 | catalog_url : str 42 | The URL of a STAC Catalog. 43 | 44 | kwargs : Optional 45 | Extra keyword arguments to pass to 46 | :py:meth:`pystac_client.Client.open`. For example: 47 | 48 | - **headers** - A dictionary of additional headers to use in all 49 | requests made to any part of this Catalog/API. 50 | - **parameters** - Optional dictionary of query string parameters to 51 | include in all requests. 52 | - **modifier** - A callable that modifies the children collection and 53 | items returned by this Client. This can be useful for injecting 54 | authentication parameters into child assets to access data from 55 | non-public sources. 56 | 57 | Yields 58 | ------ 59 | item_search : pystac_client.ItemSearch 60 | A :py:class:`pystac_client.ItemSearch` object instance that represents 61 | a deferred query to a STAC search endpoint as described in the 62 | `STAC API - Item Search spec `_. 63 | 64 | Raises 65 | ------ 66 | ModuleNotFoundError 67 | If ``pystac_client`` is not installed. See 68 | :doc:`install instructions for pystac-client `, 69 | (e.g. via ``pip install pystac-client``) before using this class. 70 | 71 | Example 72 | ------- 73 | >>> import pytest 74 | >>> pystac_client = pytest.importorskip("pystac_client") 75 | ... 76 | >>> from torchdata.datapipes.iter import IterableWrapper 77 | >>> from zen3geo.datapipes import PySTACAPISearcher 78 | ... 79 | >>> # Peform STAC API query using DataPipe 80 | >>> query = dict( 81 | ... bbox=[174.5, -41.37, 174.9, -41.19], # xmin, ymin, xmax, ymax 82 | ... datetime=["2012-02-20T00:00:00Z", "2022-12-22T00:00:00Z"], 83 | ... collections=["cop-dem-glo-30"], 84 | ... ) 85 | >>> dp = IterableWrapper(iterable=[query]) 86 | >>> dp_pystac_client = dp.search_for_pystac_item( 87 | ... catalog_url="https://planetarycomputer.microsoft.com/api/stac/v1", 88 | ... # modifier=planetary_computer.sign_inplace, 89 | ... ) 90 | >>> # Loop or iterate over the DataPipe stream 91 | >>> it = iter(dp_pystac_client) 92 | >>> stac_item_search = next(it) 93 | >>> stac_items = list(stac_item_search.items()) 94 | >>> stac_items 95 | [] 96 | >>> stac_items[0].properties # doctest: +NORMALIZE_WHITESPACE 97 | {'gsd': 30, 98 | 'datetime': '2021-04-22T00:00:00Z', 99 | 'platform': 'TanDEM-X', 100 | 'proj:epsg': 4326, 101 | 'proj:shape': [3600, 3600], 102 | 'proj:transform': [0.0002777777777777778, 103 | 0.0, 104 | 173.9998611111111, 105 | 0.0, 106 | -0.0002777777777777778, 107 | -40.99986111111111]} 108 | """ 109 | 110 | def __init__( 111 | self, 112 | source_datapipe: IterDataPipe[dict], 113 | catalog_url: str, 114 | **kwargs: Optional[Dict[str, Any]] 115 | ) -> None: 116 | if pystac_client is None: 117 | raise ModuleNotFoundError( 118 | "Package `pystac_client` is required to be installed to use this datapipe. " 119 | "Please use `pip install pystac-client` or " 120 | "`conda install -c conda-forge pystac-client` " 121 | "to install the package" 122 | ) 123 | self.source_datapipe: IterDataPipe[dict] = source_datapipe 124 | self.catalog_url: str = catalog_url 125 | self.kwargs = kwargs 126 | 127 | def __iter__(self) -> Iterator: 128 | catalog = pystac_client.Client.open(url=self.catalog_url, **self.kwargs) 129 | 130 | for query in self.source_datapipe: 131 | search = catalog.search(**query) 132 | yield search 133 | 134 | def __len__(self) -> int: 135 | return len(self.source_datapipe) 136 | 137 | 138 | @functional_datapipe("list_pystac_items_by_search") 139 | class PySTACAPIItemListerIterDataPipe(IterDataPipe): 140 | """ 141 | Lists the :py:class:`pystac.Item` objects that match the provided STAC API 142 | search parameters (functional name: ``list_pystac_items_by_search``). 143 | 144 | Parameters 145 | ---------- 146 | source_datapipe : IterDataPipe[pystac_client.ItemSearch] 147 | A DataPipe that contains :py:class:`pystac_client.ItemSearch` object 148 | instances that represents 149 | a deferred query to a STAC search endpoint as described in the 150 | `STAC API - Item Search spec `_. 151 | 152 | Yields 153 | ------ 154 | stac_item : pystac.Item 155 | A :py:class:`pystac.Item` object containing the specific 156 | :py:class:`pystac.STACObject` implementation class represented in a 157 | JSON format. 158 | 159 | Raises 160 | ------ 161 | ModuleNotFoundError 162 | If ``pystac_client`` is not installed. See 163 | :doc:`install instructions for pystac-client `, 164 | (e.g. via ``pip install pystac-client``) before using this class. 165 | 166 | Example 167 | ------- 168 | >>> import pytest 169 | >>> pystac_client = pytest.importorskip("pystac_client") 170 | ... 171 | >>> from torchdata.datapipes.iter import IterableWrapper 172 | >>> from zen3geo.datapipes import PySTACAPIItemLister 173 | ... 174 | >>> # List STAC Items from a STAC API query 175 | >>> catalog = pystac_client.Client.open( 176 | ... url="https://explorer.digitalearth.africa/stac/" 177 | ... ) 178 | >>> search = catalog.search( 179 | ... bbox=[57.2, -20.6, 57.9, -19.9], # xmin, ymin, xmax, ymax 180 | ... datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"], 181 | ... collections=["s2_l2a"], 182 | ... ) 183 | >>> dp = IterableWrapper(iterable=[search]) 184 | >>> dp_pystac_item_list = dp.list_pystac_items_by_search() 185 | ... 186 | >>> # Loop or iterate over the DataPipe stream 187 | >>> it = iter(dp_pystac_item_list) 188 | >>> stac_item = next(it) 189 | >>> stac_item 190 | 191 | >>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE 192 | {'title': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000', 193 | 'gsd': 10, 194 | 'proj:epsg': 32740, 195 | 'platform': 'sentinel-2b', 196 | 'view:off_nadir': 0, 197 | 'instruments': ['msi'], 198 | 'eo:cloud_cover': 0.02, 199 | 'odc:file_format': 'GeoTIFF', 200 | 'odc:region_code': '40KED', 201 | 'constellation': 'sentinel-2', 202 | 'sentinel:sequence': '0', 203 | 'sentinel:utm_zone': 40, 204 | 'sentinel:product_id': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000', 205 | 'sentinel:grid_square': 'ED', 206 | 'sentinel:data_coverage': 28.61, 207 | 'sentinel:latitude_band': 'K', 208 | 'created': '2023-01-03T06:24:53Z', 209 | 'sentinel:valid_cloud_cover': True, 210 | 'sentinel:boa_offset_applied': True, 211 | 'sentinel:processing_baseline': '05.09', 212 | 'proj:shape': [10980, 10980], 213 | 'proj:transform': [10.0, 0.0, 499980.0, 0.0, -10.0, 7900000.0, 0.0, 0.0, 1.0], 214 | 'cubedash:region_code': '40KED', 215 | 'datetime': '2023-01-03T06:24:53Z'} 216 | """ 217 | 218 | def __init__(self, source_datapipe): 219 | if pystac_client is None: 220 | raise ModuleNotFoundError( 221 | "Package `pystac_client` is required to be installed to use this datapipe. " 222 | "Please use `pip install pystac-client` or " 223 | "`conda install -c conda-forge pystac-client` " 224 | "to install the package" 225 | ) 226 | self.source_datapipe = source_datapipe 227 | 228 | def __iter__(self): 229 | for item_search in self.source_datapipe: 230 | yield from item_search.items() 231 | 232 | def __len__(self): 233 | return sum(item_search.matched() for item_search in self.source_datapipe) 234 | -------------------------------------------------------------------------------- /zen3geo/datapipes/rioxarray.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`rioxarray `. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional 5 | 6 | import rioxarray 7 | from torchdata.datapipes import functional_datapipe 8 | from torchdata.datapipes.iter import IterDataPipe 9 | from torchdata.datapipes.utils import StreamWrapper 10 | 11 | 12 | @functional_datapipe("read_from_rioxarray") 13 | class RioXarrayReaderIterDataPipe(IterDataPipe[StreamWrapper]): 14 | """ 15 | Takes raster files (e.g. GeoTIFFs) from local disk or URLs 16 | (as long as they can be read by rioxarray and/or rasterio) 17 | and yields :py:class:`xarray.DataArray` objects (functional name: 18 | ``read_from_rioxarray``). 19 | 20 | Based on 21 | https://github.com/pytorch/data/blob/v0.4.0/torchdata/datapipes/iter/load/online.py#L55-L96 22 | 23 | Parameters 24 | ---------- 25 | source_datapipe : IterDataPipe[str] 26 | A DataPipe that contains filepaths or URL links to raster files such as 27 | GeoTIFFs. 28 | 29 | kwargs : Optional 30 | Extra keyword arguments to pass to :py:func:`rioxarray.open_rasterio` 31 | and/or :py:func:`rasterio.open`. 32 | 33 | Yields 34 | ------ 35 | stream_obj : xarray.DataArray 36 | An :py:class:`xarray.DataArray` object containing the raster data. 37 | 38 | Example 39 | ------- 40 | >>> from torchdata.datapipes.iter import IterableWrapper 41 | >>> from zen3geo.datapipes import RioXarrayReader 42 | ... 43 | >>> # Read in GeoTIFF data using DataPipe 44 | >>> file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif" 45 | >>> dp = IterableWrapper(iterable=[file_url]) 46 | >>> dp_rioxarray = dp.read_from_rioxarray() 47 | ... 48 | >>> # Loop or iterate over the DataPipe stream 49 | >>> it = iter(dp_rioxarray) 50 | >>> dataarray = next(it) 51 | >>> dataarray.encoding["source"] 52 | 'https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif' 53 | >>> dataarray 54 | StreamWrapper< 55 | [1843200 values with dtype=uint8] 56 | Coordinates: 57 | * band (band) int64 1 58 | * x (x) float64 -179.9 -179.7 -179.5 -179.3 ... 179.5 179.7 179.9 59 | * y (y) float64 89.91 89.72 89.53 89.34 ... -89.53 -89.72 -89.91 60 | spatial_ref int64 0 61 | ... 62 | """ 63 | 64 | def __init__( 65 | self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]] 66 | ) -> None: 67 | self.source_datapipe: IterDataPipe[str] = source_datapipe 68 | self.kwargs = kwargs 69 | 70 | def __iter__(self) -> Iterator[StreamWrapper]: 71 | for filename in self.source_datapipe: 72 | yield StreamWrapper( 73 | rioxarray.open_rasterio(filename=filename, **self.kwargs) 74 | ) 75 | 76 | def __len__(self) -> int: 77 | return len(self.source_datapipe) 78 | -------------------------------------------------------------------------------- /zen3geo/datapipes/stackstac.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`stackstac `. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional 5 | 6 | import xarray as xr 7 | 8 | try: 9 | import stackstac 10 | except ImportError: 11 | stackstac = None 12 | from torchdata.datapipes import functional_datapipe 13 | from torchdata.datapipes.iter import IterDataPipe 14 | 15 | 16 | @functional_datapipe("mosaic_dataarray") 17 | class StackSTACMosaickerIterDataPipe(IterDataPipe[xr.DataArray]): 18 | """ 19 | Takes :py:class:`xarray.DataArray` objects, flattens a dimension by picking 20 | the first valid pixel, to yield mosaicked :py:class:`xarray.DataArray` 21 | objects (functional name: ``mosaic_dataarray``). 22 | 23 | Parameters 24 | ---------- 25 | source_datapipe : IterDataPipe[xarray.DataArray] 26 | A DataPipe that contains :py:class:`xarray.DataArray` objects, with 27 | e.g. dimensions ("time", "band", "y", "x"). 28 | 29 | kwargs : Optional 30 | Extra keyword arguments to pass to :py:func:`stackstac.mosaic`. 31 | 32 | Yields 33 | ------ 34 | dataarray : xarray.DataArray 35 | An :py:class:`xarray.DataArray` that has been mosaicked with e.g. 36 | dimensions ("band", "y", "x"). 37 | 38 | Raises 39 | ------ 40 | ModuleNotFoundError 41 | If ``stackstac`` is not installed. See 42 | :doc:`install instructions for stackstac `, (e.g. via 43 | ``pip install stackstac``) before using this class. 44 | 45 | Example 46 | ------- 47 | >>> import pytest 48 | >>> import xarray as xr 49 | >>> pystac = pytest.importorskip("pystac") 50 | >>> stackstac = pytest.importorskip("stackstac") 51 | ... 52 | >>> from torchdata.datapipes.iter import IterableWrapper 53 | >>> from zen3geo.datapipes import StackSTACMosaicker 54 | ... 55 | >>> # Get list of ALOS DEM tiles to mosaic together later 56 | >>> item_urls = [ 57 | ... "https://planetarycomputer.microsoft.com/api/stac/v1/collections/alos-dem/items/ALPSMLC30_N022E113_DSM", 58 | ... "https://planetarycomputer.microsoft.com/api/stac/v1/collections/alos-dem/items/ALPSMLC30_N022E114_DSM", 59 | ... ] 60 | >>> stac_items = [pystac.Item.from_file(href=url) for url in item_urls] 61 | >>> dataarray = stackstac.stack(items=stac_items) 62 | >>> assert dataarray.sizes == {'time': 2, 'band': 1, 'y': 3600, 'x': 7200} 63 | ... 64 | >>> # Mosaic different tiles in an xarray.DataArray using DataPipe 65 | >>> dp = IterableWrapper(iterable=[dataarray]) 66 | >>> dp_mosaic = dp.mosaic_dataarray() 67 | ... 68 | >>> # Loop or iterate over the DataPipe stream 69 | >>> it = iter(dp_mosaic) 70 | >>> dataarray = next(it) 71 | >>> print(dataarray.sizes) 72 | Frozen({'band': 1, 'y': 3600, 'x': 7200}) 73 | >>> print(dataarray.coords) 74 | Coordinates: 75 | * band (band) >> print(dataarray.attrs["spec"]) 80 | RasterSpec(epsg=4326, bounds=(113.0, 22.0, 115.0, 23.0), resolutions_xy=(0.0002777777777777778, 0.0002777777777777778)) 81 | """ 82 | 83 | def __init__( 84 | self, 85 | source_datapipe: IterDataPipe[xr.DataArray], 86 | **kwargs: Optional[Dict[str, Any]] 87 | ) -> None: 88 | if stackstac is None: 89 | raise ModuleNotFoundError( 90 | "Package `stackstac` is required to be installed to use this datapipe. " 91 | "Please use `pip install stackstac` or " 92 | "`conda install -c conda-forge stackstac` " 93 | "to install the package" 94 | ) 95 | self.source_datapipe: IterDataPipe = source_datapipe 96 | self.kwargs = kwargs 97 | 98 | def __iter__(self) -> Iterator[xr.DataArray]: 99 | for dataarray in self.source_datapipe: 100 | yield stackstac.mosaic(arr=dataarray, **self.kwargs) 101 | 102 | def __len__(self) -> int: 103 | return len(self.source_datapipe) 104 | 105 | 106 | @functional_datapipe("stack_stac_items") 107 | class StackSTACStackerIterDataPipe(IterDataPipe[xr.DataArray]): 108 | """ 109 | Takes :py:class:`pystac.Item` objects, reprojects them to the same grid 110 | and stacks them along time, to yield :py:class:`xarray.DataArray` objects 111 | (functional name: ``stack_stac_items``). 112 | 113 | Parameters 114 | ---------- 115 | source_datapipe : IterDataPipe[pystac.Item] 116 | A DataPipe that contains :py:class:`pystac.Item` objects. 117 | 118 | kwargs : Optional 119 | Extra keyword arguments to pass to :py:func:`stackstac.stack`. 120 | 121 | Yields 122 | ------ 123 | datacube : xarray.DataArray 124 | An :py:class:`xarray.DataArray` backed by a 125 | :py:class:`dask.array.Array` containing the time-series datacube. The 126 | dimensions will be ("time", "band", "y", "x"). 127 | 128 | Raises 129 | ------ 130 | ModuleNotFoundError 131 | If ``stackstac`` is not installed. See 132 | :doc:`install instructions for stackstac `, (e.g. via 133 | ``pip install stackstac``) before using this class. 134 | 135 | Example 136 | ------- 137 | >>> import pytest 138 | >>> pystac = pytest.importorskip("pystac") 139 | >>> stacstac = pytest.importorskip("stackstac") 140 | ... 141 | >>> from torchdata.datapipes.iter import IterableWrapper 142 | >>> from zen3geo.datapipes import StackSTACStacker 143 | ... 144 | >>> # Stack different bands in a STAC Item using DataPipe 145 | >>> item_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20220914T093226_20220914T093252_044999_056053" 146 | >>> stac_item = pystac.Item.from_file(href=item_url) 147 | >>> dp = IterableWrapper(iterable=[stac_item]) 148 | >>> dp_stackstac = dp.stack_stac_items( 149 | ... assets=["vh", "vv"], epsg=32652, resolution=10 150 | ... ) 151 | ... 152 | >>> # Loop or iterate over the DataPipe stream 153 | >>> it = iter(dp_stackstac) 154 | >>> dataarray = next(it) 155 | >>> print(dataarray.sizes) 156 | Frozen({'time': 1, 'band': 2, 'y': 20686, 'x': 28043}) 157 | >>> print(dataarray.coords) 158 | Coordinates: 159 | * time (time) datetime64[ns] 2022-09-14T0... 160 | id (time) >> print(dataarray.attrs["spec"]) 166 | RasterSpec(epsg=32652, bounds=(135370, 4098080, 415800, 4304940), resolutions_xy=(10, 10)) 167 | """ 168 | 169 | def __init__( 170 | self, source_datapipe: IterDataPipe, **kwargs: Optional[Dict[str, Any]] 171 | ) -> None: 172 | if stackstac is None: 173 | raise ModuleNotFoundError( 174 | "Package `stackstac` is required to be installed to use this datapipe. " 175 | "Please use `pip install stackstac` or " 176 | "`conda install -c conda-forge stackstac` " 177 | "to install the package" 178 | ) 179 | self.source_datapipe: IterDataPipe = source_datapipe 180 | self.kwargs = kwargs 181 | 182 | def __iter__(self) -> Iterator[xr.DataArray]: 183 | for stac_items in self.source_datapipe: 184 | yield stackstac.stack(items=stac_items, **self.kwargs) 185 | 186 | def __len__(self) -> int: 187 | return len(self.source_datapipe) 188 | -------------------------------------------------------------------------------- /zen3geo/datapipes/xbatcher.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for :doc:`xbatcher `. 3 | """ 4 | from typing import Any, Dict, Hashable, Iterator, Optional, Tuple, Union 5 | 6 | import xarray as xr 7 | 8 | try: 9 | import xbatcher 10 | except ImportError: 11 | xbatcher = None 12 | from torchdata.datapipes import functional_datapipe 13 | from torchdata.datapipes.iter import IterDataPipe 14 | 15 | 16 | @functional_datapipe("slice_with_xbatcher") 17 | class XbatcherSlicerIterDataPipe(IterDataPipe[Union[xr.DataArray, xr.Dataset]]): 18 | """ 19 | Takes an :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` 20 | and creates a sliced window view (also known as a chip or tile) of the 21 | n-dimensional array (functional name: ``slice_with_xbatcher``). 22 | 23 | Parameters 24 | ---------- 25 | source_datapipe : IterDataPipe[xarray.DataArray] 26 | A DataPipe that contains :py:class:`xarray.DataArray` or 27 | :py:class:`xarray.Dataset` objects. 28 | 29 | input_dims : dict 30 | A dictionary specifying the size of the inputs in each dimension to 31 | slice along, e.g. ``{'lon': 64, 'lat': 64}``. These are the dimensions 32 | the machine learning library will see. All other dimensions will be 33 | stacked into one dimension called ``batch``. 34 | 35 | kwargs : Optional 36 | Extra keyword arguments to pass to :py:class:`xbatcher.BatchGenerator`. 37 | 38 | Yields 39 | ------ 40 | chip : xarray.DataArray 41 | An :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` object 42 | containing the sliced raster data, with the size/shape defined by the 43 | ``input_dims`` parameter. 44 | 45 | Raises 46 | ------ 47 | ModuleNotFoundError 48 | If ``xbatcher`` is not installed. Follow 49 | :doc:`install instructions for xbatcher ` 50 | before using this class. 51 | 52 | Example 53 | ------- 54 | >>> import pytest 55 | >>> import numpy as np 56 | >>> import xarray as xr 57 | >>> xbatcher = pytest.importorskip("xbatcher") 58 | ... 59 | >>> from torchdata.datapipes.iter import IterableWrapper 60 | >>> from zen3geo.datapipes import XbatcherSlicer 61 | ... 62 | >>> # Sliced window view of xarray.DataArray using DataPipe 63 | >>> dataarray: xr.DataArray = xr.DataArray( 64 | ... data=np.ones(shape=(3, 64, 64)), 65 | ... name="foo", 66 | ... dims=["band", "y", "x"] 67 | ... ) 68 | >>> dp = IterableWrapper(iterable=[dataarray]) 69 | >>> dp_xbatcher = dp.slice_with_xbatcher(input_dims={"y": 2, "x": 2}) 70 | ... 71 | >>> # Loop or iterate over the DataPipe stream 72 | >>> it = iter(dp_xbatcher) 73 | >>> dataarray_chip = next(it) 74 | >>> dataarray_chip 75 | 76 | array([[[1., 1.], 77 | [1., 1.]], 78 | 79 | [[1., 1.], 80 | [1., 1.]], 81 | 82 | [[1., 1.], 83 | [1., 1.]]]) 84 | Dimensions without coordinates: band, y, x 85 | """ 86 | 87 | def __init__( 88 | self, 89 | source_datapipe: IterDataPipe[Union[xr.DataArray, xr.Dataset]], 90 | input_dims: Dict[Hashable, int], 91 | **kwargs: Optional[Dict[str, Any]], 92 | ) -> None: 93 | if xbatcher is None: 94 | raise ModuleNotFoundError( 95 | "Package `xbatcher` is required to be installed to use this datapipe. " 96 | "Please use `pip install xbatcher` " 97 | "to install the package" 98 | ) 99 | self.source_datapipe: IterDataPipe[ 100 | Union[xr.DataArray, xr.Dataset] 101 | ] = source_datapipe 102 | self.input_dims: Dict[Hashable, int] = input_dims 103 | self.kwargs = kwargs 104 | 105 | def __iter__(self) -> Iterator[Union[xr.DataArray, xr.Dataset]]: 106 | for dataarray in self.source_datapipe: 107 | for chip in dataarray.batch.generator( 108 | input_dims=self.input_dims, **self.kwargs 109 | ): 110 | yield chip 111 | 112 | def __len__(self) -> int: 113 | return sum( 114 | len(dataarray.batch.generator(input_dims=self.input_dims, **self.kwargs)) 115 | for dataarray in self.source_datapipe 116 | ) 117 | -------------------------------------------------------------------------------- /zen3geo/datapipes/xpystac.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataPipes for `xpystac `__. 3 | """ 4 | from typing import Any, Dict, Iterator, Optional 5 | 6 | import xarray as xr 7 | 8 | try: 9 | import pystac 10 | import xpystac 11 | except ImportError: 12 | pystac = None 13 | xpystac = None 14 | from torchdata.datapipes import functional_datapipe 15 | from torchdata.datapipes.iter import IterDataPipe 16 | from torchdata.datapipes.utils import StreamWrapper 17 | 18 | 19 | @functional_datapipe("read_from_xpystac") 20 | class XpySTACAssetReaderIterDataPipe(IterDataPipe[StreamWrapper]): 21 | """ 22 | Takes a :py:class:`pystac.Asset` object containing n-dimensional data (e.g. 23 | :doc:`Zarr `, 24 | `NetCDF `__, 25 | `Cloud-Optimized GeoTIFF `__, etc) from local disk 26 | or URLs (as long as they can be read by xpystac) and yields 27 | :py:class:`xarray.Dataset` objects (functional name: 28 | ``read_from_xpystac``). 29 | 30 | Based on 31 | https://github.com/pytorch/data/blob/v0.5.1/torchdata/datapipes/iter/load/iopath.py#L42-L97 32 | 33 | Parameters 34 | ---------- 35 | source_datapipe : IterDataPipe[pystac.Asset] 36 | A DataPipe that contains :py:class:`pystac.Asset` objects to 37 | n-dimensional files such as :doc:`Zarr `, 38 | `NetCDF `__, 39 | `Cloud-Optimized GeoTIFF `__, etc. 40 | 41 | engine : str or xarray.backends.BackendEntrypoint 42 | Engine to use when reading files. If not provided, the default engine 43 | will be the "stac" backend from ``xpystac``. Alternatively, set 44 | ``engine=None`` to let ``xarray`` choose the default engine based on 45 | available dependencies, with a preference for "netcdf4". See also 46 | :py:func:`xarray.open_dataset` for details about other engine options. 47 | 48 | kwargs : Optional 49 | Extra keyword arguments to pass to :py:func:`xarray.open_dataset`. 50 | 51 | Yields 52 | ------ 53 | stream_obj : xarray.Dataset 54 | An :py:class:`xarray.Dataset` object containing the n-dimensional data. 55 | 56 | Raises 57 | ------ 58 | ModuleNotFoundError 59 | If ``xpystac`` is not installed. See 60 | `install instructions for xpystac 61 | `__, 62 | (e.g. via ``pip install xpystac``) before using this class. 63 | 64 | Example 65 | ------- 66 | >>> import pytest 67 | >>> pystac = pytest.importorskip("pystac") 68 | >>> xpystac = pytest.importorskip("xpystac") 69 | >>> zarr = pytest.importorskip("zarr") 70 | ... 71 | >>> from torchdata.datapipes.iter import IterableWrapper 72 | >>> from zen3geo.datapipes import XpySTACAssetReader 73 | ... 74 | >>> # Read in STAC Asset using DataPipe 75 | >>> collection_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/nasa-nex-gddp-cmip6" 76 | >>> asset: pystac.Asset = pystac.Collection.from_file(href=collection_url).assets[ 77 | ... "ACCESS-CM2.historical" 78 | ... ] 79 | >>> dp = IterableWrapper(iterable=[asset]) 80 | >>> dp_xpystac = dp.read_from_xpystac() 81 | ... 82 | >>> # Loop or iterate over the DataPipe stream 83 | >>> it = iter(dp_xpystac) 84 | >>> dataset = next(it) 85 | >>> dataset.sizes 86 | Frozen({'time': 23741, 'lat': 600, 'lon': 1440}) 87 | >>> print(dataset.data_vars) 88 | Data variables: 89 | hurs (time, lat, lon) float32 ... 90 | huss (time, lat, lon) float32 ... 91 | pr (time, lat, lon) float32 ... 92 | rlds (time, lat, lon) float32 ... 93 | rsds (time, lat, lon) float32 ... 94 | sfcWind (time, lat, lon) float32 ... 95 | tas (time, lat, lon) float32 ... 96 | tasmax (time, lat, lon) float32 ... 97 | tasmin (time, lat, lon) float32 ... 98 | >>> dataset.attrs # doctest: +NORMALIZE_WHITESPACE 99 | {'Conventions': 'CF-1.7', 100 | 'activity': 'NEX-GDDP-CMIP6', 101 | 'cmip6_institution_id': 'CSIRO-ARCCSS', 102 | 'cmip6_license': 'CC-BY-SA 4.0', 103 | 'cmip6_source_id': 'ACCESS-CM2', 104 | ... 105 | 'history': '2021-10-04T13:59:21.654137+00:00: install global attributes', 106 | 'institution': 'NASA Earth Exchange, NASA Ames Research Center, ... 107 | 'product': 'output', 108 | 'realm': 'atmos', 109 | 'references': 'BCSD method: Thrasher et al., 2012, ... 110 | 'resolution_id': '0.25 degree', 111 | 'scenario': 'historical', 112 | 'source': 'BCSD', 113 | 'title': 'ACCESS-CM2, r1i1p1f1, historical, global downscaled CMIP6 ... 114 | 'tracking_id': '16d27564-470f-41ea-8077-f4cc3efa5bfe', 115 | 'variant_label': 'r1i1p1f1', 116 | 'version': '1.0'} 117 | """ 118 | 119 | def __init__( 120 | self, 121 | source_datapipe: IterDataPipe, 122 | engine: str = "stac", 123 | **kwargs: Optional[Dict[str, Any]] 124 | ) -> None: 125 | if xpystac is None and engine == "stac": 126 | raise ModuleNotFoundError( 127 | "Package `xpystac` is required to be installed to use this datapipe. " 128 | "Please use `pip install xpystac` " 129 | "to install the package" 130 | ) 131 | self.source_datapipe: IterDataPipe = source_datapipe 132 | self.engine: str = engine 133 | self.kwargs = kwargs 134 | 135 | def __iter__(self) -> Iterator[StreamWrapper]: 136 | for asset in self.source_datapipe: 137 | yield StreamWrapper( 138 | xr.open_dataset(asset, engine=self.engine, **self.kwargs) 139 | ) 140 | 141 | def __len__(self) -> int: 142 | return len(self.source_datapipe) 143 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_datashader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for datashader datapipes. 3 | """ 4 | import numpy as np 5 | import pytest 6 | import xarray as xr 7 | from torchdata.datapipes.iter import IterableWrapper 8 | 9 | from zen3geo.datapipes import DatashaderRasterizer, XarrayCanvas 10 | 11 | datashader = pytest.importorskip("datashader") 12 | 13 | 14 | # %% 15 | @pytest.fixture(scope="function", name="canvas") 16 | def fixture_canvas(): 17 | """ 18 | The blank datashader.Canvas to use in the tests. 19 | """ 20 | canvas = datashader.Canvas( 21 | plot_width=14, plot_height=10, x_range=(1, 8), y_range=(0, 5) 22 | ) 23 | canvas.crs = "OGC:CRS84" 24 | return canvas 25 | 26 | 27 | @pytest.fixture(scope="module", name="geodataframe") 28 | def fixture_geodataframe(): 29 | """ 30 | A geopandas.GeoDataFrame containing a collection of shapely.geometry 31 | objects to use in the tests. 32 | """ 33 | gpd = pytest.importorskip("geopandas") 34 | shapely = pytest.importorskip("shapely") 35 | 36 | geometries: list = [ 37 | shapely.geometry.MultiPoint([(4.5, 4.5), (3.5, 1), (6, 3.5)]), 38 | shapely.geometry.LineString([(3, 5), (5, 3), (3, 2), (5, 0)]), 39 | shapely.geometry.Polygon([(6, 5), (3.5, 2.5), (6, 0), (6, 2.5), (5, 2.5)]), 40 | ] 41 | geodataframe = gpd.GeoDataFrame(data={"geometry": geometries}) 42 | geodataframe = geodataframe.set_crs(crs="OGC:CRS84") 43 | 44 | return geodataframe 45 | 46 | 47 | # %% 48 | def test_datashader_canvas_dataset(): 49 | """ 50 | Ensure that XarrayCanvas works to create a blank datashader.Canvas object 51 | from an xarray.Dataset. 52 | """ 53 | dataset: xr.Dataset = xr.Dataset( 54 | data_vars={"temperature": (["y", "x"], 15 * np.ones(shape=(12, 8)))}, 55 | coords={ 56 | "y": (["y"], np.linspace(start=6, stop=0, num=12)), 57 | "x": (["x"], np.linspace(start=0, stop=4, num=8)), 58 | }, 59 | ) 60 | dp = IterableWrapper(iterable=[dataset]) 61 | 62 | # Using class constructors 63 | dp_canvas = XarrayCanvas(source_datapipe=dp) 64 | # Using functional form (recommended) 65 | dp_canvas = dp.canvas_from_xarray() 66 | 67 | assert len(dp_canvas) == 1 68 | it = iter(dp_canvas) 69 | canvas = next(it) 70 | 71 | assert canvas.plot_height == 12 72 | assert canvas.plot_width == 8 73 | assert hasattr(canvas, "crs") 74 | assert hasattr(canvas, "raster") 75 | 76 | 77 | @pytest.mark.parametrize( 78 | ("geom_type", "sum_val"), [("Point", 3), ("Line", 13), ("Polygon", 15)] 79 | ) 80 | def test_datashader_rasterize_vector_geometry(canvas, geodataframe, geom_type, sum_val): 81 | """ 82 | Ensure that DatashaderRasterizer works to rasterize a 83 | geopandas.GeoDataFrame of point, line or polygon type into an 84 | xarray.DataArray grid. 85 | """ 86 | dp = IterableWrapper(iterable=[canvas, canvas]) 87 | 88 | vector = geodataframe[geodataframe.type.str.contains(geom_type)] 89 | dp_vector = IterableWrapper(iterable=[vector]) 90 | 91 | # Using class constructors 92 | dp_datashader = DatashaderRasterizer(source_datapipe=dp, vector_datapipe=dp_vector) 93 | # Using functional form (recommended) 94 | dp_datashader = dp.rasterize_with_datashader(vector_datapipe=dp_vector) 95 | 96 | assert len(dp_datashader) == 2 97 | it = iter(dp_datashader) 98 | dataarray = next(it) 99 | 100 | assert dataarray.data.sum() == sum_val 101 | assert dataarray.dims == ("y", "x") 102 | assert dataarray.rio.crs == "OGC:CRS84" 103 | assert dataarray.rio.shape == (10, 14) 104 | assert dataarray.rio.transform().e == -0.5 105 | 106 | 107 | def test_datashader_rasterize_canvas_missing_crs(canvas, geodataframe): 108 | """ 109 | Ensure that DatashaderRasterizer raises an AttributeError when the 110 | input datashader.Canvas has no crs attribute. 111 | """ 112 | canvas.crs = None 113 | dp_canvas = IterableWrapper(iterable=[canvas]) 114 | dp_vector = IterableWrapper(iterable=[geodataframe.geometry]) 115 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_vector) 116 | 117 | assert len(dp_datashader) == 1 118 | it = iter(dp_datashader) 119 | with pytest.raises( 120 | AttributeError, match="Missing crs information for datashader.Canvas" 121 | ): 122 | raster = next(it) 123 | 124 | 125 | def test_datashader_rasterize_vector_missing_crs(canvas, geodataframe): 126 | """ 127 | Ensure that DatashaderRasterizer raises an AttributeError when the 128 | input geopandas.GeoSeries has no crs attribute. 129 | """ 130 | vector = geodataframe.geometry 131 | vector.crs = None 132 | dp_canvas = IterableWrapper(iterable=[canvas]) 133 | dp_vector = IterableWrapper(iterable=[vector]) 134 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_vector) 135 | 136 | assert len(dp_datashader) == 1 137 | it = iter(dp_datashader) 138 | with pytest.raises(AttributeError, match="Missing crs information for input"): 139 | raster = next(it) 140 | 141 | 142 | def test_datashader_rasterize_unmatched_lengths(canvas, geodataframe): 143 | """ 144 | Ensure that DatashaderRasterizer raises a ValueError when the length of the 145 | canvas datapipe is unmatched with the length of the vector datapipe. 146 | """ 147 | # Canvas:Vector ratio of 3:2 148 | dp_canvas = IterableWrapper(iterable=[canvas, canvas, canvas]) 149 | dp_vector = IterableWrapper(iterable=[geodataframe, geodataframe]) 150 | 151 | with pytest.raises(ValueError, match="Unmatched lengths for the"): 152 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_vector) 153 | 154 | 155 | def test_datashader_rasterize_vector_geometrycollection(canvas, geodataframe): 156 | """ 157 | Ensure that DatashaderRasterizer raises a NotImplementedError when an 158 | unsupported vector type like GeometryCollection is used. 159 | """ 160 | gpd = pytest.importorskip("geopandas") 161 | 162 | # Merge points, lines and polygons into a single GeometryCollection 163 | geocollection = gpd.GeoSeries(data=geodataframe.unary_union) 164 | geocollection = geocollection.set_crs(crs="OGC:CRS84") 165 | 166 | dp = IterableWrapper(iterable=[canvas]) 167 | dp_vector = IterableWrapper(iterable=[geocollection]) 168 | dp_datashader = dp.rasterize_with_datashader(vector_datapipe=dp_vector) 169 | 170 | assert len(dp_datashader) == 1 171 | it = iter(dp_datashader) 172 | with pytest.raises(NotImplementedError, match="Unsupported geometry type"): 173 | raster = next(it) 174 | 175 | 176 | def test_datashader_rasterize_invalid_vector(canvas, geodataframe): 177 | """ 178 | Ensure that DatashaderRasterizer raises a ValueError when an invalid 179 | geopandas.GeoDataFrame without a geometry is passed in as input. 180 | 181 | Regression test for https://github.com/weiji14/zen3geo/pull/104. 182 | """ 183 | # GeoDataFrame with empty data 184 | gdf_none = geodataframe.loc[5:] 185 | gdf_none = gdf_none.set_crs(crs="OGC:CRS84") 186 | 187 | dp = IterableWrapper(iterable=[canvas]) 188 | dp_vector = IterableWrapper(iterable=[gdf_none]) 189 | dp_datashader = dp.rasterize_with_datashader(vector_datapipe=dp_vector) 190 | 191 | assert len(dp_datashader) == 1 192 | it = iter(dp_datashader) 193 | with pytest.raises(ValueError, match="Cannot infer spatialpandas geometry type"): 194 | raster = next(it) 195 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_geopandas.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for geopandas datapipes. 3 | """ 4 | import numpy as np 5 | import pytest 6 | import xarray as xr 7 | from torchdata.datapipes.iter import IterableWrapper 8 | 9 | from zen3geo.datapipes import GeoPandasRectangleClipper 10 | 11 | gpd = pytest.importorskip("geopandas") 12 | shapely = pytest.importorskip("shapely") 13 | 14 | # %% 15 | @pytest.fixture(scope="module", name="geodataframe") 16 | def fixture_geodataframe(): 17 | """ 18 | A geopandas.GeoDataFrame containing a collection of shapely.geometry 19 | objects to use in the tests. 20 | """ 21 | geometries: list = [ 22 | shapely.geometry.box(minx=0.0, miny=0.0, maxx=2.0, maxy=2.0), 23 | shapely.geometry.box(minx=2.0, miny=2.0, maxx=4.0, maxy=4.0), 24 | ] 25 | geodataframe = gpd.GeoDataFrame(data={"geometry": geometries}) 26 | geodataframe = geodataframe.set_crs(crs="OGC:CRS84") 27 | 28 | return geodataframe 29 | 30 | 31 | @pytest.fixture(scope="function", name="dataset") 32 | def fixture_dataset(): 33 | """ 34 | The sample xarray.Dataset to use in the tests. 35 | """ 36 | dataarray = xr.DataArray( 37 | data=np.ones(shape=(1, 5, 7)), 38 | coords=dict( 39 | band=[0], 40 | y=np.linspace(start=4.0, stop=0.0, num=5), 41 | x=np.linspace(start=-1.0, stop=5, num=7), 42 | ), 43 | dims=("band", "y", "x"), 44 | name="foo", 45 | ) 46 | dataset: xr.Dataset = dataarray.to_dataset() 47 | dataset: xr.Dataset = dataset.rio.write_crs(input_crs="OGC:CRS84") 48 | 49 | return dataset 50 | 51 | 52 | # %% 53 | def test_geopandas_rectangle_clipper_geoseries_dataset(geodataframe, dataset): 54 | """ 55 | Ensure that GeoPandasRectangleClipper works to clip a geopandas.GeoSeries 56 | vector with xarray.Dataset rasters and outputs a tuple made up of a 57 | spatially subsetted geopandas.GeoSeries and an xarray.Dataset raster mask. 58 | """ 59 | dp_vector = IterableWrapper(iterable=[geodataframe.geometry]) 60 | dp_raster = IterableWrapper( 61 | iterable=[ 62 | dataset.rio.clip_box(minx=-1, miny=0, maxx=1, maxy=1), 63 | dataset.rio.clip_box(minx=3, miny=3, maxx=5, maxy=4), 64 | ] 65 | ) 66 | 67 | # Using class constructors 68 | dp_clipped = GeoPandasRectangleClipper( 69 | source_datapipe=dp_vector, mask_datapipe=dp_raster 70 | ) 71 | # Using functional form (recommended) 72 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_raster) 73 | 74 | assert len(dp_clipped) == 2 75 | it = iter(dp_clipped) 76 | 77 | clipped_geoseries, raster_chip = next(it) 78 | assert clipped_geoseries.crs == "OGC:CRS84" 79 | assert all(clipped_geoseries.geom_type == "Polygon") 80 | assert clipped_geoseries.shape == (1,) 81 | assert clipped_geoseries[0].bounds == (0.0, 0.0, 1.5, 1.5) 82 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3} 83 | assert raster_chip.rio.bounds() == (-1.5, -0.5, 1.5, 1.5) 84 | 85 | clipped_geoseries, raster_chip = next(it) 86 | assert clipped_geoseries.shape == (1,) 87 | assert clipped_geoseries[1].bounds == (2.5, 2.5, 4.0, 4.0) 88 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3} 89 | assert raster_chip.rio.bounds() == (2.5, 2.5, 5.5, 4.5) 90 | assert raster_chip.rio.crs == "OGC:CRS84" 91 | 92 | 93 | def test_geopandas_rectangle_clipper_different_crs(geodataframe, dataset): 94 | """ 95 | Ensure that GeoPandasRectangleClipper works to clip a geopandas.GeoSeries 96 | vector with xarray.Dataset rasters which have different coordinate 97 | reference systems, and outputs a tuple made up of a spatially subsetted 98 | geopandas.GeoSeries and an xarray.Dataset raster mask that both have the 99 | same coordinate reference system. 100 | """ 101 | dp_vector = IterableWrapper(iterable=[geodataframe.geometry]) 102 | 103 | dataset_3857 = dataset.rio.clip_box(minx=-1, miny=0, maxx=1, maxy=1).rio.reproject( 104 | "EPSG:3857" 105 | ) 106 | dataset_32631 = dataset.rio.clip_box(minx=3, miny=3, maxx=5, maxy=4).rio.reproject( 107 | "EPSG:32631" 108 | ) 109 | dp_raster = IterableWrapper(iterable=[dataset_3857, dataset_32631]) 110 | 111 | # Using class constructors 112 | dp_clipped = GeoPandasRectangleClipper( 113 | source_datapipe=dp_vector, mask_datapipe=dp_raster 114 | ) 115 | # Using functional form (recommended) 116 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_raster) 117 | 118 | assert len(dp_clipped) == 2 119 | it = iter(dp_clipped) 120 | 121 | clipped_geoseries, raster_chip = next(it) 122 | assert clipped_geoseries.crs == "EPSG:3857" 123 | assert all(clipped_geoseries.geom_type == "Polygon") 124 | assert clipped_geoseries.shape == (1,) 125 | assert clipped_geoseries[0].bounds == ( 126 | 0.0, 127 | 0.0, 128 | 166988.3675623712, 129 | 166998.31375292226, 130 | ) 131 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3} 132 | assert raster_chip.rio.bounds() == ( 133 | -166979.23618991036, 134 | -55646.75541526544, 135 | 166988.3675623712, 136 | 166998.31375292226, 137 | ) 138 | assert raster_chip.rio.crs == "EPSG:3857" 139 | 140 | clipped_geoseries, raster_chip = next(it) 141 | assert clipped_geoseries.crs == "EPSG:32631" 142 | assert clipped_geoseries.shape == (1,) 143 | assert clipped_geoseries[1].bounds == ( 144 | 444414.4114896285, 145 | 276009.81064532325, 146 | 611163.137304327, 147 | 442194.9725083875, 148 | ) 149 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3} 150 | assert raster_chip.rio.bounds() == ( 151 | 444414.4114896285, 152 | 276009.81064532325, 153 | 777205.5384580799, 154 | 497870.56195762416, 155 | ) 156 | assert raster_chip.rio.crs == "EPSG:32631" 157 | 158 | 159 | def test_geopandas_rectangle_clipper_incorrect_length(geodataframe, dataset): 160 | """ 161 | Ensure that GeoPandasRectangleClipper raises a NotImplementedError when the 162 | length of the vector datapipe is not equal to 1. 163 | """ 164 | dp_vector = IterableWrapper(iterable=[geodataframe, geodataframe]) 165 | dp_raster = IterableWrapper(iterable=[dataset, dataset, dataset]) 166 | 167 | with pytest.raises(NotImplementedError, match="The vector datapipe's length can"): 168 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_raster) 169 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_pyogrio.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for pyogrio datapipes. 3 | """ 4 | import pytest 5 | from torchdata.datapipes.iter import IterableWrapper 6 | 7 | from zen3geo.datapipes import PyogrioReader 8 | 9 | pyogrio = pytest.importorskip("pyogrio") 10 | 11 | # %% 12 | def test_pyogrio_reader(): 13 | """ 14 | Ensure that PyogrioReader works to read in a GeoPackage file and outputs a 15 | geopandas.GeoDataFrame object. 16 | """ 17 | file_url: str = "https://github.com/geopandas/pyogrio/raw/v0.4.0/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg" 18 | dp = IterableWrapper(iterable=[file_url]) 19 | 20 | # Using class constructors 21 | dp_pyogrio = PyogrioReader(source_datapipe=dp) 22 | # Using functional form (recommended) 23 | dp_pyogrio = dp.read_from_pyogrio() 24 | 25 | assert len(dp_pyogrio) == 1 26 | it = iter(dp_pyogrio) 27 | geodataframe = next(it) 28 | 29 | assert geodataframe.shape == (4, 12) 30 | assert any(geodataframe.isna()) 31 | assert all(geodataframe.geom_type == "Point") 32 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_pystac.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for pystac datapipes. 3 | """ 4 | import pytest 5 | from torchdata.datapipes.iter import IterableWrapper 6 | 7 | from zen3geo.datapipes import PySTACItemReader 8 | 9 | pystac = pytest.importorskip("pystac") 10 | 11 | # %% 12 | def test_pystac_item_reader(): 13 | """ 14 | Ensure that PySTACItemReader works to read in a JSON STAC item and outputs 15 | to a pystac.Item object. 16 | """ 17 | item_url: str = "https://github.com/stac-utils/pystac/raw/v1.6.1/tests/data-files/item/sample-item.json" 18 | dp = IterableWrapper(iterable=[item_url]) 19 | 20 | # Using class constructors 21 | dp_pystac = PySTACItemReader(source_datapipe=dp) 22 | # Using functional form (recommended) 23 | dp_pystac = dp.read_to_pystac_item() 24 | 25 | assert len(dp_pystac) == 1 26 | it = iter(dp_pystac) 27 | stac_item = next(it) 28 | 29 | assert stac_item.bbox == [-122.59750209, 37.48803556, -122.2880486, 37.613537207] 30 | assert stac_item.datetime.isoformat() == "2016-05-03T13:22:30.040000+00:00" 31 | assert stac_item.geometry["type"] == "Polygon" 32 | assert stac_item.properties == { 33 | "datetime": "2016-05-03T13:22:30.040000Z", 34 | "title": "A CS3 item", 35 | "license": "PDDL-1.0", 36 | "providers": [ 37 | { 38 | "name": "CoolSat", 39 | "roles": ["producer", "licensor"], 40 | "url": "https://cool-sat.com/", 41 | } 42 | ], 43 | } 44 | assert ( 45 | stac_item.assets["analytic"].extra_fields["product"] 46 | == "http://cool-sat.com/catalog/products/analytic.json" 47 | ) 48 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_pystac_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for pystac-client datapipes. 3 | """ 4 | import pytest 5 | from torchdata.datapipes.iter import IterableWrapper 6 | 7 | from zen3geo.datapipes import PySTACAPIItemLister, PySTACAPISearcher 8 | 9 | pystac_client = pytest.importorskip("pystac_client") 10 | 11 | 12 | # %% 13 | def test_pystac_client_item_search(): 14 | """ 15 | Ensure that PySTACAPISearcher works to query a STAC API /search/ endpoint 16 | and outputs a pystac_client.ItemSearch object. 17 | """ 18 | query: dict = dict( 19 | bbox=[150.9, -34.36, 151.3, -33.46], 20 | datetime=["2000-01-01T00:00:00Z", "2020-12-31T00:00:00Z"], 21 | collections=["nidem"], 22 | ) 23 | dp = IterableWrapper(iterable=[query]) 24 | 25 | # Using class constructors 26 | dp_pystac_client = PySTACAPISearcher( 27 | source_datapipe=dp, catalog_url="https://explorer.sandbox.dea.ga.gov.au/stac/" 28 | ) 29 | # Using functional form (recommended) 30 | dp_pystac_client = dp.search_for_pystac_item( 31 | catalog_url="https://explorer.sandbox.dea.ga.gov.au/stac/" 32 | ) 33 | 34 | assert len(dp_pystac_client) == 1 35 | it = iter(dp_pystac_client) 36 | stac_item_search = next(it) 37 | assert stac_item_search.client.title == "AWS Explorer" 38 | assert stac_item_search.matched() == 2 39 | 40 | stac_items = list(stac_item_search.items()) 41 | stac_item = stac_items[0] 42 | 43 | assert stac_item.bbox == [ 44 | 149.965907628116, 45 | -35.199398016548116, 46 | 152.1053101683708, 47 | -32.97280658665687, 48 | ] 49 | assert stac_item.datetime.isoformat() == "2001-07-02T00:00:00+00:00" 50 | assert stac_item.geometry["type"] == "Polygon" 51 | assert stac_item.properties == { 52 | "title": "NIDEM_104_151.29_-34.22", 53 | "created": "2018-10-15T10:00:00Z", 54 | "proj:epsg": 4326, 55 | "datetime": "2001-07-02T00:00:00Z", 56 | "cubedash:region_code": None, 57 | } 58 | assert stac_item.assets["nidem"].extra_fields["eo:bands"] == [{"name": "nidem"}] 59 | 60 | 61 | def test_pystac_client_item_search_open_headers(): 62 | """ 63 | Ensure that PySTACAPISearcher works to query a STAC API /search/ endpoint 64 | with headers passed to pystac_client.Client.open. 65 | """ 66 | query: dict = dict( 67 | bbox=[150.9, -34.36, 151.3, -33.46], 68 | datetime=["2020-01-01T00:00:00Z", "2022-12-31T00:00:00Z"], 69 | collections=["HLSS30.v2.0"], 70 | ) 71 | dp = IterableWrapper(iterable=[query]) 72 | 73 | # Using class constructors 74 | dp_pystac_client = PySTACAPISearcher( 75 | source_datapipe=dp, 76 | catalog_url="https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD", 77 | headers={"Authorization": "Bearer "}, 78 | ) 79 | # Using functional form (recommended) 80 | dp_pystac_client = dp.search_for_pystac_item( 81 | catalog_url="https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD", 82 | headers={"Authorization": "Bearer "}, 83 | ) 84 | 85 | assert len(dp_pystac_client) == 1 86 | it = iter(dp_pystac_client) 87 | stac_item_search = next(it) 88 | assert stac_item_search.client.title == "LPCLOUD" 89 | assert stac_item_search.client.description == "Root catalog for LPCLOUD" 90 | 91 | 92 | def test_pystac_client_item_lister(): 93 | """ 94 | Ensure that PySTACAPIItemLister works to yield pystac.Item instances for 95 | each item matching the given search parameters in a 96 | pystac_client.ItemSearch query. 97 | """ 98 | catalog = pystac_client.Client.open( 99 | url="https://earth-search.aws.element84.com/v1/" 100 | ) 101 | search = catalog.search( 102 | bbox=[134.2, 6.9, 134.8, 8.5], 103 | datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"], 104 | collections=["sentinel-2-l1c"], 105 | ) 106 | dp = IterableWrapper(iterable=[search]) 107 | 108 | # Using class constructors 109 | dp_pystac_item_list = PySTACAPIItemLister(source_datapipe=dp) 110 | # Using functional form (recommended) 111 | dp_pystac_item_list = dp.list_pystac_items_by_search() 112 | 113 | assert len(dp_pystac_item_list) == 14 114 | it = iter(dp_pystac_item_list) 115 | stac_item = next(it) 116 | assert stac_item.bbox == [ 117 | 134.093840347073, 118 | 6.2442879900058115, 119 | 135.08840137750929, 120 | 7.237809826458827, 121 | ] 122 | assert stac_item.datetime.isoformat() == "2023-01-29T01:35:24.640000+00:00" 123 | assert stac_item.geometry["type"] == "Polygon" 124 | assert stac_item.properties == { 125 | "created": "2023-01-29T06:01:33.679Z", 126 | "platform": "sentinel-2b", 127 | "constellation": "sentinel-2", 128 | "instruments": ["msi"], 129 | "eo:cloud_cover": 92.7676417582305, 130 | "proj:epsg": 32653, 131 | "mgrs:utm_zone": 53, 132 | "mgrs:latitude_band": "N", 133 | "mgrs:grid_square": "MH", 134 | "grid:code": "MGRS-53NMH", 135 | "view:sun_azimuth": 135.719785438016, 136 | "view:sun_elevation": 55.1713941690268, 137 | "s2:degraded_msi_data_percentage": 0.2816, 138 | "s2:product_type": "S2MSI1C", 139 | "s2:processing_baseline": "05.09", 140 | "s2:product_uri": "S2B_MSIL1C_20230129T013449_N0509_R031_T53NMH_20230129T025811.SAFE", 141 | "s2:generation_time": "2023-01-29T02:58:11.000000Z", 142 | "s2:datatake_id": "GS2B_20230129T013449_030802_N05.09", 143 | "s2:datatake_type": "INS-NOBS", 144 | "s2:datastrip_id": "S2B_OPER_MSI_L1C_DS_2BPS_20230129T025811_S20230129T013450_N05.09", 145 | "s2:granule_id": "S2B_OPER_MSI_L1C_TL_2BPS_20230129T025811_A030802_T53NMH_N05.09", 146 | "s2:reflectance_conversion_factor": 1.03193080888673, 147 | "datetime": "2023-01-29T01:35:24.640000Z", 148 | "s2:sequence": "0", 149 | "earthsearch:s3_path": "s3://earthsearch-data/sentinel-2-l1c/53/N/MH/2023/1/S2B_53NMH_20230129_0_L1C", 150 | "earthsearch:payload_id": "roda-sentinel2/workflow-sentinel2-to-stac/15626e44fb54c2182e5ed5d3aec4a209", 151 | "processing:software": {"sentinel2-to-stac": "0.1.0"}, 152 | "updated": "2023-01-29T06:01:33.679Z", 153 | } 154 | assert stac_item.assets["visual"].extra_fields["eo:bands"] == [ 155 | { 156 | "name": "red", 157 | "common_name": "red", 158 | "description": "Red (band 4)", 159 | "center_wavelength": 0.665, 160 | "full_width_half_max": 0.038, 161 | }, 162 | { 163 | "name": "green", 164 | "common_name": "green", 165 | "description": "Green (band 3)", 166 | "center_wavelength": 0.56, 167 | "full_width_half_max": 0.045, 168 | }, 169 | { 170 | "name": "blue", 171 | "common_name": "blue", 172 | "description": "Blue (band 2)", 173 | "center_wavelength": 0.49, 174 | "full_width_half_max": 0.098, 175 | }, 176 | ] 177 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_rioxarray.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for rioxarray datapipes. 3 | """ 4 | from torchdata.datapipes.iter import IterableWrapper 5 | 6 | from zen3geo.datapipes import RioXarrayReader 7 | 8 | 9 | # %% 10 | def test_rioxarray_reader(): 11 | """ 12 | Ensure that RioXarrayReader works to read in a GeoTIFF file and outputs an 13 | xarray.DataArray object. 14 | """ 15 | file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif" 16 | dp = IterableWrapper(iterable=[file_url]) 17 | 18 | # Using class constructors 19 | dp_rioxarray = RioXarrayReader(source_datapipe=dp) 20 | # Using functional form (recommended) 21 | dp_rioxarray = dp.read_from_rioxarray() 22 | 23 | assert len(dp_rioxarray) == 1 24 | it = iter(dp_rioxarray) 25 | dataarray = next(it) 26 | 27 | assert dataarray.shape == (1, 960, 1920) 28 | assert dataarray.dims == ("band", "y", "x") 29 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_stackstac.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for stackstac datapipes. 3 | """ 4 | import numpy as np 5 | import pytest 6 | import xarray as xr 7 | from torchdata.datapipes.iter import IterableWrapper 8 | 9 | from zen3geo.datapipes import StackSTACStacker 10 | 11 | pystac = pytest.importorskip("pystac") 12 | stackstac = pytest.importorskip("stackstac") 13 | 14 | # %% 15 | def test_stackstac_mosaicker(): 16 | """ 17 | Ensure that StackSTACMosaicker works to mosaic tiles within a 4D 18 | xarray.DataArray to a 3D xarray.DataArray. 19 | """ 20 | datacube: xr.DataArray = xr.DataArray( 21 | data=np.ones(shape=(3, 1, 32, 32)), dims=["tile", "band", "y", "x"] 22 | ) 23 | dataarray = stackstac.mosaic(arr=datacube, dim="tile") 24 | assert dataarray.sizes == {"band": 1, "y": 32, "x": 32} 25 | assert dataarray.sum() == 1 * 32 * 32 26 | 27 | 28 | def test_stackstac_stacker(): 29 | """ 30 | Ensure that StackSTACStacker works to stack multiple bands within a STAC 31 | item and outputs an xarray.DataArray object. 32 | """ 33 | item_url: str = "https://github.com/stac-utils/pystac/raw/v1.6.1/tests/data-files/raster/raster-sentinel2-example.json" 34 | stac_item = pystac.Item.from_file(href=item_url) 35 | dp = IterableWrapper(iterable=[stac_item]) 36 | 37 | # Using class constructors 38 | dp_stackstac = StackSTACStacker(source_datapipe=dp, assets=["B02", "B03", "B04"]) 39 | # Using functional form (recommended) 40 | dp_stackstac = dp.stack_stac_items(assets=["B02", "B03", "B04"]) 41 | 42 | assert len(dp_stackstac) == 1 43 | it = iter(dp_stackstac) 44 | dataarray = next(it) 45 | 46 | assert dataarray.shape == (1, 3, 10980, 10980) 47 | assert dataarray.dims == ("time", "band", "y", "x") 48 | assert dataarray.rio.bounds() == (399955.0, 4090205.0, 509755.0, 4200005.0) 49 | assert dataarray.rio.resolution() == (10.0, -10.0) 50 | assert dataarray.rio.crs == "EPSG:32633" 51 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_xbatcher.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for xbatcher datapipes. 3 | """ 4 | import numpy as np 5 | import pytest 6 | import xarray as xr 7 | from torchdata.datapipes.iter import IterableWrapper 8 | 9 | from zen3geo.datapipes import XbatcherSlicer 10 | 11 | xbatcher = pytest.importorskip("xbatcher") 12 | 13 | 14 | # %% 15 | def test_xbatcher_slicer_dataarray(): 16 | """ 17 | Ensure that XbatcherSlicer works to slice an xarray.DataArray object and 18 | outputs a smaller xarray.DataArray chip. 19 | """ 20 | 21 | dataarray: xr.DataArray = xr.DataArray( 22 | data=np.ones(shape=(3, 128, 128)), dims=["band", "y", "x"] 23 | ).chunk({"band": 1}) 24 | dp = IterableWrapper(iterable=[dataarray]) 25 | 26 | # Using class constructors 27 | dp_xbatcher = XbatcherSlicer(source_datapipe=dp, input_dims={"y": 64, "x": 64}) 28 | # Using functional form (recommended) 29 | dp_xbatcher = dp.slice_with_xbatcher(input_dims={"y": 64, "x": 64}) 30 | 31 | assert len(dp_xbatcher) == 4 32 | it = iter(dp_xbatcher) 33 | dataarray_chip = next(it) 34 | 35 | assert dataarray_chip.sizes == {"band": 3, "y": 64, "x": 64} 36 | assert dataarray_chip.sum() == 3 * 64 * 64 37 | 38 | 39 | def test_xbatcher_slicer_dataset(): 40 | """ 41 | Ensure that XbatcherSlicer works to slice an xarray.Dataset object and 42 | outputs a smaller xarray.Dataset chip. 43 | """ 44 | 45 | dataset: xr.Dataset = xr.Dataset( 46 | data_vars={"temperature": (["x", "y"], 15 * np.ones(shape=(32, 32)))}, 47 | coords={ 48 | "lon": (["x"], np.linspace(start=0, stop=32, num=32)), 49 | "lat": (["y"], np.linspace(start=64, stop=32, num=32)), 50 | }, 51 | ) 52 | dp = IterableWrapper(iterable=[dataset]) 53 | 54 | # Using class constructors 55 | dp_xbatcher = XbatcherSlicer(source_datapipe=dp, input_dims={"y": 16, "x": 16}) 56 | # Using functional form (recommended) 57 | dp_xbatcher = dp.slice_with_xbatcher(input_dims={"y": 16, "x": 16}) 58 | 59 | assert len(dp_xbatcher) == 4 60 | it = iter(dp_xbatcher) 61 | dataset_chip = next(it) 62 | 63 | assert dataset_chip.temperature.sizes == {"y": 16, "x": 16} 64 | assert dataset_chip.temperature.sum() == 15 * 16 * 16 65 | -------------------------------------------------------------------------------- /zen3geo/tests/test_datapipes_xpystac.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for pystac datapipes. 3 | """ 4 | import pytest 5 | from torchdata.datapipes.iter import IterableWrapper 6 | 7 | from zen3geo.datapipes import XpySTACAssetReader 8 | 9 | 10 | # %% 11 | def test_xpystac_asset_reader_cog(): 12 | """ 13 | Ensure that XpySTACAssetReader works to read in a pystac.Asset object 14 | stored as a Cloud-Optimized GeoTIFF and output to an xarray.Dataset object. 15 | """ 16 | pystac = pytest.importorskip("pystac") 17 | xpystac = pytest.importorskip("xpystac") 18 | 19 | item_url: str = "https://github.com/stac-utils/pystac/raw/v1.7.1/tests/data-files/raster/raster-sentinel2-example.json" 20 | asset: pystac.Asset = pystac.Item.from_file(href=item_url).assets["overview"] 21 | assert asset.media_type == pystac.MediaType.COG 22 | 23 | dp = IterableWrapper(iterable=[asset]) 24 | 25 | # Using class constructors 26 | dp_xpystac = XpySTACAssetReader(source_datapipe=dp) 27 | # Using functional form (recommended) 28 | dp_xpystac = dp.read_from_xpystac() 29 | 30 | assert len(dp_xpystac) == 1 31 | it = iter(dp_xpystac) 32 | dataset = next(it) 33 | 34 | assert dataset.sizes == {"band": 3, "x": 343, "y": 343} 35 | assert dataset.band_data.dtype == "float32" 36 | assert dataset.rio.bounds() == (399960.0, 4090240.0, 509720.0, 4200000.0) 37 | assert dataset.rio.resolution() == (320.0, -320.0) 38 | assert dataset.rio.crs == "EPSG:32633" 39 | 40 | 41 | def test_xpystac_asset_reader_zarr(): 42 | """ 43 | Ensure that XpySTACAssetReader works to read in a pystac.Asset object 44 | stored as a Zarr file and output to an xarray.Dataset object. 45 | """ 46 | pystac = pytest.importorskip("pystac") 47 | xpystac = pytest.importorskip("xpystac") 48 | 49 | collection_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-daily-hi" 50 | asset: pystac.Asset = pystac.Collection.from_file(href=collection_url).assets[ 51 | "zarr-https" 52 | ] 53 | assert asset.media_type == "application/vnd+zarr" 54 | 55 | dp = IterableWrapper(iterable=[asset]) 56 | 57 | # Using class constructors 58 | dp_xpystac = XpySTACAssetReader(source_datapipe=dp) 59 | # Using functional form (recommended) 60 | dp_xpystac = dp.read_from_xpystac() 61 | 62 | assert len(dp_xpystac) == 1 63 | it = iter(dp_xpystac) 64 | dataset = next(it) 65 | 66 | assert dataset.sizes == {"time": 14965, "y": 584, "x": 284, "nv": 2} 67 | assert dataset.prcp.dtype == "float32" 68 | assert dataset.rio.bounds() == (-5802750.0, -622500.0, -5518750.0, -38500.0) 69 | assert dataset.rio.resolution() == (1000.0, -1000.0) 70 | assert dataset.rio.grid_mapping == "lambert_conformal_conic" 71 | 72 | 73 | def test_xpystac_asset_reader_geotiff_without_xpystac(): 74 | """ 75 | Ensure that XpySTACAssetReader works to read in a GeoTIFF file and output 76 | to an xarray.Dataset object, even when xpystac is not installed. 77 | 78 | Note that `engine="rasterio"` has been removed in xarray v2023.04.0, see 79 | https://github.com/pydata/xarray/pull/7671. So, this test will need to be 80 | updated once we change to require an xarray verson greater than 2023.04.0. 81 | Only included this test to check an alternative to `engine="stac"` that 82 | did not require installing extra required dependencies like `netcdf4` or 83 | `h5netcdf`. 84 | """ 85 | tif_url: str = "https://github.com/corteva/rioxarray/raw/0.14.1/test/test_data/input/cint16.tif" 86 | 87 | dp = IterableWrapper(iterable=[tif_url]) 88 | 89 | # Using class constructors 90 | dp_xpystac = XpySTACAssetReader(source_datapipe=dp, engine="rasterio") 91 | # Using functional form (recommended) 92 | dp_xpystac = dp.read_from_xpystac(engine="rasterio") 93 | 94 | assert len(dp_xpystac) == 1 95 | it = iter(dp_xpystac) 96 | dataset = next(it) 97 | 98 | assert dataset.sizes == {"band": 1, "x": 100, "y": 100} 99 | assert dataset.band_data.dtype == "complex64" 100 | assert dataset.rio.bounds() == (0.0, 100.0, 100.0, 0.0) 101 | assert dataset.rio.resolution() == (1.0, 1.0) 102 | assert dataset.rio.crs == "EPSG:4326" 103 | -------------------------------------------------------------------------------- /zen3geo/tests/test_zen3geo.py: -------------------------------------------------------------------------------- 1 | from packaging.version import Version 2 | 3 | from zen3geo import __version__ 4 | 5 | 6 | def test_version(): 7 | assert Version(version=__version__) >= Version(version="0.0.0") 8 | --------------------------------------------------------------------------------