├── .github
├── ISSUE_TEMPLATE
│ └── bug_report.md
├── release-template.yml
└── workflows
│ ├── ci-tests.yml
│ ├── publish-to-pypi.yml
│ └── release-drafter.yml
├── .gitignore
├── LICENSE.md
├── README.md
├── docs
├── .readthedocs.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── _config.yml
├── _toc.yml
├── api.md
├── changelog.md
├── chipping.md
├── index.md
├── multi-resolution.md
├── object-detection-boxes.md
├── stacking.md
├── vector-segmentation-masks.md
└── walkthrough.md
├── poetry.lock
├── pyproject.toml
└── zen3geo
├── __init__.py
├── datapipes
├── __init__.py
├── datashader.py
├── geopandas.py
├── pyogrio.py
├── pystac.py
├── pystac_client.py
├── rioxarray.py
├── stackstac.py
├── xbatcher.py
└── xpystac.py
└── tests
├── test_datapipes_datashader.py
├── test_datapipes_geopandas.py
├── test_datapipes_pyogrio.py
├── test_datapipes_pystac.py
├── test_datapipes_pystac_client.py
├── test_datapipes_rioxarray.py
├── test_datapipes_stackstac.py
├── test_datapipes_xbatcher.py
├── test_datapipes_xpystac.py
└── test_zen3geo.py
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go do '...'
16 | 2. Run the following code
17 |
18 | ```python
19 | # Insert your code here
20 | ```
21 |
22 | 3. See error `...`
23 |
24 | **Expected behavior**
25 | A clear and concise description of what you expected to happen.
26 |
27 | **System details (please complete the following information):**
28 | - OS: [e.g. Linux, macOS, Windows]
29 | - Python Version [e.g. 3.11]
30 |
31 | **Additional context**
32 | Add any other context about the problem here.
33 |
--------------------------------------------------------------------------------
/.github/release-template.yml:
--------------------------------------------------------------------------------
1 | name-template: 'v$RESOLVED_VERSION 🌈'
2 | tag-template: 'v$RESOLVED_VERSION'
3 | categories:
4 | - title: '🚀 Features'
5 | label: 'feature'
6 | - title: '🐛 Bug Fixes'
7 | label: 'bug'
8 | - title: '📖 Documentation'
9 | label: 'documentation'
10 | - title: '🧰 Maintenance'
11 | label: 'maintenance'
12 | version-resolver:
13 | minor:
14 | labels:
15 | - 'feature'
16 | default: patch
17 | exclude-labels:
18 | - 'skip-changelog'
19 | category-template: '### $TITLE'
20 | change-template: '* $TITLE ([#$NUMBER]($URL))'
21 | template: |
22 | ## Release v$RESOLVED_VERSION (20YY/MM/DD)
23 |
24 | ### 💫 Highlights
25 |
26 | *
27 |
28 | $CHANGES
29 |
30 | ### 🧑🤝🧑 Contributors
31 |
32 | $CONTRIBUTORS
33 |
--------------------------------------------------------------------------------
/.github/workflows/ci-tests.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: Tests
5 |
6 | on:
7 | push:
8 | branches: [ "main" ]
9 | pull_request:
10 | types: [opened, ready_for_review, reopened, synchronize]
11 | branches: [ "main" ]
12 |
13 | permissions:
14 | contents: read
15 |
16 | jobs:
17 | test:
18 | name: ${{ matrix.os }} - Python ${{ matrix.python-version }}
19 | runs-on: ${{ matrix.os }}
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | python-version: ["3.8", "3.10", "3.11.8"]
24 | os: [ubuntu-22.04]
25 | # Is it a draft Pull Request (true or false)?
26 | isDraft:
27 | - ${{ github.event.pull_request.draft }}
28 | # Exclude Ubuntu + Python 3.8 and 3.11 jobs for draft PRs
29 | exclude:
30 | - python-version: '3.8'
31 | isDraft: true
32 | - python-version: '3.11.8'
33 | isDraft: true
34 | # Only install optional packages on Ubuntu-22.04/Python 3.10 and 3.11
35 | include:
36 | - os: 'ubuntu-22.04'
37 | python-version: '3.10'
38 | extra-packages: '--extras "raster spatial stac vector"'
39 | - os: 'ubuntu-22.04'
40 | python-version: '3.11.8'
41 | extra-packages: '--extras "raster spatial stac vector"'
42 |
43 | steps:
44 | # Checkout current git repository
45 | - name: Checkout
46 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
47 |
48 | # Install Python
49 | - name: Set up Python ${{ matrix.python-version }}
50 | uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
51 | with:
52 | python-version: ${{ matrix.python-version }}
53 |
54 | # Install poetry package manager and dependencies from poetry.lock
55 | - name: Install Poetry python dependencies
56 | run: |
57 | pip install poetry==1.6.1
58 | poetry install ${{ matrix.extra-packages }}
59 | poetry self add poetry-dynamic-versioning[plugin]
60 | poetry show
61 |
62 | # Run the unit tests and doctests
63 | - name: Test with pytest
64 | run: poetry run pytest --verbose --doctest-modules zen3geo/
65 |
--------------------------------------------------------------------------------
/.github/workflows/publish-to-pypi.yml:
--------------------------------------------------------------------------------
1 | # Publish archives to PyPI and TestPyPI using GitHub Actions
2 | # https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
3 |
4 | name: Publish to PyPI
5 |
6 | # Only run for pushes to the main branch and releases.
7 | on:
8 | push:
9 | branches:
10 | - main
11 | release:
12 | types:
13 | - published
14 | # Runs for pull requests should be disabled other than for testing purposes
15 | #pull_request:
16 | # branches:
17 | # - main
18 |
19 | permissions:
20 | contents: read
21 |
22 | jobs:
23 | publish-pypi:
24 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
25 | runs-on: ubuntu-22.04
26 | permissions:
27 | # This permission is mandatory for OIDC publishing
28 | id-token: write
29 | if: github.repository == 'weiji14/zen3geo'
30 |
31 | steps:
32 | - name: Checkout
33 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
34 | with:
35 | # fetch all history so that poetry-dynamic-versioning works
36 | fetch-depth: 0
37 |
38 | - name: Set up Python 3.11
39 | uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
40 | with:
41 | python-version: '3.11.8'
42 |
43 | - name: Install Poetry and dynamic-versioning plugin
44 | run: |
45 | pip install poetry==1.6.1
46 | poetry self add poetry-dynamic-versioning[plugin]
47 | poetry show
48 |
49 | - name: Fix up version string for TestPyPI and PyPI
50 | run: |
51 | # Change poetry-dynamic-versioning to use metadata=false so that the
52 | # local part of the version isn't included, making the version string
53 | # compatible with PyPI.
54 | sed --in-place "s/metadata = true/metadata = false/g" pyproject.toml
55 |
56 | - name: Build a binary wheel and a source tarball
57 | run: |
58 | poetry build -vvv
59 | echo ""
60 | echo "Generated files:"
61 | ls -lh dist/
62 |
63 | - name: Publish distribution 📦 to Test PyPI
64 | uses: pypa/gh-action-pypi-publish@a56da0b891b3dc519c7ee3284aff1fad93cc8598 # v1.8.6
65 | with:
66 | repository-url: https://test.pypi.org/legacy/
67 | skip-existing: true
68 |
69 | - name: Publish distribution 📦 to PyPI
70 | if: startsWith(github.ref, 'refs/tags')
71 | uses: pypa/gh-action-pypi-publish@a56da0b891b3dc519c7ee3284aff1fad93cc8598 # v1.8.6
72 |
--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
1 | name: Release Drafter
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | permissions:
9 | contents: read
10 |
11 | jobs:
12 | update_release_draft:
13 | permissions:
14 | contents: write # for release-drafter/release-drafter to create a github release
15 | runs-on: ubuntu-22.04
16 | steps:
17 | # Drafts your next Release notes as Pull Requests are merged into "main"
18 | - uses: release-drafter/release-drafter@569eb7ee3a85817ab916c8f8ff03a5bd96c9c83e # v5.23.0
19 | with:
20 | # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
21 | config-name: release-template.yml
22 | env:
23 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # Distribution / packaging
6 | build/
7 | dist/
8 | *.egg
9 | *.egg-info/
10 | .eggs/
11 | MANIFEST
12 |
13 | # Unit test / coverage reports
14 | .pytest_cache/
15 |
16 | # Jupyter Book
17 | /docs/_build/
18 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 |
2 | GNU LESSER GENERAL PUBLIC LICENSE
3 | Version 3, 29 June 2007
4 |
5 | Copyright (C) 2007 Free Software Foundation, Inc.
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 |
10 | This version of the GNU Lesser General Public License incorporates
11 | the terms and conditions of version 3 of the GNU General Public
12 | License, supplemented by the additional permissions listed below.
13 |
14 | 0. Additional Definitions.
15 |
16 | As used herein, "this License" refers to version 3 of the GNU Lesser
17 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
18 | General Public License.
19 |
20 | "The Library" refers to a covered work governed by this License,
21 | other than an Application or a Combined Work as defined below.
22 |
23 | An "Application" is any work that makes use of an interface provided
24 | by the Library, but which is not otherwise based on the Library.
25 | Defining a subclass of a class defined by the Library is deemed a mode
26 | of using an interface provided by the Library.
27 |
28 | A "Combined Work" is a work produced by combining or linking an
29 | Application with the Library. The particular version of the Library
30 | with which the Combined Work was made is also called the "Linked
31 | Version".
32 |
33 | The "Minimal Corresponding Source" for a Combined Work means the
34 | Corresponding Source for the Combined Work, excluding any source code
35 | for portions of the Combined Work that, considered in isolation, are
36 | based on the Application, and not on the Linked Version.
37 |
38 | The "Corresponding Application Code" for a Combined Work means the
39 | object code and/or source code for the Application, including any data
40 | and utility programs needed for reproducing the Combined Work from the
41 | Application, but excluding the System Libraries of the Combined Work.
42 |
43 | 1. Exception to Section 3 of the GNU GPL.
44 |
45 | You may convey a covered work under sections 3 and 4 of this License
46 | without being bound by section 3 of the GNU GPL.
47 |
48 | 2. Conveying Modified Versions.
49 |
50 | If you modify a copy of the Library, and, in your modifications, a
51 | facility refers to a function or data to be supplied by an Application
52 | that uses the facility (other than as an argument passed when the
53 | facility is invoked), then you may convey a copy of the modified
54 | version:
55 |
56 | a) under this License, provided that you make a good faith effort to
57 | ensure that, in the event an Application does not supply the
58 | function or data, the facility still operates, and performs
59 | whatever part of its purpose remains meaningful, or
60 |
61 | b) under the GNU GPL, with none of the additional permissions of
62 | this License applicable to that copy.
63 |
64 | 3. Object Code Incorporating Material from Library Header Files.
65 |
66 | The object code form of an Application may incorporate material from
67 | a header file that is part of the Library. You may convey such object
68 | code under terms of your choice, provided that, if the incorporated
69 | material is not limited to numerical parameters, data structure
70 | layouts and accessors, or small macros, inline functions and templates
71 | (ten or fewer lines in length), you do both of the following:
72 |
73 | a) Give prominent notice with each copy of the object code that the
74 | Library is used in it and that the Library and its use are
75 | covered by this License.
76 |
77 | b) Accompany the object code with a copy of the GNU GPL and this license
78 | document.
79 |
80 | 4. Combined Works.
81 |
82 | You may convey a Combined Work under terms of your choice that,
83 | taken together, effectively do not restrict modification of the
84 | portions of the Library contained in the Combined Work and reverse
85 | engineering for debugging such modifications, if you also do each of
86 | the following:
87 |
88 | a) Give prominent notice with each copy of the Combined Work that
89 | the Library is used in it and that the Library and its use are
90 | covered by this License.
91 |
92 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
93 | document.
94 |
95 | c) For a Combined Work that displays copyright notices during
96 | execution, include the copyright notice for the Library among
97 | these notices, as well as a reference directing the user to the
98 | copies of the GNU GPL and this license document.
99 |
100 | d) Do one of the following:
101 |
102 | 0) Convey the Minimal Corresponding Source under the terms of this
103 | License, and the Corresponding Application Code in a form
104 | suitable for, and under terms that permit, the user to
105 | recombine or relink the Application with a modified version of
106 | the Linked Version to produce a modified Combined Work, in the
107 | manner specified by section 6 of the GNU GPL for conveying
108 | Corresponding Source.
109 |
110 | 1) Use a suitable shared library mechanism for linking with the
111 | Library. A suitable mechanism is one that (a) uses at run time
112 | a copy of the Library already present on the user's computer
113 | system, and (b) will operate properly with a modified version
114 | of the Library that is interface-compatible with the Linked
115 | Version.
116 |
117 | e) Provide Installation Information, but only if you would otherwise
118 | be required to provide such information under section 6 of the
119 | GNU GPL, and only to the extent that such information is
120 | necessary to install and execute a modified version of the
121 | Combined Work produced by recombining or relinking the
122 | Application with a modified version of the Linked Version. (If
123 | you use option 4d0, the Installation Information must accompany
124 | the Minimal Corresponding Source and Corresponding Application
125 | Code. If you use option 4d1, you must provide the Installation
126 | Information in the manner specified by section 6 of the GNU GPL
127 | for conveying Corresponding Source.)
128 |
129 | 5. Combined Libraries.
130 |
131 | You may place library facilities that are a work based on the
132 | Library side by side in a single library together with other library
133 | facilities that are not Applications and are not covered by this
134 | License, and convey such a combined library under terms of your
135 | choice, if you do both of the following:
136 |
137 | a) Accompany the combined library with a copy of the same work based
138 | on the Library, uncombined with any other library facilities,
139 | conveyed under the terms of this License.
140 |
141 | b) Give prominent notice with the combined library that part of it
142 | is a work based on the Library, and explaining where to find the
143 | accompanying uncombined form of the same work.
144 |
145 | 6. Revised Versions of the GNU Lesser General Public License.
146 |
147 | The Free Software Foundation may publish revised and/or new versions
148 | of the GNU Lesser General Public License from time to time. Such new
149 | versions will be similar in spirit to the present version, but may
150 | differ in detail to address new problems or concerns.
151 |
152 | Each version is given a distinguishing version number. If the
153 | Library as you received it specifies that a certain numbered version
154 | of the GNU Lesser General Public License "or any later version"
155 | applies to it, you have the option of following the terms and
156 | conditions either of that published version or of any later version
157 | published by the Free Software Foundation. If the Library as you
158 | received it does not specify a version number of the GNU Lesser
159 | General Public License, you may choose any version of the GNU Lesser
160 | General Public License ever published by the Free Software Foundation.
161 |
162 | If the Library as you received it specifies that a proxy can decide
163 | whether future versions of the GNU Lesser General Public License shall
164 | apply, that proxy's public statement of acceptance of any version is
165 | permanent authorization for you to choose that version for the
166 | Library.
167 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # zen3geo
2 |
3 | The 🌏 data science library you've been waiting for~
4 |
5 | > 君の前前前世から僕は 君を探しはじめたよ
6 | >
7 | > Since your past life, I have been searching for you
8 |
9 | ## 公案
10 |
11 | ```
12 | Geography is difficult, but easy it can also be
13 | Deep Learning, you hope, has an answer to all
14 | Too this, too that, where to though, where to?
15 | Look out, sense within, and now you must know
16 | ```
17 |
18 | ## Installation
19 |
20 | To install the development version from GitHub, do:
21 |
22 | pip install git+https://github.com/weiji14/zen3geo.git
23 |
24 | Or the stable version from [PyPI](https://pypi.org/project/zen3geo):
25 |
26 | pip install zen3geo
27 |
28 | If instead, [conda-forge](https://anaconda.org/conda-forge/zen3geo) you desire:
29 |
30 | mamba install --channel conda-forge zen3geo
31 |
32 | Other instructions, see https://zen3geo.readthedocs.io/en/latest/#installation
33 |
--------------------------------------------------------------------------------
/docs/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file for Sphinx projects
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | # Required
5 | version: 2
6 |
7 | # Set the OS, Python version and other tools you might need
8 | build:
9 | os: ubuntu-22.04
10 | tools:
11 | python: "3.11"
12 | apt_packages:
13 | - graphviz
14 | jobs:
15 | pre_build:
16 | # Generate the Sphinx configuration for this Jupyter Book so it builds.
17 | # https://jupyterbook.org/en/stable/publish/readthedocs.html
18 | - "jupyter-book config sphinx docs/"
19 | post_install:
20 | # Install stackstac=0.4.4 instead of 0.5.0 to prevent
21 | # TypeError: Unsupported data type float16
22 | # because stackstac casts to float16 at read-in instead of post-read
23 | # see https://github.com/gjoseph92/stackstac/pull/208
24 | # Need to wait for rasterio/GDAL to support float16
25 | # see https://gdal.org/api/raster_c_api.html#_CPPv412GDALDataType
26 | # Install dask<2024.3.0 to prevent
27 | # ModuleNotFoundError: No module named 'dask_expr'
28 | # ImportError: Dask dataframe requirements are not installed
29 | # https://github.com/holoviz/datashader/issues/1319
30 | - "pip install stackstac==0.4.4 dask==2024.2.1"
31 |
32 | # Optional but recommended, declare the Python requirements required
33 | # to build your documentation
34 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
35 | python:
36 | install:
37 | - method: pip
38 | path: .
39 | extra_requirements:
40 | - docs
41 |
42 | sphinx:
43 | builder: html
44 | fail_on_warning: true
45 |
--------------------------------------------------------------------------------
/docs/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, caste, color, religion, or sexual
10 | identity and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the overall
26 | community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or advances of
31 | any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email address,
35 | without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | te6s3z67 at duck dot com.
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series of
86 | actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or permanent
93 | ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within the
113 | community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.1, available at
119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
120 |
121 | Community Impact Guidelines were inspired by
122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
123 |
124 | For answers to common questions about this code of conduct, see the FAQ at
125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
126 | [https://www.contributor-covenant.org/translations][translations].
127 |
128 | [homepage]: https://www.contributor-covenant.org
129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
130 | [Mozilla CoC]: https://github.com/mozilla/diversity
131 | [FAQ]: https://www.contributor-covenant.org/faq
132 | [translations]: https://www.contributor-covenant.org/translations
133 |
--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | We accept different types of contributions,
4 | including some that don't require you to write a single line of code.
5 |
6 | ## 📝 Types of contributions
7 |
8 | ### Discussions 🎉
9 |
10 | Discussions are where we have conversations.
11 |
12 | If have a great new idea, or want to share something amazing with the community,
13 | join us in [discussions](https://github.com/weiji14/zen3geo/discussions).
14 |
15 | ### Issues 🐞
16 |
17 | [Issues](https://docs.github.com/en/github/managing-your-work-on-github/about-issues)
18 | are used to track tasks that contributors can help with.
19 |
20 | If you've found something in the content or the website that should be updated,
21 | search open issues to see if someone else has reported the same thing. If it's
22 | something new, [open an issue](https://github.com/weiji14/zen3geo/issues/new/choose)!
23 | We'll use the issue to have a conversation about the problem you want to fix.
24 |
25 | ### Pull requests 🛠️
26 |
27 | A [pull request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
28 | is a way to suggest changes in our repository.
29 |
30 | When we merge those changes, they should be deployed to the live site within a few minutes.
31 | To learn more about opening a pull request in this repo,
32 | see [Opening a pull request](#opening-a-pull-request) below.
33 |
34 | ### Translations 🌏
35 |
36 | 人虽有南北之分,但佛性本无南北。
37 |
38 | Yes, the source content in this repository is mostly written in English,
39 | but we welcome folks from across the world! Please reach out if you have experience in translations and are interested in contributing!
40 |
41 | ---
42 |
43 | ## 👐 Opening a Pull Request
44 |
45 | 1. [Login](https://github.com/login) to your GitHub account,
46 | or sign up for a new one at https://github.com/signup.
47 |
48 | 2. Navigate to the file you want to modify, e.g. the
49 | [API docs file](https://github.com/weiji14/zen3geo/blob/main/docs/api.md).
50 |
51 | 3. Click on the pen 🖊️ icon on the top right corner that says "Edit this file"
52 |
53 | 4. This should bring you to a page similar to
54 | https://github.com/weiji14/zen3geo/edit/main/docs/api.md
55 | where you can make edits to the text using a web-based editor.
56 | Feel free to switch between the "Edit file" and "Preview changes" tabs as
57 | you modify the content to make sure things look ok.
58 |
59 | 5. Once you're happy with your changes, scroll down to the bottom where it says
60 | **Commit changes**. This is where you will add a short summary of the
61 | changes you have made.
62 |
63 | 
64 |
65 | Specifically, in the first box, you will need to give a short title (e.g.
66 | "Fixed typo in api.md file") that describes the changes you've made.
67 | Optionally, you can write a few extra sentences in the second box to explain
68 | things in more detail.
69 |
70 | 6. Select the "Create a new branch for this commit and start a pull request"
71 | option and provide a new branch name (e.g. "fix-api-typo"). What this
72 | does is to ensure your changes are made in an independent manner or 'branch'
73 | away from the main trunk, and those changes will have the opportunity to be
74 | double checked and openly reviewed by other people.
75 |
76 | 7. Click on the green 'Propose changes' button. This will bring you to a new
77 | page.
78 |
79 | 8. Almost there! This "Open a pull request" page is where you can finalize
80 | things for the 'pull request' (a request to make changes) you will be
81 | opening soon. Again you will need to provide a title (e.g. 'Minor changes to
82 | the API markdown file') and a description.
83 |
84 | 
85 |
86 | Be sure to provide any context on **why** you are making the changes, and
87 | **how** you are doing so. This will make it easier for other people to
88 | know what is happening when they review your changes.
89 |
90 | 9. Ready? Click on the green 'Create pull request' button! This will make your
91 | changes available for everyone to see and review publicly. The maintainers
92 | will be notified about your great new addition and will get back to you on
93 | the next steps.
94 |
95 | ---
96 |
97 | (contributing:running:locally)=
98 | ## 🏠 Running things locally
99 |
100 | This project uses [``poetry``](https://python-poetry.org/docs/master/) for
101 | installing Python dependencies required in ``zen3geo``, as well as the
102 | development and documentation-related dependencies.
103 |
104 | ### Cloning the repository ♊
105 |
106 | ```
107 | git clone git@github.com:weiji14/zen3geo.git
108 | cd zen3geo
109 | ```
110 |
111 | ### Setup virtual environment ☁️
112 |
113 | ```
114 | mamba create --name zen3geo python=3.11
115 | mamba activate zen3geo
116 |
117 | pip install poetry==1.6.1
118 | poetry install --extras "raster spatial stac vector"
119 | ```
120 |
121 | ### Building documentation 📖
122 |
123 | ```
124 | poetry install --extras=docs # or `pip install .[docs]`
125 | sudo apt install graphviz # if rendering graphviz plots
126 | jupyter-book build docs/
127 | ```
128 |
129 | Then open ``docs/_build/html/index.html`` in your browser to see the docs.
130 |
131 | ---
132 |
133 | ## 🥳 And that's it!
134 |
135 | You're now part of the zen3geo community ✨
136 |
137 | ```{admonition} Credits
138 | :class: seealso
139 | *This contributing guide was adapted from*
140 | [GitHub docs](https://github.com/github/docs/blob/main/contributing/types-of-contributions.md)
141 | and the [APECS-Earth-Observation/Polar-EO-Database](https://github.com/APECS-Earth-Observation/Polar-EO-Database/blob/main/CONTRIBUTING.md) project.
142 | ```
143 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | # Book settings
2 | # Learn more at https://jupyterbook.org/customize/config.html
3 |
4 | title: zen3geo
5 | author: The zen3geo Team
6 |
7 | # Cache execution outputs of notebooks on each build.
8 | # See https://jupyterbook.org/content/execute.html
9 | execute:
10 | execute_notebooks: cache
11 | # https://jupyterbook.org/en/latest/content/execute.html#setting-execution-timeout
12 | timeout: 300
13 |
14 | # Define the name of the latex output file for PDF builds
15 | latex:
16 | latex_documents:
17 | targetname: zen3geo.tex
18 |
19 | # Information about where the book exists on the web
20 | repository:
21 | url: https://github.com/weiji14/zen3geo # Online location of your book
22 | path_to_book: docs # Optional path to your book, relative to the repository root
23 | branch: main # Which branch of the repository should be used when creating links (optional)
24 |
25 | # Add GitHub buttons to your book
26 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository
27 | html:
28 | use_edit_page_button: true
29 | use_issues_button: true
30 | use_repository_button: true
31 |
32 | sphinx:
33 | config:
34 | autodoc_typehints: 'description'
35 | myst_all_links_external: true
36 | nb_execution_show_tb: true
37 | html_show_copyright: false
38 | html_theme_options:
39 | # https://sphinx-book-theme.readthedocs.io/en/stable/customize/sidebar-secondary.html
40 | show_toc_level: 3
41 | intersphinx_mapping:
42 | contextily:
43 | - 'https://contextily.readthedocs.io/en/latest/'
44 | - null
45 | dask:
46 | - 'https://docs.dask.org/en/latest/'
47 | - null
48 | datashader:
49 | - 'https://datashader.org/'
50 | - null
51 | datatree:
52 | - 'https://xarray-datatree.readthedocs.io/en/latest/'
53 | - null
54 | geopandas:
55 | - 'https://geopandas.org/en/latest/'
56 | - null
57 | mmdetection:
58 | - 'https://mmdetection.readthedocs.io/zh_CN/latest/'
59 | - null
60 | numpy:
61 | - 'https://numpy.org/doc/stable/'
62 | - null
63 | pyogrio:
64 | - 'https://pyogrio.readthedocs.io/en/latest/'
65 | - null
66 | pystac:
67 | - 'https://pystac.readthedocs.io/en/latest/'
68 | - null
69 | pystac_client:
70 | - 'https://pystac-client.readthedocs.io/en/latest/'
71 | - null
72 | python:
73 | - 'https://docs.python.org/3/'
74 | - null
75 | rasterio:
76 | - 'https://rasterio.readthedocs.io/en/stable/'
77 | - null
78 | rioxarray:
79 | - 'https://corteva.github.io/rioxarray/stable/'
80 | - null
81 | shapely:
82 | - 'https://shapely.readthedocs.io/en/latest/'
83 | - null
84 | stackstac:
85 | - 'https://stackstac.readthedocs.io/en/latest/'
86 | - null
87 | torch:
88 | - 'https://pytorch.org/docs/stable/'
89 | - null
90 | torchdata:
91 | - 'https://pytorch.org/data/main/'
92 | - null
93 | torchvision:
94 | - 'https://pytorch.org/vision/main/'
95 | - null
96 | xarray:
97 | - 'https://docs.xarray.dev/en/stable/'
98 | - null
99 | xbatcher:
100 | - 'https://xbatcher.readthedocs.io/en/latest/'
101 | - null
102 | zarr:
103 | - 'https://zarr.readthedocs.io/en/latest/'
104 | - null
105 | extra_extensions:
106 | - 'sphinx.ext.autodoc'
107 | - 'sphinx.ext.intersphinx'
108 | - 'sphinx.ext.napoleon'
109 | - 'sphinx.ext.viewcode'
110 |
--------------------------------------------------------------------------------
/docs/_toc.yml:
--------------------------------------------------------------------------------
1 | # Table of contents
2 | # Learn more at https://jupyterbook.org/customize/toc.html
3 |
4 | format: jb-book
5 | root: index
6 | chapters:
7 | - title: 🦮 Walkthrough
8 | file: walkthrough
9 | sections:
10 | - title: 🀄 Chipping and Batching
11 | file: chipping
12 | - title: 🫧 Vector Segmentation Masks
13 | file: vector-segmentation-masks
14 | - title: 🥡 Object Detection Boxes
15 | file: object-detection-boxes
16 | - title: 🏳️🌈 Stacking layers
17 | file: stacking
18 | - title: 📶 Multi-resolution
19 | file: multi-resolution
20 | - title: 📖 API Reference
21 | file: api
22 | - title: 📆 Changelog
23 | file: changelog
24 | - title: 🫶 Code of Conduct
25 | file: CODE_OF_CONDUCT
26 | - title: 🧑🤝🧑 Contributing
27 | file: CONTRIBUTING
28 |
--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
1 | # API Reference
2 |
3 | ## DataPipes
4 |
5 | ```{eval-rst}
6 | .. automodule:: zen3geo.datapipes
7 | :members:
8 | ```
9 |
10 | ### Datashader
11 |
12 | ```{eval-rst}
13 | .. automodule:: zen3geo.datapipes.datashader
14 | .. autoclass:: zen3geo.datapipes.DatashaderRasterizer
15 | .. autoclass:: zen3geo.datapipes.datashader.DatashaderRasterizerIterDataPipe
16 | .. autoclass:: zen3geo.datapipes.XarrayCanvas
17 | .. autoclass:: zen3geo.datapipes.datashader.XarrayCanvasIterDataPipe
18 | :show-inheritance:
19 | ```
20 |
21 | ### Geopandas
22 |
23 | ```{eval-rst}
24 | .. automodule:: zen3geo.datapipes.geopandas
25 | .. autoclass:: zen3geo.datapipes.GeoPandasRectangleClipper
26 | .. autoclass:: zen3geo.datapipes.geopandas.GeoPandasRectangleClipperIterDataPipe
27 | :show-inheritance:
28 | ```
29 |
30 | ### Pyogrio
31 |
32 | ```{eval-rst}
33 | .. automodule:: zen3geo.datapipes.pyogrio
34 | .. autoclass:: zen3geo.datapipes.PyogrioReader
35 | .. autoclass:: zen3geo.datapipes.pyogrio.PyogrioReaderIterDataPipe
36 | :show-inheritance:
37 | ```
38 |
39 | ### PySTAC
40 |
41 | ```{eval-rst}
42 | .. automodule:: zen3geo.datapipes.pystac
43 | .. autoclass:: zen3geo.datapipes.PySTACItemReader
44 | .. autoclass:: zen3geo.datapipes.pystac.PySTACItemReaderIterDataPipe
45 | :show-inheritance:
46 | ```
47 |
48 | ### PySTAC Client
49 |
50 | ```{eval-rst}
51 | .. automodule:: zen3geo.datapipes.pystac_client
52 | .. autoclass:: zen3geo.datapipes.PySTACAPISearcher
53 | .. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPISearcherIterDataPipe
54 | .. autoclass:: zen3geo.datapipes.PySTACAPIItemLister
55 | .. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPIItemListerIterDataPipe
56 | :show-inheritance:
57 | ```
58 |
59 | ### Rioxarray
60 |
61 | ```{eval-rst}
62 | .. automodule:: zen3geo.datapipes.rioxarray
63 | .. autoclass:: zen3geo.datapipes.RioXarrayReader
64 | .. autoclass:: zen3geo.datapipes.rioxarray.RioXarrayReaderIterDataPipe
65 | :show-inheritance:
66 | ```
67 |
68 | ### Stackstac
69 |
70 | ```{eval-rst}
71 | .. automodule:: zen3geo.datapipes.stackstac
72 | .. autoclass:: zen3geo.datapipes.StackSTACMosaicker
73 | .. autoclass:: zen3geo.datapipes.stackstac.StackSTACMosaickerIterDataPipe
74 | .. autoclass:: zen3geo.datapipes.StackSTACStacker
75 | .. autoclass:: zen3geo.datapipes.stackstac.StackSTACStackerIterDataPipe
76 | :show-inheritance:
77 | ```
78 |
79 | ### Xbatcher
80 |
81 | ```{eval-rst}
82 | .. automodule:: zen3geo.datapipes.xbatcher
83 | .. autoclass:: zen3geo.datapipes.XbatcherSlicer
84 | .. autoclass:: zen3geo.datapipes.xbatcher.XbatcherSlicerIterDataPipe
85 | :show-inheritance:
86 | ```
87 |
88 | ### XpySTAC
89 |
90 | ```{eval-rst}
91 | .. automodule:: zen3geo.datapipes.xpystac
92 | .. autoclass:: zen3geo.datapipes.XpySTACAssetReader
93 | .. autoclass:: zen3geo.datapipes.xpystac.XpySTACAssetReaderIterDataPipe
94 | :show-inheritance:
95 | ```
96 |
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## Release v0.6.2 (2023/06/29)
4 |
5 | ### 💫 Highlights
6 |
7 | * 🎉 **Patch release for zen3geo** 🎉
8 | * 🚀 Quick addition of a new PySTACAPIItemLister DataPipe
9 |
10 | ### 🚀 Features
11 |
12 | * ✨ PySTACAPIItemLister to list STAC Items matching STAC API search ([#111](https://github.com/weiji14/zen3geo/pull/111))
13 |
14 | ### 🧰 Maintenance
15 |
16 | * ⬆️ Bump poetry from 1.4.2 to 1.5.1 ([#110](https://github.com/weiji14/zen3geo/pull/110))
17 |
18 | ### 🧑🤝🧑 Contributors
19 |
20 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14)
21 |
22 | ---
23 |
24 | ## Release v0.6.1 (2023/05/31)
25 |
26 | ### 💫 Highlights
27 |
28 | * 🎉 **Patch release for zen3geo** 🎉
29 | * 😎 Full Python 3.11 support and a couple of bug fixes for DatashaderRasterizer
30 |
31 | ### 🚀 Features
32 |
33 | * 🥚 Allow using XpySTACAssetReader without xpystac when engine!=stac ([#100](https://github.com/weiji14/zen3geo/pull/100))
34 |
35 | ### 🐛 Bug Fixes
36 |
37 | * 🐛 Fix DatashaderRasterizer for GeoDataFrame wrapped in StreamWrapper ([#104](https://github.com/weiji14/zen3geo/pull/104))
38 | * 🐛 Fix DatashaderRasterizer to allow N:1 instead of just 1:1 ([#98](https://github.com/weiji14/zen3geo/pull/98))
39 |
40 | ### 📖 Documentation
41 |
42 | * 👽️ Handle ms-buildings 20230425 update in Object Detection tutorial ([#106](https://github.com/weiji14/zen3geo/pull/106))
43 |
44 | ### 🧰 Maintenance
45 |
46 | * 👷 NEP29: Run CI and Docs build on Python 3.11 ([#103](https://github.com/weiji14/zen3geo/pull/103))
47 | * ⬆️ Bump poetry from 1.3.0 to 1.4.2 ([#99](https://github.com/weiji14/zen3geo/pull/99))
48 |
49 | ### 🧑🤝🧑 Contributors
50 |
51 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14)
52 |
53 | ---
54 |
55 | ## Release v0.6.0 (2023/04/18)
56 |
57 | ### 💫 Highlights
58 |
59 | * 🎉 **Sixth release of zen3geo** 🎉
60 | * 🚸 Walkthrough on handling multi-resolution climate data ([#91](https://github.com/weiji14/zen3geo/pull/91))
61 |
62 | ### 🚀 Features
63 |
64 | * ✨ XpySTACAssetReader for reading COG, NetCDF & Zarr STAC assets ([#87](https://github.com/weiji14/zen3geo/pull/87))
65 | * ✨ Implement len function for XbatcherSlicerIterDataPipe ([#75](https://github.com/weiji14/zen3geo/pull/75))
66 |
67 | ### 📖 Documentation
68 |
69 | * ♻️ Use xarray.merge with join="override" in collate functions ([#72](https://github.com/weiji14/zen3geo/pull/72))
70 |
71 | ### 🧰 Maintenance
72 |
73 | * ⬆️ Bump jupyter-book from 0.14.0 to 0.15.1 ([#94](https://github.com/weiji14/zen3geo/pull/94))
74 | * 📦️ Publish to TestPyPI and PyPI via OpenID Connect token ([#90](https://github.com/weiji14/zen3geo/pull/90))
75 | * 👷 NEP29: Run Continuous Integration on Python 3.11 ([#89](https://github.com/weiji14/zen3geo/pull/89))
76 | * ⬆️ Bump jupyter-book from 0.13.0 to 0.14.0 ([#85](https://github.com/weiji14/zen3geo/pull/85))
77 | * 📌 Pin maximum python version to <4.0 ([#78](https://github.com/weiji14/zen3geo/pull/78))
78 | * ⬆️ Bump poetry from 1.2.0 to 1.3.0 ([#77](https://github.com/weiji14/zen3geo/pull/77))
79 | * 📌 Pin minimum xbatcher version to 0.2.0 ([#73](https://github.com/weiji14/zen3geo/pull/73))
80 |
81 | ### 🧑🤝🧑 Contributors
82 |
83 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14)
84 |
85 | ---
86 |
87 | ## Release v0.5.0 (2022/09/26)
88 |
89 | ### 💫 Highlights
90 |
91 | * 🎉 **Fifth release of zen3geo** 🎉
92 | * 🚸 Walkthrough on stacking time-series earth observation data ([#62](https://github.com/weiji14/zen3geo/pull/62))
93 |
94 | ### 🚀 Features
95 |
96 | * ✨ StackSTACMosaicIterDataPipe to mosaic tiles into one piece ([#63](https://github.com/weiji14/zen3geo/pull/63))
97 | * ✨ StackSTACStackerIterDataPipe for stacking STAC items ([#61](https://github.com/weiji14/zen3geo/pull/61))
98 | * ✨ PySTACAPISearchIterDataPipe to query dynamic STAC Catalogs ([#59](https://github.com/weiji14/zen3geo/pull/59))
99 | * ✨ PySTACItemReaderIterDataPipe for reading STAC Items ([#46](https://github.com/weiji14/zen3geo/pull/46))
100 |
101 | ### 📖 Documentation
102 |
103 | * 🚚 Rename to PySTACAPISearcher and StackSTACMosaicker ([#64](https://github.com/weiji14/zen3geo/pull/64))
104 |
105 | ### 🧰 Maintenance
106 |
107 | * 📌 Pin min pystac-client and stackstac to v0.4.0, pystac to 1.4.0 ([#66](https://github.com/weiji14/zen3geo/pull/66))
108 | * 📦️ Exclude tests from source distribution and binary wheel ([#58](https://github.com/weiji14/zen3geo/pull/58))
109 |
110 | ### 🧑🤝🧑 Contributors
111 |
112 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14)
113 |
114 | ---
115 |
116 | ## Release v0.4.0 (2022/09/08)
117 |
118 | ### 💫 Highlights
119 |
120 | * 🎉 **Fourth release of zen3geo** 🎉
121 | * 🚸 Walkthrough on object detection with bounding boxes ([#49](https://github.com/weiji14/zen3geo/pull/49))
122 |
123 | ### 🚀 Features
124 |
125 | * ✨ GeoPandasRectangleClipper for spatially subsetting vectors ([#52](https://github.com/weiji14/zen3geo/pull/52))
126 |
127 | ### 📖 Documentation
128 |
129 | * 📝 Add install from conda-forge instructions ([#55](https://github.com/weiji14/zen3geo/pull/55))
130 | * ✏️ Edit docs to use OGC:CRS84 lon/lat instead of EPSG:4326 ([#45](https://github.com/weiji14/zen3geo/pull/45))
131 | * 💡 Warn about overlapping strides if followed by train/val split ([#43](https://github.com/weiji14/zen3geo/pull/43))
132 |
133 | ### 🧰 Maintenance
134 |
135 | * ⬆️ Bump poetry from 1.2.0rc1 to 1.2.0 ([#47](https://github.com/weiji14/zen3geo/pull/47))
136 | * ⬆️ Bump poetry from 1.2.0b3 to 1.2.0rc1 ([#44](https://github.com/weiji14/zen3geo/pull/44))
137 |
138 | ### 🧑🤝🧑 Contributors
139 |
140 | [@dependabot[bot]](https://github.com/dependabot-bot) and [@weiji14](https://github.com/weiji14)
141 |
142 | ---
143 |
144 | ## Release v0.3.0 (2022/08/19)
145 |
146 | ### 💫 Highlights
147 |
148 | * 🎉 **Third release of zen3geo** 🎉
149 | * 🚸 Walkthrough on rasterizing vector polygons into label masks ([#31](https://github.com/weiji14/zen3geo/pull/31))
150 |
151 | ### 🚀 Features
152 |
153 | * ✨ DatashaderRasterizer for burning vector shapes to xarray grids ([#35](https://github.com/weiji14/zen3geo/pull/35))
154 | * ✨ XarrayCanvasIterDataPipe for creating blank datashader canvas ([#34](https://github.com/weiji14/zen3geo/pull/34))
155 | * ♻️ Let PyogrioReader return geodataframe only instead of tuple ([#33](https://github.com/weiji14/zen3geo/pull/33))
156 |
157 | ### 🐛 Bug Fixes
158 |
159 | * ♻️ Refactor DatashaderRasterizer to be up front about datapipe lengths ([#39](https://github.com/weiji14/zen3geo/pull/39))
160 | * 🩹 Raise ModuleNotFoundError when xbatcher not installed ([#37](https://github.com/weiji14/zen3geo/pull/37))
161 |
162 | ### 📖 Documentation
163 |
164 | * 📝 Improve pip install zen3geo instructions with extras dependencies ([#40](https://github.com/weiji14/zen3geo/pull/40))
165 | * 🔍 Show more levels for the in-page table of contents ([#36](https://github.com/weiji14/zen3geo/pull/36))
166 |
167 | ### 🧑🤝🧑 Contributors
168 |
169 | [@weiji14](https://github.com/weiji14)
170 |
171 | ---
172 |
173 | ## Release v0.2.0 (2022/07/17)
174 |
175 | ### 💫 Highlights
176 |
177 | * 🎉 **Second release of zen3geo** 🎉
178 | * 🚸 Walkthrough on creating batches of data chips ([#20](https://github.com/weiji14/zen3geo/pull/20))
179 |
180 | ### 🚀 Features
181 |
182 | * ♻️ Let RioXarrayReader return dataarray only instead of tuple ([#24](https://github.com/weiji14/zen3geo/pull/24))
183 | * ✨ XbatcherSlicerIterDataPipe for slicing xarray.DataArray ([#22](https://github.com/weiji14/zen3geo/pull/22))
184 | * ✨ PyogrioReaderIterDataPipe for reading vector OGR files ([#19](https://github.com/weiji14/zen3geo/pull/19))
185 |
186 | ### 📖 Documentation
187 |
188 | * 🎨 Extra subsection for rioxarray datapipes ([#18](https://github.com/weiji14/zen3geo/pull/18))
189 |
190 | ### 🧰 Maintenance
191 |
192 | * 👷 NEP29: Run CI and Docs build on Python 3.10 ([#29](https://github.com/weiji14/zen3geo/pull/29))
193 | * ⬆️ Bump poetry from 1.2.0b2 to 1.2.0b3 ([#28](https://github.com/weiji14/zen3geo/pull/28))
194 | * 📌 Pin minimum torchdata version to 0.4.0 ([#25](https://github.com/weiji14/zen3geo/pull/25))
195 | * 📌 Pin minimum pyogrio version to 0.4.0 ([#21](https://github.com/weiji14/zen3geo/pull/21))
196 |
197 | ### 🧑🤝🧑 Contributors
198 |
199 | [@weiji14](https://github.com/weiji14)
200 |
201 | ---
202 |
203 | ## Release v0.1.0 (2022/06/08)
204 |
205 | ### 💫 Highlights
206 |
207 | * 🎉 **First release of zen3geo** 🎉
208 | * 🚸 Walkthrough on using RioXarray IterDataPipes at https://zen3geo.readthedocs.io/en/latest/walkthrough.html ([#8](https://github.com/weiji14/zen3geo/pull/8))
209 |
210 | ### 🚀 Features
211 |
212 | * ✨ Introducing RioXarrayReaderIterDataPipe for reading GeoTIFFs ([#6](https://github.com/weiji14/zen3geo/pull/6))
213 |
214 | ### 📖 Documentation
215 |
216 | * 🔧 Configure readthedocs documentation build ([#13](https://github.com/weiji14/zen3geo/pull/13))
217 | * 💬 Show how to convert xarray.DataArray to torch.Tensor ([#9](https://github.com/weiji14/zen3geo/pull/9))
218 | * 📝 Add basic installation instructions ([#7](https://github.com/weiji14/zen3geo/pull/7))
219 | * 👥 Healthy community standards ([#4](https://github.com/weiji14/zen3geo/pull/4))
220 |
221 | ### 🧰 Maintenance
222 |
223 | * 📦 Publish to TestPyPI and PyPI using GitHub Actions ([#14](https://github.com/weiji14/zen3geo/pull/14))
224 | * 🧑💻 Draft changelog with Release Drafter GitHub Actions ([#11](https://github.com/weiji14/zen3geo/pull/11))
225 | * 👷 Setup GitHub Actions Continuous Integration tests ([#2](https://github.com/weiji14/zen3geo/pull/2))
226 | * 🌱 Initialize pyproject.toml file ([#1](https://github.com/weiji14/zen3geo/pull/1))
227 |
228 | ### 🧑🤝🧑 Contributors
229 |
230 | [@weiji14](https://github.com/weiji14)
231 |
--------------------------------------------------------------------------------
/docs/chipping.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | kernelspec:
8 | display_name: Python 3
9 | language: python
10 | name: python3
11 | ---
12 |
13 | # Chipping and batching data
14 |
15 | > What is separation?
16 | >
17 | > What isn't?
18 |
19 | Following on from the previous tutorial,
20 | let's 🧑🎓 learn more about creating a more complicated 🌈 raster data pipeline.
21 | Specifically, we'll go through the following:
22 | - Loading Cloud-Optimized GeoTIFFs (COGs) from different geographic regions 🌏
23 | - Cut up each large GeoTIFF into several 512 x 512 pixel chips 🥨
24 | - Create batches of chips/tensors to feed into a DataLoader 🏋️
25 |
26 | Some terminology 📜 disambiguation:
27 | - scene - the big image (e.g. 10000x10000 pixels) from a satellite 🛰️ (e.g. a GeoTIFF)
28 | - chip - the small image (e.g. 512x512 pixels) cut ✂️ out from a satellite scene to be loaded as a tensor
29 |
30 | See also:
31 | - https://github.com/microsoft/torchgeo/wiki/Design-Decisions#chip-vs-tile-vs-region
32 | - https://github.com/cogeotiff/cog-spec/blob/master/spec.md
33 |
34 | ## 🎉 **Getting started**
35 |
36 | Load up them libraries!
37 |
38 | ```{code-cell}
39 | import pystac
40 | import planetary_computer
41 | import rioxarray
42 |
43 | import torch
44 | import torchdata
45 | import zen3geo
46 | ```
47 |
48 | ## 0️⃣ Find [Cloud-Optimized GeoTIFFs](https://www.cogeo.org) ☁️
49 |
50 | Synthetic-Aperture Radar (SAR) from a [STAC](https://stacspec.org) catalog!
51 | We'll get some Sentinel-1 Ground-Range Detected (GRD) data over Osaka and Tokyo
52 | in Japan 🇯🇵.
53 |
54 | 🔗 Links:
55 | - [Official Sentinel-1 description page at ESA](https://sentinel.esa.int/web/sentinel/missions/sentinel-1)
56 | - [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=137.4907%2C35.0014&z=7.94&v=2&d=sentinel-1-grd&s=false%3A%3A100%3A%3Atrue&ae=0&m=cql%3A08211c0dd907a5066c41422c75629d5f&r=VV%2C+VH+False-color+composite)
57 | - [AWS Sentinel-1 Cloud-Optimized GeoTIFFs](https://registry.opendata.aws/sentinel-1)
58 |
59 |
60 | ```{code-cell}
61 | item_urls = [
62 | # Osaka
63 | "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20220614T210034_20220614T210059_043664_05368A",
64 | # Tokyo
65 | "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20220616T204349_20220616T204414_043693_053764",
66 | ]
67 |
68 | # Load each STAC item's metadata and sign the assets
69 | items = [pystac.Item.from_file(item_url) for item_url in item_urls]
70 | signed_items = [planetary_computer.sign(item) for item in items]
71 | signed_items
72 | ```
73 |
74 | ### Inspect one of the data assets 🍱
75 |
76 | The Sentinel-1 STAC item contains several assets.
77 | These include different 〰️ polarizations (e.g. 'VH', 'VV').
78 | Let's just take the 'thumbnail' product for now which is an RGB preview, with
79 | the red 🟥 channel (R) representing the co-polarization (VV or HH), the green
80 | 🟩 channel (G) representing the cross-polarization (VH or HV) and the blue 🟦
81 | channel (B) representing the ratio of the cross and co-polarizations.
82 |
83 | ```{code-cell}
84 | url: str = signed_items[0].assets["thumbnail"].href
85 | da = rioxarray.open_rasterio(filename=url)
86 | da
87 | ```
88 |
89 | This is how the Sentinel-1 radar image looks like over Osaka on 14 June 2022.
90 |
91 | 
92 |
93 | ## 1️⃣ Creating 512x512 chips from large satellite scenes 🪟
94 |
95 | Unless you have a lot of RAM, it is common to cut ✂️ a large satellite scene
96 | into multiple smaller chips (or patches, tiles 🀄, etc) first.
97 | This is typically done in a rolling or sliding window 🪟 fashion,
98 | via a nested loop through the y-dimension and x-dimension in strides of say,
99 | 512 pixels x 512 pixels.
100 |
101 | Let's begin by setting up the first part of the DataPipe,
102 | which is to read the satellite scene 🖼️ using `rioxarray`.
103 |
104 | ```{code-cell}
105 | # Just get the VV polarization for now from Sentinel-1
106 | urls = [item.assets["vv"].href for item in signed_items]
107 | dp = torchdata.datapipes.iter.IterableWrapper(iterable=urls)
108 | dp_rioxarray = dp.read_from_rioxarray(overview_level=3)
109 | dp_rioxarray
110 | ```
111 |
112 | ### Slicing with XbatcherSlicer 🍕
113 |
114 | To create the chips, we'll be using ``xbatcher`` which allows slicing 🔪 of an
115 | n-dimensional datacube along any dimension (e.g. longitude, latitude, time 🕛).
116 | This ``xbatcher`` library is integrated into ☯ ``zen3geo`` as a DataPipe called
117 | {py:class}`zen3geo.datapipes.XbatcherSlicer` (functional name:
118 | `slice_with_xbatcher`), which can be used as follows:
119 |
120 | ```{code-cell}
121 | dp_xbatcher = dp_rioxarray.slice_with_xbatcher(input_dims={"y": 512, "x": 512})
122 | dp_xbatcher
123 | ```
124 |
125 | This should give us about 12 chips in total, 6 from each of the 2 Sentinel-1
126 | images that were passed in.
127 |
128 | ```{code-cell}
129 | print(f"Number of chips: {len(dp_xbatcher)}")
130 | ```
131 |
132 | Now, if you want to customize the sliding window (e.g. do overlapping strides),
133 | pass in extra parameters to ``slice_with_xbatcher``, and it will be handled by
134 | {py:class}`xbatcher.BatchGenerator`.
135 |
136 | ```{code-cell}
137 | dp_xbatcher = dp_rioxarray.slice_with_xbatcher(
138 | input_dims={"y": 512, "x": 512}, input_overlap={"y": 256, "x": 256}
139 | )
140 | dp_xbatcher
141 | ```
142 |
143 | Great, and this overlapping stride method should give us more 512x512 chips 🧮
144 | than before.
145 |
146 | ```{code-cell}
147 | print(f"Number of chips: {len(dp_xbatcher)}")
148 | ```
149 |
150 | Double-check that single chips are of the correct dimensions
151 | (band: 1, y: 512, x: 512).
152 |
153 | ```{code-cell}
154 | chips = list(dp_xbatcher)
155 | sample = chips[0]
156 | sample
157 | ```
158 |
159 | ```{danger}
160 | Please do not use overlapping strides (i.e. `input_overlap` < `input_dim`) if
161 | you will be 🪓 splitting your chips into training, validation and test sets
162 | later! If you have say 60 overlapping chips and then go on to divide those 🍪
163 | chips randomly into train/val/test sets of 30/20/10, you will have information
164 | leakage 🚰 between the 30 training chips and 20 validation plus 10 test chips,
165 | so your model's reported validation and test metrics 📈 will be overestimating
166 | the actual performance 😲!
167 |
168 | Ideally, your train/val/test chips should be situated independently within
169 | spatially contiguous blocks 🧱. See these links for more information on why:
170 |
171 | - Kattenborn, T., Schiefer, F., Frey, J., Feilhauer, H., Mahecha, M. D., &
172 | Dormann, C. F. (2022). Spatially autocorrelated training and validation
173 | samples inflate performance assessment of convolutional neural networks.
174 | ISPRS Open Journal of Photogrammetry and Remote Sensing, 5, 100018.
175 | https://doi.org/10.1016/j.ophoto.2022.100018
176 | - https://github.com/pangeo-data/xbatcher/discussions/78#discussioncomment-3387295
177 |
178 | Yes, spatial statistics 🧮 matter, geography is special 🤓.
179 | ```
180 |
181 |
182 | ## 2️⃣ Pool chips into mini-batches ⚙️
183 |
184 | In total, we now have a set of 30 🍪 chips of size 512 x 512 pixels each.
185 | These chips can be divided into batches that are of a reasonable size.
186 | Let's use {py:class}`torchdata.datapipes.iter.Batcher`
187 | (functional name: `batch`) to do so.
188 |
189 | ```{code-cell}
190 | dp_batch = dp_xbatcher.batch(batch_size=10)
191 | print(f"Number of items in first batch: {len(list(dp_batch)[0])}")
192 | ```
193 |
194 | Now each batch will have 10 chips of size 512 x 512, with
195 | each chip being an {py:class}``xarray.DataArray``.
196 |
197 | ```{note}
198 | Notice how no mosaicking nor reprojection was done for the two satellite
199 | scenes. This is the beauty of zen3geo - full flexibility of combining
200 | geospatial datasets 😎. Respect the native coordinate system and let the data
201 | flow directly into your models!
202 |
203 | Oh, and to be super clear, of the 3 batches of 10 chips each:
204 | - The first batch has 10 chips are from the 1st satellite scene over Osaka
205 | - The second batch has 5 chips over Osaka, and 5 chips over Tokyo
206 | - The third batch has 10 chips from the 2nd satellite scene over Tokyo
207 | ```
208 |
209 | ### Stack many chips in mini-batches into a single tensor 🥞
210 |
211 | Let's now stack all these chips into a single tensor per batch, with a
212 | (number, channel, height, width) shape like (10, 1, 512, 512). We'll need a
213 | custom 🪄 collate function to do the conversion
214 | (from {py:class}``xarray.DataArray`` to {py:class}``torch.Tensor``) and
215 | stacking.
216 |
217 | ```{code-cell}
218 | def xr_collate_fn(samples) -> torch.Tensor:
219 | """
220 | Converts individual xarray.DataArray objects to a torch.Tensor (int16
221 | dtype), and stacks them all into a single torch.Tensor.
222 | """
223 | tensors = [
224 | torch.as_tensor(data=sample.data.astype(dtype="int16")) for sample in samples
225 | ]
226 | return torch.stack(tensors=tensors)
227 | ```
228 |
229 | Then, pass this collate function to
230 | {py:class}`torchdata.datapipes.iter.Collator` (functional name: `collate`).
231 |
232 | ```{code-cell}
233 | dp_collate = dp_batch.collate(collate_fn=xr_collate_fn)
234 | print(f"Number of mini-batches: {len(dp_collate)}")
235 | print(f"Mini-batch tensor shape: {list(dp_collate)[0].shape}")
236 | ```
237 |
238 | ### Into a DataLoader 🏋️
239 |
240 | One more thing 🍎, throw the DataPipe into
241 | {py:class}`torch.utils.data.DataLoader`!
242 | Set `batch_size` to `None`, since we've handled the batching manually in the
243 | above sections already.
244 |
245 | ```{code-cell}
246 | dataloader = torch.utils.data.DataLoader(dataset=dp_collate, batch_size=None)
247 | for i, batch in enumerate(dataloader):
248 | tensor = batch
249 | print(f"Batch {i}: {tensor.shape}")
250 | ```
251 |
252 | Lights, camera, action 💥
253 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | #
☯ *zen3geo* - The 🌏 data science library you've been waiting for
2 |
3 | ## Installation
4 |
5 | Get what you need, not more, not less:
6 |
7 | | Command | Dependencies |
8 | |:-------------------------------|---------------|
9 | | `pip install zen3geo` | rioxarray, torchdata |
10 | | `pip install zen3geo[raster]` | rioxarray, torchdata, xbatcher, zarr |
11 | | `pip install zen3geo[spatial]` | rioxarray, torchdata, datashader, spatialpandas |
12 | | `pip install zen3geo[stac]` | rioxarray, torchdata, pystac, pystac-client, stackstac, xpystac |
13 | | `pip install zen3geo[vector]` | rioxarray, torchdata, pyogrio[geopandas] |
14 |
15 | Retrieve more ['extras'](https://github.com/weiji14/zen3geo/blob/main/pyproject.toml) using
16 |
17 | pip install zen3geo[raster,spatial,stac,vector]
18 |
19 | To install the development version from [TestPyPI](https://test.pypi.org/project/zen3geo), do:
20 |
21 | pip install --pre --extra-index-url https://test.pypi.org/simple/ zen3geo
22 |
23 | May [conda-forge](https://anaconda.org/conda-forge/zen3geo) be with you,
24 | though optional dependencies it has not.
25 |
26 | mamba install --channel conda-forge zen3geo
27 |
28 | For the eager ones, {ref}`contributing ` will take you further.
29 |
--------------------------------------------------------------------------------
/docs/multi-resolution.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | kernelspec:
8 | display_name: Python 3
9 | language: python
10 | name: python3
11 | ---
12 |
13 | # Multi-resolution
14 |
15 | > On top of a hundred foot pole you linger
16 | >
17 | > Clinging to the first mark of the scale
18 | >
19 | > How do you proceed higher?
20 | >
21 | > It will take more than a leap of faith
22 |
23 | Earth Observation 🛰️ and climate projection 🌡️ data can be captured at
24 | different levels of detail. In this lesson, we'll work with a multitude of
25 | spatial resolutions 📏, learning to respect the ground sampling distance or
26 | native resolution 🔬 of the physical variable being measured, while 🪶
27 | minimizing memory usage. By the end of the lesson, you should be able to:
28 |
29 | - Find 🔍 low and high spatial resolution climate datasets and load them from
30 | {doc}`Zarr ` stores
31 | - Stack 🥞 and subset time-series datasets with different spatial resolutions
32 | stored in a hierarchical {py:class}`datatree.DataTree` structure
33 | - Slice 🔪 the multi-resolution dataset along the time-axis into monthly bins
34 |
35 | 🔗 Links:
36 | - https://carbonplan.org/research/cmip6-downscaling-explainer
37 | - https://github.com/carbonplan/cmip6-downscaling/blob/1.0/notebooks/accessing_data_example.ipynb
38 | - https://github.com/xarray-contrib/xbatcher/issues/93
39 |
40 |
41 | ## 🎉 **Getting started**
42 |
43 | These are the tools 🛠️ you'll need.
44 |
45 | ```{code-cell}
46 | import matplotlib.pyplot as plt
47 | import pandas as pd
48 | import torchdata.dataloader2
49 | import xarray as xr
50 | import xpystac
51 | import zen3geo
52 |
53 | from datatree import DataTree
54 | ```
55 |
56 | ## 0️⃣ Find climate model datasets 🪸
57 |
58 | The two datasets we'll be working with are 🌐 gridded climate projections, one
59 | that is in its original low 🔅 spatial resolution, and another one of a
60 | higher 🔆 spatial resolution. Specifically, we'll be looking at the maximum
61 | temperature 🌡️ (tasmax) variable from one of the Coupled Model Intercomparison
62 | Project Phase 6 (CMIP6) global coupled ocean-atmosphere general circulation
63 | model (GCM) 💨 outputs that is of low-resolution (67.5 arcminute), and a
64 | super-resolution product from DeepSD 🤔 that is of a higher resolution (15
65 | arcminute).
66 |
67 | ```{note}
68 | The following tutorial will mostly use the term super-resolution 🔭 from
69 | Computer Vision instead of downscaling ⏬. It's just that the term
70 | downscaling ⏬ (going from low to high resolution) can get confused with
71 | downsampling 🙃 (going from high to low resolution), whereas
72 | super-resolution 🔭 is unambiguously about going from low 🔅 to high 🔆
73 | resolution.
74 | ```
75 |
76 | 🔖 References:
77 | - https://carbonplan.org/research/cmip6-downscaling
78 | - https://github.com/tjvandal/deepsd
79 | - https://tutorial.xarray.dev/intermediate/cmip6-cloud.html
80 |
81 | ```{code-cell}
82 | lowres_raw = "https://cpdataeuwest.blob.core.windows.net/cp-cmip/cmip6/ScenarioMIP/MRI/MRI-ESM2-0/ssp585/r1i1p1f1/Amon/tasmax/gn/v20191108"
83 | highres_deepsd = "https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/data/DeepSD/ScenarioMIP.MRI.MRI-ESM2-0.ssp585.r1i1p1f1.month.DeepSD.tasmax.zarr"
84 | ```
85 |
86 | This is how the projected maximum temperature 🥵 for August 2089 looks like over
87 | South Asia 🪷 for a low-resolution 🔅 Global Climate Model (left) and a
88 | high-resolution 🔆 downscaled product (right).
89 |
90 | ```{code-cell}
91 | :tags: [hide-input]
92 | # Zarr datasets from https://github.com/carbonplan/research/blob/d05d148fd716ba6304e3833d765069dd890eaf4a/articles/cmip6-downscaling-explainer/components/downscaled-data.js#L97-L122
93 | ds_gcm = xr.open_dataset(
94 | filename_or_obj="https://cmip6downscaling.blob.core.windows.net/vis/article/fig1/regions/india/gcm-tasmax.zarr"
95 | )
96 | ds_gcm -= 273.15 # convert from Kelvin to Celsius
97 | ds_downscaled = xr.open_dataset(
98 | filename_or_obj="https://cmip6downscaling.blob.core.windows.net/vis/article/fig1/regions/india/downscaled-tasmax.zarr"
99 | )
100 | ds_downscaled -= 273.15 # convert from Kelvin to Celsius
101 |
102 | # Plot projected maximum temperature over South Asia from GCM and GARD-MV
103 | fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 3), sharey=True)
104 |
105 | img1 = ds_gcm.tasmax.plot.imshow(
106 | ax=ax[0], cmap="inferno", vmin=16, vmax=48, add_colorbar=False
107 | )
108 | ax[0].set_title("Global Climate Model (67.5 arcminute)")
109 |
110 | img2 = ds_downscaled.tasmax.plot.imshow(
111 | ax=ax[1], cmap="inferno", vmin=16, vmax=48, add_colorbar=False
112 | )
113 | ax[1].set_title("Downscaled result (15 arcminute)")
114 |
115 | cbar = fig.colorbar(mappable=img1, ax=ax.ravel().tolist(), extend="both")
116 | cbar.set_label(label="Daily Max Near-Surface Air\nTemperature in Aug 2089 (°C)")
117 |
118 | plt.show()
119 | ```
120 |
121 | ### Load Zarr stores 📦
122 |
123 | The {doc}`Zarr ` stores 🧊 can be loaded into an
124 | {py:class}`xarray.Dataset` via {py:class}`zen3geo.datapipes.XpySTACAssetReader`
125 | (functional name: ``read_from_xpystac``) with the `engine="zarr"` keyword
126 | argument.
127 |
128 | ```{code-cell}
129 | dp_lowres = torchdata.datapipes.iter.IterableWrapper(iterable=[lowres_raw])
130 | dp_highres = torchdata.datapipes.iter.IterableWrapper(iterable=[highres_deepsd])
131 |
132 | dp_lowres_dataset = dp_lowres.read_from_xpystac(engine="zarr", chunks="auto")
133 | dp_highres_dataset = dp_highres.read_from_xpystac(engine="zarr", chunks="auto")
134 | ```
135 |
136 | ### Inspect the climate datasets 🔥
137 |
138 | Let's now preview 👀 the low-resolution 🔅 and high-resolution 🔆 temperature
139 | datasets.
140 |
141 | ```{code-cell}
142 | it = iter(dp_lowres_dataset)
143 | ds_lowres = next(it)
144 | ds_lowres
145 | ```
146 |
147 | ```{code-cell}
148 | it = iter(dp_highres_dataset)
149 | ds_highres = next(it)
150 | ds_highres
151 | ```
152 |
153 | Notice that the low-resolution 🔅 dataset has lon/lat pixels of shape
154 | (320, 160), whereas the high-resolution 🔆 dataset is of shape (1440, 720). So
155 | there has been a 4.5x increase 📈 in spatial resolution going from the raw GCM
156 | 🌐 grid to the super-resolution 🔭 DeepSD grid.
157 |
158 | ### Shift from 0-360 to -180-180 🌐
159 |
160 | A sharp eye 👁️ would have noticed that the longitudinal range of the
161 | low-resolution 🔅 and high-resolution 🔆 dataset are offset ↔️ by 180°, going
162 | from 0° to 360° and -180° to +180° respectively. Let's shift the coordinates 📍
163 | of the low-resolution grid 🌍 from 0-360 to -180-180 using a custom
164 | {py:class}`torchdata.datapipes.iter.Mapper` (functional name: `map`) function.
165 |
166 | 🔖 References:
167 | - https://discourse.pangeo.io/t/handling-slicing-with-circular-longitude-coordinates-in-xarray/1608/3
168 | - https://gis.stackexchange.com/questions/416091/converting-a-netcdf-from-0-to-360-to-180-to-180-via-xarray
169 |
170 | ```{code-cell}
171 | def shift_longitude_360_to_180(ds: xr.Dataset) -> xr.Dataset:
172 | ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180))
173 | ds = ds.roll(lon=int(len(ds.lon) / 2), roll_coords=True)
174 | return ds
175 | ```
176 |
177 | ```{code-cell}
178 | dp_lowres_dataset_180 = dp_lowres_dataset.map(fn=shift_longitude_360_to_180)
179 | dp_lowres_dataset_180
180 | ```
181 |
182 | Double check that the low-resolution 🔆 grid's longitude coordinates 🔢 are now
183 | in the -180° to +180° range.
184 |
185 | ```{code-cell}
186 | it = iter(dp_lowres_dataset_180)
187 | ds_lowres_180 = next(it)
188 | ds_lowres_180
189 | ```
190 |
191 |
192 | ## Spatiotemporal stack and subset 🍱
193 |
194 | Following on from {doc}`./stacking` where multiple 🥞 layers with the **same**
195 | spatial resolution were stacked together into an {py:class}`xarray.DataArray`
196 | object, this section will teach 🧑🏫 you about stacking datasets with
197 | **different** spatial resolutions 📶 into a {py:class}`datatree.DataTree`
198 | object that has a nested/hierarchical structure. That
199 | {py:class}`datatree.DataTree` can then be subsetted 🥮 to the desired spatial
200 | and temporal extent in one go 😎.
201 |
202 | ### Stack multi-resolution datasets 📚
203 |
204 | First, we'll need to combine 🪢 the low-resolution GCM and high-resolution
205 | DeepSD {py:class}`xarray.Dataset` objects into a tuple 🎵 using
206 | {py:class}`torchdata.datapipes.iter.Zipper` (functional name: zip).
207 |
208 | ```{code-cell}
209 | dp_lowres_highres = dp_lowres_dataset_180.zip(dp_highres_dataset)
210 | dp_lowres_highres
211 | ```
212 |
213 | Next, use {py:class}`torchdata.datapipes.iter.Collator` (functional name:
214 | `collate`) to convert 🤸 the tuple of {py:class}`xarray.Dataset` objects into
215 | an {py:class}`datatree.DataTree` 🎋, similar to what was done in
216 | {doc}`./stacking`. Note that we'll only take the 'tasmax' ♨️ (Daily Maximum
217 | Near-Surface Air Temperature) {py:class}`xarray.DataArray` variable from each
218 | of the {py:class}`xarray.Dataset` objects.
219 |
220 | ```{code-cell}
221 | def multires_collate_fn(lowres_and_highres: tuple) -> DataTree:
222 | """
223 | Combine a pair of xarray.Dataset (lowres, highres) inputs into a
224 | datatree.DataTree with groups named 'lowres' and 'highres'.
225 | """
226 | # Turn 2 xr.Dataset objects into 1 xr.DataTree with multiple groups
227 | ds_lowres, ds_highres = lowres_and_highres
228 |
229 | # Create DataTree with lowres and highres groups
230 | datatree: DataTree = DataTree.from_dict(
231 | d={"lowres": ds_lowres.tasmax, "highres": ds_highres.tasmax}
232 | )
233 |
234 | return datatree
235 | ```
236 |
237 | ```{code-cell}
238 | dp_datatree = dp_lowres_highres.collate(collate_fn=multires_collate_fn)
239 | dp_datatree
240 | ```
241 |
242 | See the nested 🪆 structure of the {py:class}`datatree.DataTree`. The
243 | low-resolution 🔅 GCM and high-resolution 🔆 DeepSD outputs have been placed in
244 | separate groups 🖖.
245 |
246 | ```{code-cell}
247 | it = iter(dp_datatree)
248 | datatree = next(it)
249 | datatree
250 | ```
251 |
252 | ### Subset multi-resolution layers 🥮
253 |
254 | The climate model outputs above are a global 🗺️ one covering a timespan from
255 | January 2015 to December 2100 📅. If you're only interested in a particular
256 | region 🌏 or timespan ⌚, then the {py:class}`datatree.DataTree` will need to
257 | be trimmed 💇 down. Let's use {py:meth}`datatree.DataTree.sel` to subset the
258 | multi-resolution data to just the Philippines 🇵🇭 for the period 2015 to 2030.
259 |
260 | ```{code-cell}
261 | def spatiotemporal_subset(dt: DataTree) -> DataTree:
262 | dt_subset = dt.sel(
263 | lon=slice(116.4375, 126.5625),
264 | lat=slice(5.607445, 19.065325),
265 | time=slice("2015-01-01", "2030-12-31"),
266 | )
267 | return dt_subset
268 | ```
269 |
270 | ```{code-cell}
271 | dp_datatree_subset = dp_datatree.map(fn=spatiotemporal_subset)
272 | dp_datatree_subset
273 | ```
274 |
275 | Inspect the subsetted climate dataset 🕵️
276 |
277 | ```{code-cell}
278 | it = iter(dp_datatree_subset)
279 | datatree_subset = next(it)
280 | datatree_subset
281 | ```
282 |
283 | Let's plot the projected temperature 🌡️ for Dec 2030 over the Philippine
284 | Archipelago to ensure things look ok.
285 |
286 | ```{code-cell}
287 | ds_lowres = (
288 | datatree_subset["lowres/tasmax"]
289 | .sel(time=slice("2030-12-01", "2030-12-31"))
290 | .squeeze()
291 | )
292 | ds_lowres -= 273.15 # convert from Kelvin to Celsius
293 | ds_highres = (
294 | datatree_subset["highres/tasmax"]
295 | .sel(time=slice("2030-12-01", "2030-12-31"))
296 | .squeeze()
297 | )
298 | ds_highres -= 273.15 # convert from Kelvin to Celsius
299 |
300 | # Plot projected maximum temperature over the Philippines from GCM and DeepSD
301 | fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 8), sharey=True)
302 |
303 | img1 = ds_lowres.plot.imshow(
304 | ax=ax[0], cmap="inferno", vmin=22, vmax=33, add_colorbar=False
305 | )
306 | ax[0].set_title("Global Climate Model (67.5 arcminute)")
307 |
308 | img2 = ds_highres.plot.imshow(
309 | ax=ax[1], cmap="inferno", vmin=22, vmax=33, add_colorbar=False
310 | )
311 | ax[1].set_title("DeepSD output (15 arcminute)")
312 |
313 | cbar = fig.colorbar(mappable=img1, ax=ax.ravel().tolist(), extend="max")
314 | cbar.set_label(label="Daily Max Near-Surface Air\nTemperature in Dec 2030 (°C)")
315 |
316 | plt.show()
317 | ```
318 |
319 | ```{important}
320 | When slicing ✂️ different spatial resolution grids, put some 🧠 thought into the
321 | process. Do some 🧮 math to ensure the coordinates of the bounding box (min/max
322 | lon/lat) cut through the pixels exactly at the 📐 pixel boundaries whenever
323 | possible.
324 |
325 | If your multi-resolution 📶 layers have spatial resolutions that are
326 | round multiples ✖️ of each other (e.g. 10m, 20m, 60m), it is advisable to align
327 | 🎯 the pixel corners, such that the high-resolution 🔆 pixels fit within the
328 | low-resolution 🔅 pixels (e.g. one 20m pixel should contain four 10m pixels).
329 | This can be done by resampling 🖌️ or interpolating the grid (typically the
330 | higher resolution one) onto a new reference frame 🖼️.
331 |
332 | For datasets ℹ️ that come from different sources and need to be reprojected 🔁,
333 | you can do the reprojection and pixel alignment in a single step 🔂. Be extra
334 | careful about resampling, as certain datasets (e.g. complex SAR 📡 data that
335 | has been collected off-nadir) may require special 🌷 treatment.
336 | ```
337 |
338 |
339 | ## Time to slice again ⌛
340 |
341 | So, we now have a {py:class}`datatree.DataTree` with two 💕 groups/nodes called
342 | 'lowres' and 'highres' that have tensor shapes `(lat: 12, lon: 9, time: 192)`
343 | and `(lat: 54, lon: 40, time: 192)` respectively. While the time dimension ⏱️
344 | is of the same length, the timestamp values between the low-resolution 🔅 GCM
345 | and high-resolution 🔆 DeepSD output are different. Specifically, the GCM
346 | output dates at the middle of the month 📅, while the DeepSD output has dates
347 | at the start of the month. Let's see how this can be handled 🫖.
348 |
349 | ### Slicing by month 🗓️
350 |
351 | Assuming that the roughly two week offset ↔️ between the monthly resolution GCM
352 | and DeepSD time-series is negligible 🤏, we can split the dataset on the time
353 | dimension at the start/end of each month 📆. Let's write a function and use
354 | {py:class}`torchdata.datapipes.iter.FlatMapper` (functional name: `flatmap`)
355 | for this.
356 |
357 | ```{code-cell}
358 | def split_on_month(dt: DataTree, node:str = "highres/tasmax") -> DataTree:
359 | """
360 | Return a slice of data for every month in a datatree.DataTree time-series.
361 | """
362 | for t in dt[node].time.to_pandas():
363 | dt_slice = dt.sel(
364 | time=slice(t + pd.offsets.MonthBegin(0), t + pd.offsets.MonthEnd(0))
365 | )
366 | yield dt_slice.squeeze(dim="time")
367 | ```
368 |
369 | ```{code-cell}
370 | dp_datatree_timeslices = dp_datatree_subset.flatmap(fn=split_on_month)
371 | dp_datatree_timeslices
372 | ```
373 |
374 | The datapipe should yield a {py:class}`datatree.DataTree` with just one
375 | month's 📅 worth of temperature 🌡️ data per iteration.
376 |
377 | ```{code-cell}
378 | it = iter(dp_datatree_timeslices)
379 | datatree_timeslice = next(it)
380 | datatree_timeslice
381 | ```
382 |
383 | ```{seealso}
384 | Those interested in slicing multi-resolution arrays spatially can keep an eye
385 | on the 🚧 ongoing implementation at
386 | https://github.com/xarray-contrib/xbatcher/pull/171 and the discussion at
387 | https://github.com/xarray-contrib/xbatcher/issues/93. This 🧑🏫 tutorial will be
388 | updated ♻️ once there's a clean way to generate multi-resolution
389 | {py:class}`datatree.DataTree` slices in a newer release of
390 | {doc}`xbatcher ` 😉
391 | ```
392 |
393 | Visualize the final DataPipe graph ⛓️.
394 |
395 | ```{code-cell}
396 | torchdata.datapipes.utils.to_graph(dp=dp_datatree_timeslices)
397 | ```
398 |
399 | ### Into a DataLoader 🏋️
400 |
401 | Ready to populate the {py:class}`torchdata.dataloader2.DataLoader2` 🏭!
402 |
403 | ```{code-cell}
404 | dataloader = torchdata.dataloader2.DataLoader2(datapipe=dp_datatree_timeslices)
405 | for i, batch in enumerate(dataloader):
406 | ds_lowres = batch["lowres/tasmax"]
407 | ds_highres = batch["highres/tasmax"]
408 | print(f"Batch {i} - lowres: {ds_lowres.shape}, highres: {ds_highres.shape}")
409 | if i > 8:
410 | break
411 | ```
412 |
413 | Do super-resolution, but make no illusion 🧚
414 |
415 | ```{seealso}
416 | Credits to [CarbonPlan](https://github.com/carbonplan) for making the code and
417 | data for their
418 | [CMIP6 downscaling](https://github.com/carbonplan/cmip6-downscaling) work
419 | openly available. Find out more at
420 | https://docs.carbonplan.org/cmip6-downscaling!
421 | ```
422 |
--------------------------------------------------------------------------------
/docs/object-detection-boxes.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | kernelspec:
8 | display_name: Python 3
9 | language: python
10 | name: python3
11 | ---
12 |
13 | # Object detection boxes
14 |
15 | > *You shouldn't set up limits in boundless openness,
16 | > but if you set up limitlessness as boundless openness,
17 | > you've trapped yourself*
18 |
19 | Boxes are quick to draw ✏️, but finicky to train a neural network with.
20 | This time, we'll show you a geospatial object detection 🕵️ problem, where the
21 | objects are defined by a bounding box 🔲 with a specific class.
22 | By the end of this lesson, you should be able to:
23 |
24 | - Read OGR supported vector files and obtain the bounding boxes 🟨 of each
25 | geometry
26 | - Convert bounding boxes from geographic coordinates to 🖼️ image coordinates
27 | while clipping to the image extent
28 | - Use an affine transform to convert boxes in image coordinates to 🌐
29 | geographic coordinates
30 |
31 | 🔗 Links:
32 | - https://planetarycomputer.microsoft.com/dataset/ms-buildings#Example-Notebook
33 | - https://github.com/microsoft/GlobalMLBuildingFootprints/
34 | - https://mlhub.earth/datasets?tags=object+detection
35 |
36 | ## 🎉 **Getting started**
37 |
38 | These are the tools 🛠️ you'll need.
39 |
40 | ```{code-cell}
41 | import contextily
42 | import numpy as np
43 | import geopandas as gpd
44 | import matplotlib.patches
45 | import matplotlib.pyplot as plt
46 | import pandas as pd
47 | import planetary_computer
48 | import pystac_client
49 | import rioxarray
50 | import shapely.affinity
51 | import shapely.geometry
52 | import torch
53 | import torchdata
54 | import torchdata.dataloader2
55 | import xarray as xr
56 | import zen3geo
57 | ```
58 |
59 | ## 0️⃣ Find high-resolution imagery and building footprints 🌇
60 |
61 | Let's take a look at buildings over
62 | [Kampong Ayer](https://en.wikipedia.org/wiki/Kampong_Ayer), Brunei 🇧🇳! We'll
63 | use {py:func}`contextily.bounds2img` to get some 4-band RGBA
64 | 🌈 [optical imagery](https://www.arcgis.com/home/item.html?id=10df2279f9684e4a9f6a7f08febac2a9)
65 | in a {py:class}`numpy.ndarray` format.
66 |
67 | ```{code-cell}
68 | image, extent = contextily.bounds2img(
69 | w=114.94,
70 | s=4.88,
71 | e=114.95,
72 | n=4.89,
73 | ll=True,
74 | source=contextily.providers.Esri.WorldImagery,
75 | )
76 | print(f"Spatial extent in EPSG:3857: {extent}")
77 | print(f"Image dimensions (height, width, channels): {image.shape}")
78 | ```
79 |
80 | This is how Brunei's 🚣 Venice of the East looks like from above.
81 |
82 | ```{code-cell}
83 | fig, ax = plt.subplots(nrows=1, figsize=(9, 9))
84 | plt.imshow(X=image, extent=extent)
85 | ```
86 |
87 | ```{tip}
88 | For more raster basemaps, check out:
89 | - https://xyzservices.readthedocs.io/en/stable/introduction.html#overview-of-built-in-providers
90 | - https://leaflet-extras.github.io/leaflet-providers/preview/
91 | ```
92 |
93 | ### Georeference image using rioxarray 🌐
94 |
95 | To enable slicing 🔪 with xbatcher later, we'll need to turn the
96 | {py:class}`numpy.ndarray` image 🖼️ into an {py:class}`xarray.DataArray` grid
97 | with coordinates 🖼️. If you already have a georeferenced grid (e.g. from
98 | {py:class}`zen3geo.datapipes.RioXarrayReader`), this step can be skipped ⏭️.
99 |
100 |
101 | ```{code-cell}
102 | # Turn RGBA image from channel-last to channel-first and get 3-band RGB only
103 | _image = image.transpose(2, 0, 1) # Change image from (H, W, C) to (C, H, W)
104 | rgb_image = _image[0:3, :, :] # Get just RGB by dropping RGBA's alpha channel
105 | print(f"RGB image shape: {rgb_image.shape}")
106 | ```
107 |
108 | Georeferencing is done by putting the 🚦 RGB image into an
109 | {py:class}`xarray.DataArray` object with (band, y, x) coordinates, and then
110 | setting a coordinate reference system 📐 using
111 | {py:meth}`rioxarray.rioxarray.XRasterBase.set_crs`.
112 |
113 | ```{code-cell}
114 | left, right, bottom, top = extent # xmin, xmax, ymin, ymax
115 | dataarray = xr.DataArray(
116 | data=rgb_image,
117 | coords=dict(
118 | band=[0, 1, 2], # Red, Green, Blue
119 | y=np.linspace(start=top, stop=bottom, num=rgb_image.shape[1]),
120 | x=np.linspace(start=left, stop=right, num=rgb_image.shape[2]),
121 | ),
122 | dims=("band", "y", "x"),
123 | )
124 | dataarray = dataarray.rio.write_crs(input_crs="EPSG:3857")
125 | dataarray
126 | ```
127 |
128 | ### Load cloud-native vector files 💠
129 |
130 | Now to pull in some building footprints 🛖. Let's make a STAC API query to get
131 | a [GeoParquet](https://github.com/opengeospatial/geoparquet) file (a
132 | cloud-native columnar 🀤 geospatial vector file format) that intersects our
133 | study area.
134 |
135 | ```{code-cell}
136 | catalog = pystac_client.Client.open(
137 | url="https://planetarycomputer.microsoft.com/api/stac/v1",
138 | modifier=planetary_computer.sign_inplace,
139 | )
140 | search = catalog.search(
141 | collections=["ms-buildings"],
142 | query={"msbuildings:region": {"eq": "Brunei"}},
143 | intersects=shapely.geometry.box(minx=114.94, miny=4.88, maxx=114.95, maxy=4.89),
144 | )
145 | item = next(search.items())
146 | item
147 | ```
148 |
149 | ```{note}
150 | Accessing the building footprint STAC Assets from Planetary Computer will
151 | require signing 🔏 the URL. This can be done with a `modifier` function in the
152 | {py:meth}`pystac_client.Client.open` call. See also 'Automatically modifying
153 | results' under {doc}`PySTAC-Client Usage `).
154 | ```
155 |
156 | Next, we'll load ⤵️ the GeoParquet file using
157 | {py:func}`geopandas.read_parquet`.
158 |
159 | ```{code-cell}
160 | asset = item.assets["data"]
161 |
162 | geodataframe = gpd.read_parquet(
163 | path=asset.href, storage_options=asset.extra_fields["table:storage_options"]
164 | )
165 | geodataframe
166 | ```
167 |
168 | This {py:class}`geopandas.GeoDataFrame` contains building outlines across
169 | Brunei 🇧🇳 that intersects and extends beyond our study area. Let's do a spatial
170 | subset ✂️ to just the Kampong Ayer study area using
171 | {py:attr}`geopandas.GeoDataFrame.cx`, and reproject the polygon coordinates
172 | using {py:meth}`geopandas.GeoDataFrame.to_crs` to match the coordinate
173 | reference system of the optical image.
174 |
175 | ```{code-cell}
176 | _gdf_kpgayer = geodataframe.cx[114.94:114.95, 4.88:4.89]
177 | gdf_kpgayer = _gdf_kpgayer.to_crs(crs="EPSG:3857")
178 | gdf_kpgayer
179 | ```
180 |
181 | Preview 👀 the building footprints to check that things are in the right place.
182 |
183 | ```{code-cell}
184 | ax = gdf_kpgayer.plot(figsize=(9, 9))
185 | contextily.add_basemap(
186 | ax=ax,
187 | source=contextily.providers.CartoDB.Voyager,
188 | crs=gdf_kpgayer.crs.to_string(),
189 | )
190 | ax
191 | ```
192 |
193 | Cool, we see that there are some building are on water as expected 😁.
194 |
195 |
196 | ## 1️⃣ Pair image chips with bounding boxes 🧑🤝🧑
197 |
198 | Here comes the fun 🛝 part! This section is all about generating 128x128 chips
199 | 🫶 paired with bounding boxes. Let's go 🚲!
200 |
201 | ### Create 128x128 raster chips and clip vector geometries with it ✂️
202 |
203 | From the large 1280x1280 scene 🖽️, we will first slice out a hundred 128x128
204 | chips 🍕 using {py:class}`zen3geo.datapipes.XbatcherSlicer` (functional name:
205 | `slice_with_xbatcher`).
206 |
207 | ```{code-cell}
208 | dp_raster = torchdata.datapipes.iter.IterableWrapper(iterable=[dataarray])
209 | dp_xbatcher = dp_raster.slice_with_xbatcher(input_dims={"y": 128, "x": 128})
210 | dp_xbatcher
211 | ```
212 |
213 | For each 128x128 chip 🍕, we'll then find the vector geometries 🌙 that fit
214 | within the chip's spatial extent. This will be 🤸 done using
215 | {py:class}`zen3geo.datapipes.GeoPandasRectangleClipper` (functional name:
216 | `clip_vector_with_rectangle`).
217 |
218 | ```{code-cell}
219 | dp_vector = torchdata.datapipes.iter.IterableWrapper(iterable=[gdf_kpgayer])
220 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_xbatcher)
221 | dp_clipped
222 | ```
223 |
224 | ```{important}
225 | When using {py:class}`zen3geo.datapipes.GeoPandasRectangleClipper` 💇, there
226 | should only be one 'global' 🌐 vector {py:class}`geopandas.GeoSeries` or
227 | {py:class}`geopandas.GeoDataFrame`.
228 |
229 | If your raster DataPipe has chips 🍕 with different coordinate reference
230 | systems (e.g. multiple UTM Zones 🌏🌍🌎),
231 | {py:class}`zen3geo.datapipes.GeoPandasRectangleClipper` will actually reproject
232 | 🔄 the 'global' vector to the coordinate reference system of each chip, and
233 | clip ✂️ the geometries accordingly to the chip's bounding box extent 😎.
234 | ```
235 |
236 | This ``dp_clipped`` DataPipe will yield 🤲 a tuple of ``(vector, raster)``
237 | objects for each 128x128 chip. Let's inspect 🧐 one to see how they look like.
238 |
239 | ```{code-cell}
240 | # Get one chip with over 10 building footprint geometries
241 | for vector, raster in dp_clipped:
242 | if len(vector) > 10:
243 | break
244 | ```
245 |
246 | These are the spatially subsetted vector geometries 🌙 in one 128x128 chip.
247 |
248 | ```{code-cell}
249 | vector
250 | ```
251 |
252 | This is the raster chip/mask 🤿 used to clip the vector.
253 |
254 | ```{code-cell}
255 | raster
256 | ```
257 |
258 | And here's a side by side visualization of the 🌈 RGB chip image (left) and
259 | 🔷 vector building footprint polygons (right).
260 |
261 | ```{code-cell}
262 | fig, ax = plt.subplots(ncols=2, figsize=(18, 9), sharex=True, sharey=True)
263 | raster.plot.imshow(ax=ax[0])
264 | vector.plot(ax=ax[1])
265 | ```
266 |
267 | Cool, these buildings are part of the 🏬
268 | [Yayasan Shopping Complex](https://web.archive.org/web/20220906020248/http://www.yayasancomplex.com)
269 | in Bandar Seri Begawan 🌆. We can see that the raster image 🖼️ on the left
270 | aligns ok with the vector polygons 💠 on the right.
271 |
272 | ```{note}
273 | The optical 🛰️ imagery shown here is **not** the imagery used to digitize the
274 | [building footprints](https://planetarycomputer.microsoft.com/dataset/ms-buildings)
275 | 🏢! This is an example tutorial using two different data sources, that we just
276 | so happened to have plotted in the same geographic space 😝.
277 | ```
278 |
279 | ### From polygons in geographic coordinates to boxes in image coordinates ↕️
280 |
281 | Up to this point, we still have the actual 🛖 building footprint polygons. In
282 | this step 📶, we'll convert these polygons into a format suitable for 'basic'
283 | object detection 🥅 models in computer vision. Specifically:
284 |
285 | 1. The polygons 🌙 (with multiple vertices) will be simplified to a horizontal
286 | bounding box 🔲 with 4 corner vertices only.
287 | 2. The 🌐 geographic coordinates of the box which use lower left corner and
288 | upper right corner (i.e. y increases from South to North ⬆️) will be
289 | converted to 🖼️ image coordinates (0-128) which use the top left corner and
290 | bottom right corner (i.e y increases from Top to Bottom ⬇️).
291 |
292 | Let's start by using {py:attr}`geopandas.GeoSeries.bounds` to get the
293 | geographic bounds 🗺️ of each building footprint geometry 📐 in each 128x128
294 | chip.
295 |
296 | ```{code-cell}
297 | def polygon_to_bbox(geom_and_chip) -> (gpd.GeoDataFrame, xr.DataArray):
298 | """
299 | Get bounding box (minx, miny, maxx, maxy) coordinates for each geometry in
300 | a geopandas.GeoDataFrame.
301 |
302 | (maxx,maxy)
303 | ul-------ur
304 | ^ | |
305 | | | geo | y increases going up, x increases going right
306 | y | |
307 | ll-------lr
308 | (minx,miny) x-->
309 |
310 | """
311 | gdf, chip = geom_and_chip
312 | bounds: gpd.GeoDataFrame = gdf.bounds
313 | assert tuple(bounds.columns) == ("minx", "miny", "maxx", "maxy")
314 |
315 | return bounds, chip
316 | ```
317 |
318 | ```{code-cell}
319 | dp_bbox = dp_clipped.map(fn=polygon_to_bbox)
320 | ```
321 |
322 | Next, the geographic 🗺️ bounding box coordinates (in EPSG:3857) will be
323 | converted to image 🖼️ or pixel coordinates (0-128 scale). The y-direction will
324 | be flipped 🤸 upside down, and we'll be using the spatial bounds (or corner
325 | coordinates) of the 128x128 image chip as a reference 📍.
326 |
327 | ```{code-cell}
328 | def geobox_to_imgbox(bbox_and_chip) -> (pd.DataFrame, xr.DataArray):
329 | """
330 | Convert bounding boxes in a pandas.DataFrame from geographic coordinates
331 | (minx, miny, maxx, maxy) to image coordinates (x1, y1, x2, y2) based on the
332 | spatial extent of a raster image chip.
333 |
334 | (x1,y1)
335 | ul-------ur
336 | y | |
337 | | | img | y increases going down, x increases going right
338 | v | |
339 | ll-------lr
340 | x--> (x2,y2)
341 |
342 | """
343 | geobox, chip = bbox_and_chip
344 |
345 | x_res, y_res = chip.rio.resolution()
346 | assert y_res < 0
347 |
348 | left, bottom, right, top = chip.rio.bounds()
349 | assert top > bottom
350 |
351 | imgbox = pd.DataFrame()
352 | imgbox["x1"] = (geobox.minx - left) / x_res # left
353 | imgbox["y1"] = (top - geobox.maxy) / -y_res # top
354 | imgbox["x2"] = (geobox.maxx - left) / x_res # right
355 | imgbox["y2"] = (top - geobox.miny) / -y_res # bottom
356 |
357 | assert all(imgbox.x2 > imgbox.x1)
358 | assert all(imgbox.y2 > imgbox.y1)
359 |
360 | return imgbox, chip
361 | ```
362 |
363 | ```{code-cell}
364 | dp_ibox = dp_bbox.map(fn=geobox_to_imgbox)
365 | ```
366 |
367 | Now to plot 🎨 and double check that the boxes are positioned correctly in
368 | 0-128 image space 🌌.
369 |
370 | ```{code-cell}
371 | # Get one chip with over 10 building footprint geometries
372 | for ibox, ichip in dp_ibox:
373 | if len(ibox) > 10:
374 | break
375 | ibox
376 | ```
377 |
378 | ```{code-cell}
379 | fig, ax = plt.subplots(ncols=2, figsize=(18, 9), sharex=True, sharey=True)
380 | ax[0].imshow(X=ichip.transpose("y", "x", "band"))
381 | for i, row in ibox.iterrows():
382 | rectangle = matplotlib.patches.Rectangle(
383 | xy=(row.x1, row.y1),
384 | width=row.x2 - row.x1,
385 | height=row.y2 - row.y1,
386 | edgecolor="blue",
387 | linewidth=1,
388 | facecolor="none",
389 | )
390 | ax[1].add_patch(rectangle)
391 | ```
392 |
393 | Cool, the 🟦 bounding boxes on the right subplot are correctly positioned 🧭
394 | (compare it with the figure in the previous subsection).
395 |
396 | ```{hint}
397 | Instead of a bounding box 🥡 object detection task, you can also use the
398 | building polygons 🏘️ for a segmentation task 🧑🎨 following
399 | {doc}`./vector-segmentation-masks`.
400 |
401 | If you still prefer doing object detection 🕵️, but want a different box format
402 | (see options in {py:func}`torchvision.ops.box_convert`),
403 | like 🎌 centre-based coordinates with width and height (`cxcywh`), or
404 | 📨 oriented/rotated bounding box coordinates, feel free to implement your own
405 | function and DataPipe for it 🤗!
406 | ```
407 |
408 |
409 | ## 2️⃣ There and back again 🧙
410 |
411 | What follows on from here requires focus 🤫. To start, we'll pool the hundred
412 | 💯 128x128 chips into 10 batches (10 chips per batch) using
413 | {py:class}`torchdata.datapipes.iter.Batcher` (functional name: `batch`).
414 |
415 | ```{code-cell}
416 | dp_batch = dp_ibox.batch(batch_size=10)
417 | print(f"Number of items in first batch: {len(list(dp_batch)[0])}")
418 | ```
419 |
420 | ### Batch boxes with variable lengths 📏
421 |
422 | Next, we'll stack 🥞 all the image chips into a single tensor (recall
423 | {doc}`./chipping`), and concatenate 📚 the bounding boxes into a list of
424 | tensors using {py:class}`torchdata.datapipes.iter.Collator` (functional name:
425 | `collate`).
426 |
427 | ```{code-cell}
428 | def boximg_collate_fn(samples) -> (list[torch.Tensor], torch.Tensor, list[dict]):
429 | """
430 | Converts bounding boxes and raster images to tensor objects and keeps
431 | geographic metadata (spatial extent, coordinate reference system and
432 | spatial resolution).
433 |
434 | Specifically, the bounding boxes in pandas.DataFrame format are each
435 | converted to a torch.Tensor and collated into a list, while the raster
436 | images in xarray.DataArray format are converted to a torch.Tensor (int16
437 | dtype) and stacked into a single torch.Tensor.
438 | """
439 | box_tensors: list[torch.Tensor] = [
440 | torch.as_tensor(sample[0].to_numpy(dtype=np.float32)) for sample in samples
441 | ]
442 |
443 | tensors: list[torch.Tensor] = [
444 | torch.as_tensor(data=sample[1].data.astype(dtype="int16")) for sample in samples
445 | ]
446 | img_tensors = torch.stack(tensors=tensors)
447 |
448 | metadata: list[dict] = [
449 | {
450 | "bbox": sample[1].rio.bounds(),
451 | "crs": sample[1].rio.crs,
452 | "resolution": sample[1].rio.resolution(),
453 | }
454 | for sample in samples
455 | ]
456 |
457 | return box_tensors, img_tensors, metadata
458 | ```
459 |
460 | ```{code-cell}
461 | dp_collate = dp_batch.collate(collate_fn=boximg_collate_fn)
462 | print(f"Number of mini-batches: {len(dp_collate)}")
463 | mini_batch_box, mini_batch_img, mini_batch_metadata = list(dp_collate)[1]
464 | print(f"Mini-batch image tensor shape: {mini_batch_img.shape}")
465 | print(f"Mini-batch box tensors: {mini_batch_box}")
466 | print(f"Mini-batch metadata: {mini_batch_metadata}")
467 | ```
468 |
469 | The DataPipe is complete 🙌, let's visualize the entire data pipeline graph.
470 |
471 | ```{code-cell}
472 | torchdata.datapipes.utils.to_graph(dp=dp_collate)
473 | ```
474 |
475 | ### Into a DataLoader 🏋️
476 |
477 | Loop over the DataPipe using {py:class}`torch.utils.data.DataLoader` ⚙️!
478 |
479 | ```{code-cell}
480 | dataloader = torchdata.dataloader2.DataLoader2(datapipe=dp_collate)
481 | for i, batch in enumerate(dataloader):
482 | box, img, metadata = batch
483 | print(f"Batch {i} - img: {img.shape}, box sizes: {[len(b) for b in box]}")
484 | ```
485 |
486 | There's probably hundreds of models you can 🍜 feed this data into, from
487 | mmdetection's {doc}`mmdetection:model_zoo` 🐼 to torchvision's
488 | {doc}`torchvision:models`). But are we out of the woods yet?
489 |
490 | ### Georeference image boxes 📍
491 |
492 | To turn the model's predicted bounding boxes in image space 🌌 back to
493 | geographic coordinates 🌐, you'll need to use an
494 | [affine transform](https://web.archive.org/web/20210506173651/https://www.perrygeo.com/python-affine-transforms.html).
495 | Assuming you've kept your 🏷️ metadata intact, here's an example on how to do
496 | the georeferencing:
497 |
498 | ```{code-cell}
499 | for batch in dataloader:
500 | pred_boxes, images, metadata = batch
501 |
502 | objs: list = []
503 | for idx in range(0, len(images)):
504 | left, bottom, right, top = metadata[idx]["bbox"]
505 | crs = metadata[idx]["crs"]
506 | x_res, y_res = metadata[idx]["resolution"]
507 |
508 | gdf = gpd.GeoDataFrame(
509 | geometry=[
510 | shapely.affinity.affine_transform(
511 | geom=shapely.geometry.box(*coords),
512 | matrix=[x_res, 0, 0, y_res, left, top],
513 | )
514 | for coords in pred_boxes[idx]
515 | ],
516 | crs=crs,
517 | )
518 | objs.append(gdf.to_crs(crs=crs))
519 |
520 | geodataframe: gpd.GeoDataFrame = pd.concat(objs=objs, ignore_index=True)
521 | geodataframe.set_crs(crs=crs, inplace=True)
522 | break
523 |
524 | geodataframe
525 | ```
526 |
527 | Back at square one, or are we?
528 |
--------------------------------------------------------------------------------
/docs/vector-segmentation-masks.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | kernelspec:
8 | display_name: Python 3
9 | language: python
10 | name: python3
11 | ---
12 |
13 | # Vector segmentation masks
14 |
15 | > *Clouds float by, water flows on;
16 | > in movement there is no grasping, in Chan there is no settling*
17 |
18 | For 🧑🏫 supervised machine learning, labels 🏷️ are needed in addition to the
19 | input image 🖼️. Here, we'll step through an example workflow on matching vector
20 | 🚏 label data (points, lines, polygons) to 🛰️ Earth Observation data inputs.
21 | Specifically, this tutorial will cover:
22 |
23 | - Reading shapefiles 📁 directly from the web via {doc}`pyogrio `
24 | - Rasterizing vector polygons from a {py:class}`geopandas.GeoDataFrame` to an {py:class}`xarray.DataArray`
25 | - Pairing 🛰️ satellite images with the rasterized label masks and feeding them into a DataLoader
26 |
27 |
28 | ## 🎉 **Getting started**
29 |
30 | These are the tools 🛠️ you'll need.
31 |
32 | ```{code-cell}
33 | import matplotlib.pyplot as plt
34 | import numpy as np
35 | import planetary_computer
36 | import pyogrio
37 | import pystac
38 | import torch
39 | import torchdata
40 | import xarray as xr
41 | import zen3geo
42 | ```
43 |
44 | ## 0️⃣ Find cloud-hosted raster and vector data ⛳
45 |
46 | In this case study, we'll look at the flood water extent over the Narathiwat Province
47 | in Thailand 🇹🇭 and the Northern Kelantan State in Malaysia 🇲🇾 on 04 Jan 2017 that were
48 | digitized by 🇺🇳 UNITAR-UNOSAT's rapid mapping service over Synthetic Aperture Radar
49 | (SAR) 🛰️ images. Specifically, we'll be using the 🇪🇺 Sentinel-1 Ground Range Detected
50 | (GRD) product's VV polarization channel.
51 |
52 | 🔗 Links:
53 | - https://www.unitar.org/maps
54 | - https://unitar.org/maps/all-maps
55 | - [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=102.7555%2C5.7222&z=7.92&v=2&d=sentinel-1-grd&m=cql%3Afdba821238c1a390e7c75d7ced805b2e&r=VV%2C+VH+False-color+composite&s=false%3A%3A100%3A%3Atrue&sr=desc&ae=0)
56 |
57 | To start, let's get the 🛰️ satellite scene we'll be using for this tutorial.
58 |
59 | ```{code-cell}
60 | item_url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20170104T225443_20170104T225512_014688_017E5D"
61 |
62 | # Load the individual item metadata and sign the assets
63 | item = pystac.Item.from_file(item_url)
64 | signed_item = planetary_computer.sign(item)
65 | signed_item
66 | ```
67 |
68 | This is how the Sentinel-1 🩻 image looks like over Southern Thailand / Northern
69 | Peninsular Malaysia on 04 Jan 2017.
70 |
71 | 
72 |
73 | ### Load and reproject image data 🔄
74 |
75 | To keep things simple, we'll load just the VV channel into a DataPipe via
76 | {py:class}`zen3geo.datapipes.RioXarrayReader` (functional name:
77 | `read_from_rioxarray`) 😀.
78 |
79 | ```{code-cell}
80 | url = signed_item.assets["vv"].href
81 | dp = torchdata.datapipes.iter.IterableWrapper(iterable=[url])
82 | # Reading lower resolution grid using overview_level=3
83 | dp_rioxarray = dp.read_from_rioxarray(overview_level=3)
84 | dp_rioxarray
85 | ```
86 |
87 | The Sentinel-1 image from Planetary Computer comes in longitude/latitude 🌐
88 | geographic coordinates by default (OGC:CRS84). To make the pixels more equal 🔲
89 | area, we can project it to a 🌏 local projected coordinate system instead.
90 |
91 | ```{code-cell}
92 | def reproject_to_local_utm(dataarray: xr.DataArray, resolution: float=80.0) -> xr.DataArray:
93 | """
94 | Reproject an xarray.DataArray grid from OGC:CRS84 to a local UTM coordinate
95 | reference system.
96 | """
97 | # Estimate UTM coordinate reference from a single pixel
98 | pixel = dataarray.isel(y=slice(0, 1), x=slice(0,1))
99 | new_crs = dataarray.rio.reproject(dst_crs="OGC:CRS84").rio.estimate_utm_crs()
100 |
101 | return dataarray.rio.reproject(dst_crs=new_crs, resolution=resolution)
102 | ```
103 |
104 | ```{code-cell}
105 | dp_reprojected = dp_rioxarray.map(fn=reproject_to_local_utm)
106 | ```
107 |
108 | ```{note}
109 | Universal Transverse Mercator (UTM) isn't actually an equal-area projection
110 | system. However, Sentinel-1 🛰️ satellite scenes from Copernicus are usually
111 | distributed in a UTM coordinate reference system, and UTM is typically a close
112 | enough 🤏 approximation to the local geographic area, or at least it won't
113 | matter much when we're looking at spatial resolutions over several 10s of
114 | metres 🙂.
115 | ```
116 |
117 | ```{hint}
118 | For those wondering what `OGC:CRS84` is, it is the longitude/latitude version
119 | of [`EPSG:4326`](https://epsg.io/4326) 🌐 (latitude/longitude). I.e., it's a
120 | matter of axis order, with `OGC:CRS84` being x/y and `EPSG:4326` being y/x.
121 |
122 | 🔖 References:
123 | - https://gis.stackexchange.com/questions/54073/what-is-crs84-projection
124 | - https://github.com/opengeospatial/geoparquet/issues/52
125 | ```
126 |
127 | ### Transform and visualize raster data 🔎
128 |
129 | Let's visualize 👀 the Sentinel-1 image, but before that, we'll transform 🔄
130 | the VV data from linear to [decibel](https://en.wikipedia.org/wiki/Decibel)
131 | scale.
132 |
133 | ```{code-cell}
134 | def linear_to_decibel(dataarray: xr.DataArray) -> xr.DataArray:
135 | """
136 | Transforming the input xarray.DataArray's VV or VH values from linear to
137 | decibel scale using the formula ``10 * log_10(x)``.
138 | """
139 | # Mask out areas with 0 so that np.log10 is not undefined
140 | da_linear = dataarray.where(cond=dataarray != 0)
141 | da_decibel = 10 * np.log10(da_linear)
142 | return da_decibel
143 | ```
144 |
145 | ```{code-cell}
146 | dp_decibel = dp_reprojected.map(fn=linear_to_decibel)
147 | dp_decibel
148 | ```
149 |
150 | As an aside, we'll be using the Sentinel-1 image datapipe twice later, once as
151 | a template to create a blank canvas 🎞️, and another time by itself 🪞. This
152 | requires forking 🍴 the DataPipe into two branches, which can be achieved using
153 | {py:class}`torchdata.datapipes.iter.Forker` (functional name: `fork`).
154 |
155 | ```{code-cell}
156 | dp_decibel_canvas, dp_decibel_image = dp_decibel.fork(num_instances=2)
157 | dp_decibel_canvas, dp_decibel_image
158 | ```
159 |
160 | Now to visualize the transformed Sentinel-1 image 🖼️. Let's zoom in 🔭 to one
161 | of the analysis extent areas we'll be working on later.
162 |
163 | ```{code-cell}
164 | it = iter(dp_decibel_image)
165 | dataarray = next(it)
166 |
167 | da_clip = dataarray.rio.clip_box(minx=125718, miny=523574, maxx=326665, maxy=722189)
168 | da_clip.isel(band=0).plot.imshow(figsize=(11.5, 9), cmap="Blues_r", vmin=18, vmax=26)
169 | ```
170 |
171 | Notice how the darker blue areas 🔵 tend to correlate more with water features
172 | like the meandering rivers and the 🐚 sea on the NorthEast. This is because the
173 | SAR 🛰️ signal which is side looking reflects off flat water bodies like a
174 | mirror 🪞, with little energy getting reflected 🙅 back directly to the sensor
175 | (hence why it looks darker ⚫).
176 |
177 | ### Load and visualize cloud-hosted vector files 💠
178 |
179 | Let's now load some vector data from the web 🕸️. These are polygons of the
180 | segmented 🌊 water extent digitized by UNOSAT's AI Based Rapid Mapping Service.
181 | We'll be converting these vector polygons to 🌈 raster masks later.
182 |
183 | 🔗 Links:
184 | - https://github.com/UNITAR-UNOSAT/UNOSAT-AI-Based-Rapid-Mapping-Service
185 | - [UNOSAT link to polygon dataset](https://unosat.org/products/2460)
186 | - [Disaster Risk Monitoring Using Satellite Imagery online course](https://courses.nvidia.com/courses/course-v1:DLI+S-ES-01+V1)
187 |
188 | ```{code-cell}
189 | # https://gdal.org/user/virtual_file_systems.html#vsizip-zip-archives
190 | shape_url = "/vsizip/vsicurl/https://web.archive.org/web/20240411214446/https://unosat.org/static/unosat_filesystem/2460/FL20170106THA_SHP.zip/ST20170104_SatelliteDetectedWaterAndSaturatedSoil.shp"
191 | ```
192 |
193 | This is a shapefile containing 🔷 polygons of the mapped water extent. Let's
194 | put it into a DataPipe called {py:class}`zen3geo.datapipes.PyogrioReader`
195 | (functional name: ``read_from_pyogrio``).
196 |
197 | ```{code-cell}
198 | dp_shapes = torchdata.datapipes.iter.IterableWrapper(iterable=[shape_url])
199 | dp_pyogrio = dp_shapes.read_from_pyogrio()
200 | dp_pyogrio
201 | ```
202 |
203 | This will take care of loading the shapefile into a
204 | {py:class}`geopandas.GeoDataFrame` object. Let's take a look at the data table
205 | 📊 to see what attributes are inside.
206 |
207 | ```{code-cell}
208 | it = iter(dp_pyogrio)
209 | geodataframe = next(it)
210 | geodataframe.dropna(axis="columns")
211 | ```
212 |
213 | Cool, and we can also visualize the polygons 🔷 on a 2D map. To align the
214 | coordinates with the 🛰️ Sentinel-1 image above, we'll first use
215 | {py:meth}`geopandas.GeoDataFrame.to_crs` to reproject the vector from 🌐
216 | EPSG:9707 (WGS 84 + EGM96 height, latitude/longitude) to 🌏 EPSG:32648 (UTM
217 | Zone 48N).
218 |
219 | ```{code-cell}
220 | print(f"Original bounds in EPSG:9707:\n{geodataframe.bounds}")
221 | gdf = geodataframe.to_crs(crs="EPSG:32648")
222 | print(f"New bounds in EPSG:32648:\n{gdf.bounds}")
223 | ```
224 |
225 | Plot it with {py:meth}`geopandas.GeoDataFrame.plot`. This vector map 🗺️ should
226 | correspond to the zoomed in Sentinel-1 image plotted earlier above.
227 |
228 | ```{code-cell}
229 | gdf.plot(figsize=(11.5, 9))
230 | ```
231 |
232 | ```{tip}
233 | Make sure to understand your raster and vector datasets well first! Open the
234 | files up in your favourite 🌐 Geographic Information System (GIS) tool, see how
235 | they actually look like spatially. Then you'll have a better idea to decide on
236 | how to create your data pipeline. The zen3geo way puts you as the Master 🧙 in
237 | control.
238 | ```
239 |
240 |
241 | ## 1️⃣ Create a canvas to paint on 🎨
242 |
243 | In this section, we'll work on converting the flood water 🌊 polygons above
244 | from a 🚩 vector to a 🌈 raster format, i.e. rasterization. This will be done
245 | in two steps 📶:
246 |
247 | 1. Defining a blank canvas 🎞️
248 | 2. Paint the polygons onto this blank canvas 🧑🎨
249 |
250 | For this, we'll be using tools from {py:meth}`zen3geo.datapipes.datashader`.
251 | Let's see how this can be done.
252 |
253 | ### Blank canvas from template raster 🖼️
254 |
255 | A canvas represents a 2D area with a height and a width 📏. For us, we'll be
256 | using a {py:class}`datashader.Canvas`, which also defines the range of y-values
257 | (ymin to ymax) and x-values (xmin to xmax), essentially coordinates for
258 | every unit 🇾 height and 🇽 width.
259 |
260 | Since we already have a Sentinel-1 🛰️ raster grid with defined height/width
261 | and y/x coordinates, let's use it as a 📄 template to define our canvas. This
262 | is done via {py:class}`zen3geo.datapipes.XarrayCanvas` (functional name:
263 | ``canvas_from_xarray``).
264 |
265 | ```{code-cell}
266 | dp_canvas = dp_decibel_canvas.canvas_from_xarray()
267 | dp_canvas
268 | ```
269 |
270 | Cool, and here's a quick inspection 👀 of the canvas dimensions and metadata.
271 |
272 | ```{code-cell}
273 | it = iter(dp_canvas)
274 | canvas = next(it)
275 | print(f"Canvas height: {canvas.plot_height}, width: {canvas.plot_width}")
276 | print(f"Y-range: {canvas.y_range}")
277 | print(f"X-range: {canvas.x_range}")
278 | print(f"Coordinate reference system: {canvas.crs}")
279 | ```
280 |
281 | This information should match the template Sentinel-1 dataarray 🏁.
282 |
283 | ```{code-cell}
284 | print(f"Dimensions: {dict(dataarray.sizes)}")
285 | print(f"Affine transform: {dataarray.rio.transform()}")
286 | print(f"Bounding box: {dataarray.rio.bounds()}")
287 | print(f"Coordinate reference system: {dataarray.rio.crs}")
288 | ```
289 |
290 | ### Rasterize vector polygons onto canvas 🖌️
291 |
292 | Now's the time to paint or rasterize the
293 | vector {py:class}`geopandas.GeoDataFrame` polygons 🔷 onto the blank
294 | {py:class}`datashader.Canvas`! This would enable us to have a direct pixel-wise
295 | X -> Y mapping ↔️ between the Sentinel-1 image (X) and target flood label (Y).
296 |
297 | The vector polygons can be rasterized or painted 🖌️ onto the template canvas
298 | using {py:class}`zen3geo.datapipes.DatashaderRasterizer` (functional name:
299 | ``rasterize_with_datashader``).
300 |
301 | ```{code-cell}
302 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_pyogrio)
303 | dp_datashader
304 | ```
305 |
306 | This will turn the vector {py:class}`geopandas.GeoDataFrame` into a
307 | raster {py:class}`xarray.DataArray` grid, with the spatial coordinates and
308 | bounds matching exactly with the template Sentinel-1 image 😎.
309 |
310 | ```{note}
311 | Since we have just one Sentinel-1 🛰️ image and one raster 💧 flood
312 | mask, we have an easy 1:1 mapping. There are two other scenarios supported by
313 | {py:class}`zen3geo.datapipes.DatashaderRasterizer`:
314 |
315 | 1. N:1 - Many {py:class}`datashader.Canvas` objects to one vector
316 | {py:class}`geopandas.GeoDataFrame`. The single vector geodataframe will be
317 | broadcasted to match the length of the canvas list. This is useful for
318 | situations when you have a 🌐 'global' vector database that you want to pair
319 | with multiple 🛰️ satellite images.
320 | 2. N:N - Many {py:class}`datashader.Canvas` objects to many vector
321 | {py:class}`geopandas.GeoDataFrame` objects. In this case, the list of grids
322 | **must** ❗ have the same length as the list of vector geodataframes. E.g.
323 | if you have 5 grids, there must also be 5 vector files. This is so that a
324 | 1:1 pairing can be done, useful when each raster tile 🖽 has its own
325 | associated vector annotation.
326 | ```
327 |
328 | ```{seealso}
329 | For more details on how rasterization of polygons work behind the scenes 🎦,
330 | check out {doc}`Datashader `'s documentation on:
331 |
332 | - {doc}`The datashader pipeline `
333 | (especially the section on Aggregation).
334 | - {doc}`Rendering large collections of polygons `
335 | ```
336 |
337 |
338 | ## 2️⃣ Combine and conquer ⚔️
339 |
340 | So far, we've got two datapipes that should be 🧑🤝🧑 paired up in an X -> Y
341 | manner:
342 |
343 | 1. The pre-processed Sentinel-1 🌈 raster image in ``dp_decibel_image``
344 | 2. The rasterized 💧 flood segmentation masks in ``dp_datashader``
345 |
346 | One way to get these two pieces in a Machine Learning ready chip format is via
347 | a stack, slice and split ™️ approach. Think of it like a sandwich 🥪, we first
348 | stack the bread 🍞 and lettuce 🥬, and then slice the pieces 🍕 through the
349 | layers once. Ok, that was a bad analogy, let's just stick with tensors 🤪.
350 |
351 | ### Stacking the raster layers 🥞
352 |
353 | Each of our 🌈 raster inputs are {py:class}`xarray.DataArray` objects with the
354 | same spatial resolution and extent 🪟, so these can be stacked into an
355 | {py:class}`xarray.Dataset` with multiple data variables. First, we'll zip 🤐
356 | the two datapipes together using {py:class}`torchdata.datapipes.iter.Zipper`
357 | (functional name: ``zip``)
358 |
359 | ```{code-cell}
360 | dp_zip = dp_decibel_image.zip(dp_datashader)
361 | dp_zip
362 | ```
363 |
364 | This will result in a DataPipe where each item is a tuple of (X, Y) pairs 🧑🤝🧑.
365 | Just to illustrate what we've done so far, we can use
366 | {py:class}`torchdata.datapipes.utils.to_graph` to visualize the data pipeline
367 | ⛓️.
368 |
369 | ```{code-cell}
370 | torchdata.datapipes.utils.to_graph(dp=dp_zip)
371 | ```
372 |
373 | Next, let's combine 🖇️ the two (X, Y) {py:class}`xarray.DataArray` objects in
374 | the tuple into an {py:class}`xarray.Dataset` using
375 | {py:class}`torchdata.datapipes.iter.Collator` (functional name: `collate`).
376 | We'll also ✂️ clip the dataset to a bounding box area where the target water
377 | mask has no 0 or NaN values.
378 |
379 | ```{code-cell}
380 | def xr_collate_fn(image_and_mask: tuple) -> xr.Dataset:
381 | """
382 | Combine a pair of xarray.DataArray (image, mask) inputs into an
383 | xarray.Dataset with two data variables named 'image' and 'mask'.
384 | """
385 | # Turn 2 xr.DataArray objects into 1 xr.Dataset with multiple data vars
386 | image, mask = image_and_mask
387 | dataset: xr.Dataset = xr.merge(
388 | objects=[image.isel(band=0).rename("image"), mask.rename("mask")],
389 | join="override",
390 | )
391 |
392 | # Clip dataset to bounding box extent of where labels are
393 | mask_extent: tuple = mask.where(cond=mask == 1, drop=True).rio.bounds()
394 | clipped_dataset: xr.Dataset = dataset.rio.clip_box(*mask_extent)
395 |
396 | return clipped_dataset
397 | ```
398 |
399 | ```{code-cell}
400 | dp_dataset = dp_zip.collate(collate_fn=xr_collate_fn)
401 | dp_dataset
402 | ```
403 |
404 | Double check to see that resulting {py:class}`xarray.Dataset`'s image and mask
405 | looks ok 🙆♂️.
406 |
407 | ```{code-cell}
408 | it = iter(dp_dataset)
409 | dataset = next(it)
410 |
411 | # Create subplot with VV image on the left and Water mask on the right
412 | fig, axs = plt.subplots(ncols=2, figsize=(11.5, 4.5), sharey=True)
413 | dataset.image.plot.imshow(ax=axs[0], cmap="Blues_r")
414 | axs[0].set_title("Sentinel-1 VV channel")
415 | dataset.mask.plot.imshow(ax=axs[1], cmap="Blues")
416 | axs[1].set_title("Water mask")
417 | plt.show()
418 | ```
419 |
420 | ### Slice into chips and turn into tensors 🗡️
421 |
422 | To cut 🔪 the {py:class}`xarray.Dataset` into 512x512 sized chips, we'll use
423 | {py:class}`zen3geo.datapipes.XbatcherSlicer` (functional name:
424 | `slice_with_xbatcher`). Refer to {doc}`./chipping` if you need a 🧑🎓 refresher.
425 |
426 | ```{code-cell}
427 | dp_xbatcher = dp_dataset.slice_with_xbatcher(input_dims={"y": 512, "x": 512})
428 | dp_xbatcher
429 | ```
430 |
431 | Next step is to convert the 512x512 chips into a {py:class}`torch.Tensor` via
432 | {py:class}`torchdata.datapipes.iter.Mapper` (functional name: `map`). The 🛰️
433 | Sentinel-1 image and 💧 water mask will be split out at this point too.
434 |
435 | ```{code-cell}
436 | def dataset_to_tensors(chip: xr.Dataset) -> (torch.Tensor, torch.Tensor):
437 | """
438 | Converts an xarray.Dataset into to two torch.Tensor objects, the first one
439 | being the satellite image, and the second one being the target mask.
440 | """
441 | image: torch.Tensor = torch.as_tensor(chip.image.data)
442 | mask: torch.Tensor = torch.as_tensor(chip.mask.data.astype("uint8"))
443 |
444 | return image, mask
445 | ```
446 |
447 | ```{code-cell}
448 | dp_map = dp_xbatcher.map(fn=dataset_to_tensors)
449 | dp_map
450 | ```
451 |
452 | At this point, we could do some batching and collating, but we'll point you
453 | again to {doc}`./chipping` to figure it out 😝. Let's take a look at a graph
454 | of the complete data pipeline.
455 |
456 | ```{code-cell}
457 | torchdata.datapipes.utils.to_graph(dp=dp_map)
458 | ```
459 |
460 | Sweet, time for the final step ⏩.
461 |
462 | ### Into a DataLoader 🏋️
463 |
464 | Pass the DataPipe into {py:class}`torch.utils.data.DataLoader` 🤾!
465 |
466 | ```{code-cell}
467 | dataloader = torch.utils.data.DataLoader(dataset=dp_map)
468 | for i, batch in enumerate(dataloader):
469 | image, mask = batch
470 | print(f"Batch {i} - image: {image.shape}, mask: {mask.shape}")
471 | ```
472 |
473 | Now go train some flood water detection models 🌊🌊🌊
474 |
475 | ```{seealso}
476 | To learn more about AI-based flood mapping with SAR, check out these resources:
477 |
478 | - [UNOSAT/NVIDIA Disaster Risk Monitoring Using Satellite Imagery online course](https://event.unitar.org/full-catalog/disaster-risk-monitoring-using-satellite-imagery)
479 | - [Code to train a Convolutional Neural Network for flood segmentation](https://github.com/UNITAR-UNOSAT/UNOSAT-AI-Based-Rapid-Mapping-Service/blob/master/Fastai%20training.ipynb)
480 | ```
481 |
--------------------------------------------------------------------------------
/docs/walkthrough.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | kernelspec:
8 | display_name: Python 3
9 | language: python
10 | name: python3
11 | ---
12 |
13 | # Walkthrough
14 |
15 | > *To get it, you first see it, and then let it go*
16 |
17 | In this tutorial 🧑🏫, we'll step through an Earth Observation 🛰️ data pipeline
18 | using ``torchdata`` and by the end of this lesson, you should be able to:
19 | - Find Cloud-Optimized GeoTIFFs (COGs) from STAC catalogs 🥞
20 | - Construct a DataPipe that iteratively reads several COGs in a stream 🌊
21 | - Loop through batches of images in a DataPipe with a DataLoader 🏋️
22 |
23 | ## 🎉 **Getting started**
24 |
25 | These are the tools 🛠️ you'll need.
26 |
27 | ```{code-cell}
28 | # Geospatial libraries
29 | import pystac
30 | import planetary_computer
31 | import rioxarray
32 | # Deep Learning libraries
33 | import torch
34 | import torchdata
35 | import zen3geo
36 | ```
37 |
38 | Just to make sure we’re on the same page 📃,
39 | let’s check that we’ve got compatible versions installed.
40 |
41 | ```{code-cell}
42 | print(f"pystac version: {pystac.__version__}")
43 | print(f"planetary-computer version: {planetary_computer.__version__}")
44 | print(f"torch version: {torch.__version__}")
45 |
46 | print(f"torchdata version: {torchdata.__version__}")
47 | print(f"zen3geo version: {zen3geo.__version__}")
48 | rioxarray.show_versions()
49 | ```
50 |
51 | ## 0️⃣ Find [Cloud-Optimized GeoTIFFs](https://www.cogeo.org) 🗺️
52 |
53 | Let's get some optical satellite data using [STAC](https://stacspec.org)!
54 | How about Sentinel-2 L2A data over Singapore 🇸🇬?
55 |
56 | 🔗 Links:
57 | - [Official Sentinel-2 description page at ESA](https://sentinel.esa.int/web/sentinel/missions/sentinel-2)
58 | - [Microsoft Planetary Computer STAC Explorer](https://planetarycomputer.microsoft.com/explore?c=103.8152%2C1.3338&z=10.08&v=2&d=sentinel-2-l2a&s=false%3A%3A100%3A%3Atrue&ae=0&m=cql%3A2ff1401acb50731fa0a6d1e2a46f3064&r=Natural+color)
59 | - [AWS Sentinel-2 Cloud-Optimized GeoTIFFs](https://registry.opendata.aws/sentinel-2-l2a-cogs)
60 |
61 |
62 | ```{code-cell}
63 | item_url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435"
64 |
65 | # Load the individual item metadata and sign the assets
66 | item = pystac.Item.from_file(item_url)
67 | signed_item = planetary_computer.sign(item)
68 | signed_item
69 | ```
70 |
71 | ### Inspect one of the data assets 🍱
72 |
73 | The Sentinel-2 STAC item contains several assets.
74 | These include different 🌈 bands (e.g. 'B02', 'B03', 'B04').
75 | Let's just use the 'visual' product for now which includes the RGB bands.
76 |
77 | ```{code-cell}
78 | url: str = signed_item.assets["visual"].href
79 | da = rioxarray.open_rasterio(filename=url)
80 | da
81 | ```
82 |
83 | This is how the Sentinel-2 image looks like over Singapore on 15 Jan 2022.
84 |
85 | 
86 |
87 | ## 1️⃣ Construct [DataPipe](https://github.com/pytorch/data/tree/v0.6.1#what-are-datapipes) 📡
88 |
89 | A torch `DataPipe` is a way of composing data (rather than inheriting data).
90 | Yes, I don't know what it really means either, so here's some extra reading.
91 |
92 | 🔖 References:
93 | - https://pytorch.org/blog/pytorch-1.11-released/#introducing-torchdata
94 | - https://github.com/pytorch/data/tree/v0.6.1#what-are-datapipes
95 | - https://realpython.com/inheritance-composition-python
96 |
97 | ### Create an Iterable 📏
98 |
99 | Start by wrapping a list of URLs to the Cloud-Optimized GeoTIFF files.
100 | We only have 1 item so we'll use ``[url]``, but if you have more, you can do
101 | ``[url1, url2, url3]``, etc. Pass this iterable list into
102 | {py:class}`torchdata.datapipes.iter.IterableWrapper`:
103 |
104 | ```{code-cell}
105 | dp = torchdata.datapipes.iter.IterableWrapper(iterable=[url])
106 | dp
107 | ```
108 |
109 | The ``dp`` variable is the DataPipe!
110 | Now to apply some more transformations/functions on it.
111 |
112 | ### Read using RioXarrayReader 🌐
113 |
114 | This is where ☯ ``zen3geo`` comes in. We'll be using the
115 | {py:class}`zen3geo.datapipes.rioxarray.RioXarrayReaderIterDataPipe` class, or
116 | rather, the short alias {py:class}`zen3geo.datapipes.RioXarrayReader`.
117 |
118 | Confusingly, there are two ways or forms of applying ``RioXarrayReader``,
119 | a class-based method and a functional method.
120 |
121 | ```{code-cell}
122 | # Using class constructors
123 | dp_rioxarray = zen3geo.datapipes.RioXarrayReader(source_datapipe=dp)
124 | dp_rioxarray
125 | ```
126 |
127 | ```{code-cell}
128 | # Using functional form (recommended)
129 | dp_rioxarray = dp.read_from_rioxarray()
130 | dp_rioxarray
131 | ```
132 |
133 | Note that both ways are equivalent (they produce the same IterDataPipe output),
134 | but the latter (functional) form is preferred, see also
135 | https://pytorch.org/data/0.4/tutorial.html#registering-datapipes-with-the-functional-api
136 |
137 | What if you don't want the whole Sentinel-2 scene at the full 10m resolution?
138 | Since we're using Cloud-Optimized GeoTIFFs, you could set an ``overview_level``
139 | (following https://corteva.github.io/rioxarray/stable/examples/COG.html).
140 |
141 | ```{code-cell}
142 | dp_rioxarray_zoom3 = dp.read_from_rioxarray(overview_level=3)
143 | dp_rioxarray_zoom3
144 | ```
145 |
146 | Extra keyword arguments will be handled by {py:func}`rioxarray.open_rasterio`
147 | or {py:func}`rasterio.open`.
148 |
149 | ```{note}
150 | Other DataPipe classes/functions can be stacked or joined to this basic GeoTIFF
151 | reader. For example, clipping by bounding box or reprojecting to a certain
152 | Coordinate Reference System. If you would like to implement this, check out the
153 | [Contributing Guidelines](./CONTRIBUTING) to get started!
154 | ```
155 |
156 | ## 2️⃣ Loop through DataPipe ⚙️
157 |
158 | A DataPipe describes a flow of information.
159 | Through a series of steps it goes,
160 | as one piece comes in, another might follow.
161 |
162 | ### Basic iteration ♻️
163 |
164 | At the most basic level, you could iterate through the DataPipe like so:
165 |
166 | ```{code-cell}
167 | it = iter(dp_rioxarray_zoom3)
168 | dataarray = next(it)
169 | dataarray
170 | ```
171 |
172 | Or if you're more familiar with a for-loop, here it is:
173 |
174 | ```{code-cell}
175 | for dataarray in dp_rioxarray_zoom3:
176 | print(dataarray)
177 | # Run model on this data batch
178 | ```
179 |
180 | ### Into a DataLoader 🏋️
181 |
182 | For the deep learning folks, you might need one extra step.
183 | The {py:class}``xarray.DataArray`` needs to be converted to a tensor.
184 | In the Pytorch world, that can happen via {py:func}``torch.as_tensor``.
185 |
186 | ```{code-cell}
187 | def fn(da):
188 | return torch.as_tensor(da.data)
189 | ```
190 |
191 | Using {py:class}`torchdata.datapipes.iter.Mapper` (functional name: `map`),
192 | we'll apply the tensor conversion function to each dataarray in the DataPipe.
193 |
194 | ```{code-cell}
195 | dp_tensor = dp_rioxarray_zoom3.map(fn=fn)
196 | dp_tensor
197 | ```
198 |
199 | Finally, let's put our DataPipe into a {py:class}`torch.utils.data.DataLoader`!
200 |
201 | ```{code-cell}
202 | dataloader = torch.utils.data.DataLoader(dataset=dp_tensor)
203 | for batch in dataloader:
204 | tensor = batch
205 | print(tensor)
206 | ```
207 |
208 | And so it begins 🌄
209 |
210 | ---
211 |
212 | That’s all 🎉! For more information on how to use DataPipes, check out:
213 |
214 | - {doc}`TorchData DataPipe Tutorial `
215 | - {doc}`TorchData Usage Examples `
216 |
217 | If you have any questions 🙋, feel free to ask us anything at
218 | https://github.com/weiji14/zen3geo/discussions or visit the Pytorch forums at
219 | https://discuss.pytorch.org/c/data/37.
220 |
221 | Cheers!
222 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "zen3geo"
3 | version = "0.6.2"
4 | description = "The 🌏 data science library you've been waiting for~"
5 | authors = ["Wei Ji <23487320+weiji14@users.noreply.github.com>"]
6 | license = "LGPL-3.0-or-later"
7 | readme = "README.md"
8 | classifiers = [
9 | "Development Status :: 4 - Beta",
10 | "Intended Audience :: Science/Research",
11 | "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
12 | "Topic :: Scientific/Engineering",
13 | "Topic :: Scientific/Engineering :: GIS",
14 | "Topic :: Scientific/Engineering :: Image Processing",
15 | "Topic :: Software Development :: Libraries",
16 | "Programming Language :: Python :: 3.8",
17 | "Programming Language :: Python :: 3.9",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: 3.11",
20 | ]
21 | exclude = ["zen3geo/tests"]
22 |
23 | [tool.poetry.urls]
24 | "Homepage" = "https://github.com/weiji14/zen3geo/discussions"
25 | "Changelog" = "https://zen3geo.readthedocs.io/en/latest/changelog.html"
26 | "Documentation" = "https://zen3geo.readthedocs.io"
27 | "Download" = "https://anaconda.org/conda-forge/zen3geo"
28 | "Source Code" = "https://github.com/weiji14/zen3geo"
29 | "Sponsor" = "https://github.com/sponsors/weiji14"
30 |
31 | [tool.poetry.dependencies]
32 | # Required
33 | python = ">=3.8, <4.0"
34 | rioxarray = ">=0.10.0"
35 | torchdata = ">=0.4.0"
36 | # Optional
37 | datashader = {version = ">=0.14.0", optional = true}
38 | pyogrio = {version = ">=0.4.0", extras = ["geopandas"], optional = true}
39 | pystac = {version=">=1.4.0", optional=true}
40 | pystac-client = {version = ">=0.4.0", optional = true}
41 | spatialpandas = {version = ">=0.4.0", optional = true}
42 | stackstac = {version = ">=0.4.0", optional = true}
43 | xbatcher = {version = ">=0.2.0", optional = true}
44 | xpystac = {version = ">=0.0.1", optional = true}
45 | zarr = {version = ">=2.13.0", optional = true}
46 | # Docs
47 | adlfs = {version = "*", optional = true}
48 | contextily = {version = "*", optional = true}
49 | graphviz = {version = "*", optional = true}
50 | jupyter-book = {version="*", optional=true}
51 | matplotlib = {version = "*", optional = true}
52 | planetary-computer = {version="*", optional=true}
53 | xarray-datatree = {version="*", optional=true}
54 |
55 | [tool.poetry.group.dev.dependencies]
56 | aiohttp = "*"
57 | black = "*"
58 | pytest = "*"
59 |
60 | [tool.poetry.extras]
61 | docs = [
62 | "adlfs",
63 | "contextily",
64 | "datashader",
65 | "graphviz",
66 | "jupyter-book",
67 | "matplotlib",
68 | "planetary-computer",
69 | "pyogrio",
70 | "pystac",
71 | "pystac_client",
72 | "spatialpandas",
73 | "stackstac",
74 | "xarray-datatree",
75 | "xbatcher",
76 | "xpystac",
77 | "zarr"
78 | ]
79 | raster = [
80 | "xbatcher",
81 | "zarr"
82 | ]
83 | spatial = [
84 | "datashader",
85 | "spatialpandas"
86 | ]
87 | stac = [
88 | "pystac",
89 | "pystac_client",
90 | "stackstac",
91 | "xpystac"
92 | ]
93 | vector = ["pyogrio"]
94 |
95 | [tool.poetry-dynamic-versioning]
96 | bump = true
97 | enable = true
98 | metadata = true
99 | style = "pep440"
100 |
101 | [build-system]
102 | requires = ["poetry-core>=1.7.0", "poetry-dynamic-versioning"]
103 | build-backend = "poetry.core.masonry.api"
104 |
--------------------------------------------------------------------------------
/zen3geo/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | zen3geo - The 🌏 data science library you've been waiting for~
3 | """
4 |
5 | from importlib.metadata import version
6 |
7 | from zen3geo import datapipes
8 |
9 | __version__ = version("zen3geo") # e.g. 0.1.2.dev3+g0ab3cd78
10 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Iterable-style DataPipes for geospatial raster 🌈 and vector 🚏 data.
3 | """
4 |
5 | from zen3geo.datapipes.datashader import (
6 | DatashaderRasterizerIterDataPipe as DatashaderRasterizer,
7 | XarrayCanvasIterDataPipe as XarrayCanvas,
8 | )
9 | from zen3geo.datapipes.geopandas import (
10 | GeoPandasRectangleClipperIterDataPipe as GeoPandasRectangleClipper,
11 | )
12 | from zen3geo.datapipes.pyogrio import PyogrioReaderIterDataPipe as PyogrioReader
13 | from zen3geo.datapipes.pystac import PySTACItemReaderIterDataPipe as PySTACItemReader
14 | from zen3geo.datapipes.pystac_client import (
15 | PySTACAPIItemListerIterDataPipe as PySTACAPIItemLister,
16 | PySTACAPISearcherIterDataPipe as PySTACAPISearcher,
17 | )
18 | from zen3geo.datapipes.rioxarray import RioXarrayReaderIterDataPipe as RioXarrayReader
19 | from zen3geo.datapipes.stackstac import (
20 | StackSTACMosaickerIterDataPipe as StackSTACMosaicker,
21 | StackSTACStackerIterDataPipe as StackSTACStacker,
22 | )
23 | from zen3geo.datapipes.xbatcher import XbatcherSlicerIterDataPipe as XbatcherSlicer
24 | from zen3geo.datapipes.xpystac import (
25 | XpySTACAssetReaderIterDataPipe as XpySTACAssetReader,
26 | )
27 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/datashader.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`datashader `.
3 | """
4 | from typing import Any, Dict, Iterator, Optional, Union
5 |
6 | try:
7 | import datashader
8 | except ImportError:
9 | datashader = None
10 | try:
11 | import spatialpandas
12 | from spatialpandas.geometry import (
13 | LineDtype,
14 | MultiLineDtype,
15 | MultiPointDtype,
16 | MultiPolygonDtype,
17 | PointDtype,
18 | PolygonDtype,
19 | )
20 | except ImportError:
21 | spatialpandas = None
22 |
23 | import xarray as xr
24 | from torchdata.datapipes import functional_datapipe
25 | from torchdata.datapipes.iter import IterDataPipe
26 |
27 |
28 | @functional_datapipe("rasterize_with_datashader")
29 | class DatashaderRasterizerIterDataPipe(IterDataPipe):
30 | """
31 | Takes vector :py:class:`geopandas.GeoSeries` or
32 | :py:class:`geopandas.GeoDataFrame` geometries and rasterizes them using
33 | :py:class:`datashader.Canvas` to yield an :py:class:`xarray.DataArray`
34 | raster with the input geometries aggregated into a fixed-sized grid
35 | (functional name: ``rasterize_with_datashader``).
36 |
37 | Parameters
38 | ----------
39 | source_datapipe : IterDataPipe[datashader.Canvas]
40 | A DataPipe that contains :py:class:`datashader.Canvas` objects with a
41 | ``.crs`` attribute. This will be the template defining the output
42 | raster's spatial extent and x/y range.
43 |
44 | vector_datapipe : IterDataPipe[geopandas.GeoDataFrame]
45 | A DataPipe that contains :py:class:`geopandas.GeoSeries` or
46 | :py:class:`geopandas.GeoDataFrame` vector geometries with a
47 | :py:attr:`.crs ` property.
48 |
49 | agg : Optional[datashader.reductions.Reduction]
50 | Reduction operation to compute. Default depends on the input vector
51 | type:
52 |
53 | - For points, default is :py:class:`datashader.reductions.count`
54 | - For lines, default is :py:class:`datashader.reductions.any`
55 | - For polygons, default is :py:class:`datashader.reductions.any`
56 |
57 | For more information, refer to the section on Aggregation under
58 | datashader's :doc:`datashader:getting_started/Pipeline` docs.
59 |
60 | kwargs : Optional
61 | Extra keyword arguments to pass to the :py:class:`datashader.Canvas`
62 | class's aggregation methods such as ``datashader.Canvas.points``.
63 |
64 | Yields
65 | ------
66 | raster : xarray.DataArray
67 | An :py:class:`xarray.DataArray` object containing the raster data. This
68 | raster will have a :py:attr:`rioxarray.rioxarray.XRasterBase.crs`
69 | property and a proper affine transform viewable with
70 | :py:meth:`rioxarray.rioxarray.XRasterBase.transform`.
71 |
72 | Raises
73 | ------
74 | ModuleNotFoundError
75 | If ``spatialpandas`` is not installed. Please install it (e.g. via
76 | ``pip install spatialpandas``) before using this class.
77 |
78 | ValueError
79 | If either the length of the ``vector_datapipe`` is not 1, or if the
80 | length of the ``vector_datapipe`` is not equal to the length of the
81 | ``source_datapipe``. I.e. the ratio of vector:canvas must be 1:N or
82 | be exactly N:N.
83 |
84 | AttributeError
85 | If either the canvas in ``source_datapipe`` or vector geometry in
86 | ``vector_datapipe`` is missing a ``.crs`` attribute. Please set the
87 | coordinate reference system (e.g. using ``canvas.crs = 'OGC:CRS84'``
88 | for the :py:class:`datashader.Canvas` input or
89 | ``vector = vector.set_crs(crs='OGC:CRS84')`` for the
90 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame`
91 | input) before passing them into the datapipe.
92 |
93 | NotImplementedError
94 | If the input vector geometry type to ``vector_datapipe`` is not
95 | supported, typically when a
96 | :py:class:`shapely.geometry.GeometryCollection` is used. Supported
97 | types include `Point`, `LineString`, and `Polygon`, plus their
98 | multipart equivalents `MultiPoint`, `MultiLineString`, and
99 | `MultiPolygon`.
100 |
101 | Example
102 | -------
103 | >>> import pytest
104 | >>> datashader = pytest.importorskip("datashader")
105 | >>> pyogrio = pytest.importorskip("pyogrio")
106 | >>> spatialpandas = pytest.importorskip("spatialpandas")
107 | ...
108 | >>> from torchdata.datapipes.iter import IterableWrapper
109 | >>> from zen3geo.datapipes import DatashaderRasterizer
110 | ...
111 | >>> # Read in a vector point data source
112 | >>> geodataframe = pyogrio.read_dataframe(
113 | ... "https://github.com/geopandas/pyogrio/raw/v0.4.0/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg",
114 | ... read_geometry=True,
115 | ... )
116 | >>> assert geodataframe.crs == "EPSG:4326" # latitude/longitude coords
117 | >>> dp_vector = IterableWrapper(iterable=[geodataframe])
118 | ...
119 | >>> # Setup blank raster canvas where we will burn vector geometries onto
120 | >>> canvas = datashader.Canvas(
121 | ... plot_width=5,
122 | ... plot_height=6,
123 | ... x_range=(160000.0, 620000.0),
124 | ... y_range=(0.0, 450000.0),
125 | ... )
126 | >>> canvas.crs = "EPSG:32631" # UTM Zone 31N, North of Gulf of Guinea
127 | >>> dp_canvas = IterableWrapper(iterable=[canvas])
128 | ...
129 | >>> # Rasterize vector point geometries onto blank canvas
130 | >>> dp_datashader = dp_canvas.rasterize_with_datashader(
131 | ... vector_datapipe=dp_vector
132 | ... )
133 | ...
134 | >>> # Loop or iterate over the DataPipe stream
135 | >>> it = iter(dp_datashader)
136 | >>> dataarray = next(it)
137 | >>> dataarray
138 |
139 | array([[0, 0, 0, 0, 1],
140 | [0, 0, 0, 0, 0],
141 | [0, 0, 0, 0, 0],
142 | [0, 0, 1, 0, 0],
143 | [0, 1, 0, 0, 0],
144 | [1, 0, 0, 0, 0]], dtype=uint32)
145 | Coordinates:
146 | * x (x) float64 2.094e+05 3.083e+05 4.072e+05 5.06e+05 6.049e+05
147 | * y (y) float64 4.157e+05 3.47e+05 2.783e+05 ... 1.41e+05 7.237e+04
148 | spatial_ref int64 0
149 | ...
150 | >>> dataarray.rio.crs
151 | CRS.from_epsg(32631)
152 | >>> dataarray.rio.transform()
153 | Affine(98871.00388807665, 0.0, 160000.0,
154 | 0.0, -68660.4193667199, 450000.0)
155 | """
156 |
157 | def __init__(
158 | self,
159 | source_datapipe: IterDataPipe,
160 | vector_datapipe: IterDataPipe,
161 | agg: Optional = None,
162 | **kwargs: Optional[Dict[str, Any]],
163 | ) -> None:
164 | if spatialpandas is None:
165 | raise ModuleNotFoundError(
166 | "Package `spatialpandas` is required to be installed to use this datapipe. "
167 | "Please use `pip install spatialpandas` or "
168 | "`conda install -c conda-forge spatialpandas` "
169 | "to install the package"
170 | )
171 | self.source_datapipe: IterDataPipe = source_datapipe # datashader.Canvas
172 | self.vector_datapipe: IterDataPipe = vector_datapipe # geopandas.GeoDataFrame
173 | self.agg: Optional = agg # Datashader Aggregation/Reduction function
174 | self.kwargs = kwargs
175 |
176 | len_vector_datapipe: int = len(self.vector_datapipe)
177 | len_canvas_datapipe: int = len(self.source_datapipe)
178 | if len_vector_datapipe != 1 and len_vector_datapipe != len_canvas_datapipe:
179 | raise ValueError(
180 | f"Unmatched lengths for the canvas datapipe ({self.source_datapipe}) "
181 | f"and vector datapipe ({self.vector_datapipe}). \n"
182 | f"The vector datapipe's length ({len_vector_datapipe}) should either "
183 | f"be (1) to allow for broadcasting, or match the canvas datapipe's "
184 | f"length of ({len_canvas_datapipe})."
185 | )
186 |
187 | def __iter__(self) -> Iterator[xr.DataArray]:
188 | # Broadcast vector iterator to match length of raster iterator
189 | for canvas, vector in self.source_datapipe.zip_longest(
190 | self.vector_datapipe, fill_value=list(self.vector_datapipe).pop()
191 | ):
192 | # print(canvas, vector)
193 | # If canvas has no CRS attribute, set one to prevent AttributeError
194 | canvas.crs = getattr(canvas, "crs", None)
195 | if canvas.crs is None:
196 | raise AttributeError(
197 | "Missing crs information for datashader.Canvas with "
198 | f"x_range: {canvas.x_range} and y_range: {canvas.y_range}. "
199 | "Please set crs using e.g. `canvas.crs = 'OGC:CRS84'`."
200 | )
201 |
202 | # Reproject vector geometries to coordinate reference system
203 | # of the raster canvas if both are different
204 | try:
205 | if vector.crs != canvas.crs:
206 | vector = vector.to_crs(crs=canvas.crs)
207 | except (AttributeError, ValueError) as e:
208 | raise AttributeError(
209 | f"Missing crs information for input {vector.__class__} object "
210 | f"with the following bounds: \n {vector.bounds} \n"
211 | f"Please set crs using e.g. `vector = vector.set_crs(crs='OGC:CRS84')`."
212 | ) from e
213 |
214 | # Convert vector to spatialpandas format to allow datashader's
215 | # rasterization methods to work
216 | try:
217 | _vector = spatialpandas.GeoDataFrame(data=vector.geometry)
218 | except ValueError as e:
219 | if str(e) == "Unable to convert data argument to a GeometryList array":
220 | raise NotImplementedError(
221 | f"Unsupported geometry type(s) {set(vector.geom_type)} detected, "
222 | "only point, line or polygon vector geometry types "
223 | "(or their multi- equivalents) are supported."
224 | ) from e
225 | else:
226 | raise e
227 |
228 | # Determine geometry type to know which rasterization method to use
229 | vector_dtype: spatialpandas.geometry.GeometryDtype = _vector.geometry.dtype
230 |
231 | if isinstance(vector_dtype, (PointDtype, MultiPointDtype)):
232 | raster: xr.DataArray = canvas.points(
233 | source=_vector, agg=self.agg, geometry="geometry", **self.kwargs
234 | )
235 | elif isinstance(vector_dtype, (LineDtype, MultiLineDtype)):
236 | raster: xr.DataArray = canvas.line(
237 | source=_vector, agg=self.agg, geometry="geometry", **self.kwargs
238 | )
239 | elif isinstance(vector_dtype, (PolygonDtype, MultiPolygonDtype)):
240 | raster: xr.DataArray = canvas.polygons(
241 | source=_vector, agg=self.agg, geometry="geometry", **self.kwargs
242 | )
243 |
244 | # Convert boolean dtype rasters to uint8 to enable reprojection
245 | if raster.dtype == "bool":
246 | raster: xr.DataArray = raster.astype(dtype="uint8")
247 | # Set coordinate transform for raster and ensure affine
248 | # transform is correct (the y-coordinate goes from North to South)
249 | raster: xr.DataArray = raster.rio.set_crs(input_crs=canvas.crs)
250 | # assert raster.rio.transform().e > 0 # y goes South to North
251 | _raster: xr.DataArray = raster.rio.reproject(
252 | dst_crs=canvas.crs, shape=raster.rio.shape
253 | )
254 | # assert _raster.rio.transform().e < 0 # y goes North to South
255 |
256 | yield _raster
257 |
258 | def __len__(self) -> int:
259 | return len(self.source_datapipe)
260 |
261 |
262 | @functional_datapipe("canvas_from_xarray")
263 | class XarrayCanvasIterDataPipe(IterDataPipe[Union[xr.DataArray, xr.Dataset]]):
264 | """
265 | Takes an :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset`
266 | and creates a blank :py:class:`datashader.Canvas` based on the spatial
267 | extent and coordinates of the input (functional name:
268 | ``canvas_from_xarray``).
269 |
270 | Parameters
271 | ----------
272 | source_datapipe : IterDataPipe[xarrray.DataArray]
273 | A DataPipe that contains :py:class:`xarray.DataArray` or
274 | :py:class:`xarray.Dataset` objects. These data objects need to have
275 | both a ``.rio.x_dim`` and ``.rio.y_dim`` attribute, which is present
276 | if the original dataset was opened using
277 | :py:func:`rioxarray.open_rasterio`, or by setting it manually using
278 | :py:meth:`rioxarray.rioxarray.XRasterBase.set_spatial_dims`.
279 |
280 | kwargs : Optional
281 | Extra keyword arguments to pass to :py:class:`datashader.Canvas`.
282 |
283 | Yields
284 | ------
285 | canvas : datashader.Canvas
286 | A :py:class:`datashader.Canvas` object representing the same spatial
287 | extent and x/y coordinates of the input raster grid. This canvas
288 | will also have a ``.crs`` attribute that captures the original
289 | Coordinate Reference System from the input xarray object's
290 | :py:attr:`rioxarray.rioxarray.XRasterBase.crs` property.
291 |
292 | Raises
293 | ------
294 | ModuleNotFoundError
295 | If ``datashader`` is not installed. Follow
296 | :doc:`install instructions for datashader `
297 | before using this class.
298 |
299 | Example
300 | -------
301 | >>> import pytest
302 | >>> import numpy as np
303 | >>> import xarray as xr
304 | >>> datashader = pytest.importorskip("datashader")
305 | ...
306 | >>> from torchdata.datapipes.iter import IterableWrapper
307 | >>> from zen3geo.datapipes import XarrayCanvas
308 | ...
309 | >>> # Create blank canvas from xarray.DataArray using DataPipe
310 | >>> y = np.arange(0, -3, step=-1)
311 | >>> x = np.arange(0, 6)
312 | >>> dataarray: xr.DataArray = xr.DataArray(
313 | ... data=np.zeros(shape=(1, 3, 6)),
314 | ... coords=dict(band=[1], y=y, x=x),
315 | ... )
316 | >>> dataarray = dataarray.rio.set_spatial_dims(x_dim="x", y_dim="y")
317 | >>> dp = IterableWrapper(iterable=[dataarray])
318 | >>> dp_canvas = dp.canvas_from_xarray()
319 | ...
320 | >>> # Loop or iterate over the DataPipe stream
321 | >>> it = iter(dp_canvas)
322 | >>> canvas = next(it)
323 | >>> print(canvas.raster(source=dataarray))
324 |
325 | array([[[0., 0., 0., 0., 0., 0.],
326 | [0., 0., 0., 0., 0., 0.],
327 | [0., 0., 0., 0., 0., 0.]]])
328 | Coordinates:
329 | * x (x) int64 0 1 2 3 4 5
330 | * y (y) int64 0 -1 -2
331 | * band (band) int64 1
332 | ...
333 | """
334 |
335 | def __init__(
336 | self,
337 | source_datapipe: IterDataPipe[Union[xr.DataArray, xr.Dataset]],
338 | **kwargs: Optional[Dict[str, Any]],
339 | ) -> None:
340 | if datashader is None:
341 | raise ModuleNotFoundError(
342 | "Package `datashader` is required to be installed to use this datapipe. "
343 | "Please use `pip install datashader` or "
344 | "`conda install -c conda-forge datashader` "
345 | "to install the package"
346 | )
347 | self.source_datapipe: IterDataPipe[
348 | Union[xr.DataArray, xr.Dataset]
349 | ] = source_datapipe
350 | self.kwargs = kwargs
351 |
352 | def __iter__(self) -> Iterator:
353 | for dataarray in self.source_datapipe:
354 | x_dim: str = dataarray.rio.x_dim
355 | y_dim: str = dataarray.rio.y_dim
356 | plot_width: int = len(dataarray[x_dim])
357 | plot_height: int = len(dataarray[y_dim])
358 | xmin, ymin, xmax, ymax = dataarray.rio.bounds()
359 |
360 | canvas = datashader.Canvas(
361 | plot_width=plot_width,
362 | plot_height=plot_height,
363 | x_range=(xmin, xmax),
364 | y_range=(ymin, ymax),
365 | **self.kwargs,
366 | )
367 | canvas.crs = dataarray.rio.crs
368 | yield canvas
369 |
370 | def __len__(self) -> int:
371 | return len(self.source_datapipe)
372 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/geopandas.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`geopandas `.
3 | """
4 | from typing import Any, Dict, Iterator, Optional, Union
5 |
6 | try:
7 | import geopandas as gpd
8 | except ImportError:
9 | gpd = None
10 | import xarray as xr
11 | from torchdata.datapipes import functional_datapipe
12 | from torchdata.datapipes.iter import IterDataPipe
13 |
14 |
15 | @functional_datapipe("clip_vector_with_rectangle")
16 | class GeoPandasRectangleClipperIterDataPipe(IterDataPipe):
17 | """
18 | Takes vector :py:class:`geopandas.GeoSeries` or
19 | :py:class:`geopandas.GeoDataFrame` geometries and clips them with the
20 | rectangular extent of an :py:class:`xarray.DataArray` or
21 | :py:class:`xarray.Dataset` grid to yield tuples of spatially subsetted
22 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame`
23 | vectors and the correponding :py:class:`xarray.DataArray` or
24 | :py:class:`xarray.Dataset` raster object used as the clip mask (functional
25 | name: ``clip_vector_with_rectangle``).
26 |
27 | Uses the rectangular clip algorithm of :py:func:`geopandas.clip`, with the
28 | bounding box rectangle (minx, miny, maxx, maxy) derived from input raster
29 | mask's bounding box extent.
30 |
31 | Note
32 | ----
33 | If the input vector's coordinate reference system (``crs``) is different to
34 | the raster mask's coordinate reference system (``rio.crs``), the vector
35 | will be reprojected using :py:meth:`geopandas.GeoDataFrame.to_crs` to match
36 | the raster's coordinate reference system.
37 |
38 | Parameters
39 | ----------
40 | source_datapipe : IterDataPipe[geopandas.GeoDataFrame]
41 | A DataPipe that contains :py:class:`geopandas.GeoSeries` or
42 | :py:class:`geopandas.GeoDataFrame` vector geometries with a
43 | :py:attr:`.crs ` property.
44 |
45 | mask_datapipe : IterDataPipe[xarray.DataArray]
46 | A DataPipe that contains :py:class:`xarray.DataArray` or
47 | :py:class:`xarray.Dataset` objects with a
48 | :py:attr:`.rio.crs ` property and
49 | :py:meth:`.rio.bounds ` method.
50 |
51 | kwargs : Optional
52 | Extra keyword arguments to pass to :py:func:`geopandas.clip`.
53 |
54 | Yields
55 | ------
56 | paired_obj : Tuple[geopandas.GeoDataFrame, xarray.DataArray]
57 | A tuple consisting of the spatially subsetted
58 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame`
59 | vector, and the corresponding :py:class:`xarray.DataArray` or
60 | :py:class:`xarray.Dataset` raster used as the clip mask.
61 |
62 | Raises
63 | ------
64 | ModuleNotFoundError
65 | If ``geopandas`` is not installed. See
66 | :doc:`install instructions for geopandas `
67 | (e.g. via ``pip install geopandas``) before using this class.
68 |
69 | NotImplementedError
70 | If the length of the vector ``source_datapipe`` is not 1. Currently,
71 | all of the vector geometries have to be merged into a single
72 | :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame`.
73 | Refer to the section on Appending under geopandas'
74 | :doc:`geopandas:docs/user_guide/mergingdata` docs.
75 |
76 | Example
77 | -------
78 | >>> import pytest
79 | >>> import rioxarray
80 | >>> gpd = pytest.importorskip("geopandas")
81 | ...
82 | >>> from torchdata.datapipes.iter import IterableWrapper
83 | >>> from zen3geo.datapipes import GeoPandasRectangleClipper
84 | ...
85 | >>> # Read in a vector polygon data source
86 | >>> geodataframe = gpd.read_file(
87 | ... filename="https://github.com/geopandas/geopandas/raw/v0.11.1/geopandas/tests/data/overlay/polys/df1.geojson",
88 | ... )
89 | >>> assert geodataframe.crs == "EPSG:4326" # latitude/longitude coords
90 | >>> dp_vector = IterableWrapper(iterable=[geodataframe])
91 | ...
92 | >>> # Get list of raster grids to cut up the vector polygon later
93 | >>> dataarray = rioxarray.open_rasterio(
94 | ... filename="https://github.com/rasterio/rasterio/raw/1.3.2/tests/data/world.byte.tif"
95 | ... )
96 | >>> assert dataarray.rio.crs == "EPSG:4326" # latitude/longitude coords
97 | >>> dp_raster = IterableWrapper(
98 | ... iterable=[
99 | ... dataarray.sel(x=slice(0, 2)), # longitude 0 to 2 degrees
100 | ... dataarray.sel(x=slice(2, 4)), # longitude 2 to 4 degrees
101 | ... ]
102 | ... )
103 | ...
104 | >>> # Clip vector point geometries based on raster masks
105 | >>> dp_clipped = dp_vector.clip_vector_with_rectangle(
106 | ... mask_datapipe=dp_raster
107 | ... )
108 | ...
109 | >>> # Loop or iterate over the DataPipe stream
110 | >>> it = iter(dp_clipped)
111 | >>> geodataframe0, raster0 = next(it)
112 | >>> geodataframe0
113 | col1 geometry
114 | 0 1 POLYGON ((0.00000 0.00000, 0.00000 2.00000, 2....
115 | >>> raster0
116 |
117 | array([[[0, 0, ..., 0, 0],
118 | [0, 0, ..., 0, 0],
119 | ...,
120 | [1, 1, ..., 1, 1],
121 | [1, 1, ..., 1, 1]]], dtype=uint8)
122 | Coordinates:
123 | * band (band) int64 1
124 | * x (x) float64 0.0625 0.1875 0.3125 0.4375 ... 1.688 1.812 1.938
125 | * y (y) float64 74.94 74.81 74.69 74.56 ... -74.69 -74.81 -74.94
126 | spatial_ref int64 0
127 | ...
128 | >>> geodataframe1, raster1 = next(it)
129 | >>> geodataframe1
130 | col1 geometry
131 | 1 2 POLYGON ((2.00000 2.00000, 2.00000 4.00000, 4....
132 | """
133 |
134 | def __init__(
135 | self,
136 | source_datapipe: IterDataPipe,
137 | mask_datapipe: IterDataPipe[Union[xr.DataArray, xr.Dataset]],
138 | **kwargs: Optional[Dict[str, Any]],
139 | ) -> None:
140 | if gpd is None:
141 | raise ModuleNotFoundError(
142 | "Package `geopandas` is required to be installed to use this datapipe. "
143 | "Please use `pip install geopandas` or "
144 | "`conda install -c conda-forge geopandas` "
145 | "to install the package"
146 | )
147 | self.source_datapipe: IterDataPipe = source_datapipe
148 | self.mask_datapipe: IterDataPipe[xr.DataArray] = mask_datapipe
149 | self.kwargs = kwargs
150 |
151 | len_vector_datapipe: int = len(self.source_datapipe)
152 | if len_vector_datapipe != 1:
153 | raise NotImplementedError(
154 | f"The vector datapipe's length can only be (1) for now, but got "
155 | f"({len_vector_datapipe}) instead. Consider merging your vector data "
156 | f"into a single `geopandas.GeoSeries` or `geopandas.GeoDataFrame`, "
157 | f"e.g. using `geodataframe0.append(geodataframe2)`."
158 | )
159 |
160 | def __iter__(self) -> Iterator:
161 | geodataframe = list(self.source_datapipe).pop()
162 |
163 | for raster in self.mask_datapipe:
164 | mask = raster.rio.bounds()
165 |
166 | try:
167 | assert geodataframe.crs == raster.rio.crs
168 | _geodataframe = geodataframe
169 | except AssertionError:
170 | _geodataframe = geodataframe.to_crs(crs=raster.rio.crs)
171 |
172 | clipped_geodataframe = _geodataframe.clip(mask=mask, **self.kwargs)
173 |
174 | yield clipped_geodataframe, raster
175 |
176 | def __len__(self) -> int:
177 | return len(self.mask_datapipe)
178 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/pyogrio.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`pyogrio `.
3 | """
4 | from typing import Any, Dict, Iterator, Optional
5 |
6 | try:
7 | import pyogrio
8 | except ImportError:
9 | pyogrio = None
10 | from torchdata.datapipes import functional_datapipe
11 | from torchdata.datapipes.iter import IterDataPipe
12 | from torchdata.datapipes.utils import StreamWrapper
13 |
14 |
15 | @functional_datapipe("read_from_pyogrio")
16 | class PyogrioReaderIterDataPipe(IterDataPipe[StreamWrapper]):
17 | """
18 | Takes vector files (e.g. FlatGeoBuf, GeoPackage, GeoJSON) from local disk
19 | or URLs (as long as they can be read by pyogrio) and yields
20 | :py:class:`geopandas.GeoDataFrame` objects (functional name:
21 | ``read_from_pyogrio``).
22 |
23 | Based on
24 | https://github.com/pytorch/data/blob/v0.4.0/torchdata/datapipes/iter/load/iopath.py#L42-L97
25 |
26 | Parameters
27 | ----------
28 | source_datapipe : IterDataPipe[str]
29 | A DataPipe that contains filepaths or URL links to vector files such as
30 | FlatGeoBuf, GeoPackage, GeoJSON, etc.
31 |
32 | kwargs : Optional
33 | Extra keyword arguments to pass to :py:func:`pyogrio.read_dataframe`.
34 |
35 | Yields
36 | ------
37 | stream_obj : geopandas.GeoDataFrame
38 | A :py:class:`geopandas.GeoDataFrame` object containing the vector data.
39 |
40 | Raises
41 | ------
42 | ModuleNotFoundError
43 | If ``pyogrio`` is not installed. See
44 | :doc:`install instructions for pyogrio `, and ensure
45 | that ``geopandas`` is installed too (e.g. via
46 | ``pip install pyogrio[geopandas]``) before using this class.
47 |
48 | Example
49 | -------
50 | >>> import pytest
51 | >>> pyogrio = pytest.importorskip("pyogrio")
52 | ...
53 | >>> from torchdata.datapipes.iter import IterableWrapper
54 | >>> from zen3geo.datapipes import PyogrioReader
55 | ...
56 | >>> # Read in GeoPackage data using DataPipe
57 | >>> file_url: str = "https://github.com/geopandas/pyogrio/raw/v0.4.0/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg"
58 | >>> dp = IterableWrapper(iterable=[file_url])
59 | >>> dp_pyogrio = dp.read_from_pyogrio()
60 | ...
61 | >>> # Loop or iterate over the DataPipe stream
62 | >>> it = iter(dp_pyogrio)
63 | >>> geodataframe = next(it)
64 | >>> geodataframe
65 | StreamWrapper< col_bool col_int8 ... col_float64 geometry
66 | 0 1.0 1.0 ... 1.5 POINT (0.00000 0.00000)
67 | 1 0.0 2.0 ... 2.5 POINT (1.00000 1.00000)
68 | 2 1.0 3.0 ... 3.5 POINT (2.00000 2.00000)
69 | 3 NaN NaN ... NaN POINT (4.00000 4.00000)
70 |
71 | [4 rows x 12 columns]>
72 | """
73 |
74 | def __init__(
75 | self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]]
76 | ) -> None:
77 | if pyogrio is None:
78 | raise ModuleNotFoundError(
79 | "Package `pyogrio` is required to be installed to use this datapipe. "
80 | "Please use `pip install pyogrio[geopandas]` or "
81 | "`conda install -c conda-forge pyogrio` "
82 | "to install the package"
83 | )
84 | self.source_datapipe: IterDataPipe[str] = source_datapipe
85 | self.kwargs = kwargs
86 |
87 | def __iter__(self) -> Iterator[StreamWrapper]:
88 | for filename in self.source_datapipe:
89 | yield StreamWrapper(pyogrio.read_dataframe(filename, **self.kwargs))
90 |
91 | def __len__(self) -> int:
92 | return len(self.source_datapipe)
93 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/pystac.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`pystac `.
3 | """
4 | from typing import Any, Dict, Iterator, Optional
5 |
6 | try:
7 | import pystac
8 | except ImportError:
9 | pystac = None
10 | from torchdata.datapipes import functional_datapipe
11 | from torchdata.datapipes.iter import IterDataPipe
12 |
13 |
14 | @functional_datapipe("read_to_pystac_item")
15 | class PySTACItemReaderIterDataPipe(IterDataPipe):
16 | """
17 | Takes files from local disk or URLs (as long as they can be read by pystac)
18 | and yields :py:class:`pystac.Item` objects (functional name:
19 | ``read_to_pystac_item``).
20 |
21 | Parameters
22 | ----------
23 | source_datapipe : IterDataPipe[str]
24 | A DataPipe that contains filepaths or URL links to STAC items.
25 |
26 | kwargs : Optional
27 | Extra keyword arguments to pass to :py:meth:`pystac.Item.from_file`.
28 |
29 | Yields
30 | ------
31 | stac_item : pystac.Item
32 | A :py:class:`pystac.Item` object containing the specific
33 | :py:class:`pystac.STACObject` implementation class represented in a
34 | JSON format.
35 |
36 | Raises
37 | ------
38 | ModuleNotFoundError
39 | If ``pystac`` is not installed. See
40 | :doc:`install instructions for pystac `, (e.g. via
41 | ``pip install pystac``) before using this class.
42 |
43 | Example
44 | -------
45 | >>> import pytest
46 | >>> pystac = pytest.importorskip("pystac")
47 | ...
48 | >>> from torchdata.datapipes.iter import IterableWrapper
49 | >>> from zen3geo.datapipes import PySTACItemReader
50 | ...
51 | >>> # Read in STAC Item using DataPipe
52 | >>> item_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435"
53 | >>> dp = IterableWrapper(iterable=[item_url])
54 | >>> dp_pystac = dp.read_to_pystac_item()
55 | ...
56 | >>> # Loop or iterate over the DataPipe stream
57 | >>> it = iter(dp_pystac)
58 | >>> stac_item = next(it)
59 | >>> stac_item.bbox
60 | [103.20205689, 0.81602476, 104.18934086, 1.8096362]
61 | >>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE
62 | {'datetime': '2022-01-15T03:21:01.024000Z',
63 | 'platform': 'Sentinel-2A',
64 | 'proj:epsg': 32648,
65 | 'instruments': ['msi'],
66 | 's2:mgrs_tile': '48NUG',
67 | 'constellation': 'Sentinel 2',
68 | 's2:granule_id': 'S2A_OPER_MSI_L2A_TL_ESRI_20220115T170436_A034292_T48NUG_N03.00',
69 | 'eo:cloud_cover': 17.352597,
70 | 's2:datatake_id': 'GS2A_20220115T032101_034292_N03.00',
71 | 's2:product_uri': 'S2A_MSIL2A_20220115T032101_N0300_R118_T48NUG_20220115T170435.SAFE',
72 | 's2:datastrip_id': 'S2A_OPER_MSI_L2A_DS_ESRI_20220115T170436_S20220115T033502_N03.00',
73 | 's2:product_type': 'S2MSI2A',
74 | 'sat:orbit_state': 'descending',
75 | ...
76 | """
77 |
78 | def __init__(
79 | self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]]
80 | ) -> None:
81 | if pystac is None:
82 | raise ModuleNotFoundError(
83 | "Package `pystac` is required to be installed to use this datapipe. "
84 | "Please use `pip install pystac` or "
85 | "`conda install -c conda-forge pystac` "
86 | "to install the package"
87 | )
88 | self.source_datapipe: IterDataPipe[str] = source_datapipe
89 | self.kwargs = kwargs
90 |
91 | def __iter__(self) -> Iterator:
92 | for href in self.source_datapipe:
93 | yield pystac.Item.from_file(href=href, **self.kwargs)
94 |
95 | def __len__(self) -> int:
96 | return len(self.source_datapipe)
97 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/pystac_client.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`pystac-client `.
3 | """
4 | from typing import Any, Dict, Iterator, Optional
5 |
6 | try:
7 | import pystac_client
8 | except ImportError:
9 | pystac_client = None
10 | from torchdata.datapipes import functional_datapipe
11 | from torchdata.datapipes.iter import IterDataPipe
12 |
13 |
14 | @functional_datapipe("search_for_pystac_item")
15 | class PySTACAPISearcherIterDataPipe(IterDataPipe):
16 | """
17 | Takes dictionaries containing a STAC API query (as long as the parameters
18 | are understood by :py:meth:`pystac_client.Client.search`) and yields
19 | :py:class:`pystac_client.ItemSearch` objects (functional name:
20 | ``search_for_pystac_item``).
21 |
22 | Parameters
23 | ----------
24 | source_datapipe : IterDataPipe[dict]
25 | A DataPipe that contains STAC API query parameters in the form of a
26 | Python dictionary to pass to :py:meth:`pystac_client.Client.search`.
27 | For example:
28 |
29 | - **bbox** - A list, tuple, or iterator representing a bounding box of
30 | 2D or 3D coordinates. Results will be filtered to only those
31 | intersecting the bounding box.
32 | - **datetime** - Either a single datetime or datetime range used to
33 | filter results. You may express a single datetime using a
34 | :py:class:`datetime.datetime` instance, a
35 | `RFC 3339-compliant `_
36 | timestamp, or a simple date string.
37 | - **collections** - List of one or more Collection IDs or
38 | :py:class:`pystac.Collection` instances. Only Items in one of the
39 | provided Collections will be searched.
40 |
41 | catalog_url : str
42 | The URL of a STAC Catalog.
43 |
44 | kwargs : Optional
45 | Extra keyword arguments to pass to
46 | :py:meth:`pystac_client.Client.open`. For example:
47 |
48 | - **headers** - A dictionary of additional headers to use in all
49 | requests made to any part of this Catalog/API.
50 | - **parameters** - Optional dictionary of query string parameters to
51 | include in all requests.
52 | - **modifier** - A callable that modifies the children collection and
53 | items returned by this Client. This can be useful for injecting
54 | authentication parameters into child assets to access data from
55 | non-public sources.
56 |
57 | Yields
58 | ------
59 | item_search : pystac_client.ItemSearch
60 | A :py:class:`pystac_client.ItemSearch` object instance that represents
61 | a deferred query to a STAC search endpoint as described in the
62 | `STAC API - Item Search spec `_.
63 |
64 | Raises
65 | ------
66 | ModuleNotFoundError
67 | If ``pystac_client`` is not installed. See
68 | :doc:`install instructions for pystac-client `,
69 | (e.g. via ``pip install pystac-client``) before using this class.
70 |
71 | Example
72 | -------
73 | >>> import pytest
74 | >>> pystac_client = pytest.importorskip("pystac_client")
75 | ...
76 | >>> from torchdata.datapipes.iter import IterableWrapper
77 | >>> from zen3geo.datapipes import PySTACAPISearcher
78 | ...
79 | >>> # Peform STAC API query using DataPipe
80 | >>> query = dict(
81 | ... bbox=[174.5, -41.37, 174.9, -41.19], # xmin, ymin, xmax, ymax
82 | ... datetime=["2012-02-20T00:00:00Z", "2022-12-22T00:00:00Z"],
83 | ... collections=["cop-dem-glo-30"],
84 | ... )
85 | >>> dp = IterableWrapper(iterable=[query])
86 | >>> dp_pystac_client = dp.search_for_pystac_item(
87 | ... catalog_url="https://planetarycomputer.microsoft.com/api/stac/v1",
88 | ... # modifier=planetary_computer.sign_inplace,
89 | ... )
90 | >>> # Loop or iterate over the DataPipe stream
91 | >>> it = iter(dp_pystac_client)
92 | >>> stac_item_search = next(it)
93 | >>> stac_items = list(stac_item_search.items())
94 | >>> stac_items
95 | [- ]
96 | >>> stac_items[0].properties # doctest: +NORMALIZE_WHITESPACE
97 | {'gsd': 30,
98 | 'datetime': '2021-04-22T00:00:00Z',
99 | 'platform': 'TanDEM-X',
100 | 'proj:epsg': 4326,
101 | 'proj:shape': [3600, 3600],
102 | 'proj:transform': [0.0002777777777777778,
103 | 0.0,
104 | 173.9998611111111,
105 | 0.0,
106 | -0.0002777777777777778,
107 | -40.99986111111111]}
108 | """
109 |
110 | def __init__(
111 | self,
112 | source_datapipe: IterDataPipe[dict],
113 | catalog_url: str,
114 | **kwargs: Optional[Dict[str, Any]]
115 | ) -> None:
116 | if pystac_client is None:
117 | raise ModuleNotFoundError(
118 | "Package `pystac_client` is required to be installed to use this datapipe. "
119 | "Please use `pip install pystac-client` or "
120 | "`conda install -c conda-forge pystac-client` "
121 | "to install the package"
122 | )
123 | self.source_datapipe: IterDataPipe[dict] = source_datapipe
124 | self.catalog_url: str = catalog_url
125 | self.kwargs = kwargs
126 |
127 | def __iter__(self) -> Iterator:
128 | catalog = pystac_client.Client.open(url=self.catalog_url, **self.kwargs)
129 |
130 | for query in self.source_datapipe:
131 | search = catalog.search(**query)
132 | yield search
133 |
134 | def __len__(self) -> int:
135 | return len(self.source_datapipe)
136 |
137 |
138 | @functional_datapipe("list_pystac_items_by_search")
139 | class PySTACAPIItemListerIterDataPipe(IterDataPipe):
140 | """
141 | Lists the :py:class:`pystac.Item` objects that match the provided STAC API
142 | search parameters (functional name: ``list_pystac_items_by_search``).
143 |
144 | Parameters
145 | ----------
146 | source_datapipe : IterDataPipe[pystac_client.ItemSearch]
147 | A DataPipe that contains :py:class:`pystac_client.ItemSearch` object
148 | instances that represents
149 | a deferred query to a STAC search endpoint as described in the
150 | `STAC API - Item Search spec `_.
151 |
152 | Yields
153 | ------
154 | stac_item : pystac.Item
155 | A :py:class:`pystac.Item` object containing the specific
156 | :py:class:`pystac.STACObject` implementation class represented in a
157 | JSON format.
158 |
159 | Raises
160 | ------
161 | ModuleNotFoundError
162 | If ``pystac_client`` is not installed. See
163 | :doc:`install instructions for pystac-client `,
164 | (e.g. via ``pip install pystac-client``) before using this class.
165 |
166 | Example
167 | -------
168 | >>> import pytest
169 | >>> pystac_client = pytest.importorskip("pystac_client")
170 | ...
171 | >>> from torchdata.datapipes.iter import IterableWrapper
172 | >>> from zen3geo.datapipes import PySTACAPIItemLister
173 | ...
174 | >>> # List STAC Items from a STAC API query
175 | >>> catalog = pystac_client.Client.open(
176 | ... url="https://explorer.digitalearth.africa/stac/"
177 | ... )
178 | >>> search = catalog.search(
179 | ... bbox=[57.2, -20.6, 57.9, -19.9], # xmin, ymin, xmax, ymax
180 | ... datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"],
181 | ... collections=["s2_l2a"],
182 | ... )
183 | >>> dp = IterableWrapper(iterable=[search])
184 | >>> dp_pystac_item_list = dp.list_pystac_items_by_search()
185 | ...
186 | >>> # Loop or iterate over the DataPipe stream
187 | >>> it = iter(dp_pystac_item_list)
188 | >>> stac_item = next(it)
189 | >>> stac_item
190 |
-
191 | >>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE
192 | {'title': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000',
193 | 'gsd': 10,
194 | 'proj:epsg': 32740,
195 | 'platform': 'sentinel-2b',
196 | 'view:off_nadir': 0,
197 | 'instruments': ['msi'],
198 | 'eo:cloud_cover': 0.02,
199 | 'odc:file_format': 'GeoTIFF',
200 | 'odc:region_code': '40KED',
201 | 'constellation': 'sentinel-2',
202 | 'sentinel:sequence': '0',
203 | 'sentinel:utm_zone': 40,
204 | 'sentinel:product_id': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000',
205 | 'sentinel:grid_square': 'ED',
206 | 'sentinel:data_coverage': 28.61,
207 | 'sentinel:latitude_band': 'K',
208 | 'created': '2023-01-03T06:24:53Z',
209 | 'sentinel:valid_cloud_cover': True,
210 | 'sentinel:boa_offset_applied': True,
211 | 'sentinel:processing_baseline': '05.09',
212 | 'proj:shape': [10980, 10980],
213 | 'proj:transform': [10.0, 0.0, 499980.0, 0.0, -10.0, 7900000.0, 0.0, 0.0, 1.0],
214 | 'cubedash:region_code': '40KED',
215 | 'datetime': '2023-01-03T06:24:53Z'}
216 | """
217 |
218 | def __init__(self, source_datapipe):
219 | if pystac_client is None:
220 | raise ModuleNotFoundError(
221 | "Package `pystac_client` is required to be installed to use this datapipe. "
222 | "Please use `pip install pystac-client` or "
223 | "`conda install -c conda-forge pystac-client` "
224 | "to install the package"
225 | )
226 | self.source_datapipe = source_datapipe
227 |
228 | def __iter__(self):
229 | for item_search in self.source_datapipe:
230 | yield from item_search.items()
231 |
232 | def __len__(self):
233 | return sum(item_search.matched() for item_search in self.source_datapipe)
234 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/rioxarray.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`rioxarray `.
3 | """
4 | from typing import Any, Dict, Iterator, Optional
5 |
6 | import rioxarray
7 | from torchdata.datapipes import functional_datapipe
8 | from torchdata.datapipes.iter import IterDataPipe
9 | from torchdata.datapipes.utils import StreamWrapper
10 |
11 |
12 | @functional_datapipe("read_from_rioxarray")
13 | class RioXarrayReaderIterDataPipe(IterDataPipe[StreamWrapper]):
14 | """
15 | Takes raster files (e.g. GeoTIFFs) from local disk or URLs
16 | (as long as they can be read by rioxarray and/or rasterio)
17 | and yields :py:class:`xarray.DataArray` objects (functional name:
18 | ``read_from_rioxarray``).
19 |
20 | Based on
21 | https://github.com/pytorch/data/blob/v0.4.0/torchdata/datapipes/iter/load/online.py#L55-L96
22 |
23 | Parameters
24 | ----------
25 | source_datapipe : IterDataPipe[str]
26 | A DataPipe that contains filepaths or URL links to raster files such as
27 | GeoTIFFs.
28 |
29 | kwargs : Optional
30 | Extra keyword arguments to pass to :py:func:`rioxarray.open_rasterio`
31 | and/or :py:func:`rasterio.open`.
32 |
33 | Yields
34 | ------
35 | stream_obj : xarray.DataArray
36 | An :py:class:`xarray.DataArray` object containing the raster data.
37 |
38 | Example
39 | -------
40 | >>> from torchdata.datapipes.iter import IterableWrapper
41 | >>> from zen3geo.datapipes import RioXarrayReader
42 | ...
43 | >>> # Read in GeoTIFF data using DataPipe
44 | >>> file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif"
45 | >>> dp = IterableWrapper(iterable=[file_url])
46 | >>> dp_rioxarray = dp.read_from_rioxarray()
47 | ...
48 | >>> # Loop or iterate over the DataPipe stream
49 | >>> it = iter(dp_rioxarray)
50 | >>> dataarray = next(it)
51 | >>> dataarray.encoding["source"]
52 | 'https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif'
53 | >>> dataarray
54 | StreamWrapper<
55 | [1843200 values with dtype=uint8]
56 | Coordinates:
57 | * band (band) int64 1
58 | * x (x) float64 -179.9 -179.7 -179.5 -179.3 ... 179.5 179.7 179.9
59 | * y (y) float64 89.91 89.72 89.53 89.34 ... -89.53 -89.72 -89.91
60 | spatial_ref int64 0
61 | ...
62 | """
63 |
64 | def __init__(
65 | self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]]
66 | ) -> None:
67 | self.source_datapipe: IterDataPipe[str] = source_datapipe
68 | self.kwargs = kwargs
69 |
70 | def __iter__(self) -> Iterator[StreamWrapper]:
71 | for filename in self.source_datapipe:
72 | yield StreamWrapper(
73 | rioxarray.open_rasterio(filename=filename, **self.kwargs)
74 | )
75 |
76 | def __len__(self) -> int:
77 | return len(self.source_datapipe)
78 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/stackstac.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`stackstac `.
3 | """
4 | from typing import Any, Dict, Iterator, Optional
5 |
6 | import xarray as xr
7 |
8 | try:
9 | import stackstac
10 | except ImportError:
11 | stackstac = None
12 | from torchdata.datapipes import functional_datapipe
13 | from torchdata.datapipes.iter import IterDataPipe
14 |
15 |
16 | @functional_datapipe("mosaic_dataarray")
17 | class StackSTACMosaickerIterDataPipe(IterDataPipe[xr.DataArray]):
18 | """
19 | Takes :py:class:`xarray.DataArray` objects, flattens a dimension by picking
20 | the first valid pixel, to yield mosaicked :py:class:`xarray.DataArray`
21 | objects (functional name: ``mosaic_dataarray``).
22 |
23 | Parameters
24 | ----------
25 | source_datapipe : IterDataPipe[xarray.DataArray]
26 | A DataPipe that contains :py:class:`xarray.DataArray` objects, with
27 | e.g. dimensions ("time", "band", "y", "x").
28 |
29 | kwargs : Optional
30 | Extra keyword arguments to pass to :py:func:`stackstac.mosaic`.
31 |
32 | Yields
33 | ------
34 | dataarray : xarray.DataArray
35 | An :py:class:`xarray.DataArray` that has been mosaicked with e.g.
36 | dimensions ("band", "y", "x").
37 |
38 | Raises
39 | ------
40 | ModuleNotFoundError
41 | If ``stackstac`` is not installed. See
42 | :doc:`install instructions for stackstac `, (e.g. via
43 | ``pip install stackstac``) before using this class.
44 |
45 | Example
46 | -------
47 | >>> import pytest
48 | >>> import xarray as xr
49 | >>> pystac = pytest.importorskip("pystac")
50 | >>> stackstac = pytest.importorskip("stackstac")
51 | ...
52 | >>> from torchdata.datapipes.iter import IterableWrapper
53 | >>> from zen3geo.datapipes import StackSTACMosaicker
54 | ...
55 | >>> # Get list of ALOS DEM tiles to mosaic together later
56 | >>> item_urls = [
57 | ... "https://planetarycomputer.microsoft.com/api/stac/v1/collections/alos-dem/items/ALPSMLC30_N022E113_DSM",
58 | ... "https://planetarycomputer.microsoft.com/api/stac/v1/collections/alos-dem/items/ALPSMLC30_N022E114_DSM",
59 | ... ]
60 | >>> stac_items = [pystac.Item.from_file(href=url) for url in item_urls]
61 | >>> dataarray = stackstac.stack(items=stac_items)
62 | >>> assert dataarray.sizes == {'time': 2, 'band': 1, 'y': 3600, 'x': 7200}
63 | ...
64 | >>> # Mosaic different tiles in an xarray.DataArray using DataPipe
65 | >>> dp = IterableWrapper(iterable=[dataarray])
66 | >>> dp_mosaic = dp.mosaic_dataarray()
67 | ...
68 | >>> # Loop or iterate over the DataPipe stream
69 | >>> it = iter(dp_mosaic)
70 | >>> dataarray = next(it)
71 | >>> print(dataarray.sizes)
72 | Frozen({'band': 1, 'y': 3600, 'x': 7200})
73 | >>> print(dataarray.coords)
74 | Coordinates:
75 | * band (band) >> print(dataarray.attrs["spec"])
80 | RasterSpec(epsg=4326, bounds=(113.0, 22.0, 115.0, 23.0), resolutions_xy=(0.0002777777777777778, 0.0002777777777777778))
81 | """
82 |
83 | def __init__(
84 | self,
85 | source_datapipe: IterDataPipe[xr.DataArray],
86 | **kwargs: Optional[Dict[str, Any]]
87 | ) -> None:
88 | if stackstac is None:
89 | raise ModuleNotFoundError(
90 | "Package `stackstac` is required to be installed to use this datapipe. "
91 | "Please use `pip install stackstac` or "
92 | "`conda install -c conda-forge stackstac` "
93 | "to install the package"
94 | )
95 | self.source_datapipe: IterDataPipe = source_datapipe
96 | self.kwargs = kwargs
97 |
98 | def __iter__(self) -> Iterator[xr.DataArray]:
99 | for dataarray in self.source_datapipe:
100 | yield stackstac.mosaic(arr=dataarray, **self.kwargs)
101 |
102 | def __len__(self) -> int:
103 | return len(self.source_datapipe)
104 |
105 |
106 | @functional_datapipe("stack_stac_items")
107 | class StackSTACStackerIterDataPipe(IterDataPipe[xr.DataArray]):
108 | """
109 | Takes :py:class:`pystac.Item` objects, reprojects them to the same grid
110 | and stacks them along time, to yield :py:class:`xarray.DataArray` objects
111 | (functional name: ``stack_stac_items``).
112 |
113 | Parameters
114 | ----------
115 | source_datapipe : IterDataPipe[pystac.Item]
116 | A DataPipe that contains :py:class:`pystac.Item` objects.
117 |
118 | kwargs : Optional
119 | Extra keyword arguments to pass to :py:func:`stackstac.stack`.
120 |
121 | Yields
122 | ------
123 | datacube : xarray.DataArray
124 | An :py:class:`xarray.DataArray` backed by a
125 | :py:class:`dask.array.Array` containing the time-series datacube. The
126 | dimensions will be ("time", "band", "y", "x").
127 |
128 | Raises
129 | ------
130 | ModuleNotFoundError
131 | If ``stackstac`` is not installed. See
132 | :doc:`install instructions for stackstac `, (e.g. via
133 | ``pip install stackstac``) before using this class.
134 |
135 | Example
136 | -------
137 | >>> import pytest
138 | >>> pystac = pytest.importorskip("pystac")
139 | >>> stacstac = pytest.importorskip("stackstac")
140 | ...
141 | >>> from torchdata.datapipes.iter import IterableWrapper
142 | >>> from zen3geo.datapipes import StackSTACStacker
143 | ...
144 | >>> # Stack different bands in a STAC Item using DataPipe
145 | >>> item_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_IW_GRDH_1SDV_20220914T093226_20220914T093252_044999_056053"
146 | >>> stac_item = pystac.Item.from_file(href=item_url)
147 | >>> dp = IterableWrapper(iterable=[stac_item])
148 | >>> dp_stackstac = dp.stack_stac_items(
149 | ... assets=["vh", "vv"], epsg=32652, resolution=10
150 | ... )
151 | ...
152 | >>> # Loop or iterate over the DataPipe stream
153 | >>> it = iter(dp_stackstac)
154 | >>> dataarray = next(it)
155 | >>> print(dataarray.sizes)
156 | Frozen({'time': 1, 'band': 2, 'y': 20686, 'x': 28043})
157 | >>> print(dataarray.coords)
158 | Coordinates:
159 | * time (time) datetime64[ns] 2022-09-14T0...
160 | id (time) >> print(dataarray.attrs["spec"])
166 | RasterSpec(epsg=32652, bounds=(135370, 4098080, 415800, 4304940), resolutions_xy=(10, 10))
167 | """
168 |
169 | def __init__(
170 | self, source_datapipe: IterDataPipe, **kwargs: Optional[Dict[str, Any]]
171 | ) -> None:
172 | if stackstac is None:
173 | raise ModuleNotFoundError(
174 | "Package `stackstac` is required to be installed to use this datapipe. "
175 | "Please use `pip install stackstac` or "
176 | "`conda install -c conda-forge stackstac` "
177 | "to install the package"
178 | )
179 | self.source_datapipe: IterDataPipe = source_datapipe
180 | self.kwargs = kwargs
181 |
182 | def __iter__(self) -> Iterator[xr.DataArray]:
183 | for stac_items in self.source_datapipe:
184 | yield stackstac.stack(items=stac_items, **self.kwargs)
185 |
186 | def __len__(self) -> int:
187 | return len(self.source_datapipe)
188 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/xbatcher.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for :doc:`xbatcher `.
3 | """
4 | from typing import Any, Dict, Hashable, Iterator, Optional, Tuple, Union
5 |
6 | import xarray as xr
7 |
8 | try:
9 | import xbatcher
10 | except ImportError:
11 | xbatcher = None
12 | from torchdata.datapipes import functional_datapipe
13 | from torchdata.datapipes.iter import IterDataPipe
14 |
15 |
16 | @functional_datapipe("slice_with_xbatcher")
17 | class XbatcherSlicerIterDataPipe(IterDataPipe[Union[xr.DataArray, xr.Dataset]]):
18 | """
19 | Takes an :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset`
20 | and creates a sliced window view (also known as a chip or tile) of the
21 | n-dimensional array (functional name: ``slice_with_xbatcher``).
22 |
23 | Parameters
24 | ----------
25 | source_datapipe : IterDataPipe[xarray.DataArray]
26 | A DataPipe that contains :py:class:`xarray.DataArray` or
27 | :py:class:`xarray.Dataset` objects.
28 |
29 | input_dims : dict
30 | A dictionary specifying the size of the inputs in each dimension to
31 | slice along, e.g. ``{'lon': 64, 'lat': 64}``. These are the dimensions
32 | the machine learning library will see. All other dimensions will be
33 | stacked into one dimension called ``batch``.
34 |
35 | kwargs : Optional
36 | Extra keyword arguments to pass to :py:class:`xbatcher.BatchGenerator`.
37 |
38 | Yields
39 | ------
40 | chip : xarray.DataArray
41 | An :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` object
42 | containing the sliced raster data, with the size/shape defined by the
43 | ``input_dims`` parameter.
44 |
45 | Raises
46 | ------
47 | ModuleNotFoundError
48 | If ``xbatcher`` is not installed. Follow
49 | :doc:`install instructions for xbatcher `
50 | before using this class.
51 |
52 | Example
53 | -------
54 | >>> import pytest
55 | >>> import numpy as np
56 | >>> import xarray as xr
57 | >>> xbatcher = pytest.importorskip("xbatcher")
58 | ...
59 | >>> from torchdata.datapipes.iter import IterableWrapper
60 | >>> from zen3geo.datapipes import XbatcherSlicer
61 | ...
62 | >>> # Sliced window view of xarray.DataArray using DataPipe
63 | >>> dataarray: xr.DataArray = xr.DataArray(
64 | ... data=np.ones(shape=(3, 64, 64)),
65 | ... name="foo",
66 | ... dims=["band", "y", "x"]
67 | ... )
68 | >>> dp = IterableWrapper(iterable=[dataarray])
69 | >>> dp_xbatcher = dp.slice_with_xbatcher(input_dims={"y": 2, "x": 2})
70 | ...
71 | >>> # Loop or iterate over the DataPipe stream
72 | >>> it = iter(dp_xbatcher)
73 | >>> dataarray_chip = next(it)
74 | >>> dataarray_chip
75 |
76 | array([[[1., 1.],
77 | [1., 1.]],
78 |
79 | [[1., 1.],
80 | [1., 1.]],
81 |
82 | [[1., 1.],
83 | [1., 1.]]])
84 | Dimensions without coordinates: band, y, x
85 | """
86 |
87 | def __init__(
88 | self,
89 | source_datapipe: IterDataPipe[Union[xr.DataArray, xr.Dataset]],
90 | input_dims: Dict[Hashable, int],
91 | **kwargs: Optional[Dict[str, Any]],
92 | ) -> None:
93 | if xbatcher is None:
94 | raise ModuleNotFoundError(
95 | "Package `xbatcher` is required to be installed to use this datapipe. "
96 | "Please use `pip install xbatcher` "
97 | "to install the package"
98 | )
99 | self.source_datapipe: IterDataPipe[
100 | Union[xr.DataArray, xr.Dataset]
101 | ] = source_datapipe
102 | self.input_dims: Dict[Hashable, int] = input_dims
103 | self.kwargs = kwargs
104 |
105 | def __iter__(self) -> Iterator[Union[xr.DataArray, xr.Dataset]]:
106 | for dataarray in self.source_datapipe:
107 | for chip in dataarray.batch.generator(
108 | input_dims=self.input_dims, **self.kwargs
109 | ):
110 | yield chip
111 |
112 | def __len__(self) -> int:
113 | return sum(
114 | len(dataarray.batch.generator(input_dims=self.input_dims, **self.kwargs))
115 | for dataarray in self.source_datapipe
116 | )
117 |
--------------------------------------------------------------------------------
/zen3geo/datapipes/xpystac.py:
--------------------------------------------------------------------------------
1 | """
2 | DataPipes for `xpystac `__.
3 | """
4 | from typing import Any, Dict, Iterator, Optional
5 |
6 | import xarray as xr
7 |
8 | try:
9 | import pystac
10 | import xpystac
11 | except ImportError:
12 | pystac = None
13 | xpystac = None
14 | from torchdata.datapipes import functional_datapipe
15 | from torchdata.datapipes.iter import IterDataPipe
16 | from torchdata.datapipes.utils import StreamWrapper
17 |
18 |
19 | @functional_datapipe("read_from_xpystac")
20 | class XpySTACAssetReaderIterDataPipe(IterDataPipe[StreamWrapper]):
21 | """
22 | Takes a :py:class:`pystac.Asset` object containing n-dimensional data (e.g.
23 | :doc:`Zarr `,
24 | `NetCDF `__,
25 | `Cloud-Optimized GeoTIFF `__, etc) from local disk
26 | or URLs (as long as they can be read by xpystac) and yields
27 | :py:class:`xarray.Dataset` objects (functional name:
28 | ``read_from_xpystac``).
29 |
30 | Based on
31 | https://github.com/pytorch/data/blob/v0.5.1/torchdata/datapipes/iter/load/iopath.py#L42-L97
32 |
33 | Parameters
34 | ----------
35 | source_datapipe : IterDataPipe[pystac.Asset]
36 | A DataPipe that contains :py:class:`pystac.Asset` objects to
37 | n-dimensional files such as :doc:`Zarr `,
38 | `NetCDF `__,
39 | `Cloud-Optimized GeoTIFF `__, etc.
40 |
41 | engine : str or xarray.backends.BackendEntrypoint
42 | Engine to use when reading files. If not provided, the default engine
43 | will be the "stac" backend from ``xpystac``. Alternatively, set
44 | ``engine=None`` to let ``xarray`` choose the default engine based on
45 | available dependencies, with a preference for "netcdf4". See also
46 | :py:func:`xarray.open_dataset` for details about other engine options.
47 |
48 | kwargs : Optional
49 | Extra keyword arguments to pass to :py:func:`xarray.open_dataset`.
50 |
51 | Yields
52 | ------
53 | stream_obj : xarray.Dataset
54 | An :py:class:`xarray.Dataset` object containing the n-dimensional data.
55 |
56 | Raises
57 | ------
58 | ModuleNotFoundError
59 | If ``xpystac`` is not installed. See
60 | `install instructions for xpystac
61 | `__,
62 | (e.g. via ``pip install xpystac``) before using this class.
63 |
64 | Example
65 | -------
66 | >>> import pytest
67 | >>> pystac = pytest.importorskip("pystac")
68 | >>> xpystac = pytest.importorskip("xpystac")
69 | >>> zarr = pytest.importorskip("zarr")
70 | ...
71 | >>> from torchdata.datapipes.iter import IterableWrapper
72 | >>> from zen3geo.datapipes import XpySTACAssetReader
73 | ...
74 | >>> # Read in STAC Asset using DataPipe
75 | >>> collection_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/nasa-nex-gddp-cmip6"
76 | >>> asset: pystac.Asset = pystac.Collection.from_file(href=collection_url).assets[
77 | ... "ACCESS-CM2.historical"
78 | ... ]
79 | >>> dp = IterableWrapper(iterable=[asset])
80 | >>> dp_xpystac = dp.read_from_xpystac()
81 | ...
82 | >>> # Loop or iterate over the DataPipe stream
83 | >>> it = iter(dp_xpystac)
84 | >>> dataset = next(it)
85 | >>> dataset.sizes
86 | Frozen({'time': 23741, 'lat': 600, 'lon': 1440})
87 | >>> print(dataset.data_vars)
88 | Data variables:
89 | hurs (time, lat, lon) float32 ...
90 | huss (time, lat, lon) float32 ...
91 | pr (time, lat, lon) float32 ...
92 | rlds (time, lat, lon) float32 ...
93 | rsds (time, lat, lon) float32 ...
94 | sfcWind (time, lat, lon) float32 ...
95 | tas (time, lat, lon) float32 ...
96 | tasmax (time, lat, lon) float32 ...
97 | tasmin (time, lat, lon) float32 ...
98 | >>> dataset.attrs # doctest: +NORMALIZE_WHITESPACE
99 | {'Conventions': 'CF-1.7',
100 | 'activity': 'NEX-GDDP-CMIP6',
101 | 'cmip6_institution_id': 'CSIRO-ARCCSS',
102 | 'cmip6_license': 'CC-BY-SA 4.0',
103 | 'cmip6_source_id': 'ACCESS-CM2',
104 | ...
105 | 'history': '2021-10-04T13:59:21.654137+00:00: install global attributes',
106 | 'institution': 'NASA Earth Exchange, NASA Ames Research Center, ...
107 | 'product': 'output',
108 | 'realm': 'atmos',
109 | 'references': 'BCSD method: Thrasher et al., 2012, ...
110 | 'resolution_id': '0.25 degree',
111 | 'scenario': 'historical',
112 | 'source': 'BCSD',
113 | 'title': 'ACCESS-CM2, r1i1p1f1, historical, global downscaled CMIP6 ...
114 | 'tracking_id': '16d27564-470f-41ea-8077-f4cc3efa5bfe',
115 | 'variant_label': 'r1i1p1f1',
116 | 'version': '1.0'}
117 | """
118 |
119 | def __init__(
120 | self,
121 | source_datapipe: IterDataPipe,
122 | engine: str = "stac",
123 | **kwargs: Optional[Dict[str, Any]]
124 | ) -> None:
125 | if xpystac is None and engine == "stac":
126 | raise ModuleNotFoundError(
127 | "Package `xpystac` is required to be installed to use this datapipe. "
128 | "Please use `pip install xpystac` "
129 | "to install the package"
130 | )
131 | self.source_datapipe: IterDataPipe = source_datapipe
132 | self.engine: str = engine
133 | self.kwargs = kwargs
134 |
135 | def __iter__(self) -> Iterator[StreamWrapper]:
136 | for asset in self.source_datapipe:
137 | yield StreamWrapper(
138 | xr.open_dataset(asset, engine=self.engine, **self.kwargs)
139 | )
140 |
141 | def __len__(self) -> int:
142 | return len(self.source_datapipe)
143 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_datashader.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for datashader datapipes.
3 | """
4 | import numpy as np
5 | import pytest
6 | import xarray as xr
7 | from torchdata.datapipes.iter import IterableWrapper
8 |
9 | from zen3geo.datapipes import DatashaderRasterizer, XarrayCanvas
10 |
11 | datashader = pytest.importorskip("datashader")
12 |
13 |
14 | # %%
15 | @pytest.fixture(scope="function", name="canvas")
16 | def fixture_canvas():
17 | """
18 | The blank datashader.Canvas to use in the tests.
19 | """
20 | canvas = datashader.Canvas(
21 | plot_width=14, plot_height=10, x_range=(1, 8), y_range=(0, 5)
22 | )
23 | canvas.crs = "OGC:CRS84"
24 | return canvas
25 |
26 |
27 | @pytest.fixture(scope="module", name="geodataframe")
28 | def fixture_geodataframe():
29 | """
30 | A geopandas.GeoDataFrame containing a collection of shapely.geometry
31 | objects to use in the tests.
32 | """
33 | gpd = pytest.importorskip("geopandas")
34 | shapely = pytest.importorskip("shapely")
35 |
36 | geometries: list = [
37 | shapely.geometry.MultiPoint([(4.5, 4.5), (3.5, 1), (6, 3.5)]),
38 | shapely.geometry.LineString([(3, 5), (5, 3), (3, 2), (5, 0)]),
39 | shapely.geometry.Polygon([(6, 5), (3.5, 2.5), (6, 0), (6, 2.5), (5, 2.5)]),
40 | ]
41 | geodataframe = gpd.GeoDataFrame(data={"geometry": geometries})
42 | geodataframe = geodataframe.set_crs(crs="OGC:CRS84")
43 |
44 | return geodataframe
45 |
46 |
47 | # %%
48 | def test_datashader_canvas_dataset():
49 | """
50 | Ensure that XarrayCanvas works to create a blank datashader.Canvas object
51 | from an xarray.Dataset.
52 | """
53 | dataset: xr.Dataset = xr.Dataset(
54 | data_vars={"temperature": (["y", "x"], 15 * np.ones(shape=(12, 8)))},
55 | coords={
56 | "y": (["y"], np.linspace(start=6, stop=0, num=12)),
57 | "x": (["x"], np.linspace(start=0, stop=4, num=8)),
58 | },
59 | )
60 | dp = IterableWrapper(iterable=[dataset])
61 |
62 | # Using class constructors
63 | dp_canvas = XarrayCanvas(source_datapipe=dp)
64 | # Using functional form (recommended)
65 | dp_canvas = dp.canvas_from_xarray()
66 |
67 | assert len(dp_canvas) == 1
68 | it = iter(dp_canvas)
69 | canvas = next(it)
70 |
71 | assert canvas.plot_height == 12
72 | assert canvas.plot_width == 8
73 | assert hasattr(canvas, "crs")
74 | assert hasattr(canvas, "raster")
75 |
76 |
77 | @pytest.mark.parametrize(
78 | ("geom_type", "sum_val"), [("Point", 3), ("Line", 13), ("Polygon", 15)]
79 | )
80 | def test_datashader_rasterize_vector_geometry(canvas, geodataframe, geom_type, sum_val):
81 | """
82 | Ensure that DatashaderRasterizer works to rasterize a
83 | geopandas.GeoDataFrame of point, line or polygon type into an
84 | xarray.DataArray grid.
85 | """
86 | dp = IterableWrapper(iterable=[canvas, canvas])
87 |
88 | vector = geodataframe[geodataframe.type.str.contains(geom_type)]
89 | dp_vector = IterableWrapper(iterable=[vector])
90 |
91 | # Using class constructors
92 | dp_datashader = DatashaderRasterizer(source_datapipe=dp, vector_datapipe=dp_vector)
93 | # Using functional form (recommended)
94 | dp_datashader = dp.rasterize_with_datashader(vector_datapipe=dp_vector)
95 |
96 | assert len(dp_datashader) == 2
97 | it = iter(dp_datashader)
98 | dataarray = next(it)
99 |
100 | assert dataarray.data.sum() == sum_val
101 | assert dataarray.dims == ("y", "x")
102 | assert dataarray.rio.crs == "OGC:CRS84"
103 | assert dataarray.rio.shape == (10, 14)
104 | assert dataarray.rio.transform().e == -0.5
105 |
106 |
107 | def test_datashader_rasterize_canvas_missing_crs(canvas, geodataframe):
108 | """
109 | Ensure that DatashaderRasterizer raises an AttributeError when the
110 | input datashader.Canvas has no crs attribute.
111 | """
112 | canvas.crs = None
113 | dp_canvas = IterableWrapper(iterable=[canvas])
114 | dp_vector = IterableWrapper(iterable=[geodataframe.geometry])
115 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_vector)
116 |
117 | assert len(dp_datashader) == 1
118 | it = iter(dp_datashader)
119 | with pytest.raises(
120 | AttributeError, match="Missing crs information for datashader.Canvas"
121 | ):
122 | raster = next(it)
123 |
124 |
125 | def test_datashader_rasterize_vector_missing_crs(canvas, geodataframe):
126 | """
127 | Ensure that DatashaderRasterizer raises an AttributeError when the
128 | input geopandas.GeoSeries has no crs attribute.
129 | """
130 | vector = geodataframe.geometry
131 | vector.crs = None
132 | dp_canvas = IterableWrapper(iterable=[canvas])
133 | dp_vector = IterableWrapper(iterable=[vector])
134 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_vector)
135 |
136 | assert len(dp_datashader) == 1
137 | it = iter(dp_datashader)
138 | with pytest.raises(AttributeError, match="Missing crs information for input"):
139 | raster = next(it)
140 |
141 |
142 | def test_datashader_rasterize_unmatched_lengths(canvas, geodataframe):
143 | """
144 | Ensure that DatashaderRasterizer raises a ValueError when the length of the
145 | canvas datapipe is unmatched with the length of the vector datapipe.
146 | """
147 | # Canvas:Vector ratio of 3:2
148 | dp_canvas = IterableWrapper(iterable=[canvas, canvas, canvas])
149 | dp_vector = IterableWrapper(iterable=[geodataframe, geodataframe])
150 |
151 | with pytest.raises(ValueError, match="Unmatched lengths for the"):
152 | dp_datashader = dp_canvas.rasterize_with_datashader(vector_datapipe=dp_vector)
153 |
154 |
155 | def test_datashader_rasterize_vector_geometrycollection(canvas, geodataframe):
156 | """
157 | Ensure that DatashaderRasterizer raises a NotImplementedError when an
158 | unsupported vector type like GeometryCollection is used.
159 | """
160 | gpd = pytest.importorskip("geopandas")
161 |
162 | # Merge points, lines and polygons into a single GeometryCollection
163 | geocollection = gpd.GeoSeries(data=geodataframe.unary_union)
164 | geocollection = geocollection.set_crs(crs="OGC:CRS84")
165 |
166 | dp = IterableWrapper(iterable=[canvas])
167 | dp_vector = IterableWrapper(iterable=[geocollection])
168 | dp_datashader = dp.rasterize_with_datashader(vector_datapipe=dp_vector)
169 |
170 | assert len(dp_datashader) == 1
171 | it = iter(dp_datashader)
172 | with pytest.raises(NotImplementedError, match="Unsupported geometry type"):
173 | raster = next(it)
174 |
175 |
176 | def test_datashader_rasterize_invalid_vector(canvas, geodataframe):
177 | """
178 | Ensure that DatashaderRasterizer raises a ValueError when an invalid
179 | geopandas.GeoDataFrame without a geometry is passed in as input.
180 |
181 | Regression test for https://github.com/weiji14/zen3geo/pull/104.
182 | """
183 | # GeoDataFrame with empty data
184 | gdf_none = geodataframe.loc[5:]
185 | gdf_none = gdf_none.set_crs(crs="OGC:CRS84")
186 |
187 | dp = IterableWrapper(iterable=[canvas])
188 | dp_vector = IterableWrapper(iterable=[gdf_none])
189 | dp_datashader = dp.rasterize_with_datashader(vector_datapipe=dp_vector)
190 |
191 | assert len(dp_datashader) == 1
192 | it = iter(dp_datashader)
193 | with pytest.raises(ValueError, match="Cannot infer spatialpandas geometry type"):
194 | raster = next(it)
195 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_geopandas.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for geopandas datapipes.
3 | """
4 | import numpy as np
5 | import pytest
6 | import xarray as xr
7 | from torchdata.datapipes.iter import IterableWrapper
8 |
9 | from zen3geo.datapipes import GeoPandasRectangleClipper
10 |
11 | gpd = pytest.importorskip("geopandas")
12 | shapely = pytest.importorskip("shapely")
13 |
14 | # %%
15 | @pytest.fixture(scope="module", name="geodataframe")
16 | def fixture_geodataframe():
17 | """
18 | A geopandas.GeoDataFrame containing a collection of shapely.geometry
19 | objects to use in the tests.
20 | """
21 | geometries: list = [
22 | shapely.geometry.box(minx=0.0, miny=0.0, maxx=2.0, maxy=2.0),
23 | shapely.geometry.box(minx=2.0, miny=2.0, maxx=4.0, maxy=4.0),
24 | ]
25 | geodataframe = gpd.GeoDataFrame(data={"geometry": geometries})
26 | geodataframe = geodataframe.set_crs(crs="OGC:CRS84")
27 |
28 | return geodataframe
29 |
30 |
31 | @pytest.fixture(scope="function", name="dataset")
32 | def fixture_dataset():
33 | """
34 | The sample xarray.Dataset to use in the tests.
35 | """
36 | dataarray = xr.DataArray(
37 | data=np.ones(shape=(1, 5, 7)),
38 | coords=dict(
39 | band=[0],
40 | y=np.linspace(start=4.0, stop=0.0, num=5),
41 | x=np.linspace(start=-1.0, stop=5, num=7),
42 | ),
43 | dims=("band", "y", "x"),
44 | name="foo",
45 | )
46 | dataset: xr.Dataset = dataarray.to_dataset()
47 | dataset: xr.Dataset = dataset.rio.write_crs(input_crs="OGC:CRS84")
48 |
49 | return dataset
50 |
51 |
52 | # %%
53 | def test_geopandas_rectangle_clipper_geoseries_dataset(geodataframe, dataset):
54 | """
55 | Ensure that GeoPandasRectangleClipper works to clip a geopandas.GeoSeries
56 | vector with xarray.Dataset rasters and outputs a tuple made up of a
57 | spatially subsetted geopandas.GeoSeries and an xarray.Dataset raster mask.
58 | """
59 | dp_vector = IterableWrapper(iterable=[geodataframe.geometry])
60 | dp_raster = IterableWrapper(
61 | iterable=[
62 | dataset.rio.clip_box(minx=-1, miny=0, maxx=1, maxy=1),
63 | dataset.rio.clip_box(minx=3, miny=3, maxx=5, maxy=4),
64 | ]
65 | )
66 |
67 | # Using class constructors
68 | dp_clipped = GeoPandasRectangleClipper(
69 | source_datapipe=dp_vector, mask_datapipe=dp_raster
70 | )
71 | # Using functional form (recommended)
72 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_raster)
73 |
74 | assert len(dp_clipped) == 2
75 | it = iter(dp_clipped)
76 |
77 | clipped_geoseries, raster_chip = next(it)
78 | assert clipped_geoseries.crs == "OGC:CRS84"
79 | assert all(clipped_geoseries.geom_type == "Polygon")
80 | assert clipped_geoseries.shape == (1,)
81 | assert clipped_geoseries[0].bounds == (0.0, 0.0, 1.5, 1.5)
82 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3}
83 | assert raster_chip.rio.bounds() == (-1.5, -0.5, 1.5, 1.5)
84 |
85 | clipped_geoseries, raster_chip = next(it)
86 | assert clipped_geoseries.shape == (1,)
87 | assert clipped_geoseries[1].bounds == (2.5, 2.5, 4.0, 4.0)
88 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3}
89 | assert raster_chip.rio.bounds() == (2.5, 2.5, 5.5, 4.5)
90 | assert raster_chip.rio.crs == "OGC:CRS84"
91 |
92 |
93 | def test_geopandas_rectangle_clipper_different_crs(geodataframe, dataset):
94 | """
95 | Ensure that GeoPandasRectangleClipper works to clip a geopandas.GeoSeries
96 | vector with xarray.Dataset rasters which have different coordinate
97 | reference systems, and outputs a tuple made up of a spatially subsetted
98 | geopandas.GeoSeries and an xarray.Dataset raster mask that both have the
99 | same coordinate reference system.
100 | """
101 | dp_vector = IterableWrapper(iterable=[geodataframe.geometry])
102 |
103 | dataset_3857 = dataset.rio.clip_box(minx=-1, miny=0, maxx=1, maxy=1).rio.reproject(
104 | "EPSG:3857"
105 | )
106 | dataset_32631 = dataset.rio.clip_box(minx=3, miny=3, maxx=5, maxy=4).rio.reproject(
107 | "EPSG:32631"
108 | )
109 | dp_raster = IterableWrapper(iterable=[dataset_3857, dataset_32631])
110 |
111 | # Using class constructors
112 | dp_clipped = GeoPandasRectangleClipper(
113 | source_datapipe=dp_vector, mask_datapipe=dp_raster
114 | )
115 | # Using functional form (recommended)
116 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_raster)
117 |
118 | assert len(dp_clipped) == 2
119 | it = iter(dp_clipped)
120 |
121 | clipped_geoseries, raster_chip = next(it)
122 | assert clipped_geoseries.crs == "EPSG:3857"
123 | assert all(clipped_geoseries.geom_type == "Polygon")
124 | assert clipped_geoseries.shape == (1,)
125 | assert clipped_geoseries[0].bounds == (
126 | 0.0,
127 | 0.0,
128 | 166988.3675623712,
129 | 166998.31375292226,
130 | )
131 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3}
132 | assert raster_chip.rio.bounds() == (
133 | -166979.23618991036,
134 | -55646.75541526544,
135 | 166988.3675623712,
136 | 166998.31375292226,
137 | )
138 | assert raster_chip.rio.crs == "EPSG:3857"
139 |
140 | clipped_geoseries, raster_chip = next(it)
141 | assert clipped_geoseries.crs == "EPSG:32631"
142 | assert clipped_geoseries.shape == (1,)
143 | assert clipped_geoseries[1].bounds == (
144 | 444414.4114896285,
145 | 276009.81064532325,
146 | 611163.137304327,
147 | 442194.9725083875,
148 | )
149 | assert raster_chip.dims == {"band": 1, "y": 2, "x": 3}
150 | assert raster_chip.rio.bounds() == (
151 | 444414.4114896285,
152 | 276009.81064532325,
153 | 777205.5384580799,
154 | 497870.56195762416,
155 | )
156 | assert raster_chip.rio.crs == "EPSG:32631"
157 |
158 |
159 | def test_geopandas_rectangle_clipper_incorrect_length(geodataframe, dataset):
160 | """
161 | Ensure that GeoPandasRectangleClipper raises a NotImplementedError when the
162 | length of the vector datapipe is not equal to 1.
163 | """
164 | dp_vector = IterableWrapper(iterable=[geodataframe, geodataframe])
165 | dp_raster = IterableWrapper(iterable=[dataset, dataset, dataset])
166 |
167 | with pytest.raises(NotImplementedError, match="The vector datapipe's length can"):
168 | dp_clipped = dp_vector.clip_vector_with_rectangle(mask_datapipe=dp_raster)
169 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_pyogrio.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for pyogrio datapipes.
3 | """
4 | import pytest
5 | from torchdata.datapipes.iter import IterableWrapper
6 |
7 | from zen3geo.datapipes import PyogrioReader
8 |
9 | pyogrio = pytest.importorskip("pyogrio")
10 |
11 | # %%
12 | def test_pyogrio_reader():
13 | """
14 | Ensure that PyogrioReader works to read in a GeoPackage file and outputs a
15 | geopandas.GeoDataFrame object.
16 | """
17 | file_url: str = "https://github.com/geopandas/pyogrio/raw/v0.4.0/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg"
18 | dp = IterableWrapper(iterable=[file_url])
19 |
20 | # Using class constructors
21 | dp_pyogrio = PyogrioReader(source_datapipe=dp)
22 | # Using functional form (recommended)
23 | dp_pyogrio = dp.read_from_pyogrio()
24 |
25 | assert len(dp_pyogrio) == 1
26 | it = iter(dp_pyogrio)
27 | geodataframe = next(it)
28 |
29 | assert geodataframe.shape == (4, 12)
30 | assert any(geodataframe.isna())
31 | assert all(geodataframe.geom_type == "Point")
32 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_pystac.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for pystac datapipes.
3 | """
4 | import pytest
5 | from torchdata.datapipes.iter import IterableWrapper
6 |
7 | from zen3geo.datapipes import PySTACItemReader
8 |
9 | pystac = pytest.importorskip("pystac")
10 |
11 | # %%
12 | def test_pystac_item_reader():
13 | """
14 | Ensure that PySTACItemReader works to read in a JSON STAC item and outputs
15 | to a pystac.Item object.
16 | """
17 | item_url: str = "https://github.com/stac-utils/pystac/raw/v1.6.1/tests/data-files/item/sample-item.json"
18 | dp = IterableWrapper(iterable=[item_url])
19 |
20 | # Using class constructors
21 | dp_pystac = PySTACItemReader(source_datapipe=dp)
22 | # Using functional form (recommended)
23 | dp_pystac = dp.read_to_pystac_item()
24 |
25 | assert len(dp_pystac) == 1
26 | it = iter(dp_pystac)
27 | stac_item = next(it)
28 |
29 | assert stac_item.bbox == [-122.59750209, 37.48803556, -122.2880486, 37.613537207]
30 | assert stac_item.datetime.isoformat() == "2016-05-03T13:22:30.040000+00:00"
31 | assert stac_item.geometry["type"] == "Polygon"
32 | assert stac_item.properties == {
33 | "datetime": "2016-05-03T13:22:30.040000Z",
34 | "title": "A CS3 item",
35 | "license": "PDDL-1.0",
36 | "providers": [
37 | {
38 | "name": "CoolSat",
39 | "roles": ["producer", "licensor"],
40 | "url": "https://cool-sat.com/",
41 | }
42 | ],
43 | }
44 | assert (
45 | stac_item.assets["analytic"].extra_fields["product"]
46 | == "http://cool-sat.com/catalog/products/analytic.json"
47 | )
48 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_pystac_client.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for pystac-client datapipes.
3 | """
4 | import pytest
5 | from torchdata.datapipes.iter import IterableWrapper
6 |
7 | from zen3geo.datapipes import PySTACAPIItemLister, PySTACAPISearcher
8 |
9 | pystac_client = pytest.importorskip("pystac_client")
10 |
11 |
12 | # %%
13 | def test_pystac_client_item_search():
14 | """
15 | Ensure that PySTACAPISearcher works to query a STAC API /search/ endpoint
16 | and outputs a pystac_client.ItemSearch object.
17 | """
18 | query: dict = dict(
19 | bbox=[150.9, -34.36, 151.3, -33.46],
20 | datetime=["2000-01-01T00:00:00Z", "2020-12-31T00:00:00Z"],
21 | collections=["nidem"],
22 | )
23 | dp = IterableWrapper(iterable=[query])
24 |
25 | # Using class constructors
26 | dp_pystac_client = PySTACAPISearcher(
27 | source_datapipe=dp, catalog_url="https://explorer.sandbox.dea.ga.gov.au/stac/"
28 | )
29 | # Using functional form (recommended)
30 | dp_pystac_client = dp.search_for_pystac_item(
31 | catalog_url="https://explorer.sandbox.dea.ga.gov.au/stac/"
32 | )
33 |
34 | assert len(dp_pystac_client) == 1
35 | it = iter(dp_pystac_client)
36 | stac_item_search = next(it)
37 | assert stac_item_search.client.title == "AWS Explorer"
38 | assert stac_item_search.matched() == 2
39 |
40 | stac_items = list(stac_item_search.items())
41 | stac_item = stac_items[0]
42 |
43 | assert stac_item.bbox == [
44 | 149.965907628116,
45 | -35.199398016548116,
46 | 152.1053101683708,
47 | -32.97280658665687,
48 | ]
49 | assert stac_item.datetime.isoformat() == "2001-07-02T00:00:00+00:00"
50 | assert stac_item.geometry["type"] == "Polygon"
51 | assert stac_item.properties == {
52 | "title": "NIDEM_104_151.29_-34.22",
53 | "created": "2018-10-15T10:00:00Z",
54 | "proj:epsg": 4326,
55 | "datetime": "2001-07-02T00:00:00Z",
56 | "cubedash:region_code": None,
57 | }
58 | assert stac_item.assets["nidem"].extra_fields["eo:bands"] == [{"name": "nidem"}]
59 |
60 |
61 | def test_pystac_client_item_search_open_headers():
62 | """
63 | Ensure that PySTACAPISearcher works to query a STAC API /search/ endpoint
64 | with headers passed to pystac_client.Client.open.
65 | """
66 | query: dict = dict(
67 | bbox=[150.9, -34.36, 151.3, -33.46],
68 | datetime=["2020-01-01T00:00:00Z", "2022-12-31T00:00:00Z"],
69 | collections=["HLSS30.v2.0"],
70 | )
71 | dp = IterableWrapper(iterable=[query])
72 |
73 | # Using class constructors
74 | dp_pystac_client = PySTACAPISearcher(
75 | source_datapipe=dp,
76 | catalog_url="https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD",
77 | headers={"Authorization": "Bearer "},
78 | )
79 | # Using functional form (recommended)
80 | dp_pystac_client = dp.search_for_pystac_item(
81 | catalog_url="https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD",
82 | headers={"Authorization": "Bearer "},
83 | )
84 |
85 | assert len(dp_pystac_client) == 1
86 | it = iter(dp_pystac_client)
87 | stac_item_search = next(it)
88 | assert stac_item_search.client.title == "LPCLOUD"
89 | assert stac_item_search.client.description == "Root catalog for LPCLOUD"
90 |
91 |
92 | def test_pystac_client_item_lister():
93 | """
94 | Ensure that PySTACAPIItemLister works to yield pystac.Item instances for
95 | each item matching the given search parameters in a
96 | pystac_client.ItemSearch query.
97 | """
98 | catalog = pystac_client.Client.open(
99 | url="https://earth-search.aws.element84.com/v1/"
100 | )
101 | search = catalog.search(
102 | bbox=[134.2, 6.9, 134.8, 8.5],
103 | datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"],
104 | collections=["sentinel-2-l1c"],
105 | )
106 | dp = IterableWrapper(iterable=[search])
107 |
108 | # Using class constructors
109 | dp_pystac_item_list = PySTACAPIItemLister(source_datapipe=dp)
110 | # Using functional form (recommended)
111 | dp_pystac_item_list = dp.list_pystac_items_by_search()
112 |
113 | assert len(dp_pystac_item_list) == 14
114 | it = iter(dp_pystac_item_list)
115 | stac_item = next(it)
116 | assert stac_item.bbox == [
117 | 134.093840347073,
118 | 6.2442879900058115,
119 | 135.08840137750929,
120 | 7.237809826458827,
121 | ]
122 | assert stac_item.datetime.isoformat() == "2023-01-29T01:35:24.640000+00:00"
123 | assert stac_item.geometry["type"] == "Polygon"
124 | assert stac_item.properties == {
125 | "created": "2023-01-29T06:01:33.679Z",
126 | "platform": "sentinel-2b",
127 | "constellation": "sentinel-2",
128 | "instruments": ["msi"],
129 | "eo:cloud_cover": 92.7676417582305,
130 | "proj:epsg": 32653,
131 | "mgrs:utm_zone": 53,
132 | "mgrs:latitude_band": "N",
133 | "mgrs:grid_square": "MH",
134 | "grid:code": "MGRS-53NMH",
135 | "view:sun_azimuth": 135.719785438016,
136 | "view:sun_elevation": 55.1713941690268,
137 | "s2:degraded_msi_data_percentage": 0.2816,
138 | "s2:product_type": "S2MSI1C",
139 | "s2:processing_baseline": "05.09",
140 | "s2:product_uri": "S2B_MSIL1C_20230129T013449_N0509_R031_T53NMH_20230129T025811.SAFE",
141 | "s2:generation_time": "2023-01-29T02:58:11.000000Z",
142 | "s2:datatake_id": "GS2B_20230129T013449_030802_N05.09",
143 | "s2:datatake_type": "INS-NOBS",
144 | "s2:datastrip_id": "S2B_OPER_MSI_L1C_DS_2BPS_20230129T025811_S20230129T013450_N05.09",
145 | "s2:granule_id": "S2B_OPER_MSI_L1C_TL_2BPS_20230129T025811_A030802_T53NMH_N05.09",
146 | "s2:reflectance_conversion_factor": 1.03193080888673,
147 | "datetime": "2023-01-29T01:35:24.640000Z",
148 | "s2:sequence": "0",
149 | "earthsearch:s3_path": "s3://earthsearch-data/sentinel-2-l1c/53/N/MH/2023/1/S2B_53NMH_20230129_0_L1C",
150 | "earthsearch:payload_id": "roda-sentinel2/workflow-sentinel2-to-stac/15626e44fb54c2182e5ed5d3aec4a209",
151 | "processing:software": {"sentinel2-to-stac": "0.1.0"},
152 | "updated": "2023-01-29T06:01:33.679Z",
153 | }
154 | assert stac_item.assets["visual"].extra_fields["eo:bands"] == [
155 | {
156 | "name": "red",
157 | "common_name": "red",
158 | "description": "Red (band 4)",
159 | "center_wavelength": 0.665,
160 | "full_width_half_max": 0.038,
161 | },
162 | {
163 | "name": "green",
164 | "common_name": "green",
165 | "description": "Green (band 3)",
166 | "center_wavelength": 0.56,
167 | "full_width_half_max": 0.045,
168 | },
169 | {
170 | "name": "blue",
171 | "common_name": "blue",
172 | "description": "Blue (band 2)",
173 | "center_wavelength": 0.49,
174 | "full_width_half_max": 0.098,
175 | },
176 | ]
177 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_rioxarray.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for rioxarray datapipes.
3 | """
4 | from torchdata.datapipes.iter import IterableWrapper
5 |
6 | from zen3geo.datapipes import RioXarrayReader
7 |
8 |
9 | # %%
10 | def test_rioxarray_reader():
11 | """
12 | Ensure that RioXarrayReader works to read in a GeoTIFF file and outputs an
13 | xarray.DataArray object.
14 | """
15 | file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif"
16 | dp = IterableWrapper(iterable=[file_url])
17 |
18 | # Using class constructors
19 | dp_rioxarray = RioXarrayReader(source_datapipe=dp)
20 | # Using functional form (recommended)
21 | dp_rioxarray = dp.read_from_rioxarray()
22 |
23 | assert len(dp_rioxarray) == 1
24 | it = iter(dp_rioxarray)
25 | dataarray = next(it)
26 |
27 | assert dataarray.shape == (1, 960, 1920)
28 | assert dataarray.dims == ("band", "y", "x")
29 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_stackstac.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for stackstac datapipes.
3 | """
4 | import numpy as np
5 | import pytest
6 | import xarray as xr
7 | from torchdata.datapipes.iter import IterableWrapper
8 |
9 | from zen3geo.datapipes import StackSTACStacker
10 |
11 | pystac = pytest.importorskip("pystac")
12 | stackstac = pytest.importorskip("stackstac")
13 |
14 | # %%
15 | def test_stackstac_mosaicker():
16 | """
17 | Ensure that StackSTACMosaicker works to mosaic tiles within a 4D
18 | xarray.DataArray to a 3D xarray.DataArray.
19 | """
20 | datacube: xr.DataArray = xr.DataArray(
21 | data=np.ones(shape=(3, 1, 32, 32)), dims=["tile", "band", "y", "x"]
22 | )
23 | dataarray = stackstac.mosaic(arr=datacube, dim="tile")
24 | assert dataarray.sizes == {"band": 1, "y": 32, "x": 32}
25 | assert dataarray.sum() == 1 * 32 * 32
26 |
27 |
28 | def test_stackstac_stacker():
29 | """
30 | Ensure that StackSTACStacker works to stack multiple bands within a STAC
31 | item and outputs an xarray.DataArray object.
32 | """
33 | item_url: str = "https://github.com/stac-utils/pystac/raw/v1.6.1/tests/data-files/raster/raster-sentinel2-example.json"
34 | stac_item = pystac.Item.from_file(href=item_url)
35 | dp = IterableWrapper(iterable=[stac_item])
36 |
37 | # Using class constructors
38 | dp_stackstac = StackSTACStacker(source_datapipe=dp, assets=["B02", "B03", "B04"])
39 | # Using functional form (recommended)
40 | dp_stackstac = dp.stack_stac_items(assets=["B02", "B03", "B04"])
41 |
42 | assert len(dp_stackstac) == 1
43 | it = iter(dp_stackstac)
44 | dataarray = next(it)
45 |
46 | assert dataarray.shape == (1, 3, 10980, 10980)
47 | assert dataarray.dims == ("time", "band", "y", "x")
48 | assert dataarray.rio.bounds() == (399955.0, 4090205.0, 509755.0, 4200005.0)
49 | assert dataarray.rio.resolution() == (10.0, -10.0)
50 | assert dataarray.rio.crs == "EPSG:32633"
51 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_xbatcher.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for xbatcher datapipes.
3 | """
4 | import numpy as np
5 | import pytest
6 | import xarray as xr
7 | from torchdata.datapipes.iter import IterableWrapper
8 |
9 | from zen3geo.datapipes import XbatcherSlicer
10 |
11 | xbatcher = pytest.importorskip("xbatcher")
12 |
13 |
14 | # %%
15 | def test_xbatcher_slicer_dataarray():
16 | """
17 | Ensure that XbatcherSlicer works to slice an xarray.DataArray object and
18 | outputs a smaller xarray.DataArray chip.
19 | """
20 |
21 | dataarray: xr.DataArray = xr.DataArray(
22 | data=np.ones(shape=(3, 128, 128)), dims=["band", "y", "x"]
23 | ).chunk({"band": 1})
24 | dp = IterableWrapper(iterable=[dataarray])
25 |
26 | # Using class constructors
27 | dp_xbatcher = XbatcherSlicer(source_datapipe=dp, input_dims={"y": 64, "x": 64})
28 | # Using functional form (recommended)
29 | dp_xbatcher = dp.slice_with_xbatcher(input_dims={"y": 64, "x": 64})
30 |
31 | assert len(dp_xbatcher) == 4
32 | it = iter(dp_xbatcher)
33 | dataarray_chip = next(it)
34 |
35 | assert dataarray_chip.sizes == {"band": 3, "y": 64, "x": 64}
36 | assert dataarray_chip.sum() == 3 * 64 * 64
37 |
38 |
39 | def test_xbatcher_slicer_dataset():
40 | """
41 | Ensure that XbatcherSlicer works to slice an xarray.Dataset object and
42 | outputs a smaller xarray.Dataset chip.
43 | """
44 |
45 | dataset: xr.Dataset = xr.Dataset(
46 | data_vars={"temperature": (["x", "y"], 15 * np.ones(shape=(32, 32)))},
47 | coords={
48 | "lon": (["x"], np.linspace(start=0, stop=32, num=32)),
49 | "lat": (["y"], np.linspace(start=64, stop=32, num=32)),
50 | },
51 | )
52 | dp = IterableWrapper(iterable=[dataset])
53 |
54 | # Using class constructors
55 | dp_xbatcher = XbatcherSlicer(source_datapipe=dp, input_dims={"y": 16, "x": 16})
56 | # Using functional form (recommended)
57 | dp_xbatcher = dp.slice_with_xbatcher(input_dims={"y": 16, "x": 16})
58 |
59 | assert len(dp_xbatcher) == 4
60 | it = iter(dp_xbatcher)
61 | dataset_chip = next(it)
62 |
63 | assert dataset_chip.temperature.sizes == {"y": 16, "x": 16}
64 | assert dataset_chip.temperature.sum() == 15 * 16 * 16
65 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_datapipes_xpystac.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for pystac datapipes.
3 | """
4 | import pytest
5 | from torchdata.datapipes.iter import IterableWrapper
6 |
7 | from zen3geo.datapipes import XpySTACAssetReader
8 |
9 |
10 | # %%
11 | def test_xpystac_asset_reader_cog():
12 | """
13 | Ensure that XpySTACAssetReader works to read in a pystac.Asset object
14 | stored as a Cloud-Optimized GeoTIFF and output to an xarray.Dataset object.
15 | """
16 | pystac = pytest.importorskip("pystac")
17 | xpystac = pytest.importorskip("xpystac")
18 |
19 | item_url: str = "https://github.com/stac-utils/pystac/raw/v1.7.1/tests/data-files/raster/raster-sentinel2-example.json"
20 | asset: pystac.Asset = pystac.Item.from_file(href=item_url).assets["overview"]
21 | assert asset.media_type == pystac.MediaType.COG
22 |
23 | dp = IterableWrapper(iterable=[asset])
24 |
25 | # Using class constructors
26 | dp_xpystac = XpySTACAssetReader(source_datapipe=dp)
27 | # Using functional form (recommended)
28 | dp_xpystac = dp.read_from_xpystac()
29 |
30 | assert len(dp_xpystac) == 1
31 | it = iter(dp_xpystac)
32 | dataset = next(it)
33 |
34 | assert dataset.sizes == {"band": 3, "x": 343, "y": 343}
35 | assert dataset.band_data.dtype == "float32"
36 | assert dataset.rio.bounds() == (399960.0, 4090240.0, 509720.0, 4200000.0)
37 | assert dataset.rio.resolution() == (320.0, -320.0)
38 | assert dataset.rio.crs == "EPSG:32633"
39 |
40 |
41 | def test_xpystac_asset_reader_zarr():
42 | """
43 | Ensure that XpySTACAssetReader works to read in a pystac.Asset object
44 | stored as a Zarr file and output to an xarray.Dataset object.
45 | """
46 | pystac = pytest.importorskip("pystac")
47 | xpystac = pytest.importorskip("xpystac")
48 |
49 | collection_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-daily-hi"
50 | asset: pystac.Asset = pystac.Collection.from_file(href=collection_url).assets[
51 | "zarr-https"
52 | ]
53 | assert asset.media_type == "application/vnd+zarr"
54 |
55 | dp = IterableWrapper(iterable=[asset])
56 |
57 | # Using class constructors
58 | dp_xpystac = XpySTACAssetReader(source_datapipe=dp)
59 | # Using functional form (recommended)
60 | dp_xpystac = dp.read_from_xpystac()
61 |
62 | assert len(dp_xpystac) == 1
63 | it = iter(dp_xpystac)
64 | dataset = next(it)
65 |
66 | assert dataset.sizes == {"time": 14965, "y": 584, "x": 284, "nv": 2}
67 | assert dataset.prcp.dtype == "float32"
68 | assert dataset.rio.bounds() == (-5802750.0, -622500.0, -5518750.0, -38500.0)
69 | assert dataset.rio.resolution() == (1000.0, -1000.0)
70 | assert dataset.rio.grid_mapping == "lambert_conformal_conic"
71 |
72 |
73 | def test_xpystac_asset_reader_geotiff_without_xpystac():
74 | """
75 | Ensure that XpySTACAssetReader works to read in a GeoTIFF file and output
76 | to an xarray.Dataset object, even when xpystac is not installed.
77 |
78 | Note that `engine="rasterio"` has been removed in xarray v2023.04.0, see
79 | https://github.com/pydata/xarray/pull/7671. So, this test will need to be
80 | updated once we change to require an xarray verson greater than 2023.04.0.
81 | Only included this test to check an alternative to `engine="stac"` that
82 | did not require installing extra required dependencies like `netcdf4` or
83 | `h5netcdf`.
84 | """
85 | tif_url: str = "https://github.com/corteva/rioxarray/raw/0.14.1/test/test_data/input/cint16.tif"
86 |
87 | dp = IterableWrapper(iterable=[tif_url])
88 |
89 | # Using class constructors
90 | dp_xpystac = XpySTACAssetReader(source_datapipe=dp, engine="rasterio")
91 | # Using functional form (recommended)
92 | dp_xpystac = dp.read_from_xpystac(engine="rasterio")
93 |
94 | assert len(dp_xpystac) == 1
95 | it = iter(dp_xpystac)
96 | dataset = next(it)
97 |
98 | assert dataset.sizes == {"band": 1, "x": 100, "y": 100}
99 | assert dataset.band_data.dtype == "complex64"
100 | assert dataset.rio.bounds() == (0.0, 100.0, 100.0, 0.0)
101 | assert dataset.rio.resolution() == (1.0, 1.0)
102 | assert dataset.rio.crs == "EPSG:4326"
103 |
--------------------------------------------------------------------------------
/zen3geo/tests/test_zen3geo.py:
--------------------------------------------------------------------------------
1 | from packaging.version import Version
2 |
3 | from zen3geo import __version__
4 |
5 |
6 | def test_version():
7 | assert Version(version=__version__) >= Version(version="0.0.0")
8 |
--------------------------------------------------------------------------------