├── .github └── workflows │ ├── deploy_mkdocs.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── docs ├── mkdocs.yml └── src │ ├── chipper.md │ ├── index.md │ ├── indexer.md │ ├── naip-tutorial.md │ └── processors.md ├── pyproject.toml ├── stacchip ├── __init__.py ├── chipper.py ├── indexer.py ├── processors │ ├── landsat_processor.py │ ├── linz_processor.py │ ├── modis_processor.py │ ├── naip_processor.py │ ├── prechip.py │ ├── sentinel_1_processor.py │ ├── sentinel_2_processor.py │ └── stats.py └── utils.py └── tests ├── data ├── landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json ├── naip_m_4207009_ne_19_060_20211024.json ├── sentinel-2-l2a-S2A_T20HNJ_20240311T140636_L2A.json └── stacchip_test_item.json ├── test_chipper.py └── test_indexer.py /.github/workflows/deploy_mkdocs.yml: -------------------------------------------------------------------------------- 1 | name: Publish docs via GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | name: Deploy docs 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout main 14 | uses: actions/checkout@v2 15 | 16 | - name: Set up Python 3.11 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: 3.11.9 20 | 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | python -m pip install .[docs] 25 | 26 | - name: update API docs 27 | run: | 28 | pdocs as_markdown \ 29 | --output_dir docs/src/api \ 30 | --exclude_source \ 31 | --overwrite \ 32 | stacchip.chipper \ 33 | stacchip.indexer 34 | 35 | - name: Deploy docs 36 | run: mkdocs gh-deploy --force -f docs/mkdocs.yml 37 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | 7 | tests: 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | python-version: ["3.11"] 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | python -m pip install .[dev] 23 | - name: Lint with ruff 24 | run: | 25 | ruff check . 26 | - name: Test with pytest 27 | run: | 28 | pytest 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .vscode 163 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/abravalheri/validate-pyproject 3 | rev: v0.12.1 4 | hooks: 5 | - id: validate-pyproject 6 | 7 | - repo: https://github.com/psf/black 8 | rev: 22.12.0 9 | hooks: 10 | - id: black 11 | language_version: python 12 | 13 | - repo: https://github.com/PyCQA/isort 14 | rev: 5.12.0 15 | hooks: 16 | - id: isort 17 | language_version: python 18 | 19 | - repo: https://github.com/charliermarsh/ruff-pre-commit 20 | rev: v0.0.238 21 | hooks: 22 | - id: ruff 23 | args: ["--fix"] 24 | 25 | - repo: https://github.com/pre-commit/mirrors-mypy 26 | rev: v1.3.0 27 | hooks: 28 | - id: mypy 29 | language_version: python 30 | exclude: tests/.* 31 | additional_dependencies: 32 | - types-python-dateutil 33 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Stacchip change log 2 | 3 | ## 0.1.34 4 | 5 | - Add option to manually specify indexer shape. Some STAC items 6 | may not have the property `proj:shape` specified. 7 | 8 | ## 0.1.33 9 | 10 | - Breaking change: `get_chip_bbox` returns shapely polygon instead of wkt 11 | 12 | ## 0.1.32 13 | 14 | - Breacking change: chip iterator returns chip index values, not only image data. 15 | 16 | ## 0.1.31 17 | 18 | - Breaking change: simplify chipper class. Indexer has to be instantiated by the user. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stacchip 2 | 3 | Dynamically create image chips for earth observation machine learning 4 | applications using a custom chip index based on STAC items. 5 | 6 | Get a STAC item, index its contents, and create chips dynamically 7 | like so 8 | 9 | ```python 10 | # Get item from an existing STAC catalog 11 | item = stac.search(...) 12 | 13 | # Index all chips that could be derived from the STAC item 14 | index = Indexer(item).create_index() 15 | 16 | # Use the index to get RGB array for a specific chip 17 | chip = Chipper(index).chip(x=23, y=42) 18 | ``` 19 | 20 | ## Installation 21 | 22 | Stacchip is available on pypi 23 | 24 | ```bash 25 | pip install stacchip 26 | ``` 27 | 28 | ## Motivation 29 | 30 | Remote sensing imagery is typically distributed in large files (scenes) 31 | that typically have the order of 10 thousand of pixels in both the x and y 32 | directions. This is true for systems like Landsat, Sentinel 1 and 2, and 33 | aerial imagery such as NAIP. 34 | 35 | Machine learning models operate on much smaller image sizes. Many use 36 | 256x256 pixels, and the largest inputs are in the range of 1000 pixels. 37 | 38 | This poses a challenge to modelers, as they have to cut the larger scenes 39 | into pieces before passing them to their models. The smaller image snippets 40 | are typically referred to as "chips". A term we will use throughout this 41 | documentation. 42 | 43 | Creating imagery chips tends to be a tedious and slow process, and it is 44 | specific for each model. Models will have different requirements on image 45 | sizes, datatypes, and the spectral bands to include. A set of chips that 46 | works for one model might be useless for the next. 47 | 48 | Systemizing how chips are tracked, and making the chip creation more dynamic 49 | is a way to work around these difficulties. This is the goal fo stacchip. It 50 | presents an approach that leverages cloud optimized technology to make chipping 51 | simpler, faster, and less static. 52 | 53 | ## Overview 54 | 55 | Stacchip relies on three cloud oriented technologies. Cloud Optimized Geotiffs 56 | (COG), Spatio Temporal Asset Catalogs (STAC), and GeoParquet. Instead of pre-creating millions of files of a fixed size, chips are indexed first in tables, and then created dynamically from the index files when needed. The imagery data itsel is kept in its original format and referenced in STAC items. 57 | 58 | Creating chips with stacchip is composed of two steps: 59 | 60 | 1. Create a stacchip index from a set of STAC 61 | 2. Dynamically create pixel arrays for any chip in the stacchip index 62 | 63 | Indexes can be created separately for different imagery sources, and combined 64 | into larger indexes when needed. This makes mixing different imagery sources 65 | simple, and allows for flexibility during the modeling process, as imagery sources 66 | can be added and removed by only updating the combined index. 67 | 68 | The mechanism is purposefully kept as generic as possible. The index creation 69 | is done based on a STAC item alone, no other input is needed. Obtaining image 70 | data for a chip that is registered in a stacchip index only requires a few 71 | lines of code. 72 | 73 | For more information, please consult the [documentation](https://clay-foundation.github.io/stacchip/) 74 | 75 | 76 | ## Build and release 77 | 78 | The following steps to release the latest version 79 | 80 | ```bash 81 | tag=0.1.34 82 | hatch version $tag 83 | git commit -am "Bump version number" 84 | git push 85 | git tag $tag 86 | git push origin $tag 87 | rm -rf dist 88 | python -m build 89 | python3 -m twine upload --repository testpypi dist/* 90 | python3 -m twine upload --repository pypi dist/* 91 | ``` 92 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Stacchip 2 | site_description: Dynamically create image chips for earth observation machine learning applications using a custom chip index based on STAC items. 3 | 4 | docs_dir: 'src' 5 | site_dir: 'build' 6 | 7 | repo_name: "clay-foundation/stacchip" 8 | repo_url: "https://github.com/clay-foundation/stacchip" 9 | edit_uri: "blob/main/docs/src/" 10 | site_url: "https://clay-foundation.github.io/stacchip/" 11 | 12 | nav: 13 | - Stacchip: "index.md" 14 | - Indexer: "indexer.md" 15 | - Chipper: "chipper.md" 16 | - Processors: "processors.md" 17 | - Tutorial: "naip-tutorial.md" 18 | - API: 19 | - "api/stacchip/chipper.md" 20 | - "api/stacchip/indexer.md" 21 | 22 | plugins: 23 | - search 24 | - mkdocs-jupyter: 25 | include_source: True 26 | 27 | theme: 28 | name: material 29 | palette: 30 | primary: indigo 31 | scheme: default 32 | 33 | markdown_extensions: 34 | - admonition 35 | - attr_list 36 | - codehilite: 37 | guess_lang: false 38 | - def_list 39 | - footnotes 40 | - pymdownx.arithmatex 41 | - pymdownx.betterem 42 | - pymdownx.caret: 43 | insert: false 44 | - pymdownx.details 45 | - pymdownx.emoji 46 | - pymdownx.escapeall: 47 | hardbreak: true 48 | nbsp: true 49 | - pymdownx.magiclink: 50 | hide_protocol: true 51 | repo_url_shortener: true 52 | - pymdownx.smartsymbols 53 | - pymdownx.superfences 54 | - pymdownx.tasklist: 55 | custom_checkbox: true 56 | - pymdownx.tilde 57 | - toc: 58 | permalink: true 59 | -------------------------------------------------------------------------------- /docs/src/chipper.md: -------------------------------------------------------------------------------- 1 | The [Chipper](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/chipper.py) class can be used to create chips based on 2 | an existing stacchip index. 3 | 4 | The chipper class takes as input an Indexer class object. The indexer class can be instantiated using 5 | the `load_indexer_s3` and `load_indexer_local` utils functions for indexes that have been 6 | previously created using stacchip processors. 7 | 8 | For local stacchip indexes, the mountpath can be passed. Asset links in the STAC items are then patched 9 | with the local mountpath. 10 | 11 | The chipper also has an `asset_blacklist` argument that allows skipping assets 12 | from the chip retrieval process. This can be used to exclude unnecessary assets 13 | and through that increase loading speed. 14 | 15 | The following code snippet gives an example using a local path. 16 | 17 | ```python 18 | import geoarrow.pyarrow.dataset as gads 19 | 20 | from stacchip.chipper import Chipper 21 | from stacchip.utils import load_indexer_s3 22 | 23 | # Load a stacchip index table 24 | dataset = gads.dataset("/path/to/parquet/index", format="parquet") 25 | table = dataset.to_table() 26 | 27 | # Use util to load indexer using data from a 28 | # remote S3 bucket. 29 | indexer = load_indexer_s3( 30 | bucket="clay-v1-data", 31 | platform=table.column("platform")[row], 32 | item_id = table.column("item")[row], 33 | ) 34 | 35 | # Instantiate chipper 36 | chipper = Chipper(indexer) 37 | 38 | # Get data for a single chip as registered 39 | # in row 42 of the index. 40 | row = 42 41 | chip_index_x = table.column("chip_index_x")[row].as_py() 42 | chip_index_y = table.column("chip_index_y")[row].as_py() 43 | data = chipper.chip(chip_index_x, chip_index_y) 44 | ``` -------------------------------------------------------------------------------- /docs/src/index.md: -------------------------------------------------------------------------------- 1 | # stacchip 2 | 3 | Dynamically create image chips for eath observation machine learning 4 | applications using a custom chip index based on STAC items. 5 | 6 | Get a STAC item, index its contents, and create chips dynamically 7 | like so 8 | 9 | ```python 10 | # Get item from an existing STAC catalog 11 | item = stac.search(...) 12 | 13 | # Index all chips that could be derived from the STAC item 14 | index = Indexer(item).create_index() 15 | 16 | # Use the index to get RGB array for a specific chip 17 | chip = Chipper(index).chip(x=23, y=42) 18 | ``` 19 | 20 | ## Installation 21 | 22 | Stacchip is available on pypi 23 | 24 | ```bash 25 | pip install stacchip 26 | ``` 27 | 28 | ## Overview 29 | 30 | Stacchip relies on three cloud oriented technologies. Cloud Optimized Geotiffs 31 | (COG), Spatio Temporal Asset Catalogs (STAC), and GeoParquet. Instead of pre-creating millions of files of a fixed size, chips are indexed first in tables, and then created dynamically from the index files when needed. The imagery data itsel is kept in its original format and referenced in STAC items. 32 | 33 | Creating chips with stacchip is composed of two steps: 34 | 35 | 1. Create a stacchip index from a set of STAC 36 | 2. Dynamically create pixel arrays for any chip in the stacchip index 37 | 38 | Indexes can be created separately for different imagery sources, and combined 39 | into larger indexes when needed. This makes mixing different imagery sources 40 | simple, and allows for flexibility during the modeling process, as imagery sources 41 | can be added and removed by only updating the combined index. 42 | 43 | The mechanism is purposefully kept as generic as possible. The index creation 44 | is done based on a STAC item alone, no other input is needed. Obtaining image 45 | data for a chip that is registered in a stacchip index only requires a few 46 | lines of code. 47 | 48 | ## Motivation 49 | 50 | Remote sensing imagery is typically distributed in large files (scenes) 51 | that typically have the order of 10 thousand of pixels in both the x and y 52 | directions. This is true for systems like Landsat, Sentinel 1 and 2, and 53 | aerial imagery such as NAIP. 54 | 55 | Machine learning models operate on much smaller image sizes. Many use 56 | 256x256 pixels, and the largest inputs are in the range of 1000 pixels. 57 | 58 | This poses a challenge to modelers, as they have to cut the larger scenes 59 | into pieces before passing them to their models. The smaller image snippets 60 | are typically referred to as "chips". A term we will use throughout this 61 | documentation. 62 | 63 | Creating imagery chips tends to be a tedious and slow process, and it is 64 | specific for each model. Models will have different requirements on image 65 | sizes, datatypes, and the spectral bands to include. A set of chips that 66 | works for one model might be useless for the next. 67 | 68 | Systemizing how chips are tracked, and making the chip creation more dynamic 69 | is a way to work around these difficulties. This is the goal fo stacchip. It 70 | presents an approach that leverages cloud optimized technology to make chipping 71 | simpler, faster, and less static. 72 | 73 | ## License 74 | 75 | This repository is released under an Apache 2.0 license. For more details see 76 | [LICENSE](https://github.com/clay-foundation/stacchip/blob/main/LICENSE) 77 | -------------------------------------------------------------------------------- /docs/src/indexer.md: -------------------------------------------------------------------------------- 1 | The [Indexer](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/indexer.py) class is build to create a chip index for 2 | data registered in a a STAC item. The indexer will calculate the number of available 3 | chips in a STAC item given a chip size. The resulting chip index is stored as a geoparquet table. 4 | 5 | The following example creates an index the Landsat-9 STAC item from the tests 6 | 7 | ```python 8 | from pystac import Item 9 | from stacchip.indexer import LandsatIndexer 10 | 11 | item = Item.from_file( 12 | "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json" 13 | ) 14 | indexer = LandsatIndexer(item) 15 | index = indexer.create_index() 16 | ``` 17 | 18 | ## Nodata and cloud coverage 19 | 20 | Earth observation data often comes in scenes that contain 21 | nodata pixels, and the imagery might contain clouds. Statistics on nodata and cloud cover is relevant information for model training. Typically a model is trained with limited amounts nodata and cloud pixels. 22 | 23 | The indexer therefore needs to be track these two variables so that the modeler can choose how much or how little nodata pixels and cloudy pixels should be passed to the model. However, how this information is stored varies for different image sources. 24 | 25 | The indexer class might need adaption for new data sources. In these cases, 26 | the base class has to be subclassed and the `get_stats` method overridden to produce the right statistics. 27 | 28 | The stacchip library has a generic indexer for sources that have neither nodata or cloudy pixels in them. It has one indexer that takes a nodata mask as input, but assumes that there are no cloudy pixels (useful for sentinel-1). It also contains specific indexers for Landsat and Sentinel-2. For more information consult the reference documentation. 29 | 30 | ## Merging indexes 31 | 32 | Stacchip indexes are geoparquet tables, and as such they can be merged quite 33 | easily in to a single table. The recommendation is to store each stacchip index 34 | for a single STAC item in a subfolder, then the files can be merged and the 35 | STAC item can be tracked using the folder structure using partitioning feature 36 | from pyarrow. 37 | 38 | The following example assumes that each index file from a single STAC item is 39 | in a subfolder that is named after the STAC item id. 40 | 41 | ```python 42 | from pyarrow import dataset as ds 43 | 44 | part = ds.partitioning(field_names=["item_id"]) 45 | data = ds.dataset( 46 | "/path/to/stacchip/indices", 47 | format="parquet", 48 | partitioning=part, 49 | ) 50 | ds.write_dataset( 51 | data, 52 | "/path/to/combined-index", 53 | format="parquet", 54 | ) 55 | ``` -------------------------------------------------------------------------------- /docs/src/naip-tutorial.md: -------------------------------------------------------------------------------- 1 | The following code example shows how to obtain RGB+NIR chips from 2 | NAIP imagery and plot them. 3 | 4 | ```python 5 | import random 6 | 7 | import pystac_client 8 | from stacchip.indexer import NoStatsChipIndexer 9 | from stacchip.chipper import Chipper 10 | import os 11 | import matplotlib.pyplot as plt 12 | 13 | # Optimize GDAL settings for cloud optimized reading 14 | os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "EMPTY_DIR" 15 | os.environ["AWS_REQUEST_PAYER"] = "requester" 16 | 17 | # Query STAC catalog for NAIP data 18 | catalog = pystac_client.Client.open("https://earth-search.aws.element84.com/v1") 19 | 20 | 21 | items = catalog.search( 22 | collections=["naip"], 23 | max_items=100, 24 | ) 25 | 26 | items = items.item_collection() 27 | 28 | items_list = list(items) 29 | random.shuffle(items_list) 30 | 31 | chips = [] 32 | for item in items_list[:10]: 33 | print(f"Working on {item}") 34 | 35 | # Index the chips in the item 36 | indexer = NoStatsChipIndexer(item) 37 | 38 | # Instanciate the chipper 39 | chipper = Chipper(indexer, assets=["image"]) 40 | 41 | # Get first chip for the "image" asset key 42 | for chip_id in random.sample(range(0, len(chipper)), 5): 43 | x_index, y_index, chip = chipper[chip_id] 44 | chips.append(chip["image"]) 45 | 46 | 47 | fig, axs = plt.subplots(5, 10, gridspec_kw={'wspace': 0.01, 'hspace': 0.01}, squeeze=True) 48 | 49 | for idx, ax in enumerate(axs.flatten()): 50 | chip = chips[idx] 51 | # Visualize the data 52 | ax.imshow(chip[:3].swapaxes(0, 1).swapaxes(1, 2)) 53 | 54 | plt.tight_layout() 55 | plt.show() 56 | ``` 57 | 58 | Resutling in the following plot 59 | 60 | ![naip-rgb](https://github.com/Clay-foundation/stacchip/assets/901647/86844530-9297-4971-b9e5-dd5c25b28b0e) 61 | -------------------------------------------------------------------------------- /docs/src/processors.md: -------------------------------------------------------------------------------- 1 | To use stacchip for an existing imagery archive, the indexes need to be 2 | created for each scene or STAC item. 3 | 4 | Stacchip comes with [processors](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/) that 5 | can be used to collect and index imagery from multiple data sources. 6 | This will be extended as the package grows. 7 | 8 | Each processor is registered as a command line utility so that it can be 9 | scaled easily. Note that these processors are created to work well with AWS Batch, but are not dependent on it and can be used otherwise too. 10 | 11 | ## Sentinel-2 12 | 13 | The [`stacchip-sentinel-2`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/sentinel_2_processor.py) 14 | processor CLi command processes Sentinel-2 data. It will process MGRS 15 | tiles from a list of tiles from a layer that can be opened by geopandas. 16 | 17 | Each MGRS tile will be processed by the row index in the source file. 18 | 19 | For each tile it will process the least cloudy image in each quartal 20 | from two random years between 2018 and 2023. 21 | 22 | The script uses environment variables to determine all inputs: 23 | 24 | 1. The index of the MGRS tile to be processes from the source file 25 | 2. The source file for the MGRS tile sample 26 | 3. A target bucket for writing the assets, stac items, and stacchip index. 27 | 28 | An example set of environment variables to run this script is: 29 | 30 | ```bash 31 | export AWS_BATCH_JOB_ARRAY_INDEX=0 32 | export STACCHIP_MGRS_SOURCE=https://clay-mgrs-samples.s3.amazonaws.com/mgrs_sample_v02.fgb 33 | export STACCHIP_BUCKET=clay-v1-data 34 | ``` 35 | 36 | ## Landsat 37 | 38 | The [`stacchip-landsat`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/landsat_processor.py) 39 | processor CLI command processes Landsat data. It will process a list 40 | of geometries from a layer that can be opened by geopandas. For each 41 | row, it will use the centroid of the geometry to search for landsat 42 | scenes. 43 | 44 | For each geometry it will process the least cloudy image in each quartal 45 | from two random years between 2018 and 2023. For one year it will collect 46 | L1 data, and for the other year L2 data. The platform is either Landsat-8 47 | or Landsat-9, depending on availability and cloud cover. 48 | 49 | The script uses environment variables to determine all inputs: 50 | 51 | 1. The index of geometry to be processes from the source file 52 | 2. The source file for the source sample file 53 | 3. A target bucket for writing the assets, stac items, and stacchip index. 54 | 55 | An example set of environment variables to run this script is: 56 | 57 | ```bash 58 | export AWS_BATCH_JOB_ARRAY_INDEX=0 59 | export STACCHIP_SAMPLE_SOURCE=https://clay-mgrs-samples.s3.amazonaws.com/mgrs_sample_v02.fgb 60 | export STACCHIP_BUCKET=clay-v1-data 61 | ``` 62 | 63 | ## NAIP 64 | 65 | The [`stacchip-naip`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/naip_processor.py) processor CLI 66 | command processes imagery from the National Imagery Program (NAIP). 67 | 68 | The sample locations were created using the [Natural Earth](https://www.naturalearthdata.com) 69 | database as a source. The sample includes all popluated places, protected 70 | areas and parks, airports, and ports. In addition, we sampled one random point 71 | along each river, and one random location within each lake that is registered 72 | in Natural Earth. Finally, we sampled 4000 random points. All data was 73 | filtered to be within the CONUS region. 74 | 75 | Similar to the other processors, the input variables are provided using env vars. 76 | 77 | An example set of environment variables to run this script is: 78 | 79 | ```bash 80 | export AWS_BATCH_JOB_ARRAY_INDEX=0 81 | export STACCHIP_SAMPLE_SOURCE=https://clay-mgrs-samples.s3.amazonaws.com/clay_v1_naip_sample_natural_earth.fgb 82 | export STACCHIP_BUCKET=clay-v1-data 83 | ``` 84 | 85 | ## LINZ 86 | 87 | The [`stacchip-linz`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/linz_processor.py) processor CLI 88 | processes data from the New Zealand high resolution open aerial imagery. 89 | 90 | As a sample, we randomly select 50% the scenes, whith a minimum of 10 91 | and a maximum of 2000 scenes for each catalog that was included. 92 | We selected the latest imagery for each of the available regions 93 | of new zealand. The list of catalogs is in the linz processor file. 94 | 95 | We also resample all the imagery to 30cm so that the data 96 | is consistent. 97 | 98 | Similar to the other processors, the input variables are provided using env vars. 99 | 100 | An example set of environment variables to run this script is: 101 | 102 | ```bash 103 | export AWS_BATCH_JOB_ARRAY_INDEX=0 104 | export STACCHIP_BUCKET=clay-v1-data 105 | ``` 106 | 107 | ## MODIS 108 | 109 | The [`stacchip-modis`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/modis_processor.py) processor CLI 110 | processes data from the MODIS archive. The modis scenes are reprojected to 111 | the web mercator projection, and stored in S3. Then the indexer will create 112 | one index table per modis scene. We use 233 modis SIN grid tiles, with 4 random 113 | dates of 4 years of data for each SIN grid tile. 114 | 115 | Similar to the other processors, the input variables are provided using env vars. 116 | 117 | An example set of environment variables to run this script is: 118 | 119 | ```bash 120 | export AWS_BATCH_JOB_ARRAY_INDEX=0 121 | export STACCHIP_BUCKET=clay-v1-data 122 | ``` 123 | 124 | ## Batch processing 125 | 126 | The following base image can be used for batch processing. Installing the package 127 | will include the command line utilities for each processor. 128 | 129 | ```dockerfile 130 | FROM python:3.11 131 | 132 | RUN pip install stacchip 133 | ``` 134 | 135 | ## Prechip 136 | 137 | In cases where chips need to be computed in advance, the 138 | [`stacchip-prechip`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/naip_processor.py) cli script 139 | is a helper to create npz files from the chips. 140 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "stacchip" 7 | authors = [ 8 | { name="Daniel Wiesmann", email="danielwiesmann@developmentseed.org" }, 9 | ] 10 | description = "Dynamically create image chips from STAC items" 11 | readme = "README.md" 12 | requires-python = ">=3.10" 13 | dependencies = [ 14 | "pystac>=1.9.0", 15 | "pystac-client>=0.7.5", 16 | "pyarrow>=14.0.1", 17 | "geoarrow-pyarrow>=0.1.2", 18 | "rasterio>=1.3.9", 19 | "numpy>=1.26.0,<2.0", 20 | "boto3>=1.29.0", 21 | "geopandas>=0.14.1", 22 | "rio-stac>=0.9.0", 23 | "planetary-computer>=1.0.0", 24 | ] 25 | dynamic = [ 26 | "version", 27 | ] 28 | 29 | [project.urls] 30 | "Homepage" = "https://github.com/Clay-foundation/stacchip" 31 | "Bug Tracker" = "https://github.com/Clay-foundation/stacchip/issues" 32 | 33 | [project.optional-dependencies] 34 | dev = [ 35 | "ruff", 36 | "pytest", 37 | "mock", 38 | "build", 39 | "types-python-dateutil", 40 | ] 41 | docs = [ 42 | "nbconvert", 43 | "mkdocs", 44 | "mkdocs-jupyter", 45 | "mkdocs-material", 46 | "pygments", 47 | "pdocs", 48 | ] 49 | 50 | 51 | [project.scripts] 52 | stacchip-sentinel-1 = "stacchip.processors.sentinel_1_processor:process" 53 | stacchip-sentinel-2 = "stacchip.processors.sentinel_2_processor:process" 54 | stacchip-landsat = "stacchip.processors.landsat_processor:process" 55 | stacchip-naip = "stacchip.processors.naip_processor:process" 56 | stacchip-linz = "stacchip.processors.linz_processor:process" 57 | stacchip-modis = "stacchip.processors.modis_processor:process" 58 | stacchip-prechip = "stacchip.processors.prechip:process" 59 | stacchip-stats = "stacchip.processors.stats:process" 60 | 61 | [tool.hatch.version] 62 | path = "stacchip/__init__.py" 63 | 64 | [tool.isort] 65 | profile = "black" 66 | 67 | [tool.ruff] 68 | select = [ 69 | "D1", # pydocstyle errors 70 | "E", # pycodestyle errors 71 | "W", # pycodestyle warnings 72 | "F", # flake8 73 | "C", # flake8-comprehensions 74 | "B", # flake8-bugbear 75 | ] 76 | ignore = [ 77 | "E501", # line too long, handled by black 78 | "B008", # do not perform function calls in argument defaults 79 | "B905", # ignore zip() without an explicit strict= parameter, only support with python >3.10 80 | "D100", 81 | "D103", 82 | "C901", 83 | ] 84 | 85 | [tool.mypy] 86 | no_implicit_optional = true 87 | strict_optional = true 88 | namespace_packages = true 89 | explicit_package_bases = true -------------------------------------------------------------------------------- /stacchip/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | stacchip 3 | """ 4 | 5 | __version__ = "0.1.38" 6 | -------------------------------------------------------------------------------- /stacchip/chipper.py: -------------------------------------------------------------------------------- 1 | import math 2 | from pathlib import Path 3 | from typing import List, Optional 4 | from urllib.parse import urlparse 5 | 6 | import rasterio 7 | from numpy.typing import ArrayLike 8 | from rasterio.enums import Resampling 9 | from rasterio.windows import Window 10 | 11 | from stacchip.indexer import ChipIndexer 12 | 13 | 14 | class Chipper: 15 | """ 16 | Chipper class for managing and processing raster data chips. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | indexer: ChipIndexer, 22 | mountpath: Optional[str] = None, 23 | assets: Optional[List[str]] = None, 24 | asset_blacklist: Optional[List[str]] = None, 25 | ) -> None: 26 | """ 27 | Initializes the Chipper class. 28 | 29 | Args: 30 | indexer (Type[ChipIndexer]): Input data which has to be of type ChipIndexer. 31 | mountpath (Optional[str]): Path to the mount directory for raster indexer. 32 | Defaults to None. 33 | assets (Optional[List[str]]): List of asset names to include for processing. 34 | If not provided, all assets are processed. Defaults to None. 35 | asset_blacklist (Optional[List[str]]): List of asset names to exclude from 36 | processing. Defaults to None. 37 | 38 | """ 39 | self.mountpath = None if mountpath is None else Path(mountpath) 40 | self.assets = assets 41 | self.asset_blacklist = asset_blacklist 42 | self.indexer = indexer 43 | 44 | def __len__(self) -> int: 45 | """ 46 | Returns the number of chips available. 47 | 48 | Returns: 49 | int: Number of chips available based on the indexer size. 50 | """ 51 | return self.indexer.size 52 | 53 | def __getitem__(self, index: int) -> tuple: 54 | """ 55 | Gets the chip by a single index. 56 | 57 | Args: 58 | index (int): Index of the chip to retrieve. 59 | 60 | Returns: 61 | tuple: A tuple containing x index, y index, and the chip data. 62 | """ 63 | y_index = index // self.indexer.x_size 64 | x_index = index % self.indexer.x_size 65 | return x_index, y_index, self.chip(x_index, y_index) 66 | 67 | def __iter__(self): 68 | """ 69 | Iterates over chips. 70 | 71 | Yields: 72 | tuple: The next chip data in the sequence. 73 | """ 74 | counter = 0 75 | while counter < self.indexer.size: 76 | yield self[counter] 77 | counter += 1 78 | 79 | def get_pixels_for_asset(self, key: str, x: int, y: int) -> ArrayLike: 80 | """ 81 | Extracts chip pixel values for one asset. 82 | 83 | Args: 84 | key (str): The asset key to extract pixels from. 85 | x (int): The x index of the chip. 86 | y (int): The y index of the chip. 87 | 88 | Returns: 89 | ArrayLike: Array of pixel values for the specified asset. 90 | 91 | Raises: 92 | ValueError: If asset dimensions are not multiples of the highest resolution dimensions. 93 | """ 94 | asset = self.indexer.item.assets[key] 95 | 96 | srcpath = asset.href 97 | if self.mountpath: 98 | url = urlparse(srcpath, allow_fragments=False) 99 | srcpath = self.mountpath / Path(url.path.lstrip("/")) 100 | 101 | with rasterio.open(srcpath) as src: 102 | # Currently assume that different assets may be at different 103 | # resolutions, but are aligned and the gsd differs by an integer 104 | # multiplier. 105 | if self.indexer.shape[0] % src.height: 106 | raise ValueError( 107 | f"Asset height {src.height} is not a multiple of highest resolution height {self.indexer.shape[0]}" # noqa: E501 108 | ) 109 | 110 | if self.indexer.shape[1] % src.width: 111 | raise ValueError( 112 | f"Asset width {src.width} is not a multiple of highest resolution width {self.indexer.shape[1]}" # noqa: E501 113 | ) 114 | 115 | factor = self.indexer.shape[0] / src.height 116 | 117 | chip_window = Window( 118 | math.floor(x * self.indexer.chip_size / factor), 119 | math.floor(y * self.indexer.chip_size / factor), 120 | math.ceil(self.indexer.chip_size / factor), 121 | math.ceil(self.indexer.chip_size / factor), 122 | ) 123 | 124 | return src.read( 125 | window=chip_window, 126 | out_shape=(src.count, self.indexer.chip_size, self.indexer.chip_size), 127 | resampling=Resampling.nearest, 128 | ) 129 | 130 | def chip(self, x: int, y: int) -> dict: 131 | """ 132 | Retrieves chip pixel array for the specified x and y index numbers. 133 | 134 | Args: 135 | x (int): The x index of the chip. 136 | y (int): The y index of the chip. 137 | 138 | Returns: 139 | dict: A dictionary where keys are asset names and values are arrays of pixel values. 140 | """ 141 | if self.assets is not None: 142 | keys = self.assets 143 | else: 144 | keys = list(self.indexer.item.assets.keys()) 145 | 146 | if self.asset_blacklist is not None: 147 | keys = [key for key in keys if key not in self.asset_blacklist] 148 | 149 | return {key: self.get_pixels_for_asset(key, x, y) for key in keys} 150 | -------------------------------------------------------------------------------- /stacchip/indexer.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from functools import cached_property 3 | from math import floor 4 | from typing import Tuple 5 | 6 | import geoarrow.pyarrow as ga 7 | import numpy as np 8 | import pyarrow as pa 9 | import pyarrow.compute as pc 10 | import pyproj 11 | import rasterio 12 | from numpy.typing import ArrayLike 13 | from pystac import Item 14 | from rasterio.crs import CRS 15 | from rasterio.enums import Resampling 16 | from shapely import GeometryType, Polygon 17 | from shapely.geometry import box 18 | from shapely.ops import transform 19 | 20 | warnings.filterwarnings( 21 | "ignore", 22 | message=( 23 | "The argument 'infer_datetime_format' is deprecated and will" 24 | " be removed in a future version. A strict version of it is now " 25 | "the default, see https://pandas.pydata.org/pdeps/0004-consistent" 26 | "-to-datetime-parsing.html. You can safely remove this argument." 27 | ), 28 | ) 29 | 30 | 31 | class ChipIndexer: 32 | """ 33 | Indexer base class 34 | """ 35 | 36 | def __init__( 37 | self, 38 | item: Item, 39 | chip_size: int = 256, 40 | chip_max_nodata: float = 0.5, 41 | shape=None, 42 | ) -> None: 43 | """ 44 | Init ChipIndexer 45 | """ 46 | self.item = item 47 | self.chip_size = chip_size 48 | self.chip_max_nodata = chip_max_nodata 49 | self._shape = shape 50 | 51 | assert self.item.ext.has("proj") 52 | 53 | self.assert_units_metre() 54 | self.setup_projector() 55 | 56 | def assert_units_metre(self) -> None: 57 | """ 58 | Ensure input data has meters as units 59 | """ 60 | assert self.crs.linear_units.lower() in ["metre", "meter"] 61 | 62 | @property 63 | def crs(self) -> CRS: 64 | """ 65 | Get coordinate reference system for the assets in this index 66 | """ 67 | if self.item.properties.get("proj:epsg", None): 68 | return CRS.from_epsg(self.item.properties["proj:epsg"]) 69 | elif "proj:wkt2" in self.item.properties: 70 | return CRS.from_string(self.item.properties["proj:wkt2"]) 71 | else: 72 | raise ValueError("Could not identify CRS of source files") 73 | 74 | def setup_projector(self): 75 | """ 76 | Prepare projection function to project geometries into WGS84 77 | """ 78 | wgs84 = pyproj.CRS("EPSG:4326") 79 | self._projector = pyproj.Transformer.from_crs( 80 | self.crs, wgs84, always_xy=True 81 | ).transform 82 | 83 | def reproject(self, geom) -> GeometryType: 84 | """ 85 | Reproject a geometry into WGS84 86 | """ 87 | return transform(self._projector, geom) 88 | 89 | def _get_trsf_or_shape(self, key: str) -> list: 90 | """ 91 | The shape of the hightest resolution band 92 | """ 93 | data = [] 94 | if key in self.item.properties: 95 | data = self.item.properties[key] 96 | else: 97 | for asset in self.item.assets.values(): 98 | if key not in asset.extra_fields: 99 | continue 100 | if not data or data[0] < asset.extra_fields[key][0]: 101 | data = asset.extra_fields[key] 102 | if not data: 103 | raise ValueError("Could not determine {key} for this STAC item") 104 | 105 | return data 106 | 107 | @cached_property 108 | def shape(self) -> list: 109 | """ 110 | Shape of the STAC item data 111 | 112 | Obtains the shape of the highest resolution band from 113 | all the available bands. 114 | """ 115 | if self._shape is not None: 116 | return self._shape 117 | else: 118 | return self._get_trsf_or_shape("proj:shape") 119 | 120 | @cached_property 121 | def transform(self) -> list: 122 | """ 123 | The transform property from the STAC item 124 | """ 125 | return self._get_trsf_or_shape("proj:transform") 126 | 127 | @property 128 | def x_size(self) -> int: 129 | """ 130 | Number of tiles vailable in x direction 131 | """ 132 | return floor(self.shape[1] / self.chip_size) 133 | 134 | @property 135 | def y_size(self) -> int: 136 | """ 137 | Number of tiles vailable in y direction 138 | """ 139 | return floor(self.shape[0] / self.chip_size) 140 | 141 | @property 142 | def size(self) -> int: 143 | """ 144 | Number of tiles in this STAC item 145 | """ 146 | return self.x_size * self.y_size 147 | 148 | @property 149 | def bbox(self) -> Tuple[float, float, float, float]: 150 | """ 151 | Bounding box that covers all tiles 152 | 153 | This is different from the bounding box of the STAC item 154 | if the tiles don't fit into the number of pixels perfectly. 155 | """ 156 | return ( 157 | self.transform[2], 158 | self.transform[5] + self.transform[4] * self.shape[0], 159 | self.transform[2] + self.transform[0] * self.shape[1], 160 | self.transform[5], 161 | ) 162 | 163 | def get_stats(self, x: int, y: int) -> Tuple[float, float]: 164 | """ 165 | A function to write for each indexer that returns nodata and 166 | cloud statistics for a chip 167 | """ 168 | raise NotImplementedError() 169 | 170 | def get_chip_bbox(self, x: int, y: int) -> Polygon: 171 | """ 172 | Bounding box for a chip 173 | """ 174 | chip_box = box( 175 | self.bbox[0] + x * self.transform[0] * self.chip_size, 176 | self.bbox[3] + y * self.transform[4] * self.chip_size, 177 | self.bbox[0] + (x + 1) * self.transform[0] * self.chip_size, 178 | self.bbox[3] + (y + 1) * self.transform[4] * self.chip_size, 179 | ) 180 | 181 | return self.reproject(chip_box) 182 | 183 | def create_index(self) -> pa.Table: 184 | """ 185 | The index for this STAC item 186 | """ 187 | index = { 188 | "chipid": np.empty(self.size, dtype=" Tuple[float, float]: 229 | """ 230 | Cloud and nodata percentage for a chip 231 | """ 232 | return 0.0, 0.0 233 | 234 | 235 | class NoDataMaskChipIndexer(ChipIndexer): 236 | """ 237 | Chip indexer that takes the nodata mask as input and assumes that 238 | there are no clouds in the image 239 | """ 240 | 241 | def __init__( 242 | self, 243 | item: Item, 244 | nodata_mask: ArrayLike, 245 | chip_size: int = 256, 246 | chip_max_nodata: float = 0.5, 247 | ) -> None: 248 | """ 249 | Init NoDataMaskChipIndexer 250 | """ 251 | super().__init__(item, chip_size, chip_max_nodata) 252 | self.nodata_mask = nodata_mask 253 | 254 | def get_stats(self, x: int, y: int) -> Tuple[float, float]: 255 | """ 256 | Cloud and nodata percentage for a chip 257 | 258 | Assumes there are no cloudy pixels and computes nodata from mask 259 | """ 260 | nodata_percentage = np.sum( 261 | self.nodata_mask[ 262 | y * self.chip_size : (y + 1) * self.chip_size, 263 | x * self.chip_size : (x + 1) * self.chip_size, 264 | ] 265 | ) / (self.chip_size**2) 266 | 267 | return 0.0, nodata_percentage 268 | 269 | 270 | class LandsatIndexer(ChipIndexer): 271 | """ 272 | Chip indexer for Landsat 8 and 9 STAC items 273 | """ 274 | 275 | @cached_property 276 | def qa(self): 277 | """ 278 | The quality band data for the STAC item 279 | """ 280 | print("Loading qa band") 281 | self.item.assets["qa_pixel"].href = self.item.assets["qa_pixel"].extra_fields[ 282 | "alternate" 283 | ]["s3"]["href"] 284 | with rasterio.open(self.item.assets["qa_pixel"].href) as src: 285 | return src.read(1) 286 | 287 | def get_stats(self, x: int, y: int) -> Tuple[float, float]: 288 | """ 289 | Cloud and nodata percentage for a chip 290 | 291 | Uses the qa band to compute these values. 292 | """ 293 | qa = self.qa[ 294 | y * self.chip_size : (y + 1) * self.chip_size, 295 | x * self.chip_size : (x + 1) * self.chip_size, 296 | ] 297 | 298 | # Bit 1 is dilated cloud, 3 is cloud, 4 is cloud shadow. 299 | nodata_byte = np.array(1 << 0, dtype=qa.dtype) 300 | dilated_cloud_byte = np.array(1 << 1, dtype=qa.dtype) 301 | cloud_byte = np.array(1 << 3, dtype=qa.dtype) 302 | shadow_byte = np.array(1 << 4, dtype=qa.dtype) 303 | 304 | nodata_mask = np.bitwise_and(qa, nodata_byte) 305 | dilated_cloud = np.bitwise_and(qa, dilated_cloud_byte) 306 | cloud = np.bitwise_and(qa, cloud_byte) 307 | shadow = np.bitwise_and(qa, shadow_byte) 308 | 309 | layer_clouds = (dilated_cloud | cloud | shadow).astype(dtype="bool") 310 | 311 | cloud_percentage = np.sum(layer_clouds) / qa.size 312 | nodata_percentage = np.sum(nodata_mask) / qa.size 313 | 314 | return cloud_percentage, nodata_percentage 315 | 316 | 317 | class Sentinel2Indexer(ChipIndexer): 318 | """ 319 | Indexer for Sentinel-2 STAC items 320 | """ 321 | 322 | scl_filter = [1, 3, 8, 9, 10] 323 | nodata_value = 0 324 | 325 | @cached_property 326 | def scl(self): 327 | """ 328 | The Scene Classification (SCL) band data for the STAC item 329 | """ 330 | print("Loading scl band") 331 | with rasterio.open(self.item.assets["scl"].href) as src: 332 | return src.read(out_shape=(1, *self.shape), resampling=Resampling.nearest)[ 333 | 0 334 | ] 335 | 336 | def get_stats(self, x: int, y: int) -> Tuple[float, float]: 337 | """ 338 | Cloud and nodata percentage for a chip 339 | 340 | Uses the SCL band to compute these values. 341 | """ 342 | scl = self.scl[ 343 | y * self.chip_size : (y + 1) * self.chip_size, 344 | x * self.chip_size : (x + 1) * self.chip_size, 345 | ] 346 | 347 | cloud_percentage = int(np.isin(scl, self.scl_filter).sum()) / scl.size 348 | 349 | nodata_percentage = np.sum(scl == self.nodata_value) / scl.size 350 | 351 | return cloud_percentage, nodata_percentage 352 | 353 | 354 | class ModisIndexer(ChipIndexer): 355 | """ 356 | Indexer for MODIS STAC items 357 | """ 358 | 359 | @cached_property 360 | def quality(self): 361 | """ 362 | The Quality band data for the STAC item 363 | """ 364 | print("Loading quality band") 365 | with rasterio.open(self.item.assets["sur_refl_qc_500m"].href) as src: 366 | return src.read(out_shape=(1, *self.shape), resampling=Resampling.nearest)[ 367 | 0 368 | ] 369 | 370 | def get_stats(self, x: int, y: int) -> Tuple[float, float]: 371 | """ 372 | Cloud and nodata percentage for a chip 373 | """ 374 | qa = self.quality[ 375 | y * self.chip_size : (y + 1) * self.chip_size, 376 | x * self.chip_size : (x + 1) * self.chip_size, 377 | ] 378 | byte1 = np.array(1 << 0, dtype=qa.dtype) 379 | byte2 = np.array(1 << 1, dtype=qa.dtype) 380 | b1mask = np.bitwise_and(qa, byte1) 381 | b2mask = np.bitwise_and(qa, byte2) 382 | 383 | # Clouds are flagged as 10 in the first two bytes, nodata is flagged 384 | # as 11 in the first two bytes. Extracte from table 10 in 385 | # https://lpdaac.usgs.gov/documents/925/MOD09_User_Guide_V61.pdf 386 | cloud_mask = np.logical_and(b1mask, np.logical_not(b2mask)) 387 | nodata_mask = np.logical_and(b1mask, b2mask) 388 | 389 | nodata_percentage = np.sum(nodata_mask) / nodata_mask.size 390 | cloud_percentage = np.sum(cloud_mask) / cloud_mask.size 391 | 392 | return cloud_percentage, nodata_percentage 393 | -------------------------------------------------------------------------------- /stacchip/processors/landsat_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | from pathlib import Path 5 | from urllib.parse import urlparse 6 | 7 | import boto3 8 | import geopandas as gp 9 | import pyarrow as pa 10 | import pystac_client 11 | from geoarrow.pyarrow import io 12 | 13 | from stacchip.indexer import LandsatIndexer 14 | 15 | STAC_API = "https://landsatlook.usgs.gov/stac-server" 16 | 17 | LS_ASSETS_L1 = [ 18 | "blue", 19 | "green", 20 | "red", 21 | "nir08", 22 | "swir16", 23 | "lwir", 24 | "lwir_high", 25 | "swir22", 26 | "pan", 27 | "qa_pixel", 28 | ] 29 | LS_ASSETS_L2 = [ 30 | "blue", 31 | "green", 32 | "red", 33 | "nir08", 34 | "swir16", 35 | "swir22", 36 | "qa_pixel", 37 | ] 38 | ABSOLUTE_CLOUD_COVER_FILTER = 75 39 | PLATFORM_NAME_L2 = "landsat-c2l2-sr" 40 | PLATFORM_NAME_L1 = "landsat-c2l1" 41 | quartals = [ 42 | "{year}-01-01/{year}-03-31", 43 | "{year}-04-01/{year}-06-30", 44 | "{year}-07-01/{year}-09-30", 45 | "{year}-10-01/{year}-12-31", 46 | ] 47 | 48 | 49 | def process_landsat_tile(index: int, sample_source: str, bucket: str) -> None: 50 | # Prepare resources for the job 51 | catalog = pystac_client.Client.open(STAC_API) 52 | s3 = boto3.resource("s3") 53 | data = gp.read_file(sample_source) 54 | row = data.iloc[index] 55 | 56 | print("MGRS", row["name"]) 57 | for platform_name in [PLATFORM_NAME_L1, PLATFORM_NAME_L2]: 58 | random.seed(index) 59 | for year in random.sample(range(2018, 2024), 1): 60 | print(f"Year {year}") 61 | for quartal in quartals: 62 | print(f"Quartal {quartal.format(year=year)}") 63 | items = catalog.search( 64 | collections=[platform_name], 65 | datetime=quartal.format(year=year), 66 | max_items=1, 67 | intersects=row.geometry.centroid, 68 | sortby="properties.eo:cloud_cover", 69 | query={ 70 | "platform": {"in": ["LANDSAT_8", "LANDSAT_9"]}, 71 | }, 72 | ) 73 | item = items.item_collection()[0] 74 | 75 | if item.properties["eo:cloud_cover"] > ABSOLUTE_CLOUD_COVER_FILTER: 76 | continue 77 | 78 | print( 79 | f"Cloud cover is {item.properties['eo:cloud_cover']} ({item.properties['platform']})" 80 | ) 81 | 82 | for key in list(item.assets.keys()): 83 | if ( 84 | platform_name == PLATFORM_NAME_L1 and key not in LS_ASSETS_L1 85 | ) or (key not in LS_ASSETS_L2): 86 | del item.assets[key] 87 | else: 88 | href = item.assets[key].extra_fields["alternate"]["s3"]["href"] 89 | url = urlparse(href) 90 | copy_source = { 91 | "Bucket": url.netloc, 92 | "Key": url.path.lstrip("/"), 93 | } 94 | print(f"Copying {key} band to {copy_source}") 95 | new_key = f"{platform_name}/{item.id}/{Path(href).name}" 96 | s3.meta.client.copy( 97 | copy_source, 98 | bucket, 99 | new_key, 100 | ExtraArgs={"RequestPayer": "requester"}, 101 | ) 102 | item.assets[key].href = f"s3://{bucket}/{new_key}" 103 | 104 | # Convert Dictionary to JSON String 105 | data_string = json.dumps(item.to_dict()) 106 | 107 | # Upload JSON String to an S3 Object 108 | s3_bucket = s3.Bucket(name=bucket) 109 | s3_bucket.put_object( 110 | Key=f"{platform_name}/{item.id}/stac_item.json", 111 | Body=data_string, 112 | ) 113 | 114 | indexer = LandsatIndexer(item, chip_max_nodata=0) 115 | chip_index = indexer.create_index() 116 | 117 | writer = pa.BufferOutputStream() 118 | io.write_geoparquet_table(chip_index, writer) 119 | body = bytes(writer.getvalue()) 120 | # Centralize the index files to make combining them easier later on 121 | s3_bucket.put_object( 122 | Body=body, 123 | Key=f"index/{platform_name}/{item.id}/index_{item.id}.parquet", 124 | ) 125 | 126 | 127 | def process() -> None: 128 | 129 | if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ: 130 | raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set") 131 | if "STACCHIP_SAMPLE_SOURCE" not in os.environ: 132 | raise ValueError("STACCHIP_SAMPLE_SOURCE env var not set") 133 | if "STACCHIP_BUCKET" not in os.environ: 134 | raise ValueError("STACCHIP_BUCKET env var not set") 135 | 136 | index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) 137 | sample_source = os.environ["STACCHIP_SAMPLE_SOURCE"] 138 | bucket = os.environ["STACCHIP_BUCKET"] 139 | 140 | process_landsat_tile(index, sample_source, bucket) 141 | -------------------------------------------------------------------------------- /stacchip/processors/linz_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | import tempfile 5 | from pathlib import Path 6 | 7 | import boto3 8 | import pyarrow as pa 9 | import rasterio 10 | from dateutil import parser 11 | from geoarrow.pyarrow import io 12 | from pystac import Item 13 | from rasterio.enums import Resampling 14 | from rio_stac import create_stac_item 15 | 16 | from stacchip.indexer import NoDataMaskChipIndexer 17 | 18 | PLATFORM_NAME = "linz" 19 | 20 | TARGET_RESOLUTION = 0.3 21 | 22 | nz_prefixes = [ 23 | "auckland/auckland_2022_0.075m/", 24 | "wellington/wellington_2021_0.075m/", 25 | "wellington/wellington_2021_0.3m/", 26 | "bay-of-plenty/bay-of-plenty_2023_0.1m/", 27 | "bay-of-plenty/tauranga_2022_0.1m/", 28 | "bay-of-plenty/tauranga-winter_2022_0.1m/", 29 | "canterbury/canterbury_2023_0.3m/", 30 | "canterbury/ashburton_2023_0.1m/", 31 | "canterbury/hurunui_2023_0.075m/", 32 | "canterbury/timaru_2022-2023_0.1m/", 33 | "canterbury/selwyn_2022-2023_0.075m/", 34 | "gisborne/gisborne_2023_0.075m/", 35 | "hawkes-bay/hawkes-bay_2022_0.05m/", 36 | "hawkes-bay/napier_2017-2018_0.05m/", 37 | "hawkes-bay/wairoa_2014-2015_0.1m/", 38 | "manawatu-whanganui/manawatu-whanganui_2021-2022_0.3m/", 39 | "manawatu-whanganui/palmerston-north_2022_0.125m/", 40 | "manawatu-whanganui/rangitikei_2021_0.125m/", 41 | "manawatu-whanganui/tararua_2024_0.1m/", 42 | "manawatu-whanganui/whanganui_2022_0.075m/", 43 | "marlborough/marlborough_2023_0.075m/", 44 | "nelson/nelson_2022_0.075m/", 45 | "northland/northland_2016_0.1m/", 46 | "otago/queenstown_2021_0.1m/", 47 | "otago/otago_2018_0.1m/", 48 | "otago/dunedin_2018-2019_0.1m/", 49 | "southland/southland_2023_0.1m/", 50 | "southland/invercargill_2022_0.05m/", 51 | "taranaki/taranaki_2022_0.05m/", 52 | "taranaki/new-plymouth_2017_0.1m/", 53 | "tasman/tasman_2023_0.075m/", 54 | "waikato/hamilton_2023_0.05m/", 55 | "waikato/otorohanga_2021_0.1m/", 56 | "waikato/taupo_2023_0.075m/", 57 | "waikato/thames-coromandel_2021_0.1m/", 58 | "waikato/waikato_2021_0.1m/", 59 | "waikato/waipa_2021_0.1m/", 60 | "west-coast/buller_2020_0.2m/", 61 | "west-coast/west-coast_2016_0.1m/", 62 | ] 63 | 64 | 65 | def get_linz_tiffs(prefix) -> list: 66 | 67 | s3_resource = boto3.resource("s3") 68 | s3_bucket = s3_resource.Bucket(name="nz-imagery") 69 | 70 | files = [] 71 | s3_object_iterator = s3_bucket.objects.filter(Prefix=prefix) 72 | 73 | for s3_object in s3_object_iterator: 74 | if s3_object.key.endswith(".tiff"): 75 | files.append(s3_object.key) 76 | 77 | # Sample a percentage of all scenes 78 | sample_size = max(min(int(len(files) / 2), 2000), 10) 79 | print(f"Found {len(files)} scenes for {prefix}, keeping {sample_size}") 80 | random.seed(42) 81 | return random.sample(files, sample_size) 82 | 83 | 84 | def get_original_item(key: str) -> Item: 85 | s3_resource = boto3.resource("s3") 86 | content_object = s3_resource.Object( 87 | "nz-imagery", key.replace(".tiff", "") + ".json" 88 | ) 89 | file_content = content_object.get()["Body"].read().decode("utf-8") 90 | json_content = json.loads(file_content) 91 | return Item.from_dict(json_content) 92 | 93 | 94 | def process_linz_tile(index, bucket): 95 | 96 | tiffs = get_linz_tiffs(nz_prefixes[index]) 97 | 98 | for key in tiffs: 99 | print(f"Working on {key}") 100 | 101 | href = f"s3://nz-imagery/{key}" 102 | 103 | original_item = get_original_item(key) 104 | 105 | # For now, resample so we have a constant gsd for all images 106 | with rasterio.open(href) as dataset: 107 | 108 | gsd = abs(dataset.transform[0]) 109 | 110 | upscale_factor = gsd / TARGET_RESOLUTION 111 | 112 | data = dataset.read( 113 | out_shape=( 114 | dataset.count, 115 | int(dataset.height * upscale_factor), 116 | int(dataset.width * upscale_factor), 117 | ), 118 | resampling=Resampling.bilinear, 119 | ) 120 | 121 | # Drop alpha band if present 122 | data = data[:3] 123 | 124 | # scale image transform 125 | transform = dataset.transform * dataset.transform.scale( 126 | (dataset.width / data.shape[-1]), (dataset.height / data.shape[-2]) 127 | ) 128 | 129 | new_key = f"{PLATFORM_NAME}/{original_item.id}/{Path(href).name}" 130 | new_href = f"s3://{bucket}/{new_key}" 131 | 132 | meta = dataset.meta.copy() 133 | meta["transform"] = transform 134 | meta["width"] = data.shape[2] 135 | meta["height"] = data.shape[1] 136 | meta["compress"] = "deflate" 137 | meta["count"] = 3 138 | 139 | with tempfile.NamedTemporaryFile(mode="w") as temp_file: 140 | with rasterio.open(temp_file.name, "w", **meta) as dst: 141 | dst.write(data) 142 | 143 | s3_client = boto3.client("s3") 144 | s3_client.upload_file(temp_file.name, bucket, new_key) 145 | 146 | item = create_stac_item(new_href, with_proj=True) 147 | item.datetime = parser.parse(original_item.properties["start_datetime"]) 148 | item.id = original_item.id 149 | 150 | # Convert Dictionary to JSON String 151 | data_string = json.dumps(item.to_dict()) 152 | 153 | # Upload JSON String to an S3 Object 154 | s3 = boto3.resource("s3") 155 | s3_bucket = s3.Bucket(name=bucket) 156 | s3_bucket.put_object( 157 | Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json", 158 | Body=data_string, 159 | ) 160 | 161 | indexer = NoDataMaskChipIndexer(item, nodata_mask=data[0] == 0) 162 | index = indexer.create_index() 163 | 164 | writer = pa.BufferOutputStream() 165 | io.write_geoparquet_table(index, writer) 166 | body = bytes(writer.getvalue()) 167 | # Centralize the index files to make combining them easier later on 168 | s3_bucket.put_object( 169 | Body=body, 170 | Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet", 171 | ) 172 | 173 | 174 | def process() -> None: 175 | 176 | if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ: 177 | raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set") 178 | if "STACCHIP_BUCKET" not in os.environ: 179 | raise ValueError("STACCHIP_BUCKET env var not set") 180 | 181 | index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) 182 | bucket = os.environ["STACCHIP_BUCKET"] 183 | 184 | process_linz_tile(index, bucket) 185 | -------------------------------------------------------------------------------- /stacchip/processors/modis_processor.py: -------------------------------------------------------------------------------- 1 | import calendar 2 | import json 3 | import os 4 | import tempfile 5 | from datetime import datetime 6 | from pathlib import Path 7 | 8 | import boto3 9 | import planetary_computer as pc 10 | import pyarrow as pa 11 | import pystac_client 12 | import rasterio 13 | from geoarrow.pyarrow import io 14 | from rasterio.warp import Resampling, calculate_default_transform, reproject 15 | 16 | from stacchip.indexer import ModisIndexer 17 | 18 | STAC_API = "https://planetarycomputer.microsoft.com/api/stac/v1" 19 | COLLECTION = "modis-09A1-061" 20 | BANDS = [ 21 | "sur_refl_b01", 22 | "sur_refl_b02", 23 | "sur_refl_b03", 24 | "sur_refl_b04", 25 | "sur_refl_b05", 26 | "sur_refl_b06", 27 | "sur_refl_b07", 28 | "sur_refl_qc_500m", 29 | ] 30 | # The grid tiles were selected to not have nodata 31 | # in the SIN projection. This is to avoid effects 32 | # of the international dateline cutoff. 33 | SIN_GRID_TILES = [ 34 | (2, 12), 35 | (2, 13), 36 | (2, 14), 37 | (2, 15), 38 | (2, 16), 39 | (2, 17), 40 | (2, 18), 41 | (2, 19), 42 | (2, 20), 43 | (2, 21), 44 | (2, 22), 45 | (2, 23), 46 | (3, 9), 47 | (3, 11), 48 | (3, 12), 49 | (3, 13), 50 | (3, 14), 51 | (3, 15), 52 | (3, 17), 53 | (3, 18), 54 | (3, 19), 55 | (3, 20), 56 | (3, 21), 57 | (3, 22), 58 | (3, 23), 59 | (3, 24), 60 | (3, 25), 61 | (3, 26), 62 | (4, 8), 63 | (4, 10), 64 | (4, 11), 65 | (4, 12), 66 | (4, 13), 67 | (4, 14), 68 | (4, 17), 69 | (4, 18), 70 | (4, 19), 71 | (4, 20), 72 | (4, 21), 73 | (4, 22), 74 | (4, 23), 75 | (4, 24), 76 | (4, 25), 77 | (4, 26), 78 | (4, 27), 79 | (4, 28), 80 | (5, 7), 81 | (5, 8), 82 | (5, 9), 83 | (5, 10), 84 | (5, 11), 85 | (5, 12), 86 | (5, 15), 87 | (5, 16), 88 | (5, 17), 89 | (5, 18), 90 | (5, 19), 91 | (5, 20), 92 | (5, 21), 93 | (5, 23), 94 | (5, 24), 95 | (5, 25), 96 | (5, 26), 97 | (5, 27), 98 | (5, 28), 99 | (5, 29), 100 | (5, 30), 101 | (6, 3), 102 | (6, 7), 103 | (6, 8), 104 | (6, 9), 105 | (6, 10), 106 | (6, 11), 107 | (6, 16), 108 | (6, 17), 109 | (6, 18), 110 | (6, 19), 111 | (6, 20), 112 | (6, 21), 113 | (6, 22), 114 | (6, 23), 115 | (6, 24), 116 | (6, 25), 117 | (6, 26), 118 | (6, 27), 119 | (6, 28), 120 | (6, 29), 121 | (6, 30), 122 | (6, 31), 123 | (7, 3), 124 | (7, 7), 125 | (7, 8), 126 | (7, 9), 127 | (7, 10), 128 | (7, 11), 129 | (7, 15), 130 | (7, 16), 131 | (7, 17), 132 | (7, 18), 133 | (7, 19), 134 | (7, 20), 135 | (7, 21), 136 | (7, 22), 137 | (7, 23), 138 | (7, 24), 139 | (7, 25), 140 | (7, 26), 141 | (7, 27), 142 | (7, 28), 143 | (7, 29), 144 | (7, 30), 145 | (7, 31), 146 | (7, 32), 147 | (7, 33), 148 | (8, 1), 149 | (8, 2), 150 | (8, 8), 151 | (8, 9), 152 | (8, 10), 153 | (8, 11), 154 | (8, 12), 155 | (8, 13), 156 | (8, 16), 157 | (8, 18), 158 | (8, 19), 159 | (8, 20), 160 | (8, 21), 161 | (8, 22), 162 | (8, 23), 163 | (8, 25), 164 | (8, 26), 165 | (8, 27), 166 | (8, 28), 167 | (8, 29), 168 | (8, 30), 169 | (8, 31), 170 | (8, 32), 171 | (8, 33), 172 | (8, 34), 173 | (9, 1), 174 | (9, 2), 175 | (9, 3), 176 | (9, 4), 177 | (9, 8), 178 | (9, 9), 179 | (9, 10), 180 | (9, 11), 181 | (9, 12), 182 | (9, 13), 183 | (9, 14), 184 | (9, 16), 185 | (9, 19), 186 | (9, 21), 187 | (9, 22), 188 | (9, 23), 189 | (9, 25), 190 | (9, 27), 191 | (9, 28), 192 | (9, 29), 193 | (9, 30), 194 | (9, 31), 195 | (9, 32), 196 | (9, 33), 197 | (9, 34), 198 | (10, 2), 199 | (10, 3), 200 | (10, 4), 201 | (10, 5), 202 | (10, 10), 203 | (10, 11), 204 | (10, 12), 205 | (10, 13), 206 | (10, 14), 207 | (10, 17), 208 | (10, 19), 209 | (10, 20), 210 | (10, 21), 211 | (10, 22), 212 | (10, 23), 213 | (10, 27), 214 | (10, 28), 215 | (10, 29), 216 | (10, 30), 217 | (10, 31), 218 | (10, 32), 219 | (10, 33), 220 | (11, 3), 221 | (11, 4), 222 | (11, 5), 223 | (11, 6), 224 | (11, 8), 225 | (11, 10), 226 | (11, 11), 227 | (11, 12), 228 | (11, 13), 229 | (11, 14), 230 | (11, 15), 231 | (11, 19), 232 | (11, 20), 233 | (11, 21), 234 | (11, 22), 235 | (11, 23), 236 | (11, 27), 237 | (11, 28), 238 | (11, 29), 239 | (11, 30), 240 | (11, 31), 241 | (11, 32), 242 | (12, 11), 243 | (12, 12), 244 | (12, 13), 245 | (12, 16), 246 | (12, 17), 247 | (12, 19), 248 | (12, 20), 249 | (12, 24), 250 | (12, 27), 251 | (12, 28), 252 | (12, 29), 253 | (12, 30), 254 | (13, 12), 255 | (13, 13), 256 | (13, 17), 257 | (13, 20), 258 | (13, 21), 259 | (13, 22), 260 | (13, 28), 261 | (14, 13), 262 | (14, 14), 263 | (14, 15), 264 | (14, 16), 265 | (14, 18), 266 | (14, 22), 267 | ] 268 | PLATFORM_NAME = "modis" 269 | DST_CRS = "EPSG:3857" 270 | 271 | 272 | def process_modis_tile( 273 | index: int, 274 | bucket: str, 275 | ) -> None: 276 | 277 | # Prepare resources for the job 278 | catalog = pystac_client.Client.open(STAC_API, modifier=pc.sign_inplace) 279 | 280 | s3 = boto3.resource("s3") 281 | 282 | i, j = SIN_GRID_TILES[index] 283 | 284 | items_to_process = [] 285 | for year in range(2018, 2024): 286 | for month in range(1, 13): 287 | # Compute date range for this month 288 | end = calendar.monthrange(year, month)[1] 289 | timerange = ( 290 | f"{year}-{str(month).zfill(2)}-01/" 291 | f"{year}-{str(month).zfill(2)}-{str(end).zfill(2)}" 292 | ) 293 | # Query catalog 294 | items = catalog.search( 295 | collections=[COLLECTION], 296 | datetime=timerange, 297 | query={ 298 | "modis:vertical-tile": { 299 | "eq": i, 300 | }, 301 | "modis:horizontal-tile": { 302 | "eq": j, 303 | }, 304 | }, 305 | max_items=1, 306 | ) 307 | items = list(items.item_collection()) 308 | 309 | if not len(items): 310 | print(f"No items found for timerange {timerange}") 311 | continue 312 | 313 | items_to_process.append(items[0]) 314 | 315 | for item in items_to_process: 316 | for key in list(item.assets.keys()): 317 | if key not in BANDS: 318 | del item.assets[key] 319 | 320 | # Manually set datetime to end date. Modis products are 321 | # composited from a date range. 322 | item.datetime = datetime.strptime( 323 | item.properties["end_datetime"], "%Y-%m-%dT%H:%M:%SZ" 324 | ) 325 | 326 | for key, asset in item.assets.items(): 327 | new_key = f"{PLATFORM_NAME}/{item.id}/{Path(asset.href.split('?')[0]).name}" 328 | new_href = f"s3://{bucket}/{new_key}" 329 | 330 | with rasterio.open(asset.href) as src: 331 | transform, width, height = calculate_default_transform( 332 | src.crs, DST_CRS, src.width, src.height, *src.bounds 333 | ) 334 | kwargs = src.meta.copy() 335 | kwargs.update( 336 | { 337 | "crs": DST_CRS, 338 | "transform": transform, 339 | "width": width, 340 | "height": height, 341 | "compress": "deflate", 342 | } 343 | ) 344 | with tempfile.NamedTemporaryFile(mode="w") as temp_file: 345 | with rasterio.open(temp_file.name, "w", **kwargs) as dst: 346 | for i in range(1, src.count + 1): 347 | reproject( 348 | source=rasterio.band(src, i), 349 | destination=rasterio.band(dst, i), 350 | src_transform=src.transform, 351 | src_crs=src.crs, 352 | dst_transform=transform, 353 | dst_crs=DST_CRS, 354 | resampling=Resampling.nearest, 355 | ) 356 | s3_client = boto3.client("s3") 357 | s3_client.upload_file(temp_file.name, bucket, new_key) 358 | 359 | item.assets[key].href = new_href 360 | 361 | # Update proj extension to match new data format 362 | item.properties["proj:shape"] = (height, width) 363 | item.properties["proj:epsg"] = 3857 364 | del item.properties["proj:wkt2"] 365 | item.properties["proj:transform"] = transform 366 | 367 | # Convert Dictionary to JSON String 368 | data_string = json.dumps(item.to_dict()) 369 | 370 | # Upload JSON String to an S3 Object 371 | s3_bucket = s3.Bucket(name=bucket) 372 | s3_bucket.put_object( 373 | Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json", 374 | Body=data_string, 375 | ) 376 | 377 | indexer = ModisIndexer(item) 378 | index = indexer.create_index() 379 | print("Indexer info", indexer.x_size, indexer.y_size, indexer.shape) 380 | 381 | writer = pa.BufferOutputStream() 382 | io.write_geoparquet_table(index, writer) 383 | body = bytes(writer.getvalue()) 384 | # Centralize the index files to make combining them easier later on 385 | s3_bucket.put_object( 386 | Body=body, 387 | Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet", 388 | ) 389 | 390 | 391 | def process() -> None: 392 | 393 | if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ: 394 | raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set") 395 | if "STACCHIP_BUCKET" not in os.environ: 396 | raise ValueError("STACCHIP_BUCKET env var not set") 397 | 398 | index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) 399 | bucket = os.environ["STACCHIP_BUCKET"] 400 | 401 | process_modis_tile(index, bucket) 402 | -------------------------------------------------------------------------------- /stacchip/processors/naip_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | from pathlib import Path 5 | from urllib.parse import urlparse 6 | 7 | import boto3 8 | import geopandas as gp 9 | import pyarrow as pa 10 | import pystac_client 11 | from botocore.exceptions import ClientError 12 | from geoarrow.pyarrow import io 13 | 14 | from stacchip.indexer import NoStatsChipIndexer 15 | 16 | STAC_API = "https://planetarycomputer.microsoft.com/api/stac/v1" 17 | 18 | AWS_S3_URL = ( 19 | "s3://naip-analytic/{state}/{year}/{resolution}/rgbir_cog/{block}{subblock}/{name}" 20 | ) 21 | PLATFORM_NAME = "naip" 22 | 23 | 24 | def process_naip_tile( 25 | index: int, sample_source: str, bucket: str, latest_only: bool = False 26 | ) -> None: 27 | # Prepare resources for the job 28 | catalog = pystac_client.Client.open(STAC_API) 29 | s3 = boto3.resource("s3") 30 | data = gp.read_file(sample_source) 31 | row = data.iloc[index] 32 | 33 | items = catalog.search( 34 | collections=["naip"], 35 | intersects=row.geometry.centroid, 36 | sortby="properties.naip:year", 37 | max_items=10, 38 | ) 39 | items = list(items.item_collection()) 40 | 41 | if not len(items): 42 | print(f"No items found, skipping index {index}") 43 | return 44 | 45 | latest_item = items.pop() 46 | items_to_process = [latest_item] 47 | if not latest_only: 48 | random.seed(index) 49 | random_item = random.choice(items) 50 | items_to_process.append(random_item) 51 | 52 | for item in items_to_process: 53 | print(f"Processing item {item.id}") 54 | for key in list(item.assets.keys()): 55 | if key != "image": 56 | del item.assets[key] 57 | continue 58 | 59 | new_key = f"{PLATFORM_NAME}/{item.id}/{Path(item.assets[key].href).name}" 60 | try: 61 | href = AWS_S3_URL.format( 62 | year=item.properties["naip:year"], 63 | state=item.properties["naip:state"], 64 | resolution=f"{int(item.properties['gsd'] * 100)}cm", 65 | block=item.id.split("_")[2][:5], 66 | subblock=f"/{item.id.split('_')[2][5:]}", 67 | name=item.assets["image"].href.split("/")[-1], 68 | ) 69 | url = urlparse(href) 70 | copy_source = { 71 | "Bucket": "naip-analytic", 72 | "Key": url.path.lstrip("/"), 73 | } 74 | print(f"Copying {copy_source}") 75 | s3.Object("naip-analytic", url.path.lstrip("/")).load( 76 | RequestPayer="requester" 77 | ) 78 | s3.meta.client.copy( 79 | copy_source, 80 | bucket, 81 | new_key, 82 | ExtraArgs={"RequestPayer": "requester"}, 83 | ) 84 | except ClientError: 85 | href = AWS_S3_URL.format( 86 | year=item.properties["naip:year"], 87 | state=item.properties["naip:state"], 88 | resolution=f"{int(item.properties['gsd'] * 100)}cm", 89 | block=item.id.split("_")[2][:5], 90 | subblock="", 91 | name=item.assets["image"].href.split("/")[-1], 92 | ) 93 | url = urlparse(href) 94 | copy_source = { 95 | "Bucket": "naip-analytic", 96 | "Key": url.path.lstrip("/"), 97 | } 98 | print(f"Failed, now copying {copy_source}") 99 | s3.Object("naip-analytic", url.path.lstrip("/")).load( 100 | RequestPayer="requester" 101 | ) 102 | s3.meta.client.copy( 103 | copy_source, 104 | bucket, 105 | new_key, 106 | ExtraArgs={"RequestPayer": "requester"}, 107 | ) 108 | 109 | item.assets[key].href = f"s3://{bucket}/{new_key}" 110 | 111 | # Convert Dictionary to JSON String 112 | data_string = json.dumps(item.to_dict()) 113 | 114 | # Upload JSON String to an S3 Object 115 | s3_bucket = s3.Bucket(name=bucket) 116 | s3_bucket.put_object( 117 | Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json", 118 | Body=data_string, 119 | ) 120 | 121 | indexer = NoStatsChipIndexer(item) 122 | index = indexer.create_index() 123 | print("Indexer info", indexer.x_size, indexer.y_size, indexer.shape) 124 | 125 | writer = pa.BufferOutputStream() 126 | io.write_geoparquet_table(index, writer) 127 | body = bytes(writer.getvalue()) 128 | # Centralize the index files to make combining them easier later on 129 | s3_bucket.put_object( 130 | Body=body, 131 | Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet", 132 | ) 133 | 134 | 135 | def process() -> None: 136 | 137 | if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ: 138 | raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set") 139 | if "STACCHIP_SAMPLE_SOURCE" not in os.environ: 140 | raise ValueError("STACCHIP_SAMPLE_SOURCE env var not set") 141 | if "STACCHIP_BUCKET" not in os.environ: 142 | raise ValueError("STACCHIP_BUCKET env var not set") 143 | 144 | index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) 145 | sample_source = os.environ["STACCHIP_SAMPLE_SOURCE"] 146 | bucket = os.environ["STACCHIP_BUCKET"] 147 | 148 | process_naip_tile(index, sample_source, bucket) 149 | -------------------------------------------------------------------------------- /stacchip/processors/prechip.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import math 3 | import os 4 | from io import BytesIO 5 | from multiprocessing import Pool 6 | from typing import Union 7 | 8 | import boto3 9 | import numpy as np 10 | import pyarrow as pa 11 | from pyarrow import dataset as da 12 | 13 | from stacchip.chipper import Chipper 14 | from stacchip.utils import load_indexer_s3 15 | 16 | VERSION = "mode_v1_chipper_v2" 17 | 18 | CUBESIZE = 128 19 | 20 | S2_BANDS = [ 21 | "blue", 22 | "green", 23 | "red", 24 | "rededge1", 25 | "rededge2", 26 | "rededge3", 27 | "nir", 28 | "nir08", 29 | "swir16", 30 | "swir22", 31 | ] 32 | LS_BANDS = [ 33 | "red", 34 | "green", 35 | "blue", 36 | "nir08", 37 | "swir16", 38 | "swir22", 39 | ] 40 | NAIP_BANDS = ["red", "green", "blue", "nir"] 41 | LINZ_BANDS = ["red", "green", "blue"] 42 | S1_BANDS = ["vv", "vh"] 43 | MODIS_BANDS = [ 44 | "sur_refl_b01", 45 | "sur_refl_b02", 46 | "sur_refl_b03", 47 | "sur_refl_b04", 48 | "sur_refl_b05", 49 | "sur_refl_b06", 50 | "sur_refl_b07", 51 | ] 52 | 53 | 54 | def normalize_timestamp(date): 55 | 56 | week = date.isocalendar().week * 2 * np.pi / 52 57 | hour = date.hour * 2 * np.pi / 24 58 | 59 | return (math.sin(week), math.cos(week)), (math.sin(hour), math.cos(hour)) 60 | 61 | 62 | def normalize_latlon(bounds): 63 | lon = bounds[0] + (bounds[2] - bounds[0]) / 2 64 | lat = bounds[1] + (bounds[3] - bounds[1]) / 2 65 | 66 | lat = lat * np.pi / 180 67 | lon = lon * np.pi / 180 68 | 69 | return (math.sin(lat), math.cos(lat)), (math.sin(lon), math.cos(lon)) 70 | 71 | 72 | def stack_chips(chips: list, cube_id: int, chip_bucket: str, platform: str): 73 | print(f"Writing cube {cube_id}") 74 | 75 | pixels = np.stack([chip["pixels"] for chip in chips], dtype="float32") 76 | lon_norm = np.vstack([chip["lon_norm"] for chip in chips], dtype="float32") 77 | lat_norm = np.vstack([chip["lat_norm"] for chip in chips], dtype="float32") 78 | week_norm = np.vstack([chip["week_norm"] for chip in chips], dtype="float32") 79 | hour_norm = np.vstack([chip["hour_norm"] for chip in chips], dtype="float32") 80 | 81 | key = f"{VERSION}/{platform}/cube_{cube_id}.npz" 82 | 83 | client = boto3.client("s3") 84 | with BytesIO() as bytes: 85 | np.savez_compressed( 86 | file=bytes, 87 | pixels=pixels, 88 | lon_norm=lon_norm, 89 | lat_norm=lat_norm, 90 | week_norm=week_norm, 91 | hour_norm=hour_norm, 92 | ) 93 | bytes.seek(0) 94 | client.upload_fileobj(Fileobj=bytes, Bucket=chip_bucket, Key=key) 95 | 96 | 97 | def get_chip( 98 | data_bucket: str, 99 | row: int, 100 | platform: str, 101 | item_id: str, 102 | date: Union[datetime.date, datetime.datetime], 103 | chip_index_x: int, 104 | chip_index_y: int, 105 | ): 106 | print( 107 | "Getting chip", 108 | data_bucket, 109 | row, 110 | platform, 111 | item_id, 112 | date, 113 | chip_index_x, 114 | chip_index_y, 115 | ) 116 | 117 | indexer = load_indexer_s3( 118 | bucket=data_bucket, 119 | platform=platform, 120 | item_id=item_id, 121 | ) 122 | chipper = Chipper(indexer) 123 | 124 | chip = chipper.chip(chip_index_x, chip_index_y) 125 | 126 | if platform == "naip": 127 | pixels = chip["image"] 128 | bands = NAIP_BANDS 129 | elif platform == "linz": 130 | pixels = chip["asset"] 131 | bands = LINZ_BANDS 132 | elif platform == "sentinel-2-l2a": 133 | pixels = np.vstack([chip[band] for band in S2_BANDS]) 134 | bands = S2_BANDS 135 | elif platform in ["landsat-c2l2-sr", "landsat-c2l1"]: 136 | pixels = np.vstack([chip[band] for band in LS_BANDS]) 137 | bands = LS_BANDS 138 | elif platform == "sentinel-1-rtc": 139 | if any(band not in chip for band in S1_BANDS): 140 | return 141 | pixels = np.vstack([chip[band] for band in S1_BANDS]) 142 | bands = S1_BANDS 143 | elif platform == "modis": 144 | pixels = np.vstack([chip[band] for band in MODIS_BANDS]) 145 | bands = MODIS_BANDS 146 | 147 | if len(pixels) != len(bands): 148 | raise ValueError( 149 | f"Pixels shape {pixels.shape} is not equal to nr of bands {bands} for item {item_id}" 150 | ) 151 | 152 | if isinstance(date, datetime.date): 153 | # Assume noon for dates without timestamp 154 | date = datetime.datetime(date.year, date.month, date.day, 12) 155 | week_norm, hour_norm = normalize_timestamp(date) 156 | 157 | bounds = chipper.indexer.get_chip_bbox(chip_index_x, chip_index_y).bounds 158 | lon_norm, lat_norm = normalize_latlon(bounds) 159 | 160 | return { 161 | "pixels": pixels, 162 | "lon_norm": lon_norm, 163 | "lat_norm": lat_norm, 164 | "week_norm": week_norm, 165 | "hour_norm": hour_norm, 166 | } 167 | 168 | 169 | def process() -> None: 170 | # GDAL read optimization is recommended 171 | # os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "YES" 172 | # os.environ["CPL_VSIL_CURL_ALLOWED_EXTENSIONS"] = ".tif,.png,.jp2,.tiff" 173 | 174 | if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ: 175 | raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set") 176 | if "STACCHIP_DATA_BUCKET" not in os.environ: 177 | raise ValueError("STACCHIP_DATA_BUCKET env var not set") 178 | if "STACCHIP_INDEXPATH" not in os.environ: 179 | raise ValueError("STACCHIP_INDEXPATH env var not set") 180 | if "STACCHIP_CHIP_BUCKET" not in os.environ: 181 | raise ValueError("STACCHIP_TARGETPATH env var not set") 182 | 183 | index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) 184 | data_bucket = os.environ["STACCHIP_DATA_BUCKET"] 185 | indexpath = os.environ["STACCHIP_INDEXPATH"] 186 | chip_bucket = os.environ["STACCHIP_CHIP_BUCKET"] 187 | platform = os.environ.get("STACCHIP_PLATFORM", "") 188 | cubes_per_job = int(os.environ.get("STACCHIP_CUBES_PER_JOB", 10)) 189 | pool_size = int(os.environ.get("STACCHIP_POOL_SIZE", 10)) 190 | chip_max_nodata = float(os.environ.get("STACCHIP_MAX_NODATA", 0.05)) 191 | 192 | # Open table 193 | table = da.dataset(indexpath, format="parquet").to_table( 194 | columns=[ 195 | "chipid", 196 | "platform", 197 | "item_id", 198 | "date", 199 | "chip_index_x", 200 | "chip_index_y", 201 | "nodata_percentage", 202 | ] 203 | ) 204 | if platform: 205 | table = table.filter(pa.compute.field("platform") == platform) 206 | 207 | initial_count = len(table) 208 | if chip_max_nodata: 209 | table = table.filter(pa.compute.field("nodata_percentage") <= chip_max_nodata) 210 | print( 211 | f"Dropped {initial_count - len(table)} chips due to nodata filter, keeping {len(table)}" 212 | ) 213 | 214 | np.random.seed(42) 215 | random_rows = np.random.randint(0, len(table), len(table)) 216 | 217 | for cube_id in range(index * cubes_per_job, (index + 1) * cubes_per_job): 218 | random_rows_cube = random_rows[cube_id * CUBESIZE : (cube_id + 1) * CUBESIZE] 219 | if len(random_rows_cube) != CUBESIZE: 220 | print("Finishing because of incomplete cubes") 221 | return 222 | 223 | # Extract chips data for this job 224 | all_chips = [] 225 | for row in random_rows_cube: 226 | all_chips.append( 227 | ( 228 | data_bucket, 229 | row, 230 | table.column("platform")[row].as_py(), 231 | table.column("item_id")[row].as_py(), 232 | table.column("date")[row].as_py(), 233 | table.column("chip_index_x")[row].as_py(), 234 | table.column("chip_index_y")[row].as_py(), 235 | ) 236 | ) 237 | 238 | with Pool(pool_size) as pl: 239 | data = pl.starmap( 240 | get_chip, 241 | all_chips, 242 | ) 243 | 244 | if None in data: 245 | print(f"Not all cubes are complete, skipping stacking for cube {cube_id}") 246 | continue 247 | 248 | stack_chips(data, cube_id=cube_id, chip_bucket=chip_bucket, platform=platform) 249 | -------------------------------------------------------------------------------- /stacchip/processors/sentinel_1_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | from pathlib import Path 5 | from urllib.parse import urlparse 6 | 7 | import boto3 8 | import geopandas as gp 9 | import planetary_computer as pc 10 | import pyarrow as pa 11 | import pystac_client 12 | import rasterio 13 | from geoarrow.pyarrow import io 14 | from rasterio.io import MemoryFile 15 | 16 | from stacchip.indexer import NoDataMaskChipIndexer 17 | 18 | STAC_API = "https://planetarycomputer.microsoft.com/api/stac/v1" 19 | S1_ASSETS = [ 20 | "vv", 21 | "vh", 22 | ] 23 | PLATFORM_NAME = "sentinel-1-rtc" 24 | quartals = [ 25 | "{year}-01-01/{year}-03-31", 26 | "{year}-04-01/{year}-06-30", 27 | "{year}-07-01/{year}-09-30", 28 | "{year}-10-01/{year}-12-31", 29 | ] 30 | 31 | 32 | def process_mgrs_tile(index: int, mgrs_source: str, bucket: str) -> None: 33 | # Prepare resources for the job 34 | s3 = boto3.resource("s3") 35 | data = gp.read_file(mgrs_source) 36 | row = data.iloc[index] 37 | catalog = pystac_client.Client.open(STAC_API, modifier=pc.sign_inplace) 38 | print("MGRS", row["name"]) 39 | random.seed(index) 40 | for year in random.sample(range(2018, 2024), 1): 41 | print(f"Year {year}") 42 | for quartal in random.sample(quartals, 1): 43 | print(f"Quartal {quartal.format(year=year)}") 44 | items = catalog.search( 45 | max_items=1, 46 | filter_lang="cql2-json", 47 | filter={ 48 | "op": "and", 49 | "args": [ 50 | # { 51 | # "op": "s_intersects", 52 | # "args": [ 53 | # {"property": "geometry"}, 54 | # row.geometry.__geo_interface__, 55 | # ], 56 | # }, 57 | { 58 | "op": "anyinteracts", 59 | "args": [ 60 | {"property": "datetime"}, 61 | quartal.format(year=year), 62 | ], 63 | }, 64 | { 65 | "op": "=", 66 | "args": [{"property": "collection"}, "sentinel-1-rtc"], 67 | }, 68 | ], 69 | }, 70 | ) 71 | item = items.item_collection()[0] 72 | 73 | nodata_mask = None 74 | for key in list(item.assets.keys()): 75 | if key not in S1_ASSETS: 76 | del item.assets[key] 77 | else: 78 | url = item.assets[key].href 79 | with rasterio.open(url) as rst: 80 | data = rst.read() 81 | meta = rst.meta.copy() 82 | if nodata_mask is None: 83 | nodata_mask = data[0] == rst.nodata 84 | 85 | with MemoryFile() as memfile: 86 | with memfile.open(**meta, compress="deflate") as dst: 87 | dst.write(data) 88 | 89 | memfile.seek(0) 90 | 91 | s3_bucket = s3.Bucket(name=bucket) 92 | new_key = ( 93 | f"{PLATFORM_NAME}/{item.id}/{Path(urlparse(url).path).name}" 94 | ) 95 | print(f"Copying {urlparse(url).path}") 96 | s3_bucket.put_object( 97 | Key=new_key, 98 | Body=memfile.read(), 99 | ) 100 | 101 | item.assets[key].href = f"s3://{bucket}/{new_key}" 102 | 103 | # Convert Dictionary to JSON String 104 | data_string = json.dumps(item.to_dict()) 105 | 106 | # Upload JSON String to an S3 Object 107 | s3_bucket = s3.Bucket(name=bucket) 108 | s3_bucket.put_object( 109 | Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json", 110 | Body=data_string, 111 | ) 112 | indexer = NoDataMaskChipIndexer(item, nodata_mask=nodata_mask) 113 | index = indexer.create_index() 114 | 115 | writer = pa.BufferOutputStream() 116 | io.write_geoparquet_table(index, writer) 117 | body = bytes(writer.getvalue()) 118 | # Centralize the index files to make combining them easier later on 119 | s3_bucket.put_object( 120 | Body=body, 121 | Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet", 122 | ) 123 | 124 | 125 | def process() -> None: 126 | 127 | if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ: 128 | raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set") 129 | if "STACCHIP_MGRS_SOURCE" not in os.environ: 130 | raise ValueError("STACCHIP_MGRS_SOURCE env var not set") 131 | if "STACCHIP_BUCKET" not in os.environ: 132 | raise ValueError("STACCHIP_BUCKET env var not set") 133 | 134 | index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) 135 | mgrs_source = os.environ["STACCHIP_MGRS_SOURCE"] 136 | bucket = os.environ["STACCHIP_BUCKET"] 137 | 138 | process_mgrs_tile(index, mgrs_source, bucket) 139 | -------------------------------------------------------------------------------- /stacchip/processors/sentinel_2_processor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | from pathlib import Path 5 | from urllib.parse import urlparse 6 | 7 | import boto3 8 | import geopandas as gp 9 | import pyarrow as pa 10 | import pystac_client 11 | from geoarrow.pyarrow import io 12 | 13 | from stacchip.indexer import Sentinel2Indexer 14 | 15 | STAC_API = "https://earth-search.aws.element84.com/v1" 16 | S2_ASSETS = [ 17 | "blue", 18 | "green", 19 | "nir", 20 | "nir08", 21 | "red", 22 | "rededge1", 23 | "rededge2", 24 | "rededge3", 25 | "scl", 26 | "swir16", 27 | "swir22", 28 | ] 29 | ABSOLUTE_CLOUD_COVER_FILTER = 0.75 30 | PLATFORM_NAME = "sentinel-2-l2a" 31 | SCENE_NODATA_LIMIT = 20 32 | quartals = [ 33 | "{year}-01-01/{year}-03-31", 34 | "{year}-04-01/{year}-06-30", 35 | "{year}-07-01/{year}-09-30", 36 | "{year}-10-01/{year}-12-31", 37 | ] 38 | 39 | 40 | def process_mgrs_tile(index: int, mgrs_source: str, bucket: str) -> None: 41 | # Prepare resources for the job 42 | catalog = pystac_client.Client.open(STAC_API) 43 | s3 = boto3.resource("s3") 44 | data = gp.read_file(mgrs_source) 45 | row = data.iloc[index] 46 | 47 | print("MGRS", row["name"]) 48 | random.seed(index) 49 | for year in random.sample(range(2018, 2024), 2): 50 | print(f"Year {year}") 51 | for quartal in quartals: 52 | print(f"Quartal {quartal.format(year=year)}") 53 | items = catalog.search( 54 | collections=["sentinel-2-l2a"], 55 | datetime=quartal.format(year=year), 56 | max_items=1, 57 | intersects=row.geometry, 58 | sortby="properties.eo:cloud_cover", 59 | query={ 60 | "grid:code": { 61 | "eq": f"MGRS-{row['name']}", 62 | }, 63 | "s2:nodata_pixel_percentage": {"lte": SCENE_NODATA_LIMIT}, 64 | }, 65 | ) 66 | item = items.item_collection()[0] 67 | 68 | if item.properties["eo:cloud_cover"] > ABSOLUTE_CLOUD_COVER_FILTER: 69 | continue 70 | 71 | print(f"Cloud cover is {item.properties['eo:cloud_cover']}") 72 | 73 | for key in list(item.assets.keys()): 74 | if key not in S2_ASSETS: 75 | del item.assets[key] 76 | else: 77 | url = urlparse(item.assets[key].href) 78 | copy_source = { 79 | "Bucket": "sentinel-cogs", 80 | "Key": url.path.lstrip("/"), 81 | } 82 | print(f"Copying {copy_source}") 83 | new_key = ( 84 | f"{PLATFORM_NAME}/{item.id}/{Path(item.assets[key].href).name}" 85 | ) 86 | s3.meta.client.copy(copy_source, bucket, new_key) 87 | item.assets[key].href = f"s3://{bucket}/{new_key}" 88 | 89 | # Convert Dictionary to JSON String 90 | data_string = json.dumps(item.to_dict()) 91 | 92 | # Upload JSON String to an S3 Object 93 | s3_bucket = s3.Bucket(name=bucket) 94 | s3_bucket.put_object( 95 | Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json", 96 | Body=data_string, 97 | ) 98 | 99 | indexer = Sentinel2Indexer(item) 100 | index = indexer.create_index() 101 | 102 | writer = pa.BufferOutputStream() 103 | io.write_geoparquet_table(index, writer) 104 | body = bytes(writer.getvalue()) 105 | # Centralize the index files to make combining them easier later on 106 | s3_bucket.put_object( 107 | Body=body, 108 | Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet", 109 | ) 110 | 111 | 112 | def process() -> None: 113 | 114 | if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ: 115 | raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set") 116 | if "STACCHIP_MGRS_SOURCE" not in os.environ: 117 | raise ValueError("STACCHIP_MGRS_SOURCE env var not set") 118 | if "STACCHIP_BUCKET" not in os.environ: 119 | raise ValueError("STACCHIP_BUCKET env var not set") 120 | 121 | index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) 122 | mgrs_source = os.environ["STACCHIP_MGRS_SOURCE"] 123 | bucket = os.environ["STACCHIP_BUCKET"] 124 | 125 | process_mgrs_tile(index, mgrs_source, bucket) 126 | -------------------------------------------------------------------------------- /stacchip/processors/stats.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | from multiprocessing import Pool 4 | 5 | import boto3 6 | import numpy as np 7 | 8 | from stacchip.processors.prechip import ( 9 | LINZ_BANDS, 10 | LS_BANDS, 11 | MODIS_BANDS, 12 | NAIP_BANDS, 13 | S1_BANDS, 14 | S2_BANDS, 15 | ) 16 | 17 | 18 | def get_stats_keys(key): 19 | print(f"Processing {key}") 20 | if "sentinel-1-rtc" in key: 21 | nodata = -32768 22 | if "modis" in key: 23 | nodata = -28672 24 | else: 25 | nodata = 0 26 | 27 | s3_session = boto3.resource("s3") 28 | obj = s3_session.Object("clay-v1-data-cubes", key) 29 | body = obj.get()["Body"].read() 30 | with io.BytesIO(body) as f: 31 | f.seek(0) 32 | data = np.load(f)["pixels"] 33 | 34 | data = data.astype("float64").swapaxes(0, 1) 35 | 36 | data = np.ma.array(data, mask=data == nodata) 37 | 38 | pixel_count = np.ma.count(data, axis=(1, 2, 3)) 39 | pixel_sum = np.ma.sum(data, axis=(1, 2, 3)) 40 | pixel_sqr = np.ma.sum(np.ma.power(data, 2), axis=(1, 2, 3)) 41 | 42 | return pixel_count, pixel_sum, pixel_sqr 43 | 44 | 45 | def process(): 46 | if "STACCHIP_PLATFORM" not in os.environ: 47 | raise ValueError("STACCHIP_PLATFORM env var not set") 48 | pool_size = int(os.environ.get("STACCHIP_POOL_SIZE", 4)) 49 | max_cubes = int(os.environ.get("STACCHIP_MAX_CUBES", 4)) 50 | 51 | platform = os.environ.get("STACCHIP_PLATFORM") 52 | if platform == "naip": 53 | bands = NAIP_BANDS 54 | elif platform == "linz": 55 | bands = LINZ_BANDS 56 | elif platform == "sentinel-2-l2a": 57 | bands = S2_BANDS 58 | elif platform in ["landsat-c2l2-sr", "landsat-c2l1"]: 59 | bands = LS_BANDS 60 | elif platform == "sentinel-1-rtc": 61 | bands = S1_BANDS 62 | elif platform == "modis": 63 | bands = MODIS_BANDS 64 | else: 65 | raise ValueError(f"Platform {platform} not found") 66 | 67 | client = boto3.client("s3") 68 | paginator = client.get_paginator("list_objects_v2") 69 | page_iterator = paginator.paginate( 70 | Bucket="clay-v1-data-cubes", Prefix=f"mode_v1_chipper_v2/{platform}" 71 | ) 72 | 73 | band_count = len(bands) 74 | pixel_count = np.zeros(band_count) 75 | pixel_sum = np.zeros(band_count) 76 | pixel_sqr = np.zeros(band_count) 77 | 78 | counter = 0 79 | all_keys = [] 80 | for page in page_iterator: 81 | keys = [dat["Key"] for dat in page["Contents"]] 82 | for key in keys: 83 | counter += 1 84 | all_keys.append(key) 85 | if counter == max_cubes: 86 | break 87 | if counter == max_cubes: 88 | break 89 | 90 | with Pool(pool_size) as pl: 91 | result = pl.map(get_stats_keys, all_keys) 92 | 93 | for dat in result: 94 | pixel_count = np.add(pixel_count, dat[0]) 95 | pixel_sum = np.add(pixel_sum, dat[1]) 96 | pixel_sqr = np.add(pixel_sqr, dat[2]) 97 | 98 | # https://stackoverflow.com/questions/1174984/how-to-efficiently-calculate-a-running-standard-deviation 99 | mean = pixel_sum / pixel_count 100 | stdev = np.sqrt((pixel_sqr / pixel_count) - (mean * mean)) 101 | 102 | print("-- Mean by band") 103 | for band, val in zip(bands, mean): 104 | print(f"{band}: {val}") 105 | 106 | print("-- Std by band") 107 | for band, val in zip(bands, stdev): 108 | print(f"{band}: {val}") 109 | -------------------------------------------------------------------------------- /stacchip/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import boto3 5 | from pystac import Item 6 | 7 | from stacchip.indexer import ChipIndexer 8 | 9 | 10 | def load_indexer_s3(bucket: str, platform: str, item_id: str) -> ChipIndexer: 11 | """ 12 | Load stacchip index table from a remote location 13 | """ 14 | s3 = boto3.resource("s3") 15 | s3_bucket = s3.Bucket(name=bucket) 16 | content_object = s3_bucket.Object(f"{platform}/{item_id}/stac_item.json") 17 | file_content = content_object.get()["Body"].read().decode("utf-8") 18 | json_content = json.loads(file_content) 19 | item = Item.from_dict(json_content) 20 | 21 | return ChipIndexer(item) 22 | 23 | 24 | def load_indexer_local(mountpath: Path, platform: str, item_id: str) -> ChipIndexer: 25 | """ 26 | Load stacchip index table from local file 27 | """ 28 | item = Item.from_file(mountpath / Path(f"{platform}/{item_id}/stac_item.json")) 29 | return ChipIndexer(item) 30 | -------------------------------------------------------------------------------- /tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "stac_version": "1.0.0", 4 | "id": "LC09_L2SR_086107_20240311_20240312_02_T2_SR", 5 | "properties": { 6 | "datetime": "2024-03-11T23:34:56.177554Z", 7 | "eo:cloud_cover": 22.28, 8 | "view:sun_azimuth": 52.37722834, 9 | "view:sun_elevation": 17.95212357, 10 | "platform": "LANDSAT_9", 11 | "instruments": [ 12 | "OLI", 13 | "TIRS" 14 | ], 15 | "view:off_nadir": 0, 16 | "landsat:cloud_cover_land": 25.21, 17 | "landsat:wrs_type": "2", 18 | "landsat:wrs_path": "086", 19 | "landsat:wrs_row": "107", 20 | "landsat:scene_id": "LC90861072024071LGN00", 21 | "landsat:collection_category": "T2", 22 | "landsat:collection_number": "02", 23 | "landsat:correction": "L2SR", 24 | "proj:epsg": 3031, 25 | "proj:shape": [ 26 | 8271, 27 | 8331 28 | ], 29 | "proj:transform": [ 30 | 30, 31 | 0, 32 | 1517085, 33 | 0, 34 | -30, 35 | -1811685 36 | ], 37 | "created": "2024-03-12T13:52:00.034Z", 38 | "updated": "2024-03-12T13:52:00.034Z" 39 | }, 40 | "geometry": { 41 | "type": "Polygon", 42 | "coordinates": [ 43 | [ 44 | [ 45 | 136.76644043427444, 46 | -67.38978802075671 47 | ], 48 | [ 49 | 140.9389445092953, 50 | -68.07227732087095 51 | ], 52 | [ 53 | 142.5380790735029, 54 | -66.44760162836175 55 | ], 56 | [ 57 | 138.58173509190902, 58 | -65.80312306904463 59 | ], 60 | [ 61 | 136.76644043427444, 62 | -67.38978802075671 63 | ] 64 | ] 65 | ] 66 | }, 67 | "links": [ 68 | { 69 | "rel": "self", 70 | "href": "/home/tam/Desktop/clay-v1-data/items/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json", 71 | "type": "application/json" 72 | }, 73 | { 74 | "rel": "parent", 75 | "href": "https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr" 76 | }, 77 | { 78 | "rel": "collection", 79 | "href": "https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr" 80 | }, 81 | { 82 | "rel": "root", 83 | "href": "https://landsatlook.usgs.gov/stac-server/", 84 | "type": "application/json", 85 | "title": "STAC API" 86 | } 87 | ], 88 | "assets": { 89 | "thumbnail": { 90 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_small.jpeg", 91 | "type": "image/jpeg", 92 | "title": "Thumbnail image", 93 | "alternate": { 94 | "s3": { 95 | "storage:platform": "AWS", 96 | "storage:requester_pays": true, 97 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_small.jpeg" 98 | } 99 | }, 100 | "roles": [ 101 | "thumbnail" 102 | ] 103 | }, 104 | "reduced_resolution_browse": { 105 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_large.jpeg", 106 | "type": "image/jpeg", 107 | "title": "Reduced resolution browse image", 108 | "alternate": { 109 | "s3": { 110 | "storage:platform": "AWS", 111 | "storage:requester_pays": true, 112 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_large.jpeg" 113 | } 114 | }, 115 | "roles": [ 116 | "overview" 117 | ] 118 | }, 119 | "index": { 120 | "href": "https://landsatlook.usgs.gov/stac-browser/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2", 121 | "type": "text/html", 122 | "title": "HTML index page", 123 | "roles": [ 124 | "metadata" 125 | ] 126 | }, 127 | "MTL.json": { 128 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.json", 129 | "type": "application/json", 130 | "title": "Product Metadata File (json)", 131 | "description": "Collection 2 Level-2 Product Metadata File (json)", 132 | "alternate": { 133 | "s3": { 134 | "storage:platform": "AWS", 135 | "storage:requester_pays": true, 136 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.json" 137 | } 138 | }, 139 | "roles": [ 140 | "metadata" 141 | ] 142 | }, 143 | "coastal": { 144 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B1.TIF", 145 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 146 | "title": "Coastal/Aerosol Band (B1)", 147 | "description": "Collection 2 Level-2 Coastal/Aerosol Band (B1) Surface Reflectance", 148 | "eo:bands": [ 149 | { 150 | "name": "B1", 151 | "common_name": "coastal", 152 | "gsd": 30, 153 | "center_wavelength": 0.44 154 | } 155 | ], 156 | "alternate": { 157 | "s3": { 158 | "storage:platform": "AWS", 159 | "storage:requester_pays": true, 160 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B1.TIF" 161 | } 162 | }, 163 | "roles": [ 164 | "data" 165 | ] 166 | }, 167 | "blue": { 168 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B2.TIF", 169 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 170 | "title": "Blue Band (B2)", 171 | "description": "Collection 2 Level-2 Blue Band (B2) Surface Reflectance", 172 | "eo:bands": [ 173 | { 174 | "name": "B2", 175 | "common_name": "blue", 176 | "gsd": 30, 177 | "center_wavelength": 0.48 178 | } 179 | ], 180 | "alternate": { 181 | "s3": { 182 | "storage:platform": "AWS", 183 | "storage:requester_pays": true, 184 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B2.TIF" 185 | } 186 | }, 187 | "roles": [ 188 | "data" 189 | ] 190 | }, 191 | "green": { 192 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B3.TIF", 193 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 194 | "title": "Green Band (B3)", 195 | "description": "Collection 2 Level-2 Green Band (B3) Surface Reflectance", 196 | "eo:bands": [ 197 | { 198 | "name": "B3", 199 | "common_name": "green", 200 | "gsd": 30, 201 | "center_wavelength": 0.56 202 | } 203 | ], 204 | "alternate": { 205 | "s3": { 206 | "storage:platform": "AWS", 207 | "storage:requester_pays": true, 208 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B3.TIF" 209 | } 210 | }, 211 | "roles": [ 212 | "data" 213 | ] 214 | }, 215 | "red": { 216 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B4.TIF", 217 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 218 | "title": "Red Band (B4)", 219 | "description": "Collection 2 Level-2 Red Band (B4) Surface Reflectance", 220 | "eo:bands": [ 221 | { 222 | "name": "B4", 223 | "common_name": "red", 224 | "gsd": 30, 225 | "center_wavelength": 0.65 226 | } 227 | ], 228 | "alternate": { 229 | "s3": { 230 | "storage:platform": "AWS", 231 | "storage:requester_pays": true, 232 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B4.TIF" 233 | } 234 | }, 235 | "roles": [ 236 | "data" 237 | ] 238 | }, 239 | "nir08": { 240 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B5.TIF", 241 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 242 | "title": "Near Infrared Band 0.8 (B5)", 243 | "description": "Collection 2 Level-2 Near Infrared Band 0.8 (B5) Surface Reflectance", 244 | "eo:bands": [ 245 | { 246 | "name": "B5", 247 | "common_name": "nir08", 248 | "gsd": 30, 249 | "center_wavelength": 0.86 250 | } 251 | ], 252 | "alternate": { 253 | "s3": { 254 | "storage:platform": "AWS", 255 | "storage:requester_pays": true, 256 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B5.TIF" 257 | } 258 | }, 259 | "roles": [ 260 | "data", 261 | "reflectance" 262 | ] 263 | }, 264 | "swir16": { 265 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B6.TIF", 266 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 267 | "title": "Short-wave Infrared Band 1.6 (B6)", 268 | "description": "Collection 2 Level-2 Short-wave Infrared Band 1.6 (B6) Surface Reflectance", 269 | "eo:bands": [ 270 | { 271 | "name": "B6", 272 | "common_name": "swir16", 273 | "gsd": 30, 274 | "center_wavelength": 1.6 275 | } 276 | ], 277 | "alternate": { 278 | "s3": { 279 | "storage:platform": "AWS", 280 | "storage:requester_pays": true, 281 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B6.TIF" 282 | } 283 | }, 284 | "roles": [ 285 | "data", 286 | "reflectance" 287 | ] 288 | }, 289 | "swir22": { 290 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B7.TIF", 291 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 292 | "title": "Short-wave Infrared Band 2.2 (B7)", 293 | "description": "Collection 2 Level-2 Short-wave Infrared Band 2.2 (B7) Surface Reflectance", 294 | "eo:bands": [ 295 | { 296 | "name": "B7", 297 | "common_name": "swir22", 298 | "gsd": 30, 299 | "center_wavelength": 2.2 300 | } 301 | ], 302 | "alternate": { 303 | "s3": { 304 | "storage:platform": "AWS", 305 | "storage:requester_pays": true, 306 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B7.TIF" 307 | } 308 | }, 309 | "roles": [ 310 | "data", 311 | "reflectance" 312 | ] 313 | }, 314 | "qa_aerosol": { 315 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_QA_AEROSOL.TIF", 316 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 317 | "title": "Aerosol Quality Analysis Band", 318 | "description": "Collection 2 Level-2 Aerosol Quality Analysis Band Surface Reflectance", 319 | "classification:bitfields": [ 320 | { 321 | "name": "fill", 322 | "description": "Corresponding pixels in L1 image bands are fill", 323 | "offset": 0, 324 | "length": 1, 325 | "classes": [ 326 | { 327 | "name": "not_fill", 328 | "description": "L1 image band pixels are not fill", 329 | "value": 0 330 | }, 331 | { 332 | "name": "fill", 333 | "description": "L1 image band pixels are fill", 334 | "value": 1 335 | } 336 | ] 337 | }, 338 | { 339 | "name": "retrieval", 340 | "description": "Valid aerosol retrieval", 341 | "offset": 1, 342 | "length": 1, 343 | "classes": [ 344 | { 345 | "name": "not_valid", 346 | "description": "Aerosol retrieval is not valid", 347 | "value": 0 348 | }, 349 | { 350 | "name": "valid", 351 | "description": "Aerosol retrieval is valid", 352 | "value": 1 353 | } 354 | ] 355 | }, 356 | { 357 | "name": "water", 358 | "description": "Water mask", 359 | "offset": 2, 360 | "length": 1, 361 | "classes": [ 362 | { 363 | "name": "not_water", 364 | "description": "Not water", 365 | "value": 0 366 | }, 367 | { 368 | "name": "water", 369 | "description": "Water", 370 | "value": 1 371 | } 372 | ] 373 | }, 374 | { 375 | "name": "unused", 376 | "description": "Unused bit", 377 | "offset": 3, 378 | "length": 1, 379 | "classes": [ 380 | { 381 | "name": "unused", 382 | "description": "Unused bit", 383 | "value": 0 384 | } 385 | ] 386 | }, 387 | { 388 | "name": "unused", 389 | "description": "Unused bit", 390 | "offset": 4, 391 | "length": 1, 392 | "classes": [ 393 | { 394 | "name": "unused", 395 | "description": "Unused bit", 396 | "value": 0 397 | } 398 | ] 399 | }, 400 | { 401 | "name": "interpolated", 402 | "description": "Aerosol is interpolated", 403 | "offset": 5, 404 | "length": 1, 405 | "classes": [ 406 | { 407 | "name": "not_interpolated", 408 | "description": "Aerosol is not interpolated", 409 | "value": 0 410 | }, 411 | { 412 | "name": "interpolated", 413 | "description": "Aerosol is interpolated", 414 | "value": 1 415 | } 416 | ] 417 | }, 418 | { 419 | "name": "level", 420 | "description": "Aerosol level", 421 | "offset": 6, 422 | "length": 2, 423 | "classes": [ 424 | { 425 | "name": "climatology", 426 | "description": "No aerosol correction applied", 427 | "value": 0 428 | }, 429 | { 430 | "name": "low", 431 | "description": "Low aerosol level", 432 | "value": 1 433 | }, 434 | { 435 | "name": "medium", 436 | "description": "Medium aerosol level", 437 | "value": 2 438 | }, 439 | { 440 | "name": "high", 441 | "description": "High aerosol level", 442 | "value": 3 443 | } 444 | ] 445 | } 446 | ], 447 | "alternate": { 448 | "s3": { 449 | "storage:platform": "AWS", 450 | "storage:requester_pays": true, 451 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_QA_AEROSOL.TIF" 452 | } 453 | }, 454 | "roles": [ 455 | "metadata", 456 | "data-mask", 457 | "water-mask" 458 | ] 459 | }, 460 | "qa_pixel": { 461 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_PIXEL.TIF", 462 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 463 | "title": "Pixel Quality Assessment Band", 464 | "description": "Collection 2 Level-2 Pixel Quality Assessment Band Surface Reflectance", 465 | "classification:bitfields": [ 466 | { 467 | "name": "fill", 468 | "description": "Corresponding pixels in L1 image bands are fill", 469 | "offset": 0, 470 | "length": 1, 471 | "classes": [ 472 | { 473 | "name": "not_fill", 474 | "description": "L1 image band pixels are not fill", 475 | "value": 0 476 | }, 477 | { 478 | "name": "fill", 479 | "description": "L1 image band pixels are fill", 480 | "value": 1 481 | } 482 | ] 483 | }, 484 | { 485 | "name": "dilated", 486 | "description": "Dilated cloud", 487 | "offset": 1, 488 | "length": 1, 489 | "classes": [ 490 | { 491 | "name": "not_dilated", 492 | "description": "Cloud is not dilated or no cloud", 493 | "value": 0 494 | }, 495 | { 496 | "name": "dilated", 497 | "description": "Cloud dilation", 498 | "value": 1 499 | } 500 | ] 501 | }, 502 | { 503 | "name": "cirrus", 504 | "description": "Cirrus mask", 505 | "offset": 2, 506 | "length": 1, 507 | "classes": [ 508 | { 509 | "name": "not_cirrus", 510 | "description": "No confidence level set or low confidence cirrus", 511 | "value": 0 512 | }, 513 | { 514 | "name": "cirrus", 515 | "description": "High confidence cirrus", 516 | "value": 1 517 | } 518 | ] 519 | }, 520 | { 521 | "name": "cloud", 522 | "description": "Cloud mask", 523 | "offset": 3, 524 | "length": 1, 525 | "classes": [ 526 | { 527 | "name": "not_cloud", 528 | "description": "Cloud confidence is not high", 529 | "value": 0 530 | }, 531 | { 532 | "name": "cloud", 533 | "description": "High confidence cloud", 534 | "value": 1 535 | } 536 | ] 537 | }, 538 | { 539 | "name": "shadow", 540 | "description": "Cloud shadow mask", 541 | "offset": 4, 542 | "length": 1, 543 | "classes": [ 544 | { 545 | "name": "not_shadow", 546 | "description": "Cloud shadow confidence is not high", 547 | "value": 0 548 | }, 549 | { 550 | "name": "shadow", 551 | "description": "High confidence cloud shadow", 552 | "value": 1 553 | } 554 | ] 555 | }, 556 | { 557 | "name": "snow", 558 | "description": "Snow/Ice mask", 559 | "offset": 5, 560 | "length": 1, 561 | "classes": [ 562 | { 563 | "name": "not_snow", 564 | "description": "Snow/Ice confidence is not high", 565 | "value": 0 566 | }, 567 | { 568 | "name": "snow", 569 | "description": "High confidence snow cover", 570 | "value": 1 571 | } 572 | ] 573 | }, 574 | { 575 | "name": "clear", 576 | "description": "Cloud or dilated cloud bits set", 577 | "offset": 6, 578 | "length": 1, 579 | "classes": [ 580 | { 581 | "name": "not_clear", 582 | "description": "Cloud or dilated cloud bits are set", 583 | "value": 0 584 | }, 585 | { 586 | "name": "clear", 587 | "description": "Cloud and dilated cloud bits are not set", 588 | "value": 1 589 | } 590 | ] 591 | }, 592 | { 593 | "name": "water", 594 | "description": "Water mask", 595 | "offset": 7, 596 | "length": 1, 597 | "classes": [ 598 | { 599 | "name": "not_water", 600 | "description": "Land or cloud", 601 | "value": 0 602 | }, 603 | { 604 | "name": "water", 605 | "description": "Water", 606 | "value": 1 607 | } 608 | ] 609 | }, 610 | { 611 | "name": "cloud_confidence", 612 | "description": "Cloud confidence levels", 613 | "offset": 8, 614 | "length": 2, 615 | "classes": [ 616 | { 617 | "name": "not_set", 618 | "description": "No confidence level set", 619 | "value": 0 620 | }, 621 | { 622 | "name": "low", 623 | "description": "Low confidence cloud", 624 | "value": 1 625 | }, 626 | { 627 | "name": "medium", 628 | "description": "Medium confidence cloud", 629 | "value": 2 630 | }, 631 | { 632 | "name": "high", 633 | "description": "High confidence cloud", 634 | "value": 3 635 | } 636 | ] 637 | }, 638 | { 639 | "name": "shadow_confidence", 640 | "description": "Cloud shadow confidence levels", 641 | "offset": 10, 642 | "length": 2, 643 | "classes": [ 644 | { 645 | "name": "not_set", 646 | "description": "No confidence level set", 647 | "value": 0 648 | }, 649 | { 650 | "name": "low", 651 | "description": "Low confidence cloud shadow", 652 | "value": 1 653 | }, 654 | { 655 | "name": "reserved", 656 | "description": "Reserved - value not used", 657 | "value": 2 658 | }, 659 | { 660 | "name": "high", 661 | "description": "High confidence cloud shadow", 662 | "value": 3 663 | } 664 | ] 665 | }, 666 | { 667 | "name": "snow_confidence", 668 | "description": "Snow/Ice confidence levels", 669 | "offset": 12, 670 | "length": 2, 671 | "classes": [ 672 | { 673 | "name": "not_set", 674 | "description": "No confidence level set", 675 | "value": 0 676 | }, 677 | { 678 | "name": "low", 679 | "description": "Low confidence snow/ice", 680 | "value": 1 681 | }, 682 | { 683 | "name": "reserved", 684 | "description": "Reserved - value not used", 685 | "value": 2 686 | }, 687 | { 688 | "name": "high", 689 | "description": "High confidence snow/ice", 690 | "value": 3 691 | } 692 | ] 693 | }, 694 | { 695 | "name": "cirrus_confidence", 696 | "description": "Cirrus confidence levels", 697 | "offset": 14, 698 | "length": 2, 699 | "classes": [ 700 | { 701 | "name": "not_set", 702 | "description": "No confidence level set", 703 | "value": 0 704 | }, 705 | { 706 | "name": "low", 707 | "description": "Low confidence cirrus", 708 | "value": 1 709 | }, 710 | { 711 | "name": "reserved", 712 | "description": "Reserved - value not used", 713 | "value": 2 714 | }, 715 | { 716 | "name": "high", 717 | "description": "High confidence cirrus", 718 | "value": 3 719 | } 720 | ] 721 | } 722 | ], 723 | "alternate": { 724 | "s3": { 725 | "storage:platform": "AWS", 726 | "storage:requester_pays": true, 727 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_PIXEL.TIF" 728 | } 729 | }, 730 | "roles": [ 731 | "cloud", 732 | "cloud-shadow", 733 | "snow-ice", 734 | "water-mask" 735 | ] 736 | }, 737 | "qa_radsat": { 738 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_RADSAT.TIF", 739 | "type": "image/vnd.stac.geotiff; cloud-optimized=true", 740 | "title": "Radiometric Saturation Quality Assessment Band", 741 | "description": "Collection 2 Level-2 Radiometric Saturation Quality Assessment Band Surface Reflectance", 742 | "classification:bitfields": [ 743 | { 744 | "name": "band1", 745 | "description": "Band 1 radiometric saturation", 746 | "offset": 0, 747 | "length": 1, 748 | "classes": [ 749 | { 750 | "name": "not_saturated", 751 | "description": "Band 1 is not saturated", 752 | "value": 0 753 | }, 754 | { 755 | "name": "saturated", 756 | "description": "Band 1 is saturated", 757 | "value": 1 758 | } 759 | ] 760 | }, 761 | { 762 | "name": "band2", 763 | "description": "Band 2 radiometric saturation", 764 | "offset": 1, 765 | "length": 1, 766 | "classes": [ 767 | { 768 | "name": "not_saturated", 769 | "description": "Band 2 is not saturated", 770 | "value": 0 771 | }, 772 | { 773 | "name": "saturated", 774 | "description": "Band 2 is saturated", 775 | "value": 1 776 | } 777 | ] 778 | }, 779 | { 780 | "name": "band3", 781 | "description": "Band 3 radiometric saturation", 782 | "offset": 2, 783 | "length": 1, 784 | "classes": [ 785 | { 786 | "name": "not_saturated", 787 | "description": "Band 3 is not saturated", 788 | "value": 0 789 | }, 790 | { 791 | "name": "saturated", 792 | "description": "Band 3 is saturated", 793 | "value": 1 794 | } 795 | ] 796 | }, 797 | { 798 | "name": "band4", 799 | "description": "Band 4 radiometric saturation", 800 | "offset": 3, 801 | "length": 1, 802 | "classes": [ 803 | { 804 | "name": "not_saturated", 805 | "description": "Band 4 is not saturated", 806 | "value": 0 807 | }, 808 | { 809 | "name": "saturated", 810 | "description": "Band 4 is saturated", 811 | "value": 1 812 | } 813 | ] 814 | }, 815 | { 816 | "name": "band5", 817 | "description": "Band 5 radiometric saturation", 818 | "offset": 4, 819 | "length": 1, 820 | "classes": [ 821 | { 822 | "name": "not_saturated", 823 | "description": "Band 5 is not saturated", 824 | "value": 0 825 | }, 826 | { 827 | "name": "saturated", 828 | "description": "Band 5 is saturated", 829 | "value": 1 830 | } 831 | ] 832 | }, 833 | { 834 | "name": "band6", 835 | "description": "Band 6 radiometric saturation", 836 | "offset": 5, 837 | "length": 1, 838 | "classes": [ 839 | { 840 | "name": "not_saturated", 841 | "description": "Band 6 is not saturated", 842 | "value": 0 843 | }, 844 | { 845 | "name": "saturated", 846 | "description": "Band 6 is saturated", 847 | "value": 1 848 | } 849 | ] 850 | }, 851 | { 852 | "name": "band7", 853 | "description": "Band 7 radiometric saturation", 854 | "offset": 6, 855 | "length": 1, 856 | "classes": [ 857 | { 858 | "name": "not_saturated", 859 | "description": "Band 7 is not saturated", 860 | "value": 0 861 | }, 862 | { 863 | "name": "saturated", 864 | "description": "Band 7 is saturated", 865 | "value": 1 866 | } 867 | ] 868 | }, 869 | { 870 | "name": "unused", 871 | "description": "Unused bit", 872 | "offset": 7, 873 | "length": 1, 874 | "classes": [ 875 | { 876 | "name": "unused", 877 | "description": "Unused bit", 878 | "value": 0 879 | } 880 | ] 881 | }, 882 | { 883 | "name": "band9", 884 | "description": "Band 9 radiometric saturation", 885 | "offset": 8, 886 | "length": 1, 887 | "classes": [ 888 | { 889 | "name": "not_saturated", 890 | "description": "Band 9 is not saturated", 891 | "value": 0 892 | }, 893 | { 894 | "name": "saturated", 895 | "description": "Band 9 is saturated", 896 | "value": 1 897 | } 898 | ] 899 | }, 900 | { 901 | "name": "unused", 902 | "description": "Unused bit", 903 | "offset": 9, 904 | "length": 1, 905 | "classes": [ 906 | { 907 | "name": "unused", 908 | "description": "Unused bit", 909 | "value": 0 910 | } 911 | ] 912 | }, 913 | { 914 | "name": "unused", 915 | "description": "Unused bit", 916 | "offset": 10, 917 | "length": 1, 918 | "classes": [ 919 | { 920 | "name": "unused", 921 | "description": "Unused bit", 922 | "value": 0 923 | } 924 | ] 925 | }, 926 | { 927 | "name": "occlusion", 928 | "description": "Terrain not visible from sensor due to intervening terrain", 929 | "offset": 11, 930 | "length": 1, 931 | "classes": [ 932 | { 933 | "name": "not_occluded", 934 | "description": "Terrain is not occluded", 935 | "value": 0 936 | }, 937 | { 938 | "name": "occluded", 939 | "description": "Terrain is occluded", 940 | "value": 1 941 | } 942 | ] 943 | }, 944 | { 945 | "name": "unused", 946 | "description": "Unused bit", 947 | "offset": 12, 948 | "length": 1, 949 | "classes": [ 950 | { 951 | "name": "unused", 952 | "description": "Unused bit", 953 | "value": 0 954 | } 955 | ] 956 | }, 957 | { 958 | "name": "unused", 959 | "description": "Unused bit", 960 | "offset": 13, 961 | "length": 1, 962 | "classes": [ 963 | { 964 | "name": "unused", 965 | "description": "Unused bit", 966 | "value": 0 967 | } 968 | ] 969 | }, 970 | { 971 | "name": "unused", 972 | "description": "Unused bit", 973 | "offset": 14, 974 | "length": 1, 975 | "classes": [ 976 | { 977 | "name": "unused", 978 | "description": "Unused bit", 979 | "value": 0 980 | } 981 | ] 982 | }, 983 | { 984 | "name": "unused", 985 | "description": "Unused bit", 986 | "offset": 15, 987 | "length": 1, 988 | "classes": [ 989 | { 990 | "name": "unused", 991 | "description": "Unused bit", 992 | "value": 0 993 | } 994 | ] 995 | } 996 | ], 997 | "alternate": { 998 | "s3": { 999 | "storage:platform": "AWS", 1000 | "storage:requester_pays": true, 1001 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_RADSAT.TIF" 1002 | } 1003 | }, 1004 | "roles": [ 1005 | "saturation" 1006 | ] 1007 | }, 1008 | "ANG.txt": { 1009 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_ANG.txt", 1010 | "type": "text/plain", 1011 | "title": "Angle Coefficients File", 1012 | "description": "Collection 2 Level-2 Angle Coefficients File (ANG)", 1013 | "alternate": { 1014 | "s3": { 1015 | "storage:platform": "AWS", 1016 | "storage:requester_pays": true, 1017 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_ANG.txt" 1018 | } 1019 | }, 1020 | "roles": [ 1021 | "metadata" 1022 | ] 1023 | }, 1024 | "MTL.txt": { 1025 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.txt", 1026 | "type": "text/plain", 1027 | "title": "Product Metadata File", 1028 | "description": "Collection 2 Level-2 Product Metadata File (MTL)", 1029 | "alternate": { 1030 | "s3": { 1031 | "storage:platform": "AWS", 1032 | "storage:requester_pays": true, 1033 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.txt" 1034 | } 1035 | }, 1036 | "roles": [ 1037 | "metadata" 1038 | ] 1039 | }, 1040 | "MTL.xml": { 1041 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.xml", 1042 | "type": "application/xml", 1043 | "title": "Product Metadata File (xml)", 1044 | "description": "Collection 2 Level-2 Product Metadata File (xml)", 1045 | "alternate": { 1046 | "s3": { 1047 | "storage:platform": "AWS", 1048 | "storage:requester_pays": true, 1049 | "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.xml" 1050 | } 1051 | }, 1052 | "roles": [ 1053 | "metadata" 1054 | ] 1055 | } 1056 | }, 1057 | "bbox": [ 1058 | 136.76644043427444, 1059 | -68.07227732087095, 1060 | 142.5380790735029, 1061 | -65.80312306904463 1062 | ], 1063 | "stac_extensions": [ 1064 | "https://landsat.usgs.gov/stac/landsat-extension/v1.1.1/schema.json", 1065 | "https://stac-extensions.github.io/view/v1.0.0/schema.json", 1066 | "https://stac-extensions.github.io/projection/v1.1.0/schema.json", 1067 | "https://stac-extensions.github.io/eo/v1.1.0/schema.json", 1068 | "https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json", 1069 | "https://stac-extensions.github.io/storage/v1.0.0/schema.json", 1070 | "https://stac-extensions.github.io/classification/v1.1.0/schema.json" 1071 | ], 1072 | "collection": "landsat-c2l2-sr", 1073 | "description": "Landsat Collection 2 Level-2 Surface Reflectance Product" 1074 | } -------------------------------------------------------------------------------- /tests/data/naip_m_4207009_ne_19_060_20211024.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "stac_version": "1.0.0", 4 | "id": "m_4207009_ne_19_060_20211024.tif", 5 | "properties": { 6 | "proj:epsg": 26919, 7 | "proj:geometry": { 8 | "type": "Polygon", 9 | "coordinates": [ 10 | [ 11 | [ 12 | 341338.2, 13 | 4741453.800000001 14 | ], 15 | [ 16 | 347160.60000000003, 17 | 4741453.800000001 18 | ], 19 | [ 20 | 347160.60000000003, 21 | 4749053.4 22 | ], 23 | [ 24 | 341338.2, 25 | 4749053.4 26 | ], 27 | [ 28 | 341338.2, 29 | 4741453.800000001 30 | ] 31 | ] 32 | ] 33 | }, 34 | "proj:bbox": [ 35 | 341338.2, 36 | 4741453.800000001, 37 | 347160.60000000003, 38 | 4749053.4 39 | ], 40 | "proj:shape": [ 41 | 12666, 42 | 9704 43 | ], 44 | "proj:transform": [ 45 | 0.6, 46 | 0, 47 | 341338.2, 48 | 0, 49 | -0.6, 50 | 4749053.4, 51 | 0, 52 | 0, 53 | 1 54 | ], 55 | "proj:projjson": { 56 | "$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", 57 | "type": "ProjectedCRS", 58 | "name": "NAD83 / UTM zone 19N", 59 | "base_crs": { 60 | "name": "NAD83", 61 | "datum": { 62 | "type": "GeodeticReferenceFrame", 63 | "name": "North American Datum 1983", 64 | "ellipsoid": { 65 | "name": "GRS 1980", 66 | "semi_major_axis": 6378137, 67 | "inverse_flattening": 298.257222101 68 | } 69 | }, 70 | "coordinate_system": { 71 | "subtype": "ellipsoidal", 72 | "axis": [ 73 | { 74 | "name": "Geodetic latitude", 75 | "abbreviation": "Lat", 76 | "direction": "north", 77 | "unit": "degree" 78 | }, 79 | { 80 | "name": "Geodetic longitude", 81 | "abbreviation": "Lon", 82 | "direction": "east", 83 | "unit": "degree" 84 | } 85 | ] 86 | }, 87 | "id": { 88 | "authority": "EPSG", 89 | "code": 4269 90 | } 91 | }, 92 | "conversion": { 93 | "name": "UTM zone 19N", 94 | "method": { 95 | "name": "Transverse Mercator", 96 | "id": { 97 | "authority": "EPSG", 98 | "code": 9807 99 | } 100 | }, 101 | "parameters": [ 102 | { 103 | "name": "Latitude of natural origin", 104 | "value": 0, 105 | "unit": "degree", 106 | "id": { 107 | "authority": "EPSG", 108 | "code": 8801 109 | } 110 | }, 111 | { 112 | "name": "Longitude of natural origin", 113 | "value": -69, 114 | "unit": "degree", 115 | "id": { 116 | "authority": "EPSG", 117 | "code": 8802 118 | } 119 | }, 120 | { 121 | "name": "Scale factor at natural origin", 122 | "value": 0.9996, 123 | "unit": "unity", 124 | "id": { 125 | "authority": "EPSG", 126 | "code": 8805 127 | } 128 | }, 129 | { 130 | "name": "False easting", 131 | "value": 500000, 132 | "unit": "metre", 133 | "id": { 134 | "authority": "EPSG", 135 | "code": 8806 136 | } 137 | }, 138 | { 139 | "name": "False northing", 140 | "value": 0, 141 | "unit": "metre", 142 | "id": { 143 | "authority": "EPSG", 144 | "code": 8807 145 | } 146 | } 147 | ] 148 | }, 149 | "coordinate_system": { 150 | "subtype": "Cartesian", 151 | "axis": [ 152 | { 153 | "name": "Easting", 154 | "abbreviation": "", 155 | "direction": "east", 156 | "unit": "metre" 157 | }, 158 | { 159 | "name": "Northing", 160 | "abbreviation": "", 161 | "direction": "north", 162 | "unit": "metre" 163 | } 164 | ] 165 | }, 166 | "id": { 167 | "authority": "EPSG", 168 | "code": 26919 169 | } 170 | }, 171 | "proj:wkt2": "PROJCS[\"NAD83 / UTM zone 19N\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,298.257222101,AUTHORITY[\"EPSG\",\"7019\"]],AUTHORITY[\"EPSG\",\"6269\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4269\"]],PROJECTION[\"Transverse_Mercator\"],PARAMETER[\"latitude_of_origin\",0],PARAMETER[\"central_meridian\",-69],PARAMETER[\"scale_factor\",0.9996],PARAMETER[\"false_easting\",500000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AXIS[\"Easting\",EAST],AXIS[\"Northing\",NORTH],AUTHORITY[\"EPSG\",\"26919\"]]", 172 | "datetime": "2021-10-24T12:33:05.496897Z" 173 | }, 174 | "geometry": { 175 | "type": "Polygon", 176 | "coordinates": [ 177 | [ 178 | [ 179 | -70.9405470386063, 180 | 42.80920310538916 181 | ], 182 | [ 183 | -70.86937257210027, 184 | 42.81038748290737 185 | ], 186 | [ 187 | -70.8714366864438, 188 | 42.87878792763812 189 | ], 190 | [ 191 | -70.94268962889282, 192 | 42.877600665218694 193 | ], 194 | [ 195 | -70.9405470386063, 196 | 42.80920310538916 197 | ] 198 | ] 199 | ] 200 | }, 201 | "links": [], 202 | "assets": { 203 | "asset": { 204 | "href": "s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207009_ne_19_060_20211024.tif", 205 | "eo:bands": [ 206 | { 207 | "name": "b1", 208 | "description": "red" 209 | }, 210 | { 211 | "name": "b2", 212 | "description": "green" 213 | }, 214 | { 215 | "name": "b3", 216 | "description": "blue" 217 | }, 218 | { 219 | "name": "b4", 220 | "description": "undefined" 221 | } 222 | ], 223 | "roles": [] 224 | } 225 | }, 226 | "bbox": [ 227 | -70.94268962889282, 228 | 42.80920310538916, 229 | -70.86937257210027, 230 | 42.87878792763812 231 | ], 232 | "stac_extensions": [ 233 | "https://stac-extensions.github.io/projection/v1.1.0/schema.json", 234 | "https://stac-extensions.github.io/eo/v1.1.0/schema.json" 235 | ] 236 | } 237 | -------------------------------------------------------------------------------- /tests/data/sentinel-2-l2a-S2A_T20HNJ_20240311T140636_L2A.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "stac_version": "1.0.0", 4 | "id": "S2A_T20HNJ_20240311T140636_L2A", 5 | "properties": { 6 | "created": "2024-03-11T22:12:22.219Z", 7 | "platform": "sentinel-2a", 8 | "constellation": "sentinel-2", 9 | "instruments": [ 10 | "msi" 11 | ], 12 | "eo:cloud_cover": 18.431592, 13 | "proj:epsg": 32720, 14 | "proj:centroid": { 15 | "lat": -33.06046, 16 | "lon": -62.24989 17 | }, 18 | "mgrs:utm_zone": 20, 19 | "mgrs:latitude_band": "H", 20 | "mgrs:grid_square": "NJ", 21 | "grid:code": "MGRS-20HNJ", 22 | "view:azimuth": 103.15279043292124, 23 | "view:incidence_angle": 8.717368857696117, 24 | "view:sun_azimuth": 51.9541040600266, 25 | "view:sun_elevation": 47.9530815619878, 26 | "s2:tile_id": "S2A_OPER_MSI_L2A_TL_2APS_20240311T194050_A045538_T20HNJ_N05.10", 27 | "s2:degraded_msi_data_percentage": 0.0417, 28 | "s2:nodata_pixel_percentage": 28.38603, 29 | "s2:saturated_defective_pixel_percentage": 0, 30 | "s2:dark_features_percentage": 0.003961, 31 | "s2:cloud_shadow_percentage": 6.891545, 32 | "s2:vegetation_percentage": 57.82398, 33 | "s2:not_vegetated_percentage": 15.324496, 34 | "s2:water_percentage": 0.209678, 35 | "s2:unclassified_percentage": 1.314748, 36 | "s2:medium_proba_clouds_percentage": 8.782919, 37 | "s2:high_proba_clouds_percentage": 9.417892, 38 | "s2:thin_cirrus_percentage": 0.230781, 39 | "s2:snow_ice_percentage": 0, 40 | "s2:product_type": "S2MSI2A", 41 | "s2:processing_baseline": "05.10", 42 | "s2:product_uri": "S2A_MSIL2A_20240311T135701_N0510_R067_T20HNJ_20240311T194050.SAFE", 43 | "s2:generation_time": "2024-03-11T19:40:50.000000Z", 44 | "s2:datatake_id": "GS2A_20240311T135701_045538_N05.10", 45 | "s2:datatake_type": "INS-NOBS", 46 | "s2:datastrip_id": "S2A_OPER_MSI_L2A_DS_2APS_20240311T194050_S20240311T140636_N05.10", 47 | "s2:reflectance_conversion_factor": 1.01544682232552, 48 | "datetime": "2024-03-11T14:11:54.463000Z", 49 | "earthsearch:payload_id": "roda-sentinel-2-c1-l2a/workflow-sentinel-2-c1-l2a-to-stac/c7c43f015229283de230ed796369ff9e", 50 | "storage:platform": "AWS", 51 | "storage:region": "us-west-2", 52 | "storage:requester_pays": false, 53 | "processing:software": { 54 | "sentinel-2-c1-l2a-to-stac": "v2024.02.01" 55 | }, 56 | "updated": "2024-03-11T22:12:22.219Z" 57 | }, 58 | "geometry": { 59 | "type": "Polygon", 60 | "coordinates": [ 61 | [ 62 | [ 63 | -62.51540734988781, 64 | -32.53643059966684 65 | ], 66 | [ 67 | -62.817517027781356, 68 | -33.52764404692455 69 | ], 70 | [ 71 | -61.81785593725055, 72 | -33.52213683974327 73 | ], 74 | [ 75 | -61.831003262970874, 76 | -32.5319308776855 77 | ], 78 | [ 79 | -62.51540734988781, 80 | -32.53643059966684 81 | ] 82 | ] 83 | ] 84 | }, 85 | "links": [ 86 | { 87 | "rel": "self", 88 | "type": "application/geo+json", 89 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2A_T20HNJ_20240311T140636_L2A" 90 | }, 91 | { 92 | "rel": "canonical", 93 | "href": "s3://e84-earth-search-sentinel-data/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/S2A_T20HNJ_20240311T140636_L2A.json", 94 | "type": "application/json" 95 | }, 96 | { 97 | "rel": "via", 98 | "href": "s3://sentinel-s2-l2a/tiles/20/H/NJ/2024/3/11/0/metadata.xml", 99 | "type": "application/xml", 100 | "title": "Granule Metadata in Sinergize RODA Archive" 101 | }, 102 | { 103 | "rel": "parent", 104 | "type": "application/json", 105 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a" 106 | }, 107 | { 108 | "rel": "collection", 109 | "type": "application/json", 110 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a" 111 | }, 112 | { 113 | "rel": "root", 114 | "type": "application/json", 115 | "href": "https://earth-search.aws.element84.com/v1" 116 | }, 117 | { 118 | "rel": "thumbnail", 119 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2A_T20HNJ_20240311T140636_L2A/thumbnail" 120 | } 121 | ], 122 | "assets": { 123 | "red": { 124 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B04.tif", 125 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 126 | "title": "Red - 10m", 127 | "eo:bands": [ 128 | { 129 | "name": "B04", 130 | "common_name": "red", 131 | "center_wavelength": 0.665, 132 | "full_width_half_max": 0.038 133 | } 134 | ], 135 | "gsd": 10, 136 | "proj:shape": [ 137 | 10980, 138 | 10980 139 | ], 140 | "proj:transform": [ 141 | 10, 142 | 0, 143 | 499980, 144 | 0, 145 | -10, 146 | 6400000 147 | ], 148 | "raster:bands": [ 149 | { 150 | "nodata": 0, 151 | "data_type": "uint16", 152 | "spatial_resolution": 10, 153 | "scale": 0.0001, 154 | "offset": -0.1 155 | } 156 | ], 157 | "file:checksum": "1220c24a7922eebdc1124781a9ceb98b2a72e7b950512f6e14d16fe3467549232855", 158 | "file:size": 150228550, 159 | "roles": [ 160 | "data", 161 | "reflectance" 162 | ] 163 | }, 164 | "green": { 165 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B03.tif", 166 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 167 | "title": "Green - 10m", 168 | "eo:bands": [ 169 | { 170 | "name": "B03", 171 | "common_name": "green", 172 | "center_wavelength": 0.56, 173 | "full_width_half_max": 0.045 174 | } 175 | ], 176 | "gsd": 10, 177 | "proj:shape": [ 178 | 10980, 179 | 10980 180 | ], 181 | "proj:transform": [ 182 | 10, 183 | 0, 184 | 499980, 185 | 0, 186 | -10, 187 | 6400000 188 | ], 189 | "raster:bands": [ 190 | { 191 | "nodata": 0, 192 | "data_type": "uint16", 193 | "spatial_resolution": 10, 194 | "scale": 0.0001, 195 | "offset": -0.1 196 | } 197 | ], 198 | "file:checksum": "12207d2fce8ce354be7d0f67fee028c74febc3a8de80bfe7da835ff8219640730ed6", 199 | "file:size": 147422714, 200 | "roles": [ 201 | "data", 202 | "reflectance" 203 | ] 204 | }, 205 | "blue": { 206 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B02.tif", 207 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 208 | "title": "Blue - 10m", 209 | "eo:bands": [ 210 | { 211 | "name": "B02", 212 | "common_name": "blue", 213 | "center_wavelength": 0.49, 214 | "full_width_half_max": 0.098 215 | } 216 | ], 217 | "gsd": 10, 218 | "proj:shape": [ 219 | 10980, 220 | 10980 221 | ], 222 | "proj:transform": [ 223 | 10, 224 | 0, 225 | 499980, 226 | 0, 227 | -10, 228 | 6400000 229 | ], 230 | "raster:bands": [ 231 | { 232 | "nodata": 0, 233 | "data_type": "uint16", 234 | "spatial_resolution": 10, 235 | "scale": 0.0001, 236 | "offset": -0.1 237 | } 238 | ], 239 | "file:checksum": "1220938da762ddbce00c61f48d9ce7b4bd519b02e0696047e51ec81b49e54ccb87e4", 240 | "file:size": 145995722, 241 | "roles": [ 242 | "data", 243 | "reflectance" 244 | ] 245 | }, 246 | "visual": { 247 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/TCI.tif", 248 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 249 | "title": "True color image", 250 | "eo:bands": [ 251 | { 252 | "name": "B04", 253 | "common_name": "red", 254 | "center_wavelength": 0.665, 255 | "full_width_half_max": 0.038 256 | }, 257 | { 258 | "name": "B03", 259 | "common_name": "green", 260 | "center_wavelength": 0.56, 261 | "full_width_half_max": 0.045 262 | }, 263 | { 264 | "name": "B02", 265 | "common_name": "blue", 266 | "center_wavelength": 0.49, 267 | "full_width_half_max": 0.098 268 | } 269 | ], 270 | "gsd": 10, 271 | "proj:shape": [ 272 | 10980, 273 | 10980 274 | ], 275 | "proj:transform": [ 276 | 10, 277 | 0, 278 | 499980, 279 | 0, 280 | -10, 281 | 6400000 282 | ], 283 | "file:checksum": "1220187e3210dbc041d74529aed01a1bf838f6fe69404fca1fcd164056671892d250", 284 | "file:size": 180673941, 285 | "roles": [ 286 | "visual" 287 | ] 288 | }, 289 | "nir": { 290 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B08.tif", 291 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 292 | "title": "NIR 1 - 10m", 293 | "eo:bands": [ 294 | { 295 | "name": "B08", 296 | "common_name": "nir", 297 | "center_wavelength": 0.842, 298 | "full_width_half_max": 0.145 299 | } 300 | ], 301 | "gsd": 10, 302 | "proj:shape": [ 303 | 10980, 304 | 10980 305 | ], 306 | "proj:transform": [ 307 | 10, 308 | 0, 309 | 499980, 310 | 0, 311 | -10, 312 | 6400000 313 | ], 314 | "raster:bands": [ 315 | { 316 | "nodata": 0, 317 | "data_type": "uint16", 318 | "spatial_resolution": 10, 319 | "scale": 0.0001, 320 | "offset": -0.1 321 | } 322 | ], 323 | "file:checksum": "1220bf7d49117a93653c94357f01ec082f2db8528e720c4f5b631b40593c22a731f3", 324 | "file:size": 154662896, 325 | "roles": [ 326 | "data", 327 | "reflectance" 328 | ] 329 | }, 330 | "swir22": { 331 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B12.tif", 332 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 333 | "title": "SWIR 2.2μm - 20m", 334 | "eo:bands": [ 335 | { 336 | "name": "B12", 337 | "common_name": "swir22", 338 | "center_wavelength": 2.19, 339 | "full_width_half_max": 0.242 340 | } 341 | ], 342 | "gsd": 20, 343 | "proj:shape": [ 344 | 5490, 345 | 5490 346 | ], 347 | "proj:transform": [ 348 | 20, 349 | 0, 350 | 499980, 351 | 0, 352 | -20, 353 | 6400000 354 | ], 355 | "raster:bands": [ 356 | { 357 | "nodata": 0, 358 | "data_type": "uint16", 359 | "spatial_resolution": 20, 360 | "scale": 0.0001, 361 | "offset": -0.1 362 | } 363 | ], 364 | "file:checksum": "12203f4b9e02f3121690c3f8fd60b2a61baa1fd04f5d10ee81b972f0a9b700cfd978", 365 | "file:size": 40233757, 366 | "roles": [ 367 | "data", 368 | "reflectance" 369 | ] 370 | }, 371 | "rededge2": { 372 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B06.tif", 373 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 374 | "title": "Red Edge 2 - 20m", 375 | "eo:bands": [ 376 | { 377 | "name": "B06", 378 | "common_name": "rededge", 379 | "center_wavelength": 0.74, 380 | "full_width_half_max": 0.018 381 | } 382 | ], 383 | "gsd": 20, 384 | "proj:shape": [ 385 | 5490, 386 | 5490 387 | ], 388 | "proj:transform": [ 389 | 20, 390 | 0, 391 | 499980, 392 | 0, 393 | -20, 394 | 6400000 395 | ], 396 | "raster:bands": [ 397 | { 398 | "nodata": 0, 399 | "data_type": "uint16", 400 | "spatial_resolution": 20, 401 | "scale": 0.0001, 402 | "offset": -0.1 403 | } 404 | ], 405 | "file:checksum": "12206fbd7602036614b8cf39ce3fd3c0f3a6962588cb7c7ef3c13148903e597cbd8d", 406 | "file:size": 43744957, 407 | "roles": [ 408 | "data", 409 | "reflectance" 410 | ] 411 | }, 412 | "rededge3": { 413 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B07.tif", 414 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 415 | "title": "Red Edge 3 - 20m", 416 | "eo:bands": [ 417 | { 418 | "name": "B07", 419 | "common_name": "rededge", 420 | "center_wavelength": 0.783, 421 | "full_width_half_max": 0.028 422 | } 423 | ], 424 | "gsd": 20, 425 | "proj:shape": [ 426 | 5490, 427 | 5490 428 | ], 429 | "proj:transform": [ 430 | 20, 431 | 0, 432 | 499980, 433 | 0, 434 | -20, 435 | 6400000 436 | ], 437 | "raster:bands": [ 438 | { 439 | "nodata": 0, 440 | "data_type": "uint16", 441 | "spatial_resolution": 20, 442 | "scale": 0.0001, 443 | "offset": -0.1 444 | } 445 | ], 446 | "file:checksum": "1220977074bace95fca7a4c538a55847db63581f6e2f41452feb791e8a989643ada5", 447 | "file:size": 44360377, 448 | "roles": [ 449 | "data", 450 | "reflectance" 451 | ] 452 | }, 453 | "rededge1": { 454 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B05.tif", 455 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 456 | "title": "Red Edge 1 - 20m", 457 | "eo:bands": [ 458 | { 459 | "name": "B05", 460 | "common_name": "rededge", 461 | "center_wavelength": 0.704, 462 | "full_width_half_max": 0.019 463 | } 464 | ], 465 | "gsd": 20, 466 | "proj:shape": [ 467 | 5490, 468 | 5490 469 | ], 470 | "proj:transform": [ 471 | 20, 472 | 0, 473 | 499980, 474 | 0, 475 | -20, 476 | 6400000 477 | ], 478 | "raster:bands": [ 479 | { 480 | "nodata": 0, 481 | "data_type": "uint16", 482 | "spatial_resolution": 20, 483 | "scale": 0.0001, 484 | "offset": -0.1 485 | } 486 | ], 487 | "file:checksum": "12206051b3618d00910293f34ea4bc1b28215e29f655f10c06f3c3272ed1a82a2f89", 488 | "file:size": 41066885, 489 | "roles": [ 490 | "data", 491 | "reflectance" 492 | ] 493 | }, 494 | "swir16": { 495 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B11.tif", 496 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 497 | "title": "SWIR 1.6μm - 20m", 498 | "eo:bands": [ 499 | { 500 | "name": "B11", 501 | "common_name": "swir16", 502 | "center_wavelength": 1.61, 503 | "full_width_half_max": 0.143 504 | } 505 | ], 506 | "gsd": 20, 507 | "proj:shape": [ 508 | 5490, 509 | 5490 510 | ], 511 | "proj:transform": [ 512 | 20, 513 | 0, 514 | 499980, 515 | 0, 516 | -20, 517 | 6400000 518 | ], 519 | "raster:bands": [ 520 | { 521 | "nodata": 0, 522 | "data_type": "uint16", 523 | "spatial_resolution": 20, 524 | "scale": 0.0001, 525 | "offset": -0.1 526 | } 527 | ], 528 | "file:checksum": "12208f6ca0cdbf4dcefc57f603bcc776723405855039773cfdbfdbe036bdd0ceb1b4", 529 | "file:size": 40598234, 530 | "roles": [ 531 | "data", 532 | "reflectance" 533 | ] 534 | }, 535 | "wvp": { 536 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/WVP.tif", 537 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 538 | "title": "Water Vapour (WVP)", 539 | "gsd": 20, 540 | "proj:shape": [ 541 | 5490, 542 | 5490 543 | ], 544 | "proj:transform": [ 545 | 20, 546 | 0, 547 | 499980, 548 | 0, 549 | -20, 550 | 6400000 551 | ], 552 | "raster:bands": [ 553 | { 554 | "nodata": 0, 555 | "data_type": "uint16", 556 | "spatial_resolution": 20, 557 | "unit": "cm", 558 | "scale": 0.001, 559 | "offset": 0 560 | } 561 | ], 562 | "file:checksum": "122085b6cacf671829f5352b9a67bb60b83717c90594f3051310c0477b1571f03e8e", 563 | "file:size": 28636030, 564 | "roles": [ 565 | "data" 566 | ] 567 | }, 568 | "nir08": { 569 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B8A.tif", 570 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 571 | "title": "NIR 2 - 20m", 572 | "eo:bands": [ 573 | { 574 | "name": "B8A", 575 | "common_name": "nir08", 576 | "center_wavelength": 0.865, 577 | "full_width_half_max": 0.033 578 | } 579 | ], 580 | "gsd": 20, 581 | "proj:shape": [ 582 | 5490, 583 | 5490 584 | ], 585 | "proj:transform": [ 586 | 20, 587 | 0, 588 | 499980, 589 | 0, 590 | -20, 591 | 6400000 592 | ], 593 | "raster:bands": [ 594 | { 595 | "nodata": 0, 596 | "data_type": "uint16", 597 | "spatial_resolution": 20, 598 | "scale": 0.0001, 599 | "offset": -0.1 600 | } 601 | ], 602 | "file:checksum": "122037f243f3d93f1a9da2d1476fbbdc765e3e7152c86688e2b09c241430584f8721", 603 | "file:size": 44484301, 604 | "roles": [ 605 | "data", 606 | "reflectance" 607 | ] 608 | }, 609 | "scl": { 610 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/SCL.tif", 611 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 612 | "title": "Scene classification map (SCL)", 613 | "gsd": 20, 614 | "proj:shape": [ 615 | 5490, 616 | 5490 617 | ], 618 | "proj:transform": [ 619 | 20, 620 | 0, 621 | 499980, 622 | 0, 623 | -20, 624 | 6400000 625 | ], 626 | "raster:bands": [ 627 | { 628 | "nodata": 0, 629 | "data_type": "uint8", 630 | "spatial_resolution": 20 631 | } 632 | ], 633 | "file:checksum": "12205fe5d875f7f2d2ec95ec66aa7e1887da7c40d33ef86b0931c3717c7eddf6ae3a", 634 | "file:size": 1985202, 635 | "roles": [ 636 | "data" 637 | ] 638 | }, 639 | "aot": { 640 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/AOT.tif", 641 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 642 | "title": "Aerosol optical thickness (AOT)", 643 | "gsd": 20, 644 | "proj:shape": [ 645 | 5490, 646 | 5490 647 | ], 648 | "proj:transform": [ 649 | 20, 650 | 0, 651 | 499980, 652 | 0, 653 | -20, 654 | 6400000 655 | ], 656 | "raster:bands": [ 657 | { 658 | "nodata": 0, 659 | "data_type": "uint16", 660 | "spatial_resolution": 20, 661 | "scale": 0.001, 662 | "offset": 0 663 | } 664 | ], 665 | "file:checksum": "122023975c28613775d7ed4332faac2ba305cbfdf9d78458167fd935bdf483c14419", 666 | "file:size": 1054367, 667 | "roles": [ 668 | "data" 669 | ] 670 | }, 671 | "coastal": { 672 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B01.tif", 673 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 674 | "title": "Coastal - 60m", 675 | "eo:bands": [ 676 | { 677 | "name": "B01", 678 | "common_name": "coastal", 679 | "center_wavelength": 0.443, 680 | "full_width_half_max": 0.027 681 | } 682 | ], 683 | "gsd": 60, 684 | "proj:shape": [ 685 | 1830, 686 | 1830 687 | ], 688 | "proj:transform": [ 689 | 60, 690 | 0, 691 | 499980, 692 | 0, 693 | -60, 694 | 6400000 695 | ], 696 | "raster:bands": [ 697 | { 698 | "nodata": 0, 699 | "data_type": "uint16", 700 | "spatial_resolution": 60, 701 | "scale": 0.0001, 702 | "offset": -0.1 703 | } 704 | ], 705 | "file:checksum": "1220bdcf1eae16278be37f3caaef1cd561af11fab3e8a8c3f555171e481624889718", 706 | "file:size": 4384096, 707 | "roles": [ 708 | "data", 709 | "reflectance" 710 | ] 711 | }, 712 | "nir09": { 713 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B09.tif", 714 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 715 | "title": "NIR 3 - 60m", 716 | "eo:bands": [ 717 | { 718 | "name": "B09", 719 | "common_name": "nir09", 720 | "center_wavelength": 0.945, 721 | "full_width_half_max": 0.026 722 | } 723 | ], 724 | "gsd": 60, 725 | "proj:shape": [ 726 | 1830, 727 | 1830 728 | ], 729 | "proj:transform": [ 730 | 60, 731 | 0, 732 | 499980, 733 | 0, 734 | -60, 735 | 6400000 736 | ], 737 | "raster:bands": [ 738 | { 739 | "nodata": 0, 740 | "data_type": "uint16", 741 | "spatial_resolution": 60, 742 | "scale": 0.0001, 743 | "offset": -0.1 744 | } 745 | ], 746 | "file:checksum": "1220e50446aa3742da30bbd83ba3d7495430301323218f869f0ab5b195fbd20477ca", 747 | "file:size": 4939673, 748 | "roles": [ 749 | "data", 750 | "reflectance" 751 | ] 752 | }, 753 | "cloud": { 754 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/CLD_20m.tif", 755 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 756 | "title": "Cloud Probabilities", 757 | "gsd": 20, 758 | "proj:shape": [ 759 | 5490, 760 | 5490 761 | ], 762 | "proj:transform": [ 763 | 20, 764 | 0, 765 | 499980, 766 | 0, 767 | -20, 768 | 6400000 769 | ], 770 | "raster:bands": [ 771 | { 772 | "nodata": 0, 773 | "data_type": "uint8", 774 | "spatial_resolution": 20 775 | } 776 | ], 777 | "file:checksum": "1220dede1f04dc46924dfe3255aa767e13a878379ed4339c7981a6aa164e3f748af3", 778 | "file:size": 3352217, 779 | "roles": [ 780 | "data", 781 | "cloud" 782 | ] 783 | }, 784 | "snow": { 785 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/SNW_20m.tif", 786 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 787 | "title": "Snow Probabilities", 788 | "proj:shape": [ 789 | 5490, 790 | 5490 791 | ], 792 | "proj:transform": [ 793 | 20, 794 | 0, 795 | 499980, 796 | 0, 797 | -20, 798 | 6400000 799 | ], 800 | "raster:bands": [ 801 | { 802 | "nodata": 0, 803 | "data_type": "uint8", 804 | "spatial_resolution": 20 805 | } 806 | ], 807 | "file:checksum": "1220088767e0c3695ee7179512e96443e5a790febd074bd7b88d3bf530559af52fdb", 808 | "file:size": 145568, 809 | "roles": [ 810 | "data", 811 | "snow-ice" 812 | ] 813 | }, 814 | "preview": { 815 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/L2A_PVI.tif", 816 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 817 | "title": "True color preview", 818 | "eo:bands": [ 819 | { 820 | "name": "B04", 821 | "common_name": "red", 822 | "center_wavelength": 0.665, 823 | "full_width_half_max": 0.038 824 | }, 825 | { 826 | "name": "B03", 827 | "common_name": "green", 828 | "center_wavelength": 0.56, 829 | "full_width_half_max": 0.045 830 | }, 831 | { 832 | "name": "B02", 833 | "common_name": "blue", 834 | "center_wavelength": 0.49, 835 | "full_width_half_max": 0.098 836 | } 837 | ], 838 | "file:checksum": "1220da6648f65195be831249f3779e483251eb212e9c279d68557a9af1c40f654e63", 839 | "file:size": 189151, 840 | "roles": [ 841 | "overview" 842 | ] 843 | }, 844 | "granule_metadata": { 845 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/metadata.xml", 846 | "type": "application/xml", 847 | "file:checksum": "1220a6caf06015a9742a4066eb77bbe9ee583c90351ca35a121b7e96fb3f752d164e", 848 | "file:size": 374744, 849 | "roles": [ 850 | "metadata" 851 | ] 852 | }, 853 | "tileinfo_metadata": { 854 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/tileInfo.json", 855 | "type": "application/json", 856 | "file:checksum": "1220e8c6ae8fdae5bcad7a9821f0397e1bf017df72399397c59bb5d72e66d7c94f0f", 857 | "file:size": 1518, 858 | "roles": [ 859 | "metadata" 860 | ] 861 | }, 862 | "product_metadata": { 863 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/product_metadata.xml", 864 | "type": "application/xml", 865 | "file:checksum": "1220eba33cfb8c573dc004bc7168c1f6dc71991851725f5fbf40215b5c648d46800b", 866 | "file:size": 55234, 867 | "roles": [ 868 | "metadata" 869 | ] 870 | }, 871 | "thumbnail": { 872 | "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/L2A_PVI.jpg", 873 | "type": "image/jpeg", 874 | "title": "Thumbnail of preview image", 875 | "file:checksum": "1220accda8a2b685258c096cda5bf7f6903dae06f07e10560fe20e80d0099f535de2", 876 | "file:size": 31673, 877 | "roles": [ 878 | "thumbnail" 879 | ] 880 | } 881 | }, 882 | "bbox": [ 883 | -62.817517, 884 | -33.527644, 885 | -61.817856, 886 | -32.531931 887 | ], 888 | "stac_extensions": [ 889 | "https://stac-extensions.github.io/eo/v1.1.0/schema.json", 890 | "https://stac-extensions.github.io/file/v2.1.0/schema.json", 891 | "https://stac-extensions.github.io/grid/v1.1.0/schema.json", 892 | "https://stac-extensions.github.io/mgrs/v1.0.0/schema.json", 893 | "https://stac-extensions.github.io/processing/v1.1.0/schema.json", 894 | "https://stac-extensions.github.io/projection/v1.1.0/schema.json", 895 | "https://stac-extensions.github.io/raster/v1.1.0/schema.json", 896 | "https://stac-extensions.github.io/sentinel-2/v1.0.0/schema.json", 897 | "https://stac-extensions.github.io/storage/v1.0.0/schema.json", 898 | "https://stac-extensions.github.io/view/v1.0.0/schema.json" 899 | ], 900 | "collection": "sentinel-2-c1-l2a" 901 | } 902 | -------------------------------------------------------------------------------- /tests/data/stacchip_test_item.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "stac_version": "1.0.0", 4 | "id": "m_4207009_ne_19_060_20211024.tif", 5 | "properties": { 6 | "proj:epsg": 26919, 7 | "proj:geometry": { 8 | "type": "Polygon", 9 | "coordinates": [ 10 | [ 11 | [ 12 | 341000.0, 13 | 4741000.0 14 | ], 15 | [ 16 | 342000.0, 17 | 4741000.0 18 | ], 19 | [ 20 | 342000.0, 21 | 4741800.0 22 | ], 23 | [ 24 | 341000.0, 25 | 4741800.0 26 | ], 27 | [ 28 | 341000.0, 29 | 4741000.0 30 | ] 31 | ] 32 | ] 33 | }, 34 | "proj:bbox": [ 35 | 341000.0, 36 | 4741000.0, 37 | 342000.0, 38 | 4741800.0 39 | ], 40 | "proj:shape": [ 41 | 800, 42 | 1000 43 | ], 44 | "proj:transform": [ 45 | 1, 46 | 0, 47 | 342000.0, 48 | 0, 49 | -1, 50 | 4741800.0, 51 | 0, 52 | 0, 53 | 1 54 | ], 55 | "datetime": "2021-10-24T12:33:05.496897Z" 56 | }, 57 | "geometry": { 58 | "type": "Polygon", 59 | "coordinates": [ 60 | [ 61 | [ 62 | -70.9405470386063, 63 | 42.80920310538916 64 | ], 65 | [ 66 | -70.86937257210027, 67 | 42.81038748290737 68 | ], 69 | [ 70 | -70.8714366864438, 71 | 42.87878792763812 72 | ], 73 | [ 74 | -70.94268962889282, 75 | 42.877600665218694 76 | ], 77 | [ 78 | -70.9405470386063, 79 | 42.80920310538916 80 | ] 81 | ] 82 | ] 83 | }, 84 | "links": [], 85 | "assets": { 86 | "asset": { 87 | "href": "s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207009_ne_19_060_20211024.tif" 88 | } 89 | }, 90 | "bbox": [ 91 | -70.94268962889282, 92 | 42.80920310538916, 93 | -70.86937257210027, 94 | 42.87878792763812 95 | ], 96 | "stac_extensions": [ 97 | "https://stac-extensions.github.io/projection/v1.1.0/schema.json" 98 | ] 99 | } 100 | -------------------------------------------------------------------------------- /tests/test_chipper.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from tempfile import TemporaryDirectory 4 | 5 | import numpy as np 6 | import rasterio 7 | from numpy.testing import assert_array_equal 8 | from pystac import Item 9 | 10 | from stacchip.chipper import Chipper 11 | from stacchip.indexer import NoStatsChipIndexer 12 | 13 | 14 | def test_no_stats_indexer(): 15 | with TemporaryDirectory() as dirname: 16 | mountpath = Path(dirname) 17 | target_dir = mountpath / "naip/item1" 18 | target_dir.mkdir(parents=True, exist_ok=True) 19 | item = Item.from_file("tests/data/stacchip_test_item.json") 20 | shape = item.properties["proj:shape"] 21 | size = shape[0] * shape[1] 22 | trsf = item.properties["proj:transform"] 23 | bands = 2 24 | with rasterio.open( 25 | mountpath / "naip/item1/asset.tif", 26 | "w", 27 | width=shape[1], 28 | height=shape[0], 29 | count=bands, 30 | dtype="uint8", 31 | transform=[trsf[2], trsf[0], trsf[1], trsf[5], trsf[4], trsf[3]], 32 | ) as rst: 33 | raster_data = np.random.randint( 34 | 0, 255, bands * size, dtype="uint8" 35 | ).reshape((bands, *shape)) 36 | rst.write(raster_data) 37 | 38 | item.assets["asset"].href = "s3://example-bucket/naip/item1/asset.tif" 39 | with open(mountpath / "naip/item1/stac_item.json", "w") as dst: 40 | dst.write(json.dumps(item.to_dict())) 41 | indexer = NoStatsChipIndexer(item) 42 | index = indexer.create_index() 43 | chipper = Chipper(indexer, mountpath=mountpath) 44 | x = index.column("chip_index_x")[1].as_py() 45 | y = index.column("chip_index_y")[2].as_py() 46 | chip = chipper.chip(x, y) 47 | assert chip["asset"].shape[0] == raster_data.shape[0] 48 | assert_array_equal( 49 | chip["asset"][0], 50 | raster_data[ 51 | 0, 52 | (y * indexer.chip_size) : ((y + 1) * indexer.chip_size), 53 | (x * indexer.chip_size) : ((x + 1) * indexer.chip_size), 54 | ], 55 | ) 56 | # Test magic functions 57 | assert len(chipper) == indexer.size 58 | x_index, y_index, chipper_1 = chipper[1] 59 | assert x == x_index 60 | assert y == y_index 61 | assert_array_equal(chip["asset"][0], chipper_1["asset"][0]) 62 | counter = 0 63 | for _chip in chipper: 64 | counter += 1 65 | assert counter == len(chipper) 66 | -------------------------------------------------------------------------------- /tests/test_indexer.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import mock 4 | import numpy as np 5 | import pyarrow as pa 6 | import pytest 7 | from pystac import Item 8 | from rasterio import Affine 9 | from rasterio.io import MemoryFile 10 | from shapely import Point 11 | 12 | from stacchip.indexer import ( 13 | ChipIndexer, 14 | LandsatIndexer, 15 | NoStatsChipIndexer, 16 | Sentinel2Indexer, 17 | ) 18 | 19 | 20 | def get_ls_mock(nodata: bool = False) -> MemoryFile: 21 | meta = { 22 | "driver": "GTiff", 23 | "dtype": "uint16", 24 | "nodata": None, 25 | "width": 8331, 26 | "height": 8271, 27 | "count": 1, 28 | "crs": "EPSG:3031", 29 | "transform": Affine(30.0, 0.0, 1517085.0, 0.0, -30.0, -1811685.0), 30 | } 31 | data = np.zeros((1, 8331, 8271)) 32 | if nodata: 33 | data[0, :200, :200] = 1 34 | memfile = MemoryFile() 35 | with memfile.open(**meta) as dst: 36 | dst.write(data) 37 | return memfile.open() 38 | 39 | 40 | def rasterio_open_ls_mock(href: str) -> MemoryFile: 41 | return get_ls_mock() 42 | 43 | 44 | def rasterio_open_ls_nodata_mock(href: str) -> MemoryFile: 45 | return get_ls_mock(True) 46 | 47 | 48 | def rasterio_open_sentinel_mock(href: str) -> MemoryFile: 49 | meta = { 50 | "driver": "GTiff", 51 | "dtype": "uint8", 52 | "nodata": 0.0, 53 | "width": 5490, 54 | "height": 5490, 55 | "count": 1, 56 | "crs": "EPSG:32720", 57 | "transform": Affine(20.0, 0.0, 499980.0, 0.0, -20.0, 6400000.0), 58 | } 59 | data = 5 * np.ones((1, 5490, 5490), dtype="uint16") 60 | # Make first chip nodata 61 | data[0, :100, :100] = 0 62 | # Make second chip cloudy 63 | data[0, :128, 128:192] = 1 64 | memfile = MemoryFile() 65 | with memfile.open(**meta) as dst: 66 | dst.write(data) 67 | return memfile.open() 68 | 69 | 70 | def test_get_stats_error(): 71 | item = Item.from_file("tests/data/naip_m_4207009_ne_19_060_20211024.json") 72 | indexer = ChipIndexer(item) 73 | with pytest.raises(NotImplementedError): 74 | indexer.create_index() 75 | 76 | 77 | def test_no_stats_indexer(): 78 | item = Item.from_file("tests/data/naip_m_4207009_ne_19_060_20211024.json") 79 | indexer = NoStatsChipIndexer(item) 80 | assert indexer.shape == [12666, 9704] 81 | index = indexer.create_index() 82 | assert str(index.column("chipid")[0]) == "m_4207009_ne_19_060_20211024.tif-0-0" 83 | assert index.column("date")[0] == pa.scalar( 84 | datetime.date(2021, 10, 24), pa.date32() 85 | ) 86 | 87 | point = Point( 88 | indexer.transform[2], 89 | indexer.transform[5], 90 | ) 91 | 92 | target = indexer.reproject(point) 93 | assert min( 94 | [dat["x"] for dat in index.column("geometry")[0].as_py()[0]] 95 | ) == pytest.approx(target.bounds[0]) 96 | assert max( 97 | [dat["y"] for dat in index.column("geometry")[0].as_py()[0]] 98 | ) == pytest.approx(target.bounds[3]) 99 | 100 | 101 | @mock.patch("stacchip.indexer.rasterio.open", rasterio_open_sentinel_mock) 102 | def test_sentinel_2_indexer(): 103 | item = Item.from_file( 104 | "tests/data/sentinel-2-l2a-S2A_T20HNJ_20240311T140636_L2A.json" 105 | ) 106 | indexer = Sentinel2Indexer(item) 107 | assert indexer.shape == [10980, 10980] 108 | index = indexer.create_index() 109 | assert index.shape == (1763, 7) 110 | assert str(index.column("chipid")[0]) == "S2A_T20HNJ_20240311T140636_L2A-1-0" 111 | assert index.column("cloud_cover_percentage")[0].as_py() == 0.5 112 | 113 | 114 | @mock.patch("stacchip.indexer.rasterio.open", rasterio_open_ls_mock) 115 | def test_landsat_indexer(): 116 | item = Item.from_file( 117 | "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json" 118 | ) 119 | indexer = LandsatIndexer(item) 120 | assert indexer.shape == [8271, 8331] 121 | index = indexer.create_index() 122 | assert isinstance(index, pa.Table) 123 | assert ( 124 | str(index.column("chipid")[0]) 125 | == "LC09_L2SR_086107_20240311_20240312_02_T2_SR-0-0" 126 | ) 127 | assert index.shape == (1024, 7) 128 | assert indexer.x_size == int(8331 / 256) 129 | assert indexer.y_size == int(8271 / 256) 130 | 131 | 132 | @mock.patch("stacchip.indexer.rasterio.open", rasterio_open_ls_nodata_mock) 133 | def test_landsat_indexer_nodata(): 134 | item = Item.from_file( 135 | "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json" 136 | ) 137 | indexer = LandsatIndexer(item) 138 | index = indexer.create_index() 139 | assert index.shape == (1023, 7) 140 | assert ( 141 | str(index.column("chipid")[0]) 142 | == "LC09_L2SR_086107_20240311_20240312_02_T2_SR-1-0" 143 | ) 144 | 145 | indexer = LandsatIndexer(item, chip_max_nodata=0.95) 146 | index = indexer.create_index() 147 | assert index.shape == (1024, 7) 148 | 149 | 150 | def test_indexer_manual_shape(): 151 | item = Item.from_file( 152 | "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json" 153 | ) 154 | indexer = NoStatsChipIndexer(item, shape=[230, 420], chip_size=100) 155 | assert indexer.shape == [230, 420] 156 | assert indexer.y_size == 2 157 | assert indexer.x_size == 4 158 | --------------------------------------------------------------------------------