├── .github
    └── workflows
    │   ├── deploy_mkdocs.yml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── docs
    ├── mkdocs.yml
    └── src
    │   ├── chipper.md
    │   ├── index.md
    │   ├── indexer.md
    │   ├── naip-tutorial.md
    │   └── processors.md
├── pyproject.toml
├── stacchip
    ├── __init__.py
    ├── chipper.py
    ├── indexer.py
    ├── processors
    │   ├── landsat_processor.py
    │   ├── linz_processor.py
    │   ├── modis_processor.py
    │   ├── naip_processor.py
    │   ├── prechip.py
    │   ├── sentinel_1_processor.py
    │   ├── sentinel_2_processor.py
    │   └── stats.py
    └── utils.py
└── tests
    ├── data
        ├── landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json
        ├── naip_m_4207009_ne_19_060_20211024.json
        ├── sentinel-2-l2a-S2A_T20HNJ_20240311T140636_L2A.json
        └── stacchip_test_item.json
    ├── test_chipper.py
    └── test_indexer.py


/.github/workflows/deploy_mkdocs.yml:
--------------------------------------------------------------------------------
 1 | name: Publish docs via GitHub Pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build:
10 |     name: Deploy docs
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout main
14 |         uses: actions/checkout@v2
15 | 
16 |       - name: Set up Python 3.11
17 |         uses: actions/setup-python@v2
18 |         with:
19 |           python-version: 3.11.9
20 | 
21 |       - name: Install dependencies
22 |         run: |
23 |           python -m pip install --upgrade pip
24 |           python -m pip install .[docs]
25 | 
26 |       - name: update API docs
27 |         run: |
28 |           pdocs as_markdown \
29 |             --output_dir docs/src/api \
30 |             --exclude_source \
31 |             --overwrite \
32 |             stacchip.chipper \
33 |             stacchip.indexer
34 | 
35 |       - name: Deploy docs
36 |         run: mkdocs gh-deploy --force -f docs/mkdocs.yml
37 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 | 
 7 |   tests:
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       matrix:
11 |         python-version: ["3.11"]
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - name: Set up Python ${{ matrix.python-version }}
16 |         uses: actions/setup-python@v4
17 |         with:
18 |           python-version: ${{ matrix.python-version }}
19 |       - name: Install dependencies
20 |         run: |
21 |           python -m pip install --upgrade pip
22 |           python -m pip install .[dev]
23 |       - name: Lint with ruff
24 |         run: |
25 |           ruff check .
26 |       - name: Test with pytest
27 |         run: |
28 |           pytest
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .vscode
163 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/abravalheri/validate-pyproject
 3 |     rev: v0.12.1
 4 |     hooks:
 5 |       - id: validate-pyproject
 6 | 
 7 |   - repo: https://github.com/psf/black
 8 |     rev: 22.12.0
 9 |     hooks:
10 |       - id: black
11 |         language_version: python
12 | 
13 |   - repo: https://github.com/PyCQA/isort
14 |     rev: 5.12.0
15 |     hooks:
16 |       - id: isort
17 |         language_version: python
18 | 
19 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
20 |     rev: v0.0.238
21 |     hooks:
22 |       - id: ruff
23 |         args: ["--fix"]
24 | 
25 |   - repo: https://github.com/pre-commit/mirrors-mypy
26 |     rev: v1.3.0
27 |     hooks:
28 |       - id: mypy
29 |         language_version: python
30 |         exclude: tests/.*
31 |         additional_dependencies:
32 |         - types-python-dateutil
33 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Stacchip change log
 2 | 
 3 | ## 0.1.34
 4 | 
 5 | - Add option to manually specify indexer shape. Some STAC items
 6 |   may not have the property `proj:shape` specified.
 7 | 
 8 | ## 0.1.33
 9 | 
10 | - Breaking change: `get_chip_bbox` returns shapely polygon instead of wkt
11 | 
12 | ## 0.1.32
13 | 
14 | - Breacking change: chip iterator returns chip index values, not only image data.
15 | 
16 | ## 0.1.31
17 | 
18 | - Breaking change: simplify chipper class. Indexer has to be instantiated by the user.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # stacchip
 2 | 
 3 | Dynamically create image chips for earth observation machine learning
 4 | applications using a custom chip index based on STAC items.
 5 | 
 6 | Get a STAC item, index its contents, and create chips dynamically
 7 | like so
 8 | 
 9 | ```python
10 | # Get item from an existing STAC catalog
11 | item = stac.search(...)
12 | 
13 | # Index all chips that could be derived from the STAC item
14 | index = Indexer(item).create_index()
15 | 
16 | # Use the index to get RGB array for a specific chip
17 | chip = Chipper(index).chip(x=23, y=42)
18 | ```
19 | 
20 | ## Installation
21 | 
22 | Stacchip is available on pypi
23 | 
24 | ```bash
25 | pip install stacchip
26 | ```
27 | 
28 | ## Motivation
29 | 
30 | Remote sensing imagery is typically distributed in large files (scenes)
31 | that typically have the order of 10 thousand of pixels in both the x and y
32 | directions. This is true for systems like Landsat, Sentinel 1 and 2, and
33 | aerial imagery such as NAIP.
34 | 
35 | Machine learning models operate on much smaller image sizes. Many use
36 | 256x256 pixels, and the largest inputs are in the range of 1000 pixels.
37 | 
38 | This poses a challenge to modelers, as they have to cut the larger scenes
39 | into pieces before passing them to their models. The smaller image snippets
40 | are typically referred to as "chips". A term we will use throughout this
41 | documentation.
42 | 
43 | Creating imagery chips tends to be a tedious and slow process, and it is
44 | specific for each model. Models will have different requirements on image
45 | sizes, datatypes, and the spectral bands to include. A set of chips that
46 | works for one model might be useless for the next.
47 | 
48 | Systemizing how chips are tracked, and making the chip creation more dynamic
49 | is a way to work around these difficulties. This is the goal fo stacchip. It
50 | presents an approach that leverages cloud optimized technology to make chipping
51 | simpler, faster, and less static.
52 | 
53 | ## Overview
54 | 
55 | Stacchip relies on three cloud oriented technologies. Cloud Optimized Geotiffs
56 | (COG), Spatio Temporal Asset Catalogs (STAC), and GeoParquet. Instead of pre-creating millions of files of a fixed size, chips are indexed first in tables, and then created dynamically from the index files when needed. The imagery data itsel is kept in its original format and referenced in STAC items.
57 | 
58 | Creating chips with stacchip is composed of two steps:
59 | 
60 | 1. Create a stacchip index from a set of STAC
61 | 2. Dynamically create pixel arrays for any chip in the stacchip index
62 | 
63 | Indexes can be created separately for different imagery sources, and combined
64 | into larger indexes when needed. This makes mixing different imagery sources
65 | simple, and allows for flexibility during the modeling process, as imagery sources
66 | can be added and removed by only updating the combined index.
67 | 
68 | The mechanism is purposefully kept as generic as possible. The index creation
69 | is done based on a STAC item alone, no other input is needed. Obtaining image
70 | data for a chip that is registered in a stacchip index only requires a few
71 | lines of code.
72 | 
73 | For more information, please consult the [documentation](https://clay-foundation.github.io/stacchip/)
74 | 
75 | 
76 | ## Build and release
77 | 
78 | The following steps to release the latest version
79 | 
80 | ```bash
81 | tag=0.1.34
82 | hatch version $tag
83 | git commit -am "Bump version number"
84 | git push
85 | git tag $tag
86 | git push origin $tag
87 | rm -rf dist
88 | python -m build
89 | python3 -m twine upload --repository testpypi dist/*
90 | python3 -m twine upload --repository pypi dist/*
91 | ```
92 | 


--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Stacchip
 2 | site_description: Dynamically create image chips for earth observation machine learning applications using a custom chip index based on STAC items.
 3 | 
 4 | docs_dir: 'src'
 5 | site_dir: 'build'
 6 | 
 7 | repo_name: "clay-foundation/stacchip"
 8 | repo_url: "https://github.com/clay-foundation/stacchip"
 9 | edit_uri: "blob/main/docs/src/"
10 | site_url: "https://clay-foundation.github.io/stacchip/"
11 | 
12 | nav:
13 |   - Stacchip: "index.md"
14 |   - Indexer: "indexer.md"
15 |   - Chipper: "chipper.md"
16 |   - Processors: "processors.md"
17 |   - Tutorial: "naip-tutorial.md"
18 |   - API:
19 |     - "api/stacchip/chipper.md"
20 |     - "api/stacchip/indexer.md"
21 | 
22 | plugins:
23 |   - search
24 |   - mkdocs-jupyter:
25 |       include_source: True
26 | 
27 | theme:
28 |   name: material
29 |   palette:
30 |     primary: indigo
31 |     scheme: default
32 | 
33 | markdown_extensions:
34 |   - admonition
35 |   - attr_list
36 |   - codehilite:
37 |       guess_lang: false
38 |   - def_list
39 |   - footnotes
40 |   - pymdownx.arithmatex
41 |   - pymdownx.betterem
42 |   - pymdownx.caret:
43 |       insert: false
44 |   - pymdownx.details
45 |   - pymdownx.emoji
46 |   - pymdownx.escapeall:
47 |       hardbreak: true
48 |       nbsp: true
49 |   - pymdownx.magiclink:
50 |       hide_protocol: true
51 |       repo_url_shortener: true
52 |   - pymdownx.smartsymbols
53 |   - pymdownx.superfences
54 |   - pymdownx.tasklist:
55 |       custom_checkbox: true
56 |   - pymdownx.tilde
57 |   - toc:
58 |       permalink: true
59 | 


--------------------------------------------------------------------------------
/docs/src/chipper.md:
--------------------------------------------------------------------------------
 1 | The [Chipper](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/chipper.py) class can be used to create chips based on
 2 | an existing stacchip index.
 3 | 
 4 | The chipper class takes as input an Indexer class object. The indexer class can be instantiated using
 5 | the `load_indexer_s3` and `load_indexer_local` utils functions for indexes that have been
 6 | previously created using stacchip processors.
 7 | 
 8 | For local stacchip indexes, the mountpath can be passed. Asset links in the STAC items are then patched
 9 | with the local mountpath.
10 | 
11 | The chipper also has an `asset_blacklist` argument that allows skipping assets
12 | from the chip retrieval process. This can be used to exclude unnecessary assets
13 | and through that increase loading speed.
14 | 
15 | The following code snippet gives an example using a local path.
16 | 
17 | ```python
18 | import geoarrow.pyarrow.dataset as gads
19 | 
20 | from stacchip.chipper import Chipper
21 | from stacchip.utils import load_indexer_s3
22 | 
23 | # Load a stacchip index table
24 | dataset = gads.dataset("/path/to/parquet/index", format="parquet")
25 | table = dataset.to_table()
26 | 
27 | # Use util to load indexer using data from a 
28 | # remote S3 bucket.
29 | indexer = load_indexer_s3(
30 |     bucket="clay-v1-data",
31 |     platform=table.column("platform")[row],
32 |     item_id = table.column("item")[row],
33 | )
34 | 
35 | # Instantiate chipper
36 | chipper = Chipper(indexer)
37 | 
38 | # Get data for a single chip as registered
39 | # in row 42 of the index.
40 | row = 42
41 | chip_index_x = table.column("chip_index_x")[row].as_py()
42 | chip_index_y = table.column("chip_index_y")[row].as_py()
43 | data = chipper.chip(chip_index_x, chip_index_y)
44 | ```


--------------------------------------------------------------------------------
/docs/src/index.md:
--------------------------------------------------------------------------------
 1 | # stacchip
 2 | 
 3 | Dynamically create image chips for eath observation machine learning
 4 | applications using a custom chip index based on STAC items.
 5 | 
 6 | Get a STAC item, index its contents, and create chips dynamically
 7 | like so
 8 | 
 9 | ```python
10 | # Get item from an existing STAC catalog
11 | item = stac.search(...)
12 | 
13 | # Index all chips that could be derived from the STAC item
14 | index = Indexer(item).create_index()
15 | 
16 | # Use the index to get RGB array for a specific chip
17 | chip = Chipper(index).chip(x=23, y=42)
18 | ```
19 | 
20 | ## Installation
21 | 
22 | Stacchip is available on pypi
23 | 
24 | ```bash
25 | pip install stacchip
26 | ```
27 | 
28 | ## Overview
29 | 
30 | Stacchip relies on three cloud oriented technologies. Cloud Optimized Geotiffs
31 | (COG), Spatio Temporal Asset Catalogs (STAC), and GeoParquet. Instead of pre-creating millions of files of a fixed size, chips are indexed first in tables, and then created dynamically from the index files when needed. The imagery data itsel is kept in its original format and referenced in STAC items.
32 | 
33 | Creating chips with stacchip is composed of two steps:
34 | 
35 | 1. Create a stacchip index from a set of STAC
36 | 2. Dynamically create pixel arrays for any chip in the stacchip index
37 | 
38 | Indexes can be created separately for different imagery sources, and combined
39 | into larger indexes when needed. This makes mixing different imagery sources
40 | simple, and allows for flexibility during the modeling process, as imagery sources
41 | can be added and removed by only updating the combined index.
42 | 
43 | The mechanism is purposefully kept as generic as possible. The index creation
44 | is done based on a STAC item alone, no other input is needed. Obtaining image
45 | data for a chip that is registered in a stacchip index only requires a few
46 | lines of code.
47 | 
48 | ## Motivation
49 | 
50 | Remote sensing imagery is typically distributed in large files (scenes)
51 | that typically have the order of 10 thousand of pixels in both the x and y
52 | directions. This is true for systems like Landsat, Sentinel 1 and 2, and
53 | aerial imagery such as NAIP.
54 | 
55 | Machine learning models operate on much smaller image sizes. Many use
56 | 256x256 pixels, and the largest inputs are in the range of 1000 pixels.
57 | 
58 | This poses a challenge to modelers, as they have to cut the larger scenes
59 | into pieces before passing them to their models. The smaller image snippets
60 | are typically referred to as "chips". A term we will use throughout this
61 | documentation.
62 | 
63 | Creating imagery chips tends to be a tedious and slow process, and it is
64 | specific for each model. Models will have different requirements on image
65 | sizes, datatypes, and the spectral bands to include. A set of chips that
66 | works for one model might be useless for the next.
67 | 
68 | Systemizing how chips are tracked, and making the chip creation more dynamic
69 | is a way to work around these difficulties. This is the goal fo stacchip. It
70 | presents an approach that leverages cloud optimized technology to make chipping
71 | simpler, faster, and less static.
72 | 
73 | ## License
74 | 
75 | This repository is released under an Apache 2.0 license. For more details see
76 | [LICENSE](https://github.com/clay-foundation/stacchip/blob/main/LICENSE)
77 | 


--------------------------------------------------------------------------------
/docs/src/indexer.md:
--------------------------------------------------------------------------------
 1 | The [Indexer](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/indexer.py) class is build to create a chip index for
 2 | data registered in a a STAC item. The indexer will calculate the number of available
 3 | chips in a STAC item given a chip size. The resulting chip index is stored as a geoparquet table.
 4 | 
 5 | The following example creates an index the Landsat-9 STAC item from the tests
 6 | 
 7 | ```python
 8 | from pystac import Item
 9 | from stacchip.indexer import LandsatIndexer
10 | 
11 | item = Item.from_file(
12 |     "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json"
13 | )
14 | indexer = LandsatIndexer(item)
15 | index = indexer.create_index()
16 | ```
17 | 
18 | ## Nodata and cloud coverage
19 | 
20 | Earth observation data often comes in scenes that contain
21 | nodata pixels, and the imagery might contain clouds. Statistics on nodata and cloud cover is  relevant information for model training. Typically a model is trained with limited amounts nodata and cloud pixels.
22 | 
23 | The indexer therefore needs to be track these two variables so that the modeler can choose how much or how little nodata pixels and cloudy pixels should be passed to the model. However, how this information is stored varies for different image sources.
24 | 
25 | The indexer class might need adaption for new data sources. In these cases,
26 | the base class has to be subclassed and the `get_stats` method overridden to produce the right statistics.
27 | 
28 | The stacchip library has a generic indexer for sources that have neither nodata or cloudy pixels in them. It has one indexer that takes a nodata mask as input, but assumes that there are no cloudy pixels (useful for sentinel-1). It also contains specific indexers for Landsat and Sentinel-2. For more information consult the reference documentation.
29 | 
30 | ## Merging indexes
31 | 
32 | Stacchip indexes are geoparquet tables, and as such they can be merged quite
33 | easily in to a single table. The recommendation is to store each stacchip index
34 | for a single STAC item in a subfolder, then the files can be merged and the 
35 | STAC item can be tracked using the folder structure using partitioning feature
36 | from pyarrow.
37 | 
38 | The following example assumes that each index file from a single STAC item is
39 | in a subfolder that is named after the STAC item id.
40 | 
41 | ```python
42 | from pyarrow import dataset as ds
43 | 
44 | part = ds.partitioning(field_names=["item_id"])
45 | data = ds.dataset(
46 |     "/path/to/stacchip/indices",
47 |     format="parquet",
48 |     partitioning=part,
49 | )
50 | ds.write_dataset(
51 |     data,
52 |     "/path/to/combined-index",
53 |     format="parquet",
54 | )
55 | ```


--------------------------------------------------------------------------------
/docs/src/naip-tutorial.md:
--------------------------------------------------------------------------------
 1 | The following code example shows how to obtain RGB+NIR chips from
 2 | NAIP imagery and plot them.
 3 | 
 4 | ```python
 5 | import random
 6 | 
 7 | import pystac_client
 8 | from stacchip.indexer import NoStatsChipIndexer
 9 | from stacchip.chipper import Chipper
10 | import os
11 | import matplotlib.pyplot as plt
12 | 
13 | # Optimize GDAL settings for cloud optimized reading
14 | os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "EMPTY_DIR"
15 | os.environ["AWS_REQUEST_PAYER"] = "requester"
16 | 
17 | # Query STAC catalog for NAIP data
18 | catalog = pystac_client.Client.open("https://earth-search.aws.element84.com/v1")
19 | 
20 | 
21 | items = catalog.search(
22 |     collections=["naip"],
23 |     max_items=100,
24 | )
25 | 
26 | items = items.item_collection()
27 | 
28 | items_list = list(items)
29 | random.shuffle(items_list)
30 | 
31 | chips = []
32 | for item in items_list[:10]:
33 |     print(f"Working on {item}")
34 | 
35 |     # Index the chips in the item
36 |     indexer = NoStatsChipIndexer(item)
37 | 
38 |     # Instanciate the chipper
39 |     chipper = Chipper(indexer, assets=["image"])
40 | 
41 |     # Get first chip for the "image" asset key
42 |     for chip_id in random.sample(range(0, len(chipper)), 5):
43 |         x_index, y_index, chip = chipper[chip_id]
44 |         chips.append(chip["image"])
45 | 
46 | 
47 | fig, axs = plt.subplots(5, 10, gridspec_kw={'wspace': 0.01, 'hspace': 0.01}, squeeze=True)
48 | 
49 | for idx, ax in enumerate(axs.flatten()):
50 |     chip = chips[idx]
51 |     # Visualize the data
52 |     ax.imshow(chip[:3].swapaxes(0, 1).swapaxes(1, 2))
53 | 
54 | plt.tight_layout()
55 | plt.show()
56 | ```
57 | 
58 | Resutling in the following plot
59 | 
60 | ![naip-rgb](https://github.com/Clay-foundation/stacchip/assets/901647/86844530-9297-4971-b9e5-dd5c25b28b0e)
61 | 


--------------------------------------------------------------------------------
/docs/src/processors.md:
--------------------------------------------------------------------------------
  1 | To use stacchip for an existing imagery archive, the indexes need to be
  2 | created for each scene or STAC item.
  3 | 
  4 | Stacchip comes with [processors](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/) that
  5 | can be used to collect and index imagery from multiple data sources.
  6 | This will be extended as the package grows. 
  7 | 
  8 | Each processor is registered as a command line utility so that it can be
  9 | scaled easily. Note that these processors are created to work well with AWS Batch, but are not dependent on it and can  be used otherwise too.
 10 | 
 11 | ## Sentinel-2
 12 | 
 13 | The [`stacchip-sentinel-2`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/sentinel_2_processor.py)
 14 | processor CLi command processes Sentinel-2 data. It will process MGRS
 15 | tiles from a list of tiles from a layer that can be opened by geopandas.
 16 | 
 17 | Each MGRS tile will be processed by the row index in the source file.
 18 | 
 19 | For each tile it will process the least cloudy image in each quartal
 20 | from two random years between 2018 and 2023.
 21 | 
 22 | The script uses environment variables to determine all inputs:
 23 | 
 24 | 1. The index of the MGRS tile to be processes from the source file
 25 | 2. The source file for the MGRS tile sample
 26 | 3. A target bucket for writing the assets, stac items, and stacchip index.
 27 | 
 28 | An example set of environment variables to run this script is:
 29 | 
 30 | ```bash
 31 | export AWS_BATCH_JOB_ARRAY_INDEX=0
 32 | export STACCHIP_MGRS_SOURCE=https://clay-mgrs-samples.s3.amazonaws.com/mgrs_sample_v02.fgb
 33 | export STACCHIP_BUCKET=clay-v1-data
 34 | ```
 35 | 
 36 | ## Landsat
 37 | 
 38 | The [`stacchip-landsat`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/landsat_processor.py)
 39 | processor CLI command processes Landsat data. It will process a list
 40 | of geometries from a layer that can be opened by geopandas. For each
 41 | row, it will use the centroid of the geometry to search for landsat
 42 | scenes.
 43 | 
 44 | For each geometry it will process the least cloudy image in each quartal
 45 | from two random years between 2018 and 2023. For one year it will collect
 46 | L1 data, and for the other year L2 data. The platform is either Landsat-8
 47 | or Landsat-9, depending on availability and cloud cover.
 48 | 
 49 | The script uses environment variables to determine all inputs:
 50 | 
 51 | 1. The index of geometry to be processes from the source file
 52 | 2. The source file for the source sample file
 53 | 3. A target bucket for writing the assets, stac items, and stacchip index.
 54 | 
 55 | An example set of environment variables to run this script is:
 56 | 
 57 | ```bash
 58 | export AWS_BATCH_JOB_ARRAY_INDEX=0
 59 | export STACCHIP_SAMPLE_SOURCE=https://clay-mgrs-samples.s3.amazonaws.com/mgrs_sample_v02.fgb
 60 | export STACCHIP_BUCKET=clay-v1-data
 61 | ```
 62 | 
 63 | ## NAIP
 64 | 
 65 | The [`stacchip-naip`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/naip_processor.py) processor CLI
 66 | command processes imagery from the National Imagery Program (NAIP).
 67 | 
 68 | The sample locations were created using the [Natural Earth](https://www.naturalearthdata.com)
 69 | database as a source. The sample includes all popluated places, protected
 70 | areas and parks, airports, and ports. In addition, we sampled one random point 
 71 | along each river, and one random location within each lake that is registered
 72 | in Natural Earth. Finally, we sampled 4000 random points. All data was 
 73 | filtered to be within the CONUS region.
 74 | 
 75 | Similar to the other processors, the input variables are provided using env vars.
 76 | 
 77 | An example set of environment variables to run this script is:
 78 | 
 79 | ```bash
 80 | export AWS_BATCH_JOB_ARRAY_INDEX=0
 81 | export STACCHIP_SAMPLE_SOURCE=https://clay-mgrs-samples.s3.amazonaws.com/clay_v1_naip_sample_natural_earth.fgb
 82 | export STACCHIP_BUCKET=clay-v1-data
 83 | ```
 84 | 
 85 | ## LINZ
 86 | 
 87 | The [`stacchip-linz`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/linz_processor.py) processor CLI
 88 | processes data from the New Zealand high resolution open aerial imagery.
 89 | 
 90 | As a sample, we randomly select 50% the scenes, whith a minimum of 10
 91 | and a maximum of 2000 scenes for each catalog that was included.
 92 | We selected the latest imagery for each of the available regions
 93 | of new zealand. The list of catalogs is in the linz processor file.
 94 | 
 95 | We also resample all the imagery to 30cm so that the data
 96 | is consistent.
 97 | 
 98 | Similar to the other processors, the input variables are provided using env vars.
 99 | 
100 | An example set of environment variables to run this script is:
101 | 
102 | ```bash
103 | export AWS_BATCH_JOB_ARRAY_INDEX=0
104 | export STACCHIP_BUCKET=clay-v1-data
105 | ```
106 | 
107 | ## MODIS
108 | 
109 | The [`stacchip-modis`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/modis_processor.py) processor CLI
110 | processes data from the MODIS archive. The modis scenes are reprojected to
111 | the web mercator projection, and stored in S3. Then the indexer will create
112 | one index table per modis scene. We use 233 modis SIN grid tiles, with 4 random
113 | dates of 4 years of data for each SIN grid tile.
114 | 
115 | Similar to the other processors, the input variables are provided using env vars.
116 | 
117 | An example set of environment variables to run this script is:
118 | 
119 | ```bash
120 | export AWS_BATCH_JOB_ARRAY_INDEX=0
121 | export STACCHIP_BUCKET=clay-v1-data
122 | ```
123 | 
124 | ## Batch processing
125 | 
126 | The following base image can be used for batch processing. Installing the package
127 | will include the command line utilities for each processor.
128 | 
129 | ```dockerfile
130 | FROM python:3.11
131 | 
132 | RUN pip install stacchip
133 | ```
134 | 
135 | ## Prechip
136 | 
137 | In cases where chips need to be computed in advance, the
138 | [`stacchip-prechip`](https://github.com/Clay-foundation/stacchip/blob/main/stacchip/processors/naip_processor.py) cli script
139 | is a helper to create npz files from the chips.
140 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "stacchip"
 7 | authors = [
 8 |   { name="Daniel Wiesmann", email="danielwiesmann@developmentseed.org" },
 9 | ]
10 | description = "Dynamically create image chips from STAC items"
11 | readme = "README.md"
12 | requires-python = ">=3.10"
13 | dependencies = [
14 |     "pystac>=1.9.0",
15 |     "pystac-client>=0.7.5",
16 |     "pyarrow>=14.0.1",
17 |     "geoarrow-pyarrow>=0.1.2",
18 |     "rasterio>=1.3.9",
19 |     "numpy>=1.26.0,<2.0",
20 |     "boto3>=1.29.0",
21 |     "geopandas>=0.14.1",
22 |     "rio-stac>=0.9.0",
23 |     "planetary-computer>=1.0.0",
24 | ]
25 | dynamic = [
26 |  "version",
27 | ]
28 | 
29 | [project.urls]
30 | "Homepage" = "https://github.com/Clay-foundation/stacchip"
31 | "Bug Tracker" = "https://github.com/Clay-foundation/stacchip/issues"
32 | 
33 | [project.optional-dependencies]
34 | dev = [
35 |     "ruff",
36 |     "pytest",
37 |     "mock",
38 |     "build",
39 |     "types-python-dateutil",
40 | ]
41 | docs = [
42 |     "nbconvert",
43 |     "mkdocs",
44 |     "mkdocs-jupyter",
45 |     "mkdocs-material",
46 |     "pygments",
47 |     "pdocs",
48 | ]
49 | 
50 | 
51 | [project.scripts]
52 | stacchip-sentinel-1 = "stacchip.processors.sentinel_1_processor:process"
53 | stacchip-sentinel-2 = "stacchip.processors.sentinel_2_processor:process"
54 | stacchip-landsat = "stacchip.processors.landsat_processor:process"
55 | stacchip-naip = "stacchip.processors.naip_processor:process"
56 | stacchip-linz = "stacchip.processors.linz_processor:process"
57 | stacchip-modis = "stacchip.processors.modis_processor:process"
58 | stacchip-prechip = "stacchip.processors.prechip:process"
59 | stacchip-stats = "stacchip.processors.stats:process"
60 | 
61 | [tool.hatch.version]
62 | path = "stacchip/__init__.py"
63 | 
64 | [tool.isort]
65 | profile = "black"
66 | 
67 | [tool.ruff]
68 | select = [
69 |     "D1",  # pydocstyle errors
70 |     "E",  # pycodestyle errors
71 |     "W",  # pycodestyle warnings
72 |     "F",  # flake8
73 |     "C",  # flake8-comprehensions
74 |     "B",  # flake8-bugbear
75 | ]
76 | ignore = [
77 |     "E501",  # line too long, handled by black
78 |     "B008",  # do not perform function calls in argument defaults
79 |     "B905",  # ignore zip() without an explicit strict= parameter, only support with python >3.10
80 |     "D100",
81 |     "D103",
82 |     "C901",
83 | ]
84 | 
85 | [tool.mypy]
86 | no_implicit_optional = true
87 | strict_optional = true
88 | namespace_packages = true
89 | explicit_package_bases = true


--------------------------------------------------------------------------------
/stacchip/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | stacchip
3 | """
4 | 
5 | __version__ = "0.1.38"
6 | 


--------------------------------------------------------------------------------
/stacchip/chipper.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from pathlib import Path
  3 | from typing import List, Optional
  4 | from urllib.parse import urlparse
  5 | 
  6 | import rasterio
  7 | from numpy.typing import ArrayLike
  8 | from rasterio.enums import Resampling
  9 | from rasterio.windows import Window
 10 | 
 11 | from stacchip.indexer import ChipIndexer
 12 | 
 13 | 
 14 | class Chipper:
 15 |     """
 16 |     Chipper class for managing and processing raster data chips.
 17 |     """
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         indexer: ChipIndexer,
 22 |         mountpath: Optional[str] = None,
 23 |         assets: Optional[List[str]] = None,
 24 |         asset_blacklist: Optional[List[str]] = None,
 25 |     ) -> None:
 26 |         """
 27 |         Initializes the Chipper class.
 28 | 
 29 |         Args:
 30 |             indexer (Type[ChipIndexer]): Input data which has to be of type ChipIndexer.
 31 |             mountpath (Optional[str]): Path to the mount directory for raster indexer.
 32 |                 Defaults to None.
 33 |             assets (Optional[List[str]]): List of asset names to include for processing.
 34 |                 If not provided, all assets are processed. Defaults to None.
 35 |             asset_blacklist (Optional[List[str]]): List of asset names to exclude from
 36 |                 processing. Defaults to None.
 37 | 
 38 |         """
 39 |         self.mountpath = None if mountpath is None else Path(mountpath)
 40 |         self.assets = assets
 41 |         self.asset_blacklist = asset_blacklist
 42 |         self.indexer = indexer
 43 | 
 44 |     def __len__(self) -> int:
 45 |         """
 46 |         Returns the number of chips available.
 47 | 
 48 |         Returns:
 49 |             int: Number of chips available based on the indexer size.
 50 |         """
 51 |         return self.indexer.size
 52 | 
 53 |     def __getitem__(self, index: int) -> tuple:
 54 |         """
 55 |         Gets the chip by a single index.
 56 | 
 57 |         Args:
 58 |             index (int): Index of the chip to retrieve.
 59 | 
 60 |         Returns:
 61 |             tuple: A tuple containing x index, y index, and the chip data.
 62 |         """
 63 |         y_index = index // self.indexer.x_size
 64 |         x_index = index % self.indexer.x_size
 65 |         return x_index, y_index, self.chip(x_index, y_index)
 66 | 
 67 |     def __iter__(self):
 68 |         """
 69 |         Iterates over chips.
 70 | 
 71 |         Yields:
 72 |             tuple: The next chip data in the sequence.
 73 |         """
 74 |         counter = 0
 75 |         while counter < self.indexer.size:
 76 |             yield self[counter]
 77 |             counter += 1
 78 | 
 79 |     def get_pixels_for_asset(self, key: str, x: int, y: int) -> ArrayLike:
 80 |         """
 81 |         Extracts chip pixel values for one asset.
 82 | 
 83 |         Args:
 84 |             key (str): The asset key to extract pixels from.
 85 |             x (int): The x index of the chip.
 86 |             y (int): The y index of the chip.
 87 | 
 88 |         Returns:
 89 |             ArrayLike: Array of pixel values for the specified asset.
 90 | 
 91 |         Raises:
 92 |             ValueError: If asset dimensions are not multiples of the highest resolution dimensions.
 93 |         """
 94 |         asset = self.indexer.item.assets[key]
 95 | 
 96 |         srcpath = asset.href
 97 |         if self.mountpath:
 98 |             url = urlparse(srcpath, allow_fragments=False)
 99 |             srcpath = self.mountpath / Path(url.path.lstrip("/"))
100 | 
101 |         with rasterio.open(srcpath) as src:
102 |             # Currently assume that different assets may be at different
103 |             # resolutions, but are aligned and the gsd differs by an integer
104 |             # multiplier.
105 |             if self.indexer.shape[0] % src.height:
106 |                 raise ValueError(
107 |                     f"Asset height {src.height} is not a multiple of highest resolution height {self.indexer.shape[0]}"  # noqa: E501
108 |                 )
109 | 
110 |             if self.indexer.shape[1] % src.width:
111 |                 raise ValueError(
112 |                     f"Asset width {src.width} is not a multiple of highest resolution width {self.indexer.shape[1]}"  # noqa: E501
113 |                 )
114 | 
115 |             factor = self.indexer.shape[0] / src.height
116 | 
117 |             chip_window = Window(
118 |                 math.floor(x * self.indexer.chip_size / factor),
119 |                 math.floor(y * self.indexer.chip_size / factor),
120 |                 math.ceil(self.indexer.chip_size / factor),
121 |                 math.ceil(self.indexer.chip_size / factor),
122 |             )
123 | 
124 |             return src.read(
125 |                 window=chip_window,
126 |                 out_shape=(src.count, self.indexer.chip_size, self.indexer.chip_size),
127 |                 resampling=Resampling.nearest,
128 |             )
129 | 
130 |     def chip(self, x: int, y: int) -> dict:
131 |         """
132 |         Retrieves chip pixel array for the specified x and y index numbers.
133 | 
134 |         Args:
135 |             x (int): The x index of the chip.
136 |             y (int): The y index of the chip.
137 | 
138 |         Returns:
139 |             dict: A dictionary where keys are asset names and values are arrays of pixel values.
140 |         """
141 |         if self.assets is not None:
142 |             keys = self.assets
143 |         else:
144 |             keys = list(self.indexer.item.assets.keys())
145 | 
146 |         if self.asset_blacklist is not None:
147 |             keys = [key for key in keys if key not in self.asset_blacklist]
148 | 
149 |         return {key: self.get_pixels_for_asset(key, x, y) for key in keys}
150 | 


--------------------------------------------------------------------------------
/stacchip/indexer.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from functools import cached_property
  3 | from math import floor
  4 | from typing import Tuple
  5 | 
  6 | import geoarrow.pyarrow as ga
  7 | import numpy as np
  8 | import pyarrow as pa
  9 | import pyarrow.compute as pc
 10 | import pyproj
 11 | import rasterio
 12 | from numpy.typing import ArrayLike
 13 | from pystac import Item
 14 | from rasterio.crs import CRS
 15 | from rasterio.enums import Resampling
 16 | from shapely import GeometryType, Polygon
 17 | from shapely.geometry import box
 18 | from shapely.ops import transform
 19 | 
 20 | warnings.filterwarnings(
 21 |     "ignore",
 22 |     message=(
 23 |         "The argument 'infer_datetime_format' is deprecated and will"
 24 |         " be removed in a future version. A strict version of it is now "
 25 |         "the default, see https://pandas.pydata.org/pdeps/0004-consistent"
 26 |         "-to-datetime-parsing.html. You can safely remove this argument."
 27 |     ),
 28 | )
 29 | 
 30 | 
 31 | class ChipIndexer:
 32 |     """
 33 |     Indexer base class
 34 |     """
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         item: Item,
 39 |         chip_size: int = 256,
 40 |         chip_max_nodata: float = 0.5,
 41 |         shape=None,
 42 |     ) -> None:
 43 |         """
 44 |         Init ChipIndexer
 45 |         """
 46 |         self.item = item
 47 |         self.chip_size = chip_size
 48 |         self.chip_max_nodata = chip_max_nodata
 49 |         self._shape = shape
 50 | 
 51 |         assert self.item.ext.has("proj")
 52 | 
 53 |         self.assert_units_metre()
 54 |         self.setup_projector()
 55 | 
 56 |     def assert_units_metre(self) -> None:
 57 |         """
 58 |         Ensure input data has meters as units
 59 |         """
 60 |         assert self.crs.linear_units.lower() in ["metre", "meter"]
 61 | 
 62 |     @property
 63 |     def crs(self) -> CRS:
 64 |         """
 65 |         Get coordinate reference system for the assets in this index
 66 |         """
 67 |         if self.item.properties.get("proj:epsg", None):
 68 |             return CRS.from_epsg(self.item.properties["proj:epsg"])
 69 |         elif "proj:wkt2" in self.item.properties:
 70 |             return CRS.from_string(self.item.properties["proj:wkt2"])
 71 |         else:
 72 |             raise ValueError("Could not identify CRS of source files")
 73 | 
 74 |     def setup_projector(self):
 75 |         """
 76 |         Prepare projection function to project geometries into WGS84
 77 |         """
 78 |         wgs84 = pyproj.CRS("EPSG:4326")
 79 |         self._projector = pyproj.Transformer.from_crs(
 80 |             self.crs, wgs84, always_xy=True
 81 |         ).transform
 82 | 
 83 |     def reproject(self, geom) -> GeometryType:
 84 |         """
 85 |         Reproject a geometry into WGS84
 86 |         """
 87 |         return transform(self._projector, geom)
 88 | 
 89 |     def _get_trsf_or_shape(self, key: str) -> list:
 90 |         """
 91 |         The shape of the hightest resolution band
 92 |         """
 93 |         data = []
 94 |         if key in self.item.properties:
 95 |             data = self.item.properties[key]
 96 |         else:
 97 |             for asset in self.item.assets.values():
 98 |                 if key not in asset.extra_fields:
 99 |                     continue
100 |                 if not data or data[0] < asset.extra_fields[key][0]:
101 |                     data = asset.extra_fields[key]
102 |         if not data:
103 |             raise ValueError("Could not determine {key} for this STAC item")
104 | 
105 |         return data
106 | 
107 |     @cached_property
108 |     def shape(self) -> list:
109 |         """
110 |         Shape of the STAC item data
111 | 
112 |         Obtains the shape of the highest resolution band from
113 |         all the available bands.
114 |         """
115 |         if self._shape is not None:
116 |             return self._shape
117 |         else:
118 |             return self._get_trsf_or_shape("proj:shape")
119 | 
120 |     @cached_property
121 |     def transform(self) -> list:
122 |         """
123 |         The transform property from the STAC item
124 |         """
125 |         return self._get_trsf_or_shape("proj:transform")
126 | 
127 |     @property
128 |     def x_size(self) -> int:
129 |         """
130 |         Number of tiles vailable in x direction
131 |         """
132 |         return floor(self.shape[1] / self.chip_size)
133 | 
134 |     @property
135 |     def y_size(self) -> int:
136 |         """
137 |         Number of tiles vailable in y direction
138 |         """
139 |         return floor(self.shape[0] / self.chip_size)
140 | 
141 |     @property
142 |     def size(self) -> int:
143 |         """
144 |         Number of tiles in this STAC item
145 |         """
146 |         return self.x_size * self.y_size
147 | 
148 |     @property
149 |     def bbox(self) -> Tuple[float, float, float, float]:
150 |         """
151 |         Bounding box that covers all tiles
152 | 
153 |         This is different from the bounding box of the STAC item
154 |         if the tiles don't fit into the number of pixels perfectly.
155 |         """
156 |         return (
157 |             self.transform[2],
158 |             self.transform[5] + self.transform[4] * self.shape[0],
159 |             self.transform[2] + self.transform[0] * self.shape[1],
160 |             self.transform[5],
161 |         )
162 | 
163 |     def get_stats(self, x: int, y: int) -> Tuple[float, float]:
164 |         """
165 |         A function to write for each indexer that returns nodata and
166 |         cloud statistics for a chip
167 |         """
168 |         raise NotImplementedError()
169 | 
170 |     def get_chip_bbox(self, x: int, y: int) -> Polygon:
171 |         """
172 |         Bounding box for a chip
173 |         """
174 |         chip_box = box(
175 |             self.bbox[0] + x * self.transform[0] * self.chip_size,
176 |             self.bbox[3] + y * self.transform[4] * self.chip_size,
177 |             self.bbox[0] + (x + 1) * self.transform[0] * self.chip_size,
178 |             self.bbox[3] + (y + 1) * self.transform[4] * self.chip_size,
179 |         )
180 | 
181 |         return self.reproject(chip_box)
182 | 
183 |     def create_index(self) -> pa.Table:
184 |         """
185 |         The index for this STAC item
186 |         """
187 |         index = {
188 |             "chipid": np.empty(self.size, dtype="<U256"),
189 |             "date": np.empty(self.size, dtype="datetime64[D]"),
190 |             "chip_index_x": np.empty(self.size, dtype="uint16"),
191 |             "chip_index_y": np.empty(self.size, dtype="uint16"),
192 |             "cloud_cover_percentage": np.empty(self.size, dtype="float32"),
193 |             "nodata_percentage": np.empty(self.size, dtype="float32"),
194 |             "geometry": np.empty(self.size, dtype="object"),
195 |         }
196 |         counter = 0
197 |         for y in range(0, self.y_size):
198 |             for x in range(0, self.x_size):
199 | 
200 |                 cloud_cover_percentage, nodata_percentage = self.get_stats(x, y)
201 | 
202 |                 index["chipid"][counter] = f"{self.item.id}-{x}-{y}"
203 |                 index["date"][counter] = self.item.datetime.date()
204 |                 index["chip_index_x"][counter] = x
205 |                 index["chip_index_y"][counter] = y
206 |                 index["cloud_cover_percentage"][counter] = cloud_cover_percentage
207 |                 index["nodata_percentage"][counter] = nodata_percentage
208 |                 index["geometry"][counter] = self.get_chip_bbox(x, y).wkt
209 | 
210 |                 counter += 1
211 | 
212 |         index["geometry"] = ga.as_geoarrow(index["geometry"])
213 | 
214 |         table = pa.table(index)
215 |         chips_count = table.shape[0]
216 |         table = table.filter(pc.field("nodata_percentage") <= self.chip_max_nodata)
217 |         print(
218 |             f"Dropped {chips_count - table.shape[0]}/{chips_count} chips due to nodata above {self.chip_max_nodata}"
219 |         )
220 |         return table
221 | 
222 | 
223 | class NoStatsChipIndexer(ChipIndexer):
224 |     """
225 |     Indexer that assumes that none of the chips have any clouds or nodata
226 |     """
227 | 
228 |     def get_stats(self, x: int, y: int) -> Tuple[float, float]:
229 |         """
230 |         Cloud and nodata percentage for a chip
231 |         """
232 |         return 0.0, 0.0
233 | 
234 | 
235 | class NoDataMaskChipIndexer(ChipIndexer):
236 |     """
237 |     Chip indexer that takes the nodata mask as input and assumes that
238 |     there are no clouds in the image
239 |     """
240 | 
241 |     def __init__(
242 |         self,
243 |         item: Item,
244 |         nodata_mask: ArrayLike,
245 |         chip_size: int = 256,
246 |         chip_max_nodata: float = 0.5,
247 |     ) -> None:
248 |         """
249 |         Init NoDataMaskChipIndexer
250 |         """
251 |         super().__init__(item, chip_size, chip_max_nodata)
252 |         self.nodata_mask = nodata_mask
253 | 
254 |     def get_stats(self, x: int, y: int) -> Tuple[float, float]:
255 |         """
256 |         Cloud and nodata percentage for a chip
257 | 
258 |         Assumes there are no cloudy pixels and computes nodata from mask
259 |         """
260 |         nodata_percentage = np.sum(
261 |             self.nodata_mask[
262 |                 y * self.chip_size : (y + 1) * self.chip_size,
263 |                 x * self.chip_size : (x + 1) * self.chip_size,
264 |             ]
265 |         ) / (self.chip_size**2)
266 | 
267 |         return 0.0, nodata_percentage
268 | 
269 | 
270 | class LandsatIndexer(ChipIndexer):
271 |     """
272 |     Chip indexer for Landsat 8 and 9 STAC items
273 |     """
274 | 
275 |     @cached_property
276 |     def qa(self):
277 |         """
278 |         The quality band data for the STAC item
279 |         """
280 |         print("Loading qa band")
281 |         self.item.assets["qa_pixel"].href = self.item.assets["qa_pixel"].extra_fields[
282 |             "alternate"
283 |         ]["s3"]["href"]
284 |         with rasterio.open(self.item.assets["qa_pixel"].href) as src:
285 |             return src.read(1)
286 | 
287 |     def get_stats(self, x: int, y: int) -> Tuple[float, float]:
288 |         """
289 |         Cloud and nodata percentage for a chip
290 | 
291 |         Uses the qa band to compute these values.
292 |         """
293 |         qa = self.qa[
294 |             y * self.chip_size : (y + 1) * self.chip_size,
295 |             x * self.chip_size : (x + 1) * self.chip_size,
296 |         ]
297 | 
298 |         # Bit 1 is dilated cloud, 3 is cloud, 4 is cloud shadow.
299 |         nodata_byte = np.array(1 << 0, dtype=qa.dtype)
300 |         dilated_cloud_byte = np.array(1 << 1, dtype=qa.dtype)
301 |         cloud_byte = np.array(1 << 3, dtype=qa.dtype)
302 |         shadow_byte = np.array(1 << 4, dtype=qa.dtype)
303 | 
304 |         nodata_mask = np.bitwise_and(qa, nodata_byte)
305 |         dilated_cloud = np.bitwise_and(qa, dilated_cloud_byte)
306 |         cloud = np.bitwise_and(qa, cloud_byte)
307 |         shadow = np.bitwise_and(qa, shadow_byte)
308 | 
309 |         layer_clouds = (dilated_cloud | cloud | shadow).astype(dtype="bool")
310 | 
311 |         cloud_percentage = np.sum(layer_clouds) / qa.size
312 |         nodata_percentage = np.sum(nodata_mask) / qa.size
313 | 
314 |         return cloud_percentage, nodata_percentage
315 | 
316 | 
317 | class Sentinel2Indexer(ChipIndexer):
318 |     """
319 |     Indexer for Sentinel-2 STAC items
320 |     """
321 | 
322 |     scl_filter = [1, 3, 8, 9, 10]
323 |     nodata_value = 0
324 | 
325 |     @cached_property
326 |     def scl(self):
327 |         """
328 |         The Scene Classification (SCL) band data for the STAC item
329 |         """
330 |         print("Loading scl band")
331 |         with rasterio.open(self.item.assets["scl"].href) as src:
332 |             return src.read(out_shape=(1, *self.shape), resampling=Resampling.nearest)[
333 |                 0
334 |             ]
335 | 
336 |     def get_stats(self, x: int, y: int) -> Tuple[float, float]:
337 |         """
338 |         Cloud and nodata percentage for a chip
339 | 
340 |         Uses the SCL band to compute these values.
341 |         """
342 |         scl = self.scl[
343 |             y * self.chip_size : (y + 1) * self.chip_size,
344 |             x * self.chip_size : (x + 1) * self.chip_size,
345 |         ]
346 | 
347 |         cloud_percentage = int(np.isin(scl, self.scl_filter).sum()) / scl.size
348 | 
349 |         nodata_percentage = np.sum(scl == self.nodata_value) / scl.size
350 | 
351 |         return cloud_percentage, nodata_percentage
352 | 
353 | 
354 | class ModisIndexer(ChipIndexer):
355 |     """
356 |     Indexer for MODIS STAC items
357 |     """
358 | 
359 |     @cached_property
360 |     def quality(self):
361 |         """
362 |         The Quality band data for the STAC item
363 |         """
364 |         print("Loading quality band")
365 |         with rasterio.open(self.item.assets["sur_refl_qc_500m"].href) as src:
366 |             return src.read(out_shape=(1, *self.shape), resampling=Resampling.nearest)[
367 |                 0
368 |             ]
369 | 
370 |     def get_stats(self, x: int, y: int) -> Tuple[float, float]:
371 |         """
372 |         Cloud and nodata percentage for a chip
373 |         """
374 |         qa = self.quality[
375 |             y * self.chip_size : (y + 1) * self.chip_size,
376 |             x * self.chip_size : (x + 1) * self.chip_size,
377 |         ]
378 |         byte1 = np.array(1 << 0, dtype=qa.dtype)
379 |         byte2 = np.array(1 << 1, dtype=qa.dtype)
380 |         b1mask = np.bitwise_and(qa, byte1)
381 |         b2mask = np.bitwise_and(qa, byte2)
382 | 
383 |         # Clouds are flagged as 10 in the first two bytes, nodata is flagged
384 |         # as 11 in the first two bytes. Extracte from table 10 in
385 |         # https://lpdaac.usgs.gov/documents/925/MOD09_User_Guide_V61.pdf
386 |         cloud_mask = np.logical_and(b1mask, np.logical_not(b2mask))
387 |         nodata_mask = np.logical_and(b1mask, b2mask)
388 | 
389 |         nodata_percentage = np.sum(nodata_mask) / nodata_mask.size
390 |         cloud_percentage = np.sum(cloud_mask) / cloud_mask.size
391 | 
392 |         return cloud_percentage, nodata_percentage
393 | 


--------------------------------------------------------------------------------
/stacchip/processors/landsat_processor.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | from pathlib import Path
  5 | from urllib.parse import urlparse
  6 | 
  7 | import boto3
  8 | import geopandas as gp
  9 | import pyarrow as pa
 10 | import pystac_client
 11 | from geoarrow.pyarrow import io
 12 | 
 13 | from stacchip.indexer import LandsatIndexer
 14 | 
 15 | STAC_API = "https://landsatlook.usgs.gov/stac-server"
 16 | 
 17 | LS_ASSETS_L1 = [
 18 |     "blue",
 19 |     "green",
 20 |     "red",
 21 |     "nir08",
 22 |     "swir16",
 23 |     "lwir",
 24 |     "lwir_high",
 25 |     "swir22",
 26 |     "pan",
 27 |     "qa_pixel",
 28 | ]
 29 | LS_ASSETS_L2 = [
 30 |     "blue",
 31 |     "green",
 32 |     "red",
 33 |     "nir08",
 34 |     "swir16",
 35 |     "swir22",
 36 |     "qa_pixel",
 37 | ]
 38 | ABSOLUTE_CLOUD_COVER_FILTER = 75
 39 | PLATFORM_NAME_L2 = "landsat-c2l2-sr"
 40 | PLATFORM_NAME_L1 = "landsat-c2l1"
 41 | quartals = [
 42 |     "{year}-01-01/{year}-03-31",
 43 |     "{year}-04-01/{year}-06-30",
 44 |     "{year}-07-01/{year}-09-30",
 45 |     "{year}-10-01/{year}-12-31",
 46 | ]
 47 | 
 48 | 
 49 | def process_landsat_tile(index: int, sample_source: str, bucket: str) -> None:
 50 |     # Prepare resources for the job
 51 |     catalog = pystac_client.Client.open(STAC_API)
 52 |     s3 = boto3.resource("s3")
 53 |     data = gp.read_file(sample_source)
 54 |     row = data.iloc[index]
 55 | 
 56 |     print("MGRS", row["name"])
 57 |     for platform_name in [PLATFORM_NAME_L1, PLATFORM_NAME_L2]:
 58 |         random.seed(index)
 59 |         for year in random.sample(range(2018, 2024), 1):
 60 |             print(f"Year {year}")
 61 |             for quartal in quartals:
 62 |                 print(f"Quartal {quartal.format(year=year)}")
 63 |                 items = catalog.search(
 64 |                     collections=[platform_name],
 65 |                     datetime=quartal.format(year=year),
 66 |                     max_items=1,
 67 |                     intersects=row.geometry.centroid,
 68 |                     sortby="properties.eo:cloud_cover",
 69 |                     query={
 70 |                         "platform": {"in": ["LANDSAT_8", "LANDSAT_9"]},
 71 |                     },
 72 |                 )
 73 |                 item = items.item_collection()[0]
 74 | 
 75 |                 if item.properties["eo:cloud_cover"] > ABSOLUTE_CLOUD_COVER_FILTER:
 76 |                     continue
 77 | 
 78 |                 print(
 79 |                     f"Cloud cover is {item.properties['eo:cloud_cover']} ({item.properties['platform']})"
 80 |                 )
 81 | 
 82 |                 for key in list(item.assets.keys()):
 83 |                     if (
 84 |                         platform_name == PLATFORM_NAME_L1 and key not in LS_ASSETS_L1
 85 |                     ) or (key not in LS_ASSETS_L2):
 86 |                         del item.assets[key]
 87 |                     else:
 88 |                         href = item.assets[key].extra_fields["alternate"]["s3"]["href"]
 89 |                         url = urlparse(href)
 90 |                         copy_source = {
 91 |                             "Bucket": url.netloc,
 92 |                             "Key": url.path.lstrip("/"),
 93 |                         }
 94 |                         print(f"Copying {key} band to {copy_source}")
 95 |                         new_key = f"{platform_name}/{item.id}/{Path(href).name}"
 96 |                         s3.meta.client.copy(
 97 |                             copy_source,
 98 |                             bucket,
 99 |                             new_key,
100 |                             ExtraArgs={"RequestPayer": "requester"},
101 |                         )
102 |                         item.assets[key].href = f"s3://{bucket}/{new_key}"
103 | 
104 |                 # Convert Dictionary to JSON String
105 |                 data_string = json.dumps(item.to_dict())
106 | 
107 |                 # Upload JSON String to an S3 Object
108 |                 s3_bucket = s3.Bucket(name=bucket)
109 |                 s3_bucket.put_object(
110 |                     Key=f"{platform_name}/{item.id}/stac_item.json",
111 |                     Body=data_string,
112 |                 )
113 | 
114 |                 indexer = LandsatIndexer(item, chip_max_nodata=0)
115 |                 chip_index = indexer.create_index()
116 | 
117 |                 writer = pa.BufferOutputStream()
118 |                 io.write_geoparquet_table(chip_index, writer)
119 |                 body = bytes(writer.getvalue())
120 |                 # Centralize the index files to make combining them easier later on
121 |                 s3_bucket.put_object(
122 |                     Body=body,
123 |                     Key=f"index/{platform_name}/{item.id}/index_{item.id}.parquet",
124 |                 )
125 | 
126 | 
127 | def process() -> None:
128 | 
129 |     if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ:
130 |         raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set")
131 |     if "STACCHIP_SAMPLE_SOURCE" not in os.environ:
132 |         raise ValueError("STACCHIP_SAMPLE_SOURCE env var not set")
133 |     if "STACCHIP_BUCKET" not in os.environ:
134 |         raise ValueError("STACCHIP_BUCKET env var not set")
135 | 
136 |     index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"])
137 |     sample_source = os.environ["STACCHIP_SAMPLE_SOURCE"]
138 |     bucket = os.environ["STACCHIP_BUCKET"]
139 | 
140 |     process_landsat_tile(index, sample_source, bucket)
141 | 


--------------------------------------------------------------------------------
/stacchip/processors/linz_processor.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | import tempfile
  5 | from pathlib import Path
  6 | 
  7 | import boto3
  8 | import pyarrow as pa
  9 | import rasterio
 10 | from dateutil import parser
 11 | from geoarrow.pyarrow import io
 12 | from pystac import Item
 13 | from rasterio.enums import Resampling
 14 | from rio_stac import create_stac_item
 15 | 
 16 | from stacchip.indexer import NoDataMaskChipIndexer
 17 | 
 18 | PLATFORM_NAME = "linz"
 19 | 
 20 | TARGET_RESOLUTION = 0.3
 21 | 
 22 | nz_prefixes = [
 23 |     "auckland/auckland_2022_0.075m/",
 24 |     "wellington/wellington_2021_0.075m/",
 25 |     "wellington/wellington_2021_0.3m/",
 26 |     "bay-of-plenty/bay-of-plenty_2023_0.1m/",
 27 |     "bay-of-plenty/tauranga_2022_0.1m/",
 28 |     "bay-of-plenty/tauranga-winter_2022_0.1m/",
 29 |     "canterbury/canterbury_2023_0.3m/",
 30 |     "canterbury/ashburton_2023_0.1m/",
 31 |     "canterbury/hurunui_2023_0.075m/",
 32 |     "canterbury/timaru_2022-2023_0.1m/",
 33 |     "canterbury/selwyn_2022-2023_0.075m/",
 34 |     "gisborne/gisborne_2023_0.075m/",
 35 |     "hawkes-bay/hawkes-bay_2022_0.05m/",
 36 |     "hawkes-bay/napier_2017-2018_0.05m/",
 37 |     "hawkes-bay/wairoa_2014-2015_0.1m/",
 38 |     "manawatu-whanganui/manawatu-whanganui_2021-2022_0.3m/",
 39 |     "manawatu-whanganui/palmerston-north_2022_0.125m/",
 40 |     "manawatu-whanganui/rangitikei_2021_0.125m/",
 41 |     "manawatu-whanganui/tararua_2024_0.1m/",
 42 |     "manawatu-whanganui/whanganui_2022_0.075m/",
 43 |     "marlborough/marlborough_2023_0.075m/",
 44 |     "nelson/nelson_2022_0.075m/",
 45 |     "northland/northland_2016_0.1m/",
 46 |     "otago/queenstown_2021_0.1m/",
 47 |     "otago/otago_2018_0.1m/",
 48 |     "otago/dunedin_2018-2019_0.1m/",
 49 |     "southland/southland_2023_0.1m/",
 50 |     "southland/invercargill_2022_0.05m/",
 51 |     "taranaki/taranaki_2022_0.05m/",
 52 |     "taranaki/new-plymouth_2017_0.1m/",
 53 |     "tasman/tasman_2023_0.075m/",
 54 |     "waikato/hamilton_2023_0.05m/",
 55 |     "waikato/otorohanga_2021_0.1m/",
 56 |     "waikato/taupo_2023_0.075m/",
 57 |     "waikato/thames-coromandel_2021_0.1m/",
 58 |     "waikato/waikato_2021_0.1m/",
 59 |     "waikato/waipa_2021_0.1m/",
 60 |     "west-coast/buller_2020_0.2m/",
 61 |     "west-coast/west-coast_2016_0.1m/",
 62 | ]
 63 | 
 64 | 
 65 | def get_linz_tiffs(prefix) -> list:
 66 | 
 67 |     s3_resource = boto3.resource("s3")
 68 |     s3_bucket = s3_resource.Bucket(name="nz-imagery")
 69 | 
 70 |     files = []
 71 |     s3_object_iterator = s3_bucket.objects.filter(Prefix=prefix)
 72 | 
 73 |     for s3_object in s3_object_iterator:
 74 |         if s3_object.key.endswith(".tiff"):
 75 |             files.append(s3_object.key)
 76 | 
 77 |     # Sample a percentage of all scenes
 78 |     sample_size = max(min(int(len(files) / 2), 2000), 10)
 79 |     print(f"Found {len(files)} scenes for {prefix}, keeping {sample_size}")
 80 |     random.seed(42)
 81 |     return random.sample(files, sample_size)
 82 | 
 83 | 
 84 | def get_original_item(key: str) -> Item:
 85 |     s3_resource = boto3.resource("s3")
 86 |     content_object = s3_resource.Object(
 87 |         "nz-imagery", key.replace(".tiff", "") + ".json"
 88 |     )
 89 |     file_content = content_object.get()["Body"].read().decode("utf-8")
 90 |     json_content = json.loads(file_content)
 91 |     return Item.from_dict(json_content)
 92 | 
 93 | 
 94 | def process_linz_tile(index, bucket):
 95 | 
 96 |     tiffs = get_linz_tiffs(nz_prefixes[index])
 97 | 
 98 |     for key in tiffs:
 99 |         print(f"Working on {key}")
100 | 
101 |         href = f"s3://nz-imagery/{key}"
102 | 
103 |         original_item = get_original_item(key)
104 | 
105 |         # For now, resample so we have a constant gsd for all images
106 |         with rasterio.open(href) as dataset:
107 | 
108 |             gsd = abs(dataset.transform[0])
109 | 
110 |             upscale_factor = gsd / TARGET_RESOLUTION
111 | 
112 |             data = dataset.read(
113 |                 out_shape=(
114 |                     dataset.count,
115 |                     int(dataset.height * upscale_factor),
116 |                     int(dataset.width * upscale_factor),
117 |                 ),
118 |                 resampling=Resampling.bilinear,
119 |             )
120 | 
121 |             # Drop alpha band if present
122 |             data = data[:3]
123 | 
124 |             # scale image transform
125 |             transform = dataset.transform * dataset.transform.scale(
126 |                 (dataset.width / data.shape[-1]), (dataset.height / data.shape[-2])
127 |             )
128 | 
129 |             new_key = f"{PLATFORM_NAME}/{original_item.id}/{Path(href).name}"
130 |             new_href = f"s3://{bucket}/{new_key}"
131 | 
132 |             meta = dataset.meta.copy()
133 |             meta["transform"] = transform
134 |             meta["width"] = data.shape[2]
135 |             meta["height"] = data.shape[1]
136 |             meta["compress"] = "deflate"
137 |             meta["count"] = 3
138 | 
139 |         with tempfile.NamedTemporaryFile(mode="w") as temp_file:
140 |             with rasterio.open(temp_file.name, "w", **meta) as dst:
141 |                 dst.write(data)
142 | 
143 |             s3_client = boto3.client("s3")
144 |             s3_client.upload_file(temp_file.name, bucket, new_key)
145 | 
146 |         item = create_stac_item(new_href, with_proj=True)
147 |         item.datetime = parser.parse(original_item.properties["start_datetime"])
148 |         item.id = original_item.id
149 | 
150 |         # Convert Dictionary to JSON String
151 |         data_string = json.dumps(item.to_dict())
152 | 
153 |         # Upload JSON String to an S3 Object
154 |         s3 = boto3.resource("s3")
155 |         s3_bucket = s3.Bucket(name=bucket)
156 |         s3_bucket.put_object(
157 |             Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json",
158 |             Body=data_string,
159 |         )
160 | 
161 |         indexer = NoDataMaskChipIndexer(item, nodata_mask=data[0] == 0)
162 |         index = indexer.create_index()
163 | 
164 |         writer = pa.BufferOutputStream()
165 |         io.write_geoparquet_table(index, writer)
166 |         body = bytes(writer.getvalue())
167 |         # Centralize the index files to make combining them easier later on
168 |         s3_bucket.put_object(
169 |             Body=body,
170 |             Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet",
171 |         )
172 | 
173 | 
174 | def process() -> None:
175 | 
176 |     if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ:
177 |         raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set")
178 |     if "STACCHIP_BUCKET" not in os.environ:
179 |         raise ValueError("STACCHIP_BUCKET env var not set")
180 | 
181 |     index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"])
182 |     bucket = os.environ["STACCHIP_BUCKET"]
183 | 
184 |     process_linz_tile(index, bucket)
185 | 


--------------------------------------------------------------------------------
/stacchip/processors/modis_processor.py:
--------------------------------------------------------------------------------
  1 | import calendar
  2 | import json
  3 | import os
  4 | import tempfile
  5 | from datetime import datetime
  6 | from pathlib import Path
  7 | 
  8 | import boto3
  9 | import planetary_computer as pc
 10 | import pyarrow as pa
 11 | import pystac_client
 12 | import rasterio
 13 | from geoarrow.pyarrow import io
 14 | from rasterio.warp import Resampling, calculate_default_transform, reproject
 15 | 
 16 | from stacchip.indexer import ModisIndexer
 17 | 
 18 | STAC_API = "https://planetarycomputer.microsoft.com/api/stac/v1"
 19 | COLLECTION = "modis-09A1-061"
 20 | BANDS = [
 21 |     "sur_refl_b01",
 22 |     "sur_refl_b02",
 23 |     "sur_refl_b03",
 24 |     "sur_refl_b04",
 25 |     "sur_refl_b05",
 26 |     "sur_refl_b06",
 27 |     "sur_refl_b07",
 28 |     "sur_refl_qc_500m",
 29 | ]
 30 | # The grid tiles were selected to not have nodata
 31 | # in the SIN projection. This is to avoid effects
 32 | # of the international dateline cutoff.
 33 | SIN_GRID_TILES = [
 34 |     (2, 12),
 35 |     (2, 13),
 36 |     (2, 14),
 37 |     (2, 15),
 38 |     (2, 16),
 39 |     (2, 17),
 40 |     (2, 18),
 41 |     (2, 19),
 42 |     (2, 20),
 43 |     (2, 21),
 44 |     (2, 22),
 45 |     (2, 23),
 46 |     (3, 9),
 47 |     (3, 11),
 48 |     (3, 12),
 49 |     (3, 13),
 50 |     (3, 14),
 51 |     (3, 15),
 52 |     (3, 17),
 53 |     (3, 18),
 54 |     (3, 19),
 55 |     (3, 20),
 56 |     (3, 21),
 57 |     (3, 22),
 58 |     (3, 23),
 59 |     (3, 24),
 60 |     (3, 25),
 61 |     (3, 26),
 62 |     (4, 8),
 63 |     (4, 10),
 64 |     (4, 11),
 65 |     (4, 12),
 66 |     (4, 13),
 67 |     (4, 14),
 68 |     (4, 17),
 69 |     (4, 18),
 70 |     (4, 19),
 71 |     (4, 20),
 72 |     (4, 21),
 73 |     (4, 22),
 74 |     (4, 23),
 75 |     (4, 24),
 76 |     (4, 25),
 77 |     (4, 26),
 78 |     (4, 27),
 79 |     (4, 28),
 80 |     (5, 7),
 81 |     (5, 8),
 82 |     (5, 9),
 83 |     (5, 10),
 84 |     (5, 11),
 85 |     (5, 12),
 86 |     (5, 15),
 87 |     (5, 16),
 88 |     (5, 17),
 89 |     (5, 18),
 90 |     (5, 19),
 91 |     (5, 20),
 92 |     (5, 21),
 93 |     (5, 23),
 94 |     (5, 24),
 95 |     (5, 25),
 96 |     (5, 26),
 97 |     (5, 27),
 98 |     (5, 28),
 99 |     (5, 29),
100 |     (5, 30),
101 |     (6, 3),
102 |     (6, 7),
103 |     (6, 8),
104 |     (6, 9),
105 |     (6, 10),
106 |     (6, 11),
107 |     (6, 16),
108 |     (6, 17),
109 |     (6, 18),
110 |     (6, 19),
111 |     (6, 20),
112 |     (6, 21),
113 |     (6, 22),
114 |     (6, 23),
115 |     (6, 24),
116 |     (6, 25),
117 |     (6, 26),
118 |     (6, 27),
119 |     (6, 28),
120 |     (6, 29),
121 |     (6, 30),
122 |     (6, 31),
123 |     (7, 3),
124 |     (7, 7),
125 |     (7, 8),
126 |     (7, 9),
127 |     (7, 10),
128 |     (7, 11),
129 |     (7, 15),
130 |     (7, 16),
131 |     (7, 17),
132 |     (7, 18),
133 |     (7, 19),
134 |     (7, 20),
135 |     (7, 21),
136 |     (7, 22),
137 |     (7, 23),
138 |     (7, 24),
139 |     (7, 25),
140 |     (7, 26),
141 |     (7, 27),
142 |     (7, 28),
143 |     (7, 29),
144 |     (7, 30),
145 |     (7, 31),
146 |     (7, 32),
147 |     (7, 33),
148 |     (8, 1),
149 |     (8, 2),
150 |     (8, 8),
151 |     (8, 9),
152 |     (8, 10),
153 |     (8, 11),
154 |     (8, 12),
155 |     (8, 13),
156 |     (8, 16),
157 |     (8, 18),
158 |     (8, 19),
159 |     (8, 20),
160 |     (8, 21),
161 |     (8, 22),
162 |     (8, 23),
163 |     (8, 25),
164 |     (8, 26),
165 |     (8, 27),
166 |     (8, 28),
167 |     (8, 29),
168 |     (8, 30),
169 |     (8, 31),
170 |     (8, 32),
171 |     (8, 33),
172 |     (8, 34),
173 |     (9, 1),
174 |     (9, 2),
175 |     (9, 3),
176 |     (9, 4),
177 |     (9, 8),
178 |     (9, 9),
179 |     (9, 10),
180 |     (9, 11),
181 |     (9, 12),
182 |     (9, 13),
183 |     (9, 14),
184 |     (9, 16),
185 |     (9, 19),
186 |     (9, 21),
187 |     (9, 22),
188 |     (9, 23),
189 |     (9, 25),
190 |     (9, 27),
191 |     (9, 28),
192 |     (9, 29),
193 |     (9, 30),
194 |     (9, 31),
195 |     (9, 32),
196 |     (9, 33),
197 |     (9, 34),
198 |     (10, 2),
199 |     (10, 3),
200 |     (10, 4),
201 |     (10, 5),
202 |     (10, 10),
203 |     (10, 11),
204 |     (10, 12),
205 |     (10, 13),
206 |     (10, 14),
207 |     (10, 17),
208 |     (10, 19),
209 |     (10, 20),
210 |     (10, 21),
211 |     (10, 22),
212 |     (10, 23),
213 |     (10, 27),
214 |     (10, 28),
215 |     (10, 29),
216 |     (10, 30),
217 |     (10, 31),
218 |     (10, 32),
219 |     (10, 33),
220 |     (11, 3),
221 |     (11, 4),
222 |     (11, 5),
223 |     (11, 6),
224 |     (11, 8),
225 |     (11, 10),
226 |     (11, 11),
227 |     (11, 12),
228 |     (11, 13),
229 |     (11, 14),
230 |     (11, 15),
231 |     (11, 19),
232 |     (11, 20),
233 |     (11, 21),
234 |     (11, 22),
235 |     (11, 23),
236 |     (11, 27),
237 |     (11, 28),
238 |     (11, 29),
239 |     (11, 30),
240 |     (11, 31),
241 |     (11, 32),
242 |     (12, 11),
243 |     (12, 12),
244 |     (12, 13),
245 |     (12, 16),
246 |     (12, 17),
247 |     (12, 19),
248 |     (12, 20),
249 |     (12, 24),
250 |     (12, 27),
251 |     (12, 28),
252 |     (12, 29),
253 |     (12, 30),
254 |     (13, 12),
255 |     (13, 13),
256 |     (13, 17),
257 |     (13, 20),
258 |     (13, 21),
259 |     (13, 22),
260 |     (13, 28),
261 |     (14, 13),
262 |     (14, 14),
263 |     (14, 15),
264 |     (14, 16),
265 |     (14, 18),
266 |     (14, 22),
267 | ]
268 | PLATFORM_NAME = "modis"
269 | DST_CRS = "EPSG:3857"
270 | 
271 | 
272 | def process_modis_tile(
273 |     index: int,
274 |     bucket: str,
275 | ) -> None:
276 | 
277 |     # Prepare resources for the job
278 |     catalog = pystac_client.Client.open(STAC_API, modifier=pc.sign_inplace)
279 | 
280 |     s3 = boto3.resource("s3")
281 | 
282 |     i, j = SIN_GRID_TILES[index]
283 | 
284 |     items_to_process = []
285 |     for year in range(2018, 2024):
286 |         for month in range(1, 13):
287 |             # Compute date range for this month
288 |             end = calendar.monthrange(year, month)[1]
289 |             timerange = (
290 |                 f"{year}-{str(month).zfill(2)}-01/"
291 |                 f"{year}-{str(month).zfill(2)}-{str(end).zfill(2)}"
292 |             )
293 |             # Query catalog
294 |             items = catalog.search(
295 |                 collections=[COLLECTION],
296 |                 datetime=timerange,
297 |                 query={
298 |                     "modis:vertical-tile": {
299 |                         "eq": i,
300 |                     },
301 |                     "modis:horizontal-tile": {
302 |                         "eq": j,
303 |                     },
304 |                 },
305 |                 max_items=1,
306 |             )
307 |             items = list(items.item_collection())
308 | 
309 |             if not len(items):
310 |                 print(f"No items found for timerange {timerange}")
311 |                 continue
312 | 
313 |             items_to_process.append(items[0])
314 | 
315 |     for item in items_to_process:
316 |         for key in list(item.assets.keys()):
317 |             if key not in BANDS:
318 |                 del item.assets[key]
319 | 
320 |         # Manually set datetime to end date. Modis products are
321 |         # composited from a date range.
322 |         item.datetime = datetime.strptime(
323 |             item.properties["end_datetime"], "%Y-%m-%dT%H:%M:%SZ"
324 |         )
325 | 
326 |         for key, asset in item.assets.items():
327 |             new_key = f"{PLATFORM_NAME}/{item.id}/{Path(asset.href.split('?')[0]).name}"
328 |             new_href = f"s3://{bucket}/{new_key}"
329 | 
330 |             with rasterio.open(asset.href) as src:
331 |                 transform, width, height = calculate_default_transform(
332 |                     src.crs, DST_CRS, src.width, src.height, *src.bounds
333 |                 )
334 |                 kwargs = src.meta.copy()
335 |                 kwargs.update(
336 |                     {
337 |                         "crs": DST_CRS,
338 |                         "transform": transform,
339 |                         "width": width,
340 |                         "height": height,
341 |                         "compress": "deflate",
342 |                     }
343 |                 )
344 |                 with tempfile.NamedTemporaryFile(mode="w") as temp_file:
345 |                     with rasterio.open(temp_file.name, "w", **kwargs) as dst:
346 |                         for i in range(1, src.count + 1):
347 |                             reproject(
348 |                                 source=rasterio.band(src, i),
349 |                                 destination=rasterio.band(dst, i),
350 |                                 src_transform=src.transform,
351 |                                 src_crs=src.crs,
352 |                                 dst_transform=transform,
353 |                                 dst_crs=DST_CRS,
354 |                                 resampling=Resampling.nearest,
355 |                             )
356 |                     s3_client = boto3.client("s3")
357 |                     s3_client.upload_file(temp_file.name, bucket, new_key)
358 | 
359 |             item.assets[key].href = new_href
360 | 
361 |         # Update proj extension to match new data format
362 |         item.properties["proj:shape"] = (height, width)
363 |         item.properties["proj:epsg"] = 3857
364 |         del item.properties["proj:wkt2"]
365 |         item.properties["proj:transform"] = transform
366 | 
367 |         # Convert Dictionary to JSON String
368 |         data_string = json.dumps(item.to_dict())
369 | 
370 |         # Upload JSON String to an S3 Object
371 |         s3_bucket = s3.Bucket(name=bucket)
372 |         s3_bucket.put_object(
373 |             Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json",
374 |             Body=data_string,
375 |         )
376 | 
377 |         indexer = ModisIndexer(item)
378 |         index = indexer.create_index()
379 |         print("Indexer info", indexer.x_size, indexer.y_size, indexer.shape)
380 | 
381 |         writer = pa.BufferOutputStream()
382 |         io.write_geoparquet_table(index, writer)
383 |         body = bytes(writer.getvalue())
384 |         # Centralize the index files to make combining them easier later on
385 |         s3_bucket.put_object(
386 |             Body=body,
387 |             Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet",
388 |         )
389 | 
390 | 
391 | def process() -> None:
392 | 
393 |     if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ:
394 |         raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set")
395 |     if "STACCHIP_BUCKET" not in os.environ:
396 |         raise ValueError("STACCHIP_BUCKET env var not set")
397 | 
398 |     index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"])
399 |     bucket = os.environ["STACCHIP_BUCKET"]
400 | 
401 |     process_modis_tile(index, bucket)
402 | 


--------------------------------------------------------------------------------
/stacchip/processors/naip_processor.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | from pathlib import Path
  5 | from urllib.parse import urlparse
  6 | 
  7 | import boto3
  8 | import geopandas as gp
  9 | import pyarrow as pa
 10 | import pystac_client
 11 | from botocore.exceptions import ClientError
 12 | from geoarrow.pyarrow import io
 13 | 
 14 | from stacchip.indexer import NoStatsChipIndexer
 15 | 
 16 | STAC_API = "https://planetarycomputer.microsoft.com/api/stac/v1"
 17 | 
 18 | AWS_S3_URL = (
 19 |     "s3://naip-analytic/{state}/{year}/{resolution}/rgbir_cog/{block}{subblock}/{name}"
 20 | )
 21 | PLATFORM_NAME = "naip"
 22 | 
 23 | 
 24 | def process_naip_tile(
 25 |     index: int, sample_source: str, bucket: str, latest_only: bool = False
 26 | ) -> None:
 27 |     # Prepare resources for the job
 28 |     catalog = pystac_client.Client.open(STAC_API)
 29 |     s3 = boto3.resource("s3")
 30 |     data = gp.read_file(sample_source)
 31 |     row = data.iloc[index]
 32 | 
 33 |     items = catalog.search(
 34 |         collections=["naip"],
 35 |         intersects=row.geometry.centroid,
 36 |         sortby="properties.naip:year",
 37 |         max_items=10,
 38 |     )
 39 |     items = list(items.item_collection())
 40 | 
 41 |     if not len(items):
 42 |         print(f"No items found, skipping index {index}")
 43 |         return
 44 | 
 45 |     latest_item = items.pop()
 46 |     items_to_process = [latest_item]
 47 |     if not latest_only:
 48 |         random.seed(index)
 49 |         random_item = random.choice(items)
 50 |         items_to_process.append(random_item)
 51 | 
 52 |     for item in items_to_process:
 53 |         print(f"Processing item {item.id}")
 54 |         for key in list(item.assets.keys()):
 55 |             if key != "image":
 56 |                 del item.assets[key]
 57 |                 continue
 58 | 
 59 |             new_key = f"{PLATFORM_NAME}/{item.id}/{Path(item.assets[key].href).name}"
 60 |             try:
 61 |                 href = AWS_S3_URL.format(
 62 |                     year=item.properties["naip:year"],
 63 |                     state=item.properties["naip:state"],
 64 |                     resolution=f"{int(item.properties['gsd'] * 100)}cm",
 65 |                     block=item.id.split("_")[2][:5],
 66 |                     subblock=f"/{item.id.split('_')[2][5:]}",
 67 |                     name=item.assets["image"].href.split("/")[-1],
 68 |                 )
 69 |                 url = urlparse(href)
 70 |                 copy_source = {
 71 |                     "Bucket": "naip-analytic",
 72 |                     "Key": url.path.lstrip("/"),
 73 |                 }
 74 |                 print(f"Copying {copy_source}")
 75 |                 s3.Object("naip-analytic", url.path.lstrip("/")).load(
 76 |                     RequestPayer="requester"
 77 |                 )
 78 |                 s3.meta.client.copy(
 79 |                     copy_source,
 80 |                     bucket,
 81 |                     new_key,
 82 |                     ExtraArgs={"RequestPayer": "requester"},
 83 |                 )
 84 |             except ClientError:
 85 |                 href = AWS_S3_URL.format(
 86 |                     year=item.properties["naip:year"],
 87 |                     state=item.properties["naip:state"],
 88 |                     resolution=f"{int(item.properties['gsd'] * 100)}cm",
 89 |                     block=item.id.split("_")[2][:5],
 90 |                     subblock="",
 91 |                     name=item.assets["image"].href.split("/")[-1],
 92 |                 )
 93 |                 url = urlparse(href)
 94 |                 copy_source = {
 95 |                     "Bucket": "naip-analytic",
 96 |                     "Key": url.path.lstrip("/"),
 97 |                 }
 98 |                 print(f"Failed, now copying {copy_source}")
 99 |                 s3.Object("naip-analytic", url.path.lstrip("/")).load(
100 |                     RequestPayer="requester"
101 |                 )
102 |                 s3.meta.client.copy(
103 |                     copy_source,
104 |                     bucket,
105 |                     new_key,
106 |                     ExtraArgs={"RequestPayer": "requester"},
107 |                 )
108 | 
109 |             item.assets[key].href = f"s3://{bucket}/{new_key}"
110 | 
111 |         # Convert Dictionary to JSON String
112 |         data_string = json.dumps(item.to_dict())
113 | 
114 |         # Upload JSON String to an S3 Object
115 |         s3_bucket = s3.Bucket(name=bucket)
116 |         s3_bucket.put_object(
117 |             Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json",
118 |             Body=data_string,
119 |         )
120 | 
121 |         indexer = NoStatsChipIndexer(item)
122 |         index = indexer.create_index()
123 |         print("Indexer info", indexer.x_size, indexer.y_size, indexer.shape)
124 | 
125 |         writer = pa.BufferOutputStream()
126 |         io.write_geoparquet_table(index, writer)
127 |         body = bytes(writer.getvalue())
128 |         # Centralize the index files to make combining them easier later on
129 |         s3_bucket.put_object(
130 |             Body=body,
131 |             Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet",
132 |         )
133 | 
134 | 
135 | def process() -> None:
136 | 
137 |     if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ:
138 |         raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set")
139 |     if "STACCHIP_SAMPLE_SOURCE" not in os.environ:
140 |         raise ValueError("STACCHIP_SAMPLE_SOURCE env var not set")
141 |     if "STACCHIP_BUCKET" not in os.environ:
142 |         raise ValueError("STACCHIP_BUCKET env var not set")
143 | 
144 |     index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"])
145 |     sample_source = os.environ["STACCHIP_SAMPLE_SOURCE"]
146 |     bucket = os.environ["STACCHIP_BUCKET"]
147 | 
148 |     process_naip_tile(index, sample_source, bucket)
149 | 


--------------------------------------------------------------------------------
/stacchip/processors/prechip.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import math
  3 | import os
  4 | from io import BytesIO
  5 | from multiprocessing import Pool
  6 | from typing import Union
  7 | 
  8 | import boto3
  9 | import numpy as np
 10 | import pyarrow as pa
 11 | from pyarrow import dataset as da
 12 | 
 13 | from stacchip.chipper import Chipper
 14 | from stacchip.utils import load_indexer_s3
 15 | 
 16 | VERSION = "mode_v1_chipper_v2"
 17 | 
 18 | CUBESIZE = 128
 19 | 
 20 | S2_BANDS = [
 21 |     "blue",
 22 |     "green",
 23 |     "red",
 24 |     "rededge1",
 25 |     "rededge2",
 26 |     "rededge3",
 27 |     "nir",
 28 |     "nir08",
 29 |     "swir16",
 30 |     "swir22",
 31 | ]
 32 | LS_BANDS = [
 33 |     "red",
 34 |     "green",
 35 |     "blue",
 36 |     "nir08",
 37 |     "swir16",
 38 |     "swir22",
 39 | ]
 40 | NAIP_BANDS = ["red", "green", "blue", "nir"]
 41 | LINZ_BANDS = ["red", "green", "blue"]
 42 | S1_BANDS = ["vv", "vh"]
 43 | MODIS_BANDS = [
 44 |     "sur_refl_b01",
 45 |     "sur_refl_b02",
 46 |     "sur_refl_b03",
 47 |     "sur_refl_b04",
 48 |     "sur_refl_b05",
 49 |     "sur_refl_b06",
 50 |     "sur_refl_b07",
 51 | ]
 52 | 
 53 | 
 54 | def normalize_timestamp(date):
 55 | 
 56 |     week = date.isocalendar().week * 2 * np.pi / 52
 57 |     hour = date.hour * 2 * np.pi / 24
 58 | 
 59 |     return (math.sin(week), math.cos(week)), (math.sin(hour), math.cos(hour))
 60 | 
 61 | 
 62 | def normalize_latlon(bounds):
 63 |     lon = bounds[0] + (bounds[2] - bounds[0]) / 2
 64 |     lat = bounds[1] + (bounds[3] - bounds[1]) / 2
 65 | 
 66 |     lat = lat * np.pi / 180
 67 |     lon = lon * np.pi / 180
 68 | 
 69 |     return (math.sin(lat), math.cos(lat)), (math.sin(lon), math.cos(lon))
 70 | 
 71 | 
 72 | def stack_chips(chips: list, cube_id: int, chip_bucket: str, platform: str):
 73 |     print(f"Writing cube {cube_id}")
 74 | 
 75 |     pixels = np.stack([chip["pixels"] for chip in chips], dtype="float32")
 76 |     lon_norm = np.vstack([chip["lon_norm"] for chip in chips], dtype="float32")
 77 |     lat_norm = np.vstack([chip["lat_norm"] for chip in chips], dtype="float32")
 78 |     week_norm = np.vstack([chip["week_norm"] for chip in chips], dtype="float32")
 79 |     hour_norm = np.vstack([chip["hour_norm"] for chip in chips], dtype="float32")
 80 | 
 81 |     key = f"{VERSION}/{platform}/cube_{cube_id}.npz"
 82 | 
 83 |     client = boto3.client("s3")
 84 |     with BytesIO() as bytes:
 85 |         np.savez_compressed(
 86 |             file=bytes,
 87 |             pixels=pixels,
 88 |             lon_norm=lon_norm,
 89 |             lat_norm=lat_norm,
 90 |             week_norm=week_norm,
 91 |             hour_norm=hour_norm,
 92 |         )
 93 |         bytes.seek(0)
 94 |         client.upload_fileobj(Fileobj=bytes, Bucket=chip_bucket, Key=key)
 95 | 
 96 | 
 97 | def get_chip(
 98 |     data_bucket: str,
 99 |     row: int,
100 |     platform: str,
101 |     item_id: str,
102 |     date: Union[datetime.date, datetime.datetime],
103 |     chip_index_x: int,
104 |     chip_index_y: int,
105 | ):
106 |     print(
107 |         "Getting chip",
108 |         data_bucket,
109 |         row,
110 |         platform,
111 |         item_id,
112 |         date,
113 |         chip_index_x,
114 |         chip_index_y,
115 |     )
116 | 
117 |     indexer = load_indexer_s3(
118 |         bucket=data_bucket,
119 |         platform=platform,
120 |         item_id=item_id,
121 |     )
122 |     chipper = Chipper(indexer)
123 | 
124 |     chip = chipper.chip(chip_index_x, chip_index_y)
125 | 
126 |     if platform == "naip":
127 |         pixels = chip["image"]
128 |         bands = NAIP_BANDS
129 |     elif platform == "linz":
130 |         pixels = chip["asset"]
131 |         bands = LINZ_BANDS
132 |     elif platform == "sentinel-2-l2a":
133 |         pixels = np.vstack([chip[band] for band in S2_BANDS])
134 |         bands = S2_BANDS
135 |     elif platform in ["landsat-c2l2-sr", "landsat-c2l1"]:
136 |         pixels = np.vstack([chip[band] for band in LS_BANDS])
137 |         bands = LS_BANDS
138 |     elif platform == "sentinel-1-rtc":
139 |         if any(band not in chip for band in S1_BANDS):
140 |             return
141 |         pixels = np.vstack([chip[band] for band in S1_BANDS])
142 |         bands = S1_BANDS
143 |     elif platform == "modis":
144 |         pixels = np.vstack([chip[band] for band in MODIS_BANDS])
145 |         bands = MODIS_BANDS
146 | 
147 |     if len(pixels) != len(bands):
148 |         raise ValueError(
149 |             f"Pixels shape {pixels.shape} is not equal to nr of bands {bands} for item {item_id}"
150 |         )
151 | 
152 |     if isinstance(date, datetime.date):
153 |         # Assume noon for dates without timestamp
154 |         date = datetime.datetime(date.year, date.month, date.day, 12)
155 |     week_norm, hour_norm = normalize_timestamp(date)
156 | 
157 |     bounds = chipper.indexer.get_chip_bbox(chip_index_x, chip_index_y).bounds
158 |     lon_norm, lat_norm = normalize_latlon(bounds)
159 | 
160 |     return {
161 |         "pixels": pixels,
162 |         "lon_norm": lon_norm,
163 |         "lat_norm": lat_norm,
164 |         "week_norm": week_norm,
165 |         "hour_norm": hour_norm,
166 |     }
167 | 
168 | 
169 | def process() -> None:
170 |     # GDAL read optimization is recommended
171 |     # os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "YES"
172 |     # os.environ["CPL_VSIL_CURL_ALLOWED_EXTENSIONS"] = ".tif,.png,.jp2,.tiff"
173 | 
174 |     if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ:
175 |         raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set")
176 |     if "STACCHIP_DATA_BUCKET" not in os.environ:
177 |         raise ValueError("STACCHIP_DATA_BUCKET env var not set")
178 |     if "STACCHIP_INDEXPATH" not in os.environ:
179 |         raise ValueError("STACCHIP_INDEXPATH env var not set")
180 |     if "STACCHIP_CHIP_BUCKET" not in os.environ:
181 |         raise ValueError("STACCHIP_TARGETPATH env var not set")
182 | 
183 |     index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"])
184 |     data_bucket = os.environ["STACCHIP_DATA_BUCKET"]
185 |     indexpath = os.environ["STACCHIP_INDEXPATH"]
186 |     chip_bucket = os.environ["STACCHIP_CHIP_BUCKET"]
187 |     platform = os.environ.get("STACCHIP_PLATFORM", "")
188 |     cubes_per_job = int(os.environ.get("STACCHIP_CUBES_PER_JOB", 10))
189 |     pool_size = int(os.environ.get("STACCHIP_POOL_SIZE", 10))
190 |     chip_max_nodata = float(os.environ.get("STACCHIP_MAX_NODATA", 0.05))
191 | 
192 |     # Open table
193 |     table = da.dataset(indexpath, format="parquet").to_table(
194 |         columns=[
195 |             "chipid",
196 |             "platform",
197 |             "item_id",
198 |             "date",
199 |             "chip_index_x",
200 |             "chip_index_y",
201 |             "nodata_percentage",
202 |         ]
203 |     )
204 |     if platform:
205 |         table = table.filter(pa.compute.field("platform") == platform)
206 | 
207 |     initial_count = len(table)
208 |     if chip_max_nodata:
209 |         table = table.filter(pa.compute.field("nodata_percentage") <= chip_max_nodata)
210 |     print(
211 |         f"Dropped {initial_count - len(table)} chips due to nodata filter, keeping {len(table)}"
212 |     )
213 | 
214 |     np.random.seed(42)
215 |     random_rows = np.random.randint(0, len(table), len(table))
216 | 
217 |     for cube_id in range(index * cubes_per_job, (index + 1) * cubes_per_job):
218 |         random_rows_cube = random_rows[cube_id * CUBESIZE : (cube_id + 1) * CUBESIZE]
219 |         if len(random_rows_cube) != CUBESIZE:
220 |             print("Finishing because of incomplete cubes")
221 |             return
222 | 
223 |         # Extract chips data for this job
224 |         all_chips = []
225 |         for row in random_rows_cube:
226 |             all_chips.append(
227 |                 (
228 |                     data_bucket,
229 |                     row,
230 |                     table.column("platform")[row].as_py(),
231 |                     table.column("item_id")[row].as_py(),
232 |                     table.column("date")[row].as_py(),
233 |                     table.column("chip_index_x")[row].as_py(),
234 |                     table.column("chip_index_y")[row].as_py(),
235 |                 )
236 |             )
237 | 
238 |         with Pool(pool_size) as pl:
239 |             data = pl.starmap(
240 |                 get_chip,
241 |                 all_chips,
242 |             )
243 | 
244 |         if None in data:
245 |             print(f"Not all cubes are complete, skipping stacking for cube {cube_id}")
246 |             continue
247 | 
248 |         stack_chips(data, cube_id=cube_id, chip_bucket=chip_bucket, platform=platform)
249 | 


--------------------------------------------------------------------------------
/stacchip/processors/sentinel_1_processor.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | from pathlib import Path
  5 | from urllib.parse import urlparse
  6 | 
  7 | import boto3
  8 | import geopandas as gp
  9 | import planetary_computer as pc
 10 | import pyarrow as pa
 11 | import pystac_client
 12 | import rasterio
 13 | from geoarrow.pyarrow import io
 14 | from rasterio.io import MemoryFile
 15 | 
 16 | from stacchip.indexer import NoDataMaskChipIndexer
 17 | 
 18 | STAC_API = "https://planetarycomputer.microsoft.com/api/stac/v1"
 19 | S1_ASSETS = [
 20 |     "vv",
 21 |     "vh",
 22 | ]
 23 | PLATFORM_NAME = "sentinel-1-rtc"
 24 | quartals = [
 25 |     "{year}-01-01/{year}-03-31",
 26 |     "{year}-04-01/{year}-06-30",
 27 |     "{year}-07-01/{year}-09-30",
 28 |     "{year}-10-01/{year}-12-31",
 29 | ]
 30 | 
 31 | 
 32 | def process_mgrs_tile(index: int, mgrs_source: str, bucket: str) -> None:
 33 |     # Prepare resources for the job
 34 |     s3 = boto3.resource("s3")
 35 |     data = gp.read_file(mgrs_source)
 36 |     row = data.iloc[index]
 37 |     catalog = pystac_client.Client.open(STAC_API, modifier=pc.sign_inplace)
 38 |     print("MGRS", row["name"])
 39 |     random.seed(index)
 40 |     for year in random.sample(range(2018, 2024), 1):
 41 |         print(f"Year {year}")
 42 |         for quartal in random.sample(quartals, 1):
 43 |             print(f"Quartal {quartal.format(year=year)}")
 44 |             items = catalog.search(
 45 |                 max_items=1,
 46 |                 filter_lang="cql2-json",
 47 |                 filter={
 48 |                     "op": "and",
 49 |                     "args": [
 50 |                         # {
 51 |                         #     "op": "s_intersects",
 52 |                         #     "args": [
 53 |                         #         {"property": "geometry"},
 54 |                         #         row.geometry.__geo_interface__,
 55 |                         #     ],
 56 |                         # },
 57 |                         {
 58 |                             "op": "anyinteracts",
 59 |                             "args": [
 60 |                                 {"property": "datetime"},
 61 |                                 quartal.format(year=year),
 62 |                             ],
 63 |                         },
 64 |                         {
 65 |                             "op": "=",
 66 |                             "args": [{"property": "collection"}, "sentinel-1-rtc"],
 67 |                         },
 68 |                     ],
 69 |                 },
 70 |             )
 71 |             item = items.item_collection()[0]
 72 | 
 73 |             nodata_mask = None
 74 |             for key in list(item.assets.keys()):
 75 |                 if key not in S1_ASSETS:
 76 |                     del item.assets[key]
 77 |                 else:
 78 |                     url = item.assets[key].href
 79 |                     with rasterio.open(url) as rst:
 80 |                         data = rst.read()
 81 |                         meta = rst.meta.copy()
 82 |                         if nodata_mask is None:
 83 |                             nodata_mask = data[0] == rst.nodata
 84 | 
 85 |                     with MemoryFile() as memfile:
 86 |                         with memfile.open(**meta, compress="deflate") as dst:
 87 |                             dst.write(data)
 88 | 
 89 |                         memfile.seek(0)
 90 | 
 91 |                         s3_bucket = s3.Bucket(name=bucket)
 92 |                         new_key = (
 93 |                             f"{PLATFORM_NAME}/{item.id}/{Path(urlparse(url).path).name}"
 94 |                         )
 95 |                         print(f"Copying {urlparse(url).path}")
 96 |                         s3_bucket.put_object(
 97 |                             Key=new_key,
 98 |                             Body=memfile.read(),
 99 |                         )
100 | 
101 |                     item.assets[key].href = f"s3://{bucket}/{new_key}"
102 | 
103 |             # Convert Dictionary to JSON String
104 |             data_string = json.dumps(item.to_dict())
105 | 
106 |             # Upload JSON String to an S3 Object
107 |             s3_bucket = s3.Bucket(name=bucket)
108 |             s3_bucket.put_object(
109 |                 Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json",
110 |                 Body=data_string,
111 |             )
112 |             indexer = NoDataMaskChipIndexer(item, nodata_mask=nodata_mask)
113 |             index = indexer.create_index()
114 | 
115 |             writer = pa.BufferOutputStream()
116 |             io.write_geoparquet_table(index, writer)
117 |             body = bytes(writer.getvalue())
118 |             # Centralize the index files to make combining them easier later on
119 |             s3_bucket.put_object(
120 |                 Body=body,
121 |                 Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet",
122 |             )
123 | 
124 | 
125 | def process() -> None:
126 | 
127 |     if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ:
128 |         raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set")
129 |     if "STACCHIP_MGRS_SOURCE" not in os.environ:
130 |         raise ValueError("STACCHIP_MGRS_SOURCE env var not set")
131 |     if "STACCHIP_BUCKET" not in os.environ:
132 |         raise ValueError("STACCHIP_BUCKET env var not set")
133 | 
134 |     index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"])
135 |     mgrs_source = os.environ["STACCHIP_MGRS_SOURCE"]
136 |     bucket = os.environ["STACCHIP_BUCKET"]
137 | 
138 |     process_mgrs_tile(index, mgrs_source, bucket)
139 | 


--------------------------------------------------------------------------------
/stacchip/processors/sentinel_2_processor.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | from pathlib import Path
  5 | from urllib.parse import urlparse
  6 | 
  7 | import boto3
  8 | import geopandas as gp
  9 | import pyarrow as pa
 10 | import pystac_client
 11 | from geoarrow.pyarrow import io
 12 | 
 13 | from stacchip.indexer import Sentinel2Indexer
 14 | 
 15 | STAC_API = "https://earth-search.aws.element84.com/v1"
 16 | S2_ASSETS = [
 17 |     "blue",
 18 |     "green",
 19 |     "nir",
 20 |     "nir08",
 21 |     "red",
 22 |     "rededge1",
 23 |     "rededge2",
 24 |     "rededge3",
 25 |     "scl",
 26 |     "swir16",
 27 |     "swir22",
 28 | ]
 29 | ABSOLUTE_CLOUD_COVER_FILTER = 0.75
 30 | PLATFORM_NAME = "sentinel-2-l2a"
 31 | SCENE_NODATA_LIMIT = 20
 32 | quartals = [
 33 |     "{year}-01-01/{year}-03-31",
 34 |     "{year}-04-01/{year}-06-30",
 35 |     "{year}-07-01/{year}-09-30",
 36 |     "{year}-10-01/{year}-12-31",
 37 | ]
 38 | 
 39 | 
 40 | def process_mgrs_tile(index: int, mgrs_source: str, bucket: str) -> None:
 41 |     # Prepare resources for the job
 42 |     catalog = pystac_client.Client.open(STAC_API)
 43 |     s3 = boto3.resource("s3")
 44 |     data = gp.read_file(mgrs_source)
 45 |     row = data.iloc[index]
 46 | 
 47 |     print("MGRS", row["name"])
 48 |     random.seed(index)
 49 |     for year in random.sample(range(2018, 2024), 2):
 50 |         print(f"Year {year}")
 51 |         for quartal in quartals:
 52 |             print(f"Quartal {quartal.format(year=year)}")
 53 |             items = catalog.search(
 54 |                 collections=["sentinel-2-l2a"],
 55 |                 datetime=quartal.format(year=year),
 56 |                 max_items=1,
 57 |                 intersects=row.geometry,
 58 |                 sortby="properties.eo:cloud_cover",
 59 |                 query={
 60 |                     "grid:code": {
 61 |                         "eq": f"MGRS-{row['name']}",
 62 |                     },
 63 |                     "s2:nodata_pixel_percentage": {"lte": SCENE_NODATA_LIMIT},
 64 |                 },
 65 |             )
 66 |             item = items.item_collection()[0]
 67 | 
 68 |             if item.properties["eo:cloud_cover"] > ABSOLUTE_CLOUD_COVER_FILTER:
 69 |                 continue
 70 | 
 71 |             print(f"Cloud cover is {item.properties['eo:cloud_cover']}")
 72 | 
 73 |             for key in list(item.assets.keys()):
 74 |                 if key not in S2_ASSETS:
 75 |                     del item.assets[key]
 76 |                 else:
 77 |                     url = urlparse(item.assets[key].href)
 78 |                     copy_source = {
 79 |                         "Bucket": "sentinel-cogs",
 80 |                         "Key": url.path.lstrip("/"),
 81 |                     }
 82 |                     print(f"Copying {copy_source}")
 83 |                     new_key = (
 84 |                         f"{PLATFORM_NAME}/{item.id}/{Path(item.assets[key].href).name}"
 85 |                     )
 86 |                     s3.meta.client.copy(copy_source, bucket, new_key)
 87 |                     item.assets[key].href = f"s3://{bucket}/{new_key}"
 88 | 
 89 |             # Convert Dictionary to JSON String
 90 |             data_string = json.dumps(item.to_dict())
 91 | 
 92 |             # Upload JSON String to an S3 Object
 93 |             s3_bucket = s3.Bucket(name=bucket)
 94 |             s3_bucket.put_object(
 95 |                 Key=f"{PLATFORM_NAME}/{item.id}/stac_item.json",
 96 |                 Body=data_string,
 97 |             )
 98 | 
 99 |             indexer = Sentinel2Indexer(item)
100 |             index = indexer.create_index()
101 | 
102 |             writer = pa.BufferOutputStream()
103 |             io.write_geoparquet_table(index, writer)
104 |             body = bytes(writer.getvalue())
105 |             # Centralize the index files to make combining them easier later on
106 |             s3_bucket.put_object(
107 |                 Body=body,
108 |                 Key=f"index/{PLATFORM_NAME}/{item.id}/index_{item.id}.parquet",
109 |             )
110 | 
111 | 
112 | def process() -> None:
113 | 
114 |     if "AWS_BATCH_JOB_ARRAY_INDEX" not in os.environ:
115 |         raise ValueError("AWS_BATCH_JOB_ARRAY_INDEX env var not set")
116 |     if "STACCHIP_MGRS_SOURCE" not in os.environ:
117 |         raise ValueError("STACCHIP_MGRS_SOURCE env var not set")
118 |     if "STACCHIP_BUCKET" not in os.environ:
119 |         raise ValueError("STACCHIP_BUCKET env var not set")
120 | 
121 |     index = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"])
122 |     mgrs_source = os.environ["STACCHIP_MGRS_SOURCE"]
123 |     bucket = os.environ["STACCHIP_BUCKET"]
124 | 
125 |     process_mgrs_tile(index, mgrs_source, bucket)
126 | 


--------------------------------------------------------------------------------
/stacchip/processors/stats.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | from multiprocessing import Pool
  4 | 
  5 | import boto3
  6 | import numpy as np
  7 | 
  8 | from stacchip.processors.prechip import (
  9 |     LINZ_BANDS,
 10 |     LS_BANDS,
 11 |     MODIS_BANDS,
 12 |     NAIP_BANDS,
 13 |     S1_BANDS,
 14 |     S2_BANDS,
 15 | )
 16 | 
 17 | 
 18 | def get_stats_keys(key):
 19 |     print(f"Processing {key}")
 20 |     if "sentinel-1-rtc" in key:
 21 |         nodata = -32768
 22 |     if "modis" in key:
 23 |         nodata = -28672
 24 |     else:
 25 |         nodata = 0
 26 | 
 27 |     s3_session = boto3.resource("s3")
 28 |     obj = s3_session.Object("clay-v1-data-cubes", key)
 29 |     body = obj.get()["Body"].read()
 30 |     with io.BytesIO(body) as f:
 31 |         f.seek(0)
 32 |         data = np.load(f)["pixels"]
 33 | 
 34 |     data = data.astype("float64").swapaxes(0, 1)
 35 | 
 36 |     data = np.ma.array(data, mask=data == nodata)
 37 | 
 38 |     pixel_count = np.ma.count(data, axis=(1, 2, 3))
 39 |     pixel_sum = np.ma.sum(data, axis=(1, 2, 3))
 40 |     pixel_sqr = np.ma.sum(np.ma.power(data, 2), axis=(1, 2, 3))
 41 | 
 42 |     return pixel_count, pixel_sum, pixel_sqr
 43 | 
 44 | 
 45 | def process():
 46 |     if "STACCHIP_PLATFORM" not in os.environ:
 47 |         raise ValueError("STACCHIP_PLATFORM env var not set")
 48 |     pool_size = int(os.environ.get("STACCHIP_POOL_SIZE", 4))
 49 |     max_cubes = int(os.environ.get("STACCHIP_MAX_CUBES", 4))
 50 | 
 51 |     platform = os.environ.get("STACCHIP_PLATFORM")
 52 |     if platform == "naip":
 53 |         bands = NAIP_BANDS
 54 |     elif platform == "linz":
 55 |         bands = LINZ_BANDS
 56 |     elif platform == "sentinel-2-l2a":
 57 |         bands = S2_BANDS
 58 |     elif platform in ["landsat-c2l2-sr", "landsat-c2l1"]:
 59 |         bands = LS_BANDS
 60 |     elif platform == "sentinel-1-rtc":
 61 |         bands = S1_BANDS
 62 |     elif platform == "modis":
 63 |         bands = MODIS_BANDS
 64 |     else:
 65 |         raise ValueError(f"Platform {platform} not found")
 66 | 
 67 |     client = boto3.client("s3")
 68 |     paginator = client.get_paginator("list_objects_v2")
 69 |     page_iterator = paginator.paginate(
 70 |         Bucket="clay-v1-data-cubes", Prefix=f"mode_v1_chipper_v2/{platform}"
 71 |     )
 72 | 
 73 |     band_count = len(bands)
 74 |     pixel_count = np.zeros(band_count)
 75 |     pixel_sum = np.zeros(band_count)
 76 |     pixel_sqr = np.zeros(band_count)
 77 | 
 78 |     counter = 0
 79 |     all_keys = []
 80 |     for page in page_iterator:
 81 |         keys = [dat["Key"] for dat in page["Contents"]]
 82 |         for key in keys:
 83 |             counter += 1
 84 |             all_keys.append(key)
 85 |             if counter == max_cubes:
 86 |                 break
 87 |         if counter == max_cubes:
 88 |             break
 89 | 
 90 |     with Pool(pool_size) as pl:
 91 |         result = pl.map(get_stats_keys, all_keys)
 92 | 
 93 |     for dat in result:
 94 |         pixel_count = np.add(pixel_count, dat[0])
 95 |         pixel_sum = np.add(pixel_sum, dat[1])
 96 |         pixel_sqr = np.add(pixel_sqr, dat[2])
 97 | 
 98 |     # https://stackoverflow.com/questions/1174984/how-to-efficiently-calculate-a-running-standard-deviation
 99 |     mean = pixel_sum / pixel_count
100 |     stdev = np.sqrt((pixel_sqr / pixel_count) - (mean * mean))
101 | 
102 |     print("-- Mean by band")
103 |     for band, val in zip(bands, mean):
104 |         print(f"{band}: {val}")
105 | 
106 |     print("-- Std by band")
107 |     for band, val in zip(bands, stdev):
108 |         print(f"{band}: {val}")
109 | 


--------------------------------------------------------------------------------
/stacchip/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import boto3
 5 | from pystac import Item
 6 | 
 7 | from stacchip.indexer import ChipIndexer
 8 | 
 9 | 
10 | def load_indexer_s3(bucket: str, platform: str, item_id: str) -> ChipIndexer:
11 |     """
12 |     Load stacchip index table from a remote location
13 |     """
14 |     s3 = boto3.resource("s3")
15 |     s3_bucket = s3.Bucket(name=bucket)
16 |     content_object = s3_bucket.Object(f"{platform}/{item_id}/stac_item.json")
17 |     file_content = content_object.get()["Body"].read().decode("utf-8")
18 |     json_content = json.loads(file_content)
19 |     item = Item.from_dict(json_content)
20 | 
21 |     return ChipIndexer(item)
22 | 
23 | 
24 | def load_indexer_local(mountpath: Path, platform: str, item_id: str) -> ChipIndexer:
25 |     """
26 |     Load stacchip index table from local file
27 |     """
28 |     item = Item.from_file(mountpath / Path(f"{platform}/{item_id}/stac_item.json"))
29 |     return ChipIndexer(item)
30 | 


--------------------------------------------------------------------------------
/tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "type": "Feature",
   3 |   "stac_version": "1.0.0",
   4 |   "id": "LC09_L2SR_086107_20240311_20240312_02_T2_SR",
   5 |   "properties": {
   6 |     "datetime": "2024-03-11T23:34:56.177554Z",
   7 |     "eo:cloud_cover": 22.28,
   8 |     "view:sun_azimuth": 52.37722834,
   9 |     "view:sun_elevation": 17.95212357,
  10 |     "platform": "LANDSAT_9",
  11 |     "instruments": [
  12 |       "OLI",
  13 |       "TIRS"
  14 |     ],
  15 |     "view:off_nadir": 0,
  16 |     "landsat:cloud_cover_land": 25.21,
  17 |     "landsat:wrs_type": "2",
  18 |     "landsat:wrs_path": "086",
  19 |     "landsat:wrs_row": "107",
  20 |     "landsat:scene_id": "LC90861072024071LGN00",
  21 |     "landsat:collection_category": "T2",
  22 |     "landsat:collection_number": "02",
  23 |     "landsat:correction": "L2SR",
  24 |     "proj:epsg": 3031,
  25 |     "proj:shape": [
  26 |       8271,
  27 |       8331
  28 |     ],
  29 |     "proj:transform": [
  30 |       30,
  31 |       0,
  32 |       1517085,
  33 |       0,
  34 |       -30,
  35 |       -1811685
  36 |     ],
  37 |     "created": "2024-03-12T13:52:00.034Z",
  38 |     "updated": "2024-03-12T13:52:00.034Z"
  39 |   },
  40 |   "geometry": {
  41 |     "type": "Polygon",
  42 |     "coordinates": [
  43 |       [
  44 |         [
  45 |           136.76644043427444,
  46 |           -67.38978802075671
  47 |         ],
  48 |         [
  49 |           140.9389445092953,
  50 |           -68.07227732087095
  51 |         ],
  52 |         [
  53 |           142.5380790735029,
  54 |           -66.44760162836175
  55 |         ],
  56 |         [
  57 |           138.58173509190902,
  58 |           -65.80312306904463
  59 |         ],
  60 |         [
  61 |           136.76644043427444,
  62 |           -67.38978802075671
  63 |         ]
  64 |       ]
  65 |     ]
  66 |   },
  67 |   "links": [
  68 |     {
  69 |       "rel": "self",
  70 |       "href": "/home/tam/Desktop/clay-v1-data/items/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json",
  71 |       "type": "application/json"
  72 |     },
  73 |     {
  74 |       "rel": "parent",
  75 |       "href": "https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr"
  76 |     },
  77 |     {
  78 |       "rel": "collection",
  79 |       "href": "https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr"
  80 |     },
  81 |     {
  82 |       "rel": "root",
  83 |       "href": "https://landsatlook.usgs.gov/stac-server/",
  84 |       "type": "application/json",
  85 |       "title": "STAC API"
  86 |     }
  87 |   ],
  88 |   "assets": {
  89 |     "thumbnail": {
  90 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_small.jpeg",
  91 |       "type": "image/jpeg",
  92 |       "title": "Thumbnail image",
  93 |       "alternate": {
  94 |         "s3": {
  95 |           "storage:platform": "AWS",
  96 |           "storage:requester_pays": true,
  97 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_small.jpeg"
  98 |         }
  99 |       },
 100 |       "roles": [
 101 |         "thumbnail"
 102 |       ]
 103 |     },
 104 |     "reduced_resolution_browse": {
 105 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_large.jpeg",
 106 |       "type": "image/jpeg",
 107 |       "title": "Reduced resolution browse image",
 108 |       "alternate": {
 109 |         "s3": {
 110 |           "storage:platform": "AWS",
 111 |           "storage:requester_pays": true,
 112 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_thumb_large.jpeg"
 113 |         }
 114 |       },
 115 |       "roles": [
 116 |         "overview"
 117 |       ]
 118 |     },
 119 |     "index": {
 120 |       "href": "https://landsatlook.usgs.gov/stac-browser/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2",
 121 |       "type": "text/html",
 122 |       "title": "HTML index page",
 123 |       "roles": [
 124 |         "metadata"
 125 |       ]
 126 |     },
 127 |     "MTL.json": {
 128 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.json",
 129 |       "type": "application/json",
 130 |       "title": "Product Metadata File (json)",
 131 |       "description": "Collection 2 Level-2 Product Metadata File (json)",
 132 |       "alternate": {
 133 |         "s3": {
 134 |           "storage:platform": "AWS",
 135 |           "storage:requester_pays": true,
 136 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.json"
 137 |         }
 138 |       },
 139 |       "roles": [
 140 |         "metadata"
 141 |       ]
 142 |     },
 143 |     "coastal": {
 144 |       "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B1.TIF",
 145 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 146 |       "title": "Coastal/Aerosol Band (B1)",
 147 |       "description": "Collection 2 Level-2 Coastal/Aerosol Band (B1) Surface Reflectance",
 148 |       "eo:bands": [
 149 |         {
 150 |           "name": "B1",
 151 |           "common_name": "coastal",
 152 |           "gsd": 30,
 153 |           "center_wavelength": 0.44
 154 |         }
 155 |       ],
 156 |       "alternate": {
 157 |         "s3": {
 158 |           "storage:platform": "AWS",
 159 |           "storage:requester_pays": true,
 160 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B1.TIF"
 161 |         }
 162 |       },
 163 |       "roles": [
 164 |         "data"
 165 |       ]
 166 |     },
 167 |     "blue": {
 168 |       "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B2.TIF",
 169 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 170 |       "title": "Blue Band (B2)",
 171 |       "description": "Collection 2 Level-2 Blue Band (B2) Surface Reflectance",
 172 |       "eo:bands": [
 173 |         {
 174 |           "name": "B2",
 175 |           "common_name": "blue",
 176 |           "gsd": 30,
 177 |           "center_wavelength": 0.48
 178 |         }
 179 |       ],
 180 |       "alternate": {
 181 |         "s3": {
 182 |           "storage:platform": "AWS",
 183 |           "storage:requester_pays": true,
 184 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B2.TIF"
 185 |         }
 186 |       },
 187 |       "roles": [
 188 |         "data"
 189 |       ]
 190 |     },
 191 |     "green": {
 192 |       "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B3.TIF",
 193 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 194 |       "title": "Green Band (B3)",
 195 |       "description": "Collection 2 Level-2 Green Band (B3) Surface Reflectance",
 196 |       "eo:bands": [
 197 |         {
 198 |           "name": "B3",
 199 |           "common_name": "green",
 200 |           "gsd": 30,
 201 |           "center_wavelength": 0.56
 202 |         }
 203 |       ],
 204 |       "alternate": {
 205 |         "s3": {
 206 |           "storage:platform": "AWS",
 207 |           "storage:requester_pays": true,
 208 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B3.TIF"
 209 |         }
 210 |       },
 211 |       "roles": [
 212 |         "data"
 213 |       ]
 214 |     },
 215 |     "red": {
 216 |       "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B4.TIF",
 217 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 218 |       "title": "Red Band (B4)",
 219 |       "description": "Collection 2 Level-2 Red Band (B4) Surface Reflectance",
 220 |       "eo:bands": [
 221 |         {
 222 |           "name": "B4",
 223 |           "common_name": "red",
 224 |           "gsd": 30,
 225 |           "center_wavelength": 0.65
 226 |         }
 227 |       ],
 228 |       "alternate": {
 229 |         "s3": {
 230 |           "storage:platform": "AWS",
 231 |           "storage:requester_pays": true,
 232 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B4.TIF"
 233 |         }
 234 |       },
 235 |       "roles": [
 236 |         "data"
 237 |       ]
 238 |     },
 239 |     "nir08": {
 240 |       "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B5.TIF",
 241 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 242 |       "title": "Near Infrared Band 0.8 (B5)",
 243 |       "description": "Collection 2 Level-2 Near Infrared Band 0.8 (B5) Surface Reflectance",
 244 |       "eo:bands": [
 245 |         {
 246 |           "name": "B5",
 247 |           "common_name": "nir08",
 248 |           "gsd": 30,
 249 |           "center_wavelength": 0.86
 250 |         }
 251 |       ],
 252 |       "alternate": {
 253 |         "s3": {
 254 |           "storage:platform": "AWS",
 255 |           "storage:requester_pays": true,
 256 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B5.TIF"
 257 |         }
 258 |       },
 259 |       "roles": [
 260 |         "data",
 261 |         "reflectance"
 262 |       ]
 263 |     },
 264 |     "swir16": {
 265 |       "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B6.TIF",
 266 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 267 |       "title": "Short-wave Infrared Band 1.6 (B6)",
 268 |       "description": "Collection 2 Level-2 Short-wave Infrared Band 1.6 (B6) Surface Reflectance",
 269 |       "eo:bands": [
 270 |         {
 271 |           "name": "B6",
 272 |           "common_name": "swir16",
 273 |           "gsd": 30,
 274 |           "center_wavelength": 1.6
 275 |         }
 276 |       ],
 277 |       "alternate": {
 278 |         "s3": {
 279 |           "storage:platform": "AWS",
 280 |           "storage:requester_pays": true,
 281 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B6.TIF"
 282 |         }
 283 |       },
 284 |       "roles": [
 285 |         "data",
 286 |         "reflectance"
 287 |       ]
 288 |     },
 289 |     "swir22": {
 290 |       "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B7.TIF",
 291 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 292 |       "title": "Short-wave Infrared Band 2.2 (B7)",
 293 |       "description": "Collection 2 Level-2 Short-wave Infrared Band 2.2 (B7) Surface Reflectance",
 294 |       "eo:bands": [
 295 |         {
 296 |           "name": "B7",
 297 |           "common_name": "swir22",
 298 |           "gsd": 30,
 299 |           "center_wavelength": 2.2
 300 |         }
 301 |       ],
 302 |       "alternate": {
 303 |         "s3": {
 304 |           "storage:platform": "AWS",
 305 |           "storage:requester_pays": true,
 306 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_B7.TIF"
 307 |         }
 308 |       },
 309 |       "roles": [
 310 |         "data",
 311 |         "reflectance"
 312 |       ]
 313 |     },
 314 |     "qa_aerosol": {
 315 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_QA_AEROSOL.TIF",
 316 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 317 |       "title": "Aerosol Quality Analysis Band",
 318 |       "description": "Collection 2 Level-2 Aerosol Quality Analysis Band Surface Reflectance",
 319 |       "classification:bitfields": [
 320 |         {
 321 |           "name": "fill",
 322 |           "description": "Corresponding pixels in L1 image bands are fill",
 323 |           "offset": 0,
 324 |           "length": 1,
 325 |           "classes": [
 326 |             {
 327 |               "name": "not_fill",
 328 |               "description": "L1 image band pixels are not fill",
 329 |               "value": 0
 330 |             },
 331 |             {
 332 |               "name": "fill",
 333 |               "description": "L1 image band pixels are fill",
 334 |               "value": 1
 335 |             }
 336 |           ]
 337 |         },
 338 |         {
 339 |           "name": "retrieval",
 340 |           "description": "Valid aerosol retrieval",
 341 |           "offset": 1,
 342 |           "length": 1,
 343 |           "classes": [
 344 |             {
 345 |               "name": "not_valid",
 346 |               "description": "Aerosol retrieval is not valid",
 347 |               "value": 0
 348 |             },
 349 |             {
 350 |               "name": "valid",
 351 |               "description": "Aerosol retrieval is valid",
 352 |               "value": 1
 353 |             }
 354 |           ]
 355 |         },
 356 |         {
 357 |           "name": "water",
 358 |           "description": "Water mask",
 359 |           "offset": 2,
 360 |           "length": 1,
 361 |           "classes": [
 362 |             {
 363 |               "name": "not_water",
 364 |               "description": "Not water",
 365 |               "value": 0
 366 |             },
 367 |             {
 368 |               "name": "water",
 369 |               "description": "Water",
 370 |               "value": 1
 371 |             }
 372 |           ]
 373 |         },
 374 |         {
 375 |           "name": "unused",
 376 |           "description": "Unused bit",
 377 |           "offset": 3,
 378 |           "length": 1,
 379 |           "classes": [
 380 |             {
 381 |               "name": "unused",
 382 |               "description": "Unused bit",
 383 |               "value": 0
 384 |             }
 385 |           ]
 386 |         },
 387 |         {
 388 |           "name": "unused",
 389 |           "description": "Unused bit",
 390 |           "offset": 4,
 391 |           "length": 1,
 392 |           "classes": [
 393 |             {
 394 |               "name": "unused",
 395 |               "description": "Unused bit",
 396 |               "value": 0
 397 |             }
 398 |           ]
 399 |         },
 400 |         {
 401 |           "name": "interpolated",
 402 |           "description": "Aerosol is interpolated",
 403 |           "offset": 5,
 404 |           "length": 1,
 405 |           "classes": [
 406 |             {
 407 |               "name": "not_interpolated",
 408 |               "description": "Aerosol is not interpolated",
 409 |               "value": 0
 410 |             },
 411 |             {
 412 |               "name": "interpolated",
 413 |               "description": "Aerosol is interpolated",
 414 |               "value": 1
 415 |             }
 416 |           ]
 417 |         },
 418 |         {
 419 |           "name": "level",
 420 |           "description": "Aerosol level",
 421 |           "offset": 6,
 422 |           "length": 2,
 423 |           "classes": [
 424 |             {
 425 |               "name": "climatology",
 426 |               "description": "No aerosol correction applied",
 427 |               "value": 0
 428 |             },
 429 |             {
 430 |               "name": "low",
 431 |               "description": "Low aerosol level",
 432 |               "value": 1
 433 |             },
 434 |             {
 435 |               "name": "medium",
 436 |               "description": "Medium aerosol level",
 437 |               "value": 2
 438 |             },
 439 |             {
 440 |               "name": "high",
 441 |               "description": "High aerosol level",
 442 |               "value": 3
 443 |             }
 444 |           ]
 445 |         }
 446 |       ],
 447 |       "alternate": {
 448 |         "s3": {
 449 |           "storage:platform": "AWS",
 450 |           "storage:requester_pays": true,
 451 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_SR_QA_AEROSOL.TIF"
 452 |         }
 453 |       },
 454 |       "roles": [
 455 |         "metadata",
 456 |         "data-mask",
 457 |         "water-mask"
 458 |       ]
 459 |     },
 460 |     "qa_pixel": {
 461 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_PIXEL.TIF",
 462 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 463 |       "title": "Pixel Quality Assessment Band",
 464 |       "description": "Collection 2 Level-2 Pixel Quality Assessment Band Surface Reflectance",
 465 |       "classification:bitfields": [
 466 |         {
 467 |           "name": "fill",
 468 |           "description": "Corresponding pixels in L1 image bands are fill",
 469 |           "offset": 0,
 470 |           "length": 1,
 471 |           "classes": [
 472 |             {
 473 |               "name": "not_fill",
 474 |               "description": "L1 image band pixels are not fill",
 475 |               "value": 0
 476 |             },
 477 |             {
 478 |               "name": "fill",
 479 |               "description": "L1 image band pixels are fill",
 480 |               "value": 1
 481 |             }
 482 |           ]
 483 |         },
 484 |         {
 485 |           "name": "dilated",
 486 |           "description": "Dilated cloud",
 487 |           "offset": 1,
 488 |           "length": 1,
 489 |           "classes": [
 490 |             {
 491 |               "name": "not_dilated",
 492 |               "description": "Cloud is not dilated or no cloud",
 493 |               "value": 0
 494 |             },
 495 |             {
 496 |               "name": "dilated",
 497 |               "description": "Cloud dilation",
 498 |               "value": 1
 499 |             }
 500 |           ]
 501 |         },
 502 |         {
 503 |           "name": "cirrus",
 504 |           "description": "Cirrus mask",
 505 |           "offset": 2,
 506 |           "length": 1,
 507 |           "classes": [
 508 |             {
 509 |               "name": "not_cirrus",
 510 |               "description": "No confidence level set or low confidence cirrus",
 511 |               "value": 0
 512 |             },
 513 |             {
 514 |               "name": "cirrus",
 515 |               "description": "High confidence cirrus",
 516 |               "value": 1
 517 |             }
 518 |           ]
 519 |         },
 520 |         {
 521 |           "name": "cloud",
 522 |           "description": "Cloud mask",
 523 |           "offset": 3,
 524 |           "length": 1,
 525 |           "classes": [
 526 |             {
 527 |               "name": "not_cloud",
 528 |               "description": "Cloud confidence is not high",
 529 |               "value": 0
 530 |             },
 531 |             {
 532 |               "name": "cloud",
 533 |               "description": "High confidence cloud",
 534 |               "value": 1
 535 |             }
 536 |           ]
 537 |         },
 538 |         {
 539 |           "name": "shadow",
 540 |           "description": "Cloud shadow mask",
 541 |           "offset": 4,
 542 |           "length": 1,
 543 |           "classes": [
 544 |             {
 545 |               "name": "not_shadow",
 546 |               "description": "Cloud shadow confidence is not high",
 547 |               "value": 0
 548 |             },
 549 |             {
 550 |               "name": "shadow",
 551 |               "description": "High confidence cloud shadow",
 552 |               "value": 1
 553 |             }
 554 |           ]
 555 |         },
 556 |         {
 557 |           "name": "snow",
 558 |           "description": "Snow/Ice mask",
 559 |           "offset": 5,
 560 |           "length": 1,
 561 |           "classes": [
 562 |             {
 563 |               "name": "not_snow",
 564 |               "description": "Snow/Ice confidence is not high",
 565 |               "value": 0
 566 |             },
 567 |             {
 568 |               "name": "snow",
 569 |               "description": "High confidence snow cover",
 570 |               "value": 1
 571 |             }
 572 |           ]
 573 |         },
 574 |         {
 575 |           "name": "clear",
 576 |           "description": "Cloud or dilated cloud bits set",
 577 |           "offset": 6,
 578 |           "length": 1,
 579 |           "classes": [
 580 |             {
 581 |               "name": "not_clear",
 582 |               "description": "Cloud or dilated cloud bits are set",
 583 |               "value": 0
 584 |             },
 585 |             {
 586 |               "name": "clear",
 587 |               "description": "Cloud and dilated cloud bits are not set",
 588 |               "value": 1
 589 |             }
 590 |           ]
 591 |         },
 592 |         {
 593 |           "name": "water",
 594 |           "description": "Water mask",
 595 |           "offset": 7,
 596 |           "length": 1,
 597 |           "classes": [
 598 |             {
 599 |               "name": "not_water",
 600 |               "description": "Land or cloud",
 601 |               "value": 0
 602 |             },
 603 |             {
 604 |               "name": "water",
 605 |               "description": "Water",
 606 |               "value": 1
 607 |             }
 608 |           ]
 609 |         },
 610 |         {
 611 |           "name": "cloud_confidence",
 612 |           "description": "Cloud confidence levels",
 613 |           "offset": 8,
 614 |           "length": 2,
 615 |           "classes": [
 616 |             {
 617 |               "name": "not_set",
 618 |               "description": "No confidence level set",
 619 |               "value": 0
 620 |             },
 621 |             {
 622 |               "name": "low",
 623 |               "description": "Low confidence cloud",
 624 |               "value": 1
 625 |             },
 626 |             {
 627 |               "name": "medium",
 628 |               "description": "Medium confidence cloud",
 629 |               "value": 2
 630 |             },
 631 |             {
 632 |               "name": "high",
 633 |               "description": "High confidence cloud",
 634 |               "value": 3
 635 |             }
 636 |           ]
 637 |         },
 638 |         {
 639 |           "name": "shadow_confidence",
 640 |           "description": "Cloud shadow confidence levels",
 641 |           "offset": 10,
 642 |           "length": 2,
 643 |           "classes": [
 644 |             {
 645 |               "name": "not_set",
 646 |               "description": "No confidence level set",
 647 |               "value": 0
 648 |             },
 649 |             {
 650 |               "name": "low",
 651 |               "description": "Low confidence cloud shadow",
 652 |               "value": 1
 653 |             },
 654 |             {
 655 |               "name": "reserved",
 656 |               "description": "Reserved - value not used",
 657 |               "value": 2
 658 |             },
 659 |             {
 660 |               "name": "high",
 661 |               "description": "High confidence cloud shadow",
 662 |               "value": 3
 663 |             }
 664 |           ]
 665 |         },
 666 |         {
 667 |           "name": "snow_confidence",
 668 |           "description": "Snow/Ice confidence levels",
 669 |           "offset": 12,
 670 |           "length": 2,
 671 |           "classes": [
 672 |             {
 673 |               "name": "not_set",
 674 |               "description": "No confidence level set",
 675 |               "value": 0
 676 |             },
 677 |             {
 678 |               "name": "low",
 679 |               "description": "Low confidence snow/ice",
 680 |               "value": 1
 681 |             },
 682 |             {
 683 |               "name": "reserved",
 684 |               "description": "Reserved - value not used",
 685 |               "value": 2
 686 |             },
 687 |             {
 688 |               "name": "high",
 689 |               "description": "High confidence snow/ice",
 690 |               "value": 3
 691 |             }
 692 |           ]
 693 |         },
 694 |         {
 695 |           "name": "cirrus_confidence",
 696 |           "description": "Cirrus confidence levels",
 697 |           "offset": 14,
 698 |           "length": 2,
 699 |           "classes": [
 700 |             {
 701 |               "name": "not_set",
 702 |               "description": "No confidence level set",
 703 |               "value": 0
 704 |             },
 705 |             {
 706 |               "name": "low",
 707 |               "description": "Low confidence cirrus",
 708 |               "value": 1
 709 |             },
 710 |             {
 711 |               "name": "reserved",
 712 |               "description": "Reserved - value not used",
 713 |               "value": 2
 714 |             },
 715 |             {
 716 |               "name": "high",
 717 |               "description": "High confidence cirrus",
 718 |               "value": 3
 719 |             }
 720 |           ]
 721 |         }
 722 |       ],
 723 |       "alternate": {
 724 |         "s3": {
 725 |           "storage:platform": "AWS",
 726 |           "storage:requester_pays": true,
 727 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_PIXEL.TIF"
 728 |         }
 729 |       },
 730 |       "roles": [
 731 |         "cloud",
 732 |         "cloud-shadow",
 733 |         "snow-ice",
 734 |         "water-mask"
 735 |       ]
 736 |     },
 737 |     "qa_radsat": {
 738 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_RADSAT.TIF",
 739 |       "type": "image/vnd.stac.geotiff; cloud-optimized=true",
 740 |       "title": "Radiometric Saturation Quality Assessment Band",
 741 |       "description": "Collection 2 Level-2 Radiometric Saturation Quality Assessment Band Surface Reflectance",
 742 |       "classification:bitfields": [
 743 |         {
 744 |           "name": "band1",
 745 |           "description": "Band 1 radiometric saturation",
 746 |           "offset": 0,
 747 |           "length": 1,
 748 |           "classes": [
 749 |             {
 750 |               "name": "not_saturated",
 751 |               "description": "Band 1 is not saturated",
 752 |               "value": 0
 753 |             },
 754 |             {
 755 |               "name": "saturated",
 756 |               "description": "Band 1 is saturated",
 757 |               "value": 1
 758 |             }
 759 |           ]
 760 |         },
 761 |         {
 762 |           "name": "band2",
 763 |           "description": "Band 2 radiometric saturation",
 764 |           "offset": 1,
 765 |           "length": 1,
 766 |           "classes": [
 767 |             {
 768 |               "name": "not_saturated",
 769 |               "description": "Band 2 is not saturated",
 770 |               "value": 0
 771 |             },
 772 |             {
 773 |               "name": "saturated",
 774 |               "description": "Band 2 is saturated",
 775 |               "value": 1
 776 |             }
 777 |           ]
 778 |         },
 779 |         {
 780 |           "name": "band3",
 781 |           "description": "Band 3 radiometric saturation",
 782 |           "offset": 2,
 783 |           "length": 1,
 784 |           "classes": [
 785 |             {
 786 |               "name": "not_saturated",
 787 |               "description": "Band 3 is not saturated",
 788 |               "value": 0
 789 |             },
 790 |             {
 791 |               "name": "saturated",
 792 |               "description": "Band 3 is saturated",
 793 |               "value": 1
 794 |             }
 795 |           ]
 796 |         },
 797 |         {
 798 |           "name": "band4",
 799 |           "description": "Band 4 radiometric saturation",
 800 |           "offset": 3,
 801 |           "length": 1,
 802 |           "classes": [
 803 |             {
 804 |               "name": "not_saturated",
 805 |               "description": "Band 4 is not saturated",
 806 |               "value": 0
 807 |             },
 808 |             {
 809 |               "name": "saturated",
 810 |               "description": "Band 4 is saturated",
 811 |               "value": 1
 812 |             }
 813 |           ]
 814 |         },
 815 |         {
 816 |           "name": "band5",
 817 |           "description": "Band 5 radiometric saturation",
 818 |           "offset": 4,
 819 |           "length": 1,
 820 |           "classes": [
 821 |             {
 822 |               "name": "not_saturated",
 823 |               "description": "Band 5 is not saturated",
 824 |               "value": 0
 825 |             },
 826 |             {
 827 |               "name": "saturated",
 828 |               "description": "Band 5 is saturated",
 829 |               "value": 1
 830 |             }
 831 |           ]
 832 |         },
 833 |         {
 834 |           "name": "band6",
 835 |           "description": "Band 6 radiometric saturation",
 836 |           "offset": 5,
 837 |           "length": 1,
 838 |           "classes": [
 839 |             {
 840 |               "name": "not_saturated",
 841 |               "description": "Band 6 is not saturated",
 842 |               "value": 0
 843 |             },
 844 |             {
 845 |               "name": "saturated",
 846 |               "description": "Band 6 is saturated",
 847 |               "value": 1
 848 |             }
 849 |           ]
 850 |         },
 851 |         {
 852 |           "name": "band7",
 853 |           "description": "Band 7 radiometric saturation",
 854 |           "offset": 6,
 855 |           "length": 1,
 856 |           "classes": [
 857 |             {
 858 |               "name": "not_saturated",
 859 |               "description": "Band 7 is not saturated",
 860 |               "value": 0
 861 |             },
 862 |             {
 863 |               "name": "saturated",
 864 |               "description": "Band 7 is saturated",
 865 |               "value": 1
 866 |             }
 867 |           ]
 868 |         },
 869 |         {
 870 |           "name": "unused",
 871 |           "description": "Unused bit",
 872 |           "offset": 7,
 873 |           "length": 1,
 874 |           "classes": [
 875 |             {
 876 |               "name": "unused",
 877 |               "description": "Unused bit",
 878 |               "value": 0
 879 |             }
 880 |           ]
 881 |         },
 882 |         {
 883 |           "name": "band9",
 884 |           "description": "Band 9 radiometric saturation",
 885 |           "offset": 8,
 886 |           "length": 1,
 887 |           "classes": [
 888 |             {
 889 |               "name": "not_saturated",
 890 |               "description": "Band 9 is not saturated",
 891 |               "value": 0
 892 |             },
 893 |             {
 894 |               "name": "saturated",
 895 |               "description": "Band 9 is saturated",
 896 |               "value": 1
 897 |             }
 898 |           ]
 899 |         },
 900 |         {
 901 |           "name": "unused",
 902 |           "description": "Unused bit",
 903 |           "offset": 9,
 904 |           "length": 1,
 905 |           "classes": [
 906 |             {
 907 |               "name": "unused",
 908 |               "description": "Unused bit",
 909 |               "value": 0
 910 |             }
 911 |           ]
 912 |         },
 913 |         {
 914 |           "name": "unused",
 915 |           "description": "Unused bit",
 916 |           "offset": 10,
 917 |           "length": 1,
 918 |           "classes": [
 919 |             {
 920 |               "name": "unused",
 921 |               "description": "Unused bit",
 922 |               "value": 0
 923 |             }
 924 |           ]
 925 |         },
 926 |         {
 927 |           "name": "occlusion",
 928 |           "description": "Terrain not visible from sensor due to intervening terrain",
 929 |           "offset": 11,
 930 |           "length": 1,
 931 |           "classes": [
 932 |             {
 933 |               "name": "not_occluded",
 934 |               "description": "Terrain is not occluded",
 935 |               "value": 0
 936 |             },
 937 |             {
 938 |               "name": "occluded",
 939 |               "description": "Terrain is occluded",
 940 |               "value": 1
 941 |             }
 942 |           ]
 943 |         },
 944 |         {
 945 |           "name": "unused",
 946 |           "description": "Unused bit",
 947 |           "offset": 12,
 948 |           "length": 1,
 949 |           "classes": [
 950 |             {
 951 |               "name": "unused",
 952 |               "description": "Unused bit",
 953 |               "value": 0
 954 |             }
 955 |           ]
 956 |         },
 957 |         {
 958 |           "name": "unused",
 959 |           "description": "Unused bit",
 960 |           "offset": 13,
 961 |           "length": 1,
 962 |           "classes": [
 963 |             {
 964 |               "name": "unused",
 965 |               "description": "Unused bit",
 966 |               "value": 0
 967 |             }
 968 |           ]
 969 |         },
 970 |         {
 971 |           "name": "unused",
 972 |           "description": "Unused bit",
 973 |           "offset": 14,
 974 |           "length": 1,
 975 |           "classes": [
 976 |             {
 977 |               "name": "unused",
 978 |               "description": "Unused bit",
 979 |               "value": 0
 980 |             }
 981 |           ]
 982 |         },
 983 |         {
 984 |           "name": "unused",
 985 |           "description": "Unused bit",
 986 |           "offset": 15,
 987 |           "length": 1,
 988 |           "classes": [
 989 |             {
 990 |               "name": "unused",
 991 |               "description": "Unused bit",
 992 |               "value": 0
 993 |             }
 994 |           ]
 995 |         }
 996 |       ],
 997 |       "alternate": {
 998 |         "s3": {
 999 |           "storage:platform": "AWS",
1000 |           "storage:requester_pays": true,
1001 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_QA_RADSAT.TIF"
1002 |         }
1003 |       },
1004 |       "roles": [
1005 |         "saturation"
1006 |       ]
1007 |     },
1008 |     "ANG.txt": {
1009 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_ANG.txt",
1010 |       "type": "text/plain",
1011 |       "title": "Angle Coefficients File",
1012 |       "description": "Collection 2 Level-2 Angle Coefficients File (ANG)",
1013 |       "alternate": {
1014 |         "s3": {
1015 |           "storage:platform": "AWS",
1016 |           "storage:requester_pays": true,
1017 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_ANG.txt"
1018 |         }
1019 |       },
1020 |       "roles": [
1021 |         "metadata"
1022 |       ]
1023 |     },
1024 |     "MTL.txt": {
1025 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.txt",
1026 |       "type": "text/plain",
1027 |       "title": "Product Metadata File",
1028 |       "description": "Collection 2 Level-2 Product Metadata File (MTL)",
1029 |       "alternate": {
1030 |         "s3": {
1031 |           "storage:platform": "AWS",
1032 |           "storage:requester_pays": true,
1033 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.txt"
1034 |         }
1035 |       },
1036 |       "roles": [
1037 |         "metadata"
1038 |       ]
1039 |     },
1040 |     "MTL.xml": {
1041 |       "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.xml",
1042 |       "type": "application/xml",
1043 |       "title": "Product Metadata File (xml)",
1044 |       "description": "Collection 2 Level-2 Product Metadata File (xml)",
1045 |       "alternate": {
1046 |         "s3": {
1047 |           "storage:platform": "AWS",
1048 |           "storage:requester_pays": true,
1049 |           "href": "s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2024/086/107/LC09_L2SR_086107_20240311_20240312_02_T2/LC09_L2SR_086107_20240311_20240312_02_T2_MTL.xml"
1050 |         }
1051 |       },
1052 |       "roles": [
1053 |         "metadata"
1054 |       ]
1055 |     }
1056 |   },
1057 |   "bbox": [
1058 |     136.76644043427444,
1059 |     -68.07227732087095,
1060 |     142.5380790735029,
1061 |     -65.80312306904463
1062 |   ],
1063 |   "stac_extensions": [
1064 |     "https://landsat.usgs.gov/stac/landsat-extension/v1.1.1/schema.json",
1065 |     "https://stac-extensions.github.io/view/v1.0.0/schema.json",
1066 |     "https://stac-extensions.github.io/projection/v1.1.0/schema.json",
1067 |     "https://stac-extensions.github.io/eo/v1.1.0/schema.json",
1068 |     "https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json",
1069 |     "https://stac-extensions.github.io/storage/v1.0.0/schema.json",
1070 |     "https://stac-extensions.github.io/classification/v1.1.0/schema.json"
1071 |   ],
1072 |   "collection": "landsat-c2l2-sr",
1073 |   "description": "Landsat Collection 2 Level-2 Surface Reflectance Product"
1074 | }


--------------------------------------------------------------------------------
/tests/data/naip_m_4207009_ne_19_060_20211024.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "Feature",
  3 |   "stac_version": "1.0.0",
  4 |   "id": "m_4207009_ne_19_060_20211024.tif",
  5 |   "properties": {
  6 |     "proj:epsg": 26919,
  7 |     "proj:geometry": {
  8 |       "type": "Polygon",
  9 |       "coordinates": [
 10 |         [
 11 |           [
 12 |             341338.2,
 13 |             4741453.800000001
 14 |           ],
 15 |           [
 16 |             347160.60000000003,
 17 |             4741453.800000001
 18 |           ],
 19 |           [
 20 |             347160.60000000003,
 21 |             4749053.4
 22 |           ],
 23 |           [
 24 |             341338.2,
 25 |             4749053.4
 26 |           ],
 27 |           [
 28 |             341338.2,
 29 |             4741453.800000001
 30 |           ]
 31 |         ]
 32 |       ]
 33 |     },
 34 |     "proj:bbox": [
 35 |       341338.2,
 36 |       4741453.800000001,
 37 |       347160.60000000003,
 38 |       4749053.4
 39 |     ],
 40 |     "proj:shape": [
 41 |       12666,
 42 |       9704
 43 |     ],
 44 |     "proj:transform": [
 45 |       0.6,
 46 |       0,
 47 |       341338.2,
 48 |       0,
 49 |       -0.6,
 50 |       4749053.4,
 51 |       0,
 52 |       0,
 53 |       1
 54 |     ],
 55 |     "proj:projjson": {
 56 |       "$schema": "https://proj.org/schemas/v0.7/projjson.schema.json",
 57 |       "type": "ProjectedCRS",
 58 |       "name": "NAD83 / UTM zone 19N",
 59 |       "base_crs": {
 60 |         "name": "NAD83",
 61 |         "datum": {
 62 |           "type": "GeodeticReferenceFrame",
 63 |           "name": "North American Datum 1983",
 64 |           "ellipsoid": {
 65 |             "name": "GRS 1980",
 66 |             "semi_major_axis": 6378137,
 67 |             "inverse_flattening": 298.257222101
 68 |           }
 69 |         },
 70 |         "coordinate_system": {
 71 |           "subtype": "ellipsoidal",
 72 |           "axis": [
 73 |             {
 74 |               "name": "Geodetic latitude",
 75 |               "abbreviation": "Lat",
 76 |               "direction": "north",
 77 |               "unit": "degree"
 78 |             },
 79 |             {
 80 |               "name": "Geodetic longitude",
 81 |               "abbreviation": "Lon",
 82 |               "direction": "east",
 83 |               "unit": "degree"
 84 |             }
 85 |           ]
 86 |         },
 87 |         "id": {
 88 |           "authority": "EPSG",
 89 |           "code": 4269
 90 |         }
 91 |       },
 92 |       "conversion": {
 93 |         "name": "UTM zone 19N",
 94 |         "method": {
 95 |           "name": "Transverse Mercator",
 96 |           "id": {
 97 |             "authority": "EPSG",
 98 |             "code": 9807
 99 |           }
100 |         },
101 |         "parameters": [
102 |           {
103 |             "name": "Latitude of natural origin",
104 |             "value": 0,
105 |             "unit": "degree",
106 |             "id": {
107 |               "authority": "EPSG",
108 |               "code": 8801
109 |             }
110 |           },
111 |           {
112 |             "name": "Longitude of natural origin",
113 |             "value": -69,
114 |             "unit": "degree",
115 |             "id": {
116 |               "authority": "EPSG",
117 |               "code": 8802
118 |             }
119 |           },
120 |           {
121 |             "name": "Scale factor at natural origin",
122 |             "value": 0.9996,
123 |             "unit": "unity",
124 |             "id": {
125 |               "authority": "EPSG",
126 |               "code": 8805
127 |             }
128 |           },
129 |           {
130 |             "name": "False easting",
131 |             "value": 500000,
132 |             "unit": "metre",
133 |             "id": {
134 |               "authority": "EPSG",
135 |               "code": 8806
136 |             }
137 |           },
138 |           {
139 |             "name": "False northing",
140 |             "value": 0,
141 |             "unit": "metre",
142 |             "id": {
143 |               "authority": "EPSG",
144 |               "code": 8807
145 |             }
146 |           }
147 |         ]
148 |       },
149 |       "coordinate_system": {
150 |         "subtype": "Cartesian",
151 |         "axis": [
152 |           {
153 |             "name": "Easting",
154 |             "abbreviation": "",
155 |             "direction": "east",
156 |             "unit": "metre"
157 |           },
158 |           {
159 |             "name": "Northing",
160 |             "abbreviation": "",
161 |             "direction": "north",
162 |             "unit": "metre"
163 |           }
164 |         ]
165 |       },
166 |       "id": {
167 |         "authority": "EPSG",
168 |         "code": 26919
169 |       }
170 |     },
171 |     "proj:wkt2": "PROJCS[\"NAD83 / UTM zone 19N\",GEOGCS[\"NAD83\",DATUM[\"North_American_Datum_1983\",SPHEROID[\"GRS 1980\",6378137,298.257222101,AUTHORITY[\"EPSG\",\"7019\"]],AUTHORITY[\"EPSG\",\"6269\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AUTHORITY[\"EPSG\",\"4269\"]],PROJECTION[\"Transverse_Mercator\"],PARAMETER[\"latitude_of_origin\",0],PARAMETER[\"central_meridian\",-69],PARAMETER[\"scale_factor\",0.9996],PARAMETER[\"false_easting\",500000],PARAMETER[\"false_northing\",0],UNIT[\"metre\",1,AUTHORITY[\"EPSG\",\"9001\"]],AXIS[\"Easting\",EAST],AXIS[\"Northing\",NORTH],AUTHORITY[\"EPSG\",\"26919\"]]",
172 |     "datetime": "2021-10-24T12:33:05.496897Z"
173 |   },
174 |   "geometry": {
175 |     "type": "Polygon",
176 |     "coordinates": [
177 |       [
178 |         [
179 |           -70.9405470386063,
180 |           42.80920310538916
181 |         ],
182 |         [
183 |           -70.86937257210027,
184 |           42.81038748290737
185 |         ],
186 |         [
187 |           -70.8714366864438,
188 |           42.87878792763812
189 |         ],
190 |         [
191 |           -70.94268962889282,
192 |           42.877600665218694
193 |         ],
194 |         [
195 |           -70.9405470386063,
196 |           42.80920310538916
197 |         ]
198 |       ]
199 |     ]
200 |   },
201 |   "links": [],
202 |   "assets": {
203 |     "asset": {
204 |       "href": "s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207009_ne_19_060_20211024.tif",
205 |       "eo:bands": [
206 |         {
207 |           "name": "b1",
208 |           "description": "red"
209 |         },
210 |         {
211 |           "name": "b2",
212 |           "description": "green"
213 |         },
214 |         {
215 |           "name": "b3",
216 |           "description": "blue"
217 |         },
218 |         {
219 |           "name": "b4",
220 |           "description": "undefined"
221 |         }
222 |       ],
223 |       "roles": []
224 |     }
225 |   },
226 |   "bbox": [
227 |     -70.94268962889282,
228 |     42.80920310538916,
229 |     -70.86937257210027,
230 |     42.87878792763812
231 |   ],
232 |   "stac_extensions": [
233 |     "https://stac-extensions.github.io/projection/v1.1.0/schema.json",
234 |     "https://stac-extensions.github.io/eo/v1.1.0/schema.json"
235 |   ]
236 | }
237 | 


--------------------------------------------------------------------------------
/tests/data/sentinel-2-l2a-S2A_T20HNJ_20240311T140636_L2A.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "Feature",
  3 |   "stac_version": "1.0.0",
  4 |   "id": "S2A_T20HNJ_20240311T140636_L2A",
  5 |   "properties": {
  6 |     "created": "2024-03-11T22:12:22.219Z",
  7 |     "platform": "sentinel-2a",
  8 |     "constellation": "sentinel-2",
  9 |     "instruments": [
 10 |       "msi"
 11 |     ],
 12 |     "eo:cloud_cover": 18.431592,
 13 |     "proj:epsg": 32720,
 14 |     "proj:centroid": {
 15 |       "lat": -33.06046,
 16 |       "lon": -62.24989
 17 |     },
 18 |     "mgrs:utm_zone": 20,
 19 |     "mgrs:latitude_band": "H",
 20 |     "mgrs:grid_square": "NJ",
 21 |     "grid:code": "MGRS-20HNJ",
 22 |     "view:azimuth": 103.15279043292124,
 23 |     "view:incidence_angle": 8.717368857696117,
 24 |     "view:sun_azimuth": 51.9541040600266,
 25 |     "view:sun_elevation": 47.9530815619878,
 26 |     "s2:tile_id": "S2A_OPER_MSI_L2A_TL_2APS_20240311T194050_A045538_T20HNJ_N05.10",
 27 |     "s2:degraded_msi_data_percentage": 0.0417,
 28 |     "s2:nodata_pixel_percentage": 28.38603,
 29 |     "s2:saturated_defective_pixel_percentage": 0,
 30 |     "s2:dark_features_percentage": 0.003961,
 31 |     "s2:cloud_shadow_percentage": 6.891545,
 32 |     "s2:vegetation_percentage": 57.82398,
 33 |     "s2:not_vegetated_percentage": 15.324496,
 34 |     "s2:water_percentage": 0.209678,
 35 |     "s2:unclassified_percentage": 1.314748,
 36 |     "s2:medium_proba_clouds_percentage": 8.782919,
 37 |     "s2:high_proba_clouds_percentage": 9.417892,
 38 |     "s2:thin_cirrus_percentage": 0.230781,
 39 |     "s2:snow_ice_percentage": 0,
 40 |     "s2:product_type": "S2MSI2A",
 41 |     "s2:processing_baseline": "05.10",
 42 |     "s2:product_uri": "S2A_MSIL2A_20240311T135701_N0510_R067_T20HNJ_20240311T194050.SAFE",
 43 |     "s2:generation_time": "2024-03-11T19:40:50.000000Z",
 44 |     "s2:datatake_id": "GS2A_20240311T135701_045538_N05.10",
 45 |     "s2:datatake_type": "INS-NOBS",
 46 |     "s2:datastrip_id": "S2A_OPER_MSI_L2A_DS_2APS_20240311T194050_S20240311T140636_N05.10",
 47 |     "s2:reflectance_conversion_factor": 1.01544682232552,
 48 |     "datetime": "2024-03-11T14:11:54.463000Z",
 49 |     "earthsearch:payload_id": "roda-sentinel-2-c1-l2a/workflow-sentinel-2-c1-l2a-to-stac/c7c43f015229283de230ed796369ff9e",
 50 |     "storage:platform": "AWS",
 51 |     "storage:region": "us-west-2",
 52 |     "storage:requester_pays": false,
 53 |     "processing:software": {
 54 |       "sentinel-2-c1-l2a-to-stac": "v2024.02.01"
 55 |     },
 56 |     "updated": "2024-03-11T22:12:22.219Z"
 57 |   },
 58 |   "geometry": {
 59 |     "type": "Polygon",
 60 |     "coordinates": [
 61 |       [
 62 |         [
 63 |           -62.51540734988781,
 64 |           -32.53643059966684
 65 |         ],
 66 |         [
 67 |           -62.817517027781356,
 68 |           -33.52764404692455
 69 |         ],
 70 |         [
 71 |           -61.81785593725055,
 72 |           -33.52213683974327
 73 |         ],
 74 |         [
 75 |           -61.831003262970874,
 76 |           -32.5319308776855
 77 |         ],
 78 |         [
 79 |           -62.51540734988781,
 80 |           -32.53643059966684
 81 |         ]
 82 |       ]
 83 |     ]
 84 |   },
 85 |   "links": [
 86 |     {
 87 |       "rel": "self",
 88 |       "type": "application/geo+json",
 89 |       "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2A_T20HNJ_20240311T140636_L2A"
 90 |     },
 91 |     {
 92 |       "rel": "canonical",
 93 |       "href": "s3://e84-earth-search-sentinel-data/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/S2A_T20HNJ_20240311T140636_L2A.json",
 94 |       "type": "application/json"
 95 |     },
 96 |     {
 97 |       "rel": "via",
 98 |       "href": "s3://sentinel-s2-l2a/tiles/20/H/NJ/2024/3/11/0/metadata.xml",
 99 |       "type": "application/xml",
100 |       "title": "Granule Metadata in Sinergize RODA Archive"
101 |     },
102 |     {
103 |       "rel": "parent",
104 |       "type": "application/json",
105 |       "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a"
106 |     },
107 |     {
108 |       "rel": "collection",
109 |       "type": "application/json",
110 |       "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a"
111 |     },
112 |     {
113 |       "rel": "root",
114 |       "type": "application/json",
115 |       "href": "https://earth-search.aws.element84.com/v1"
116 |     },
117 |     {
118 |       "rel": "thumbnail",
119 |       "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2A_T20HNJ_20240311T140636_L2A/thumbnail"
120 |     }
121 |   ],
122 |   "assets": {
123 |     "red": {
124 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B04.tif",
125 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
126 |       "title": "Red - 10m",
127 |       "eo:bands": [
128 |         {
129 |           "name": "B04",
130 |           "common_name": "red",
131 |           "center_wavelength": 0.665,
132 |           "full_width_half_max": 0.038
133 |         }
134 |       ],
135 |       "gsd": 10,
136 |       "proj:shape": [
137 |         10980,
138 |         10980
139 |       ],
140 |       "proj:transform": [
141 |         10,
142 |         0,
143 |         499980,
144 |         0,
145 |         -10,
146 |         6400000
147 |       ],
148 |       "raster:bands": [
149 |         {
150 |           "nodata": 0,
151 |           "data_type": "uint16",
152 |           "spatial_resolution": 10,
153 |           "scale": 0.0001,
154 |           "offset": -0.1
155 |         }
156 |       ],
157 |       "file:checksum": "1220c24a7922eebdc1124781a9ceb98b2a72e7b950512f6e14d16fe3467549232855",
158 |       "file:size": 150228550,
159 |       "roles": [
160 |         "data",
161 |         "reflectance"
162 |       ]
163 |     },
164 |     "green": {
165 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B03.tif",
166 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
167 |       "title": "Green - 10m",
168 |       "eo:bands": [
169 |         {
170 |           "name": "B03",
171 |           "common_name": "green",
172 |           "center_wavelength": 0.56,
173 |           "full_width_half_max": 0.045
174 |         }
175 |       ],
176 |       "gsd": 10,
177 |       "proj:shape": [
178 |         10980,
179 |         10980
180 |       ],
181 |       "proj:transform": [
182 |         10,
183 |         0,
184 |         499980,
185 |         0,
186 |         -10,
187 |         6400000
188 |       ],
189 |       "raster:bands": [
190 |         {
191 |           "nodata": 0,
192 |           "data_type": "uint16",
193 |           "spatial_resolution": 10,
194 |           "scale": 0.0001,
195 |           "offset": -0.1
196 |         }
197 |       ],
198 |       "file:checksum": "12207d2fce8ce354be7d0f67fee028c74febc3a8de80bfe7da835ff8219640730ed6",
199 |       "file:size": 147422714,
200 |       "roles": [
201 |         "data",
202 |         "reflectance"
203 |       ]
204 |     },
205 |     "blue": {
206 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B02.tif",
207 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
208 |       "title": "Blue - 10m",
209 |       "eo:bands": [
210 |         {
211 |           "name": "B02",
212 |           "common_name": "blue",
213 |           "center_wavelength": 0.49,
214 |           "full_width_half_max": 0.098
215 |         }
216 |       ],
217 |       "gsd": 10,
218 |       "proj:shape": [
219 |         10980,
220 |         10980
221 |       ],
222 |       "proj:transform": [
223 |         10,
224 |         0,
225 |         499980,
226 |         0,
227 |         -10,
228 |         6400000
229 |       ],
230 |       "raster:bands": [
231 |         {
232 |           "nodata": 0,
233 |           "data_type": "uint16",
234 |           "spatial_resolution": 10,
235 |           "scale": 0.0001,
236 |           "offset": -0.1
237 |         }
238 |       ],
239 |       "file:checksum": "1220938da762ddbce00c61f48d9ce7b4bd519b02e0696047e51ec81b49e54ccb87e4",
240 |       "file:size": 145995722,
241 |       "roles": [
242 |         "data",
243 |         "reflectance"
244 |       ]
245 |     },
246 |     "visual": {
247 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/TCI.tif",
248 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
249 |       "title": "True color image",
250 |       "eo:bands": [
251 |         {
252 |           "name": "B04",
253 |           "common_name": "red",
254 |           "center_wavelength": 0.665,
255 |           "full_width_half_max": 0.038
256 |         },
257 |         {
258 |           "name": "B03",
259 |           "common_name": "green",
260 |           "center_wavelength": 0.56,
261 |           "full_width_half_max": 0.045
262 |         },
263 |         {
264 |           "name": "B02",
265 |           "common_name": "blue",
266 |           "center_wavelength": 0.49,
267 |           "full_width_half_max": 0.098
268 |         }
269 |       ],
270 |       "gsd": 10,
271 |       "proj:shape": [
272 |         10980,
273 |         10980
274 |       ],
275 |       "proj:transform": [
276 |         10,
277 |         0,
278 |         499980,
279 |         0,
280 |         -10,
281 |         6400000
282 |       ],
283 |       "file:checksum": "1220187e3210dbc041d74529aed01a1bf838f6fe69404fca1fcd164056671892d250",
284 |       "file:size": 180673941,
285 |       "roles": [
286 |         "visual"
287 |       ]
288 |     },
289 |     "nir": {
290 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B08.tif",
291 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
292 |       "title": "NIR 1 - 10m",
293 |       "eo:bands": [
294 |         {
295 |           "name": "B08",
296 |           "common_name": "nir",
297 |           "center_wavelength": 0.842,
298 |           "full_width_half_max": 0.145
299 |         }
300 |       ],
301 |       "gsd": 10,
302 |       "proj:shape": [
303 |         10980,
304 |         10980
305 |       ],
306 |       "proj:transform": [
307 |         10,
308 |         0,
309 |         499980,
310 |         0,
311 |         -10,
312 |         6400000
313 |       ],
314 |       "raster:bands": [
315 |         {
316 |           "nodata": 0,
317 |           "data_type": "uint16",
318 |           "spatial_resolution": 10,
319 |           "scale": 0.0001,
320 |           "offset": -0.1
321 |         }
322 |       ],
323 |       "file:checksum": "1220bf7d49117a93653c94357f01ec082f2db8528e720c4f5b631b40593c22a731f3",
324 |       "file:size": 154662896,
325 |       "roles": [
326 |         "data",
327 |         "reflectance"
328 |       ]
329 |     },
330 |     "swir22": {
331 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B12.tif",
332 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
333 |       "title": "SWIR 2.2μm - 20m",
334 |       "eo:bands": [
335 |         {
336 |           "name": "B12",
337 |           "common_name": "swir22",
338 |           "center_wavelength": 2.19,
339 |           "full_width_half_max": 0.242
340 |         }
341 |       ],
342 |       "gsd": 20,
343 |       "proj:shape": [
344 |         5490,
345 |         5490
346 |       ],
347 |       "proj:transform": [
348 |         20,
349 |         0,
350 |         499980,
351 |         0,
352 |         -20,
353 |         6400000
354 |       ],
355 |       "raster:bands": [
356 |         {
357 |           "nodata": 0,
358 |           "data_type": "uint16",
359 |           "spatial_resolution": 20,
360 |           "scale": 0.0001,
361 |           "offset": -0.1
362 |         }
363 |       ],
364 |       "file:checksum": "12203f4b9e02f3121690c3f8fd60b2a61baa1fd04f5d10ee81b972f0a9b700cfd978",
365 |       "file:size": 40233757,
366 |       "roles": [
367 |         "data",
368 |         "reflectance"
369 |       ]
370 |     },
371 |     "rededge2": {
372 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B06.tif",
373 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
374 |       "title": "Red Edge 2 - 20m",
375 |       "eo:bands": [
376 |         {
377 |           "name": "B06",
378 |           "common_name": "rededge",
379 |           "center_wavelength": 0.74,
380 |           "full_width_half_max": 0.018
381 |         }
382 |       ],
383 |       "gsd": 20,
384 |       "proj:shape": [
385 |         5490,
386 |         5490
387 |       ],
388 |       "proj:transform": [
389 |         20,
390 |         0,
391 |         499980,
392 |         0,
393 |         -20,
394 |         6400000
395 |       ],
396 |       "raster:bands": [
397 |         {
398 |           "nodata": 0,
399 |           "data_type": "uint16",
400 |           "spatial_resolution": 20,
401 |           "scale": 0.0001,
402 |           "offset": -0.1
403 |         }
404 |       ],
405 |       "file:checksum": "12206fbd7602036614b8cf39ce3fd3c0f3a6962588cb7c7ef3c13148903e597cbd8d",
406 |       "file:size": 43744957,
407 |       "roles": [
408 |         "data",
409 |         "reflectance"
410 |       ]
411 |     },
412 |     "rededge3": {
413 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B07.tif",
414 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
415 |       "title": "Red Edge 3 - 20m",
416 |       "eo:bands": [
417 |         {
418 |           "name": "B07",
419 |           "common_name": "rededge",
420 |           "center_wavelength": 0.783,
421 |           "full_width_half_max": 0.028
422 |         }
423 |       ],
424 |       "gsd": 20,
425 |       "proj:shape": [
426 |         5490,
427 |         5490
428 |       ],
429 |       "proj:transform": [
430 |         20,
431 |         0,
432 |         499980,
433 |         0,
434 |         -20,
435 |         6400000
436 |       ],
437 |       "raster:bands": [
438 |         {
439 |           "nodata": 0,
440 |           "data_type": "uint16",
441 |           "spatial_resolution": 20,
442 |           "scale": 0.0001,
443 |           "offset": -0.1
444 |         }
445 |       ],
446 |       "file:checksum": "1220977074bace95fca7a4c538a55847db63581f6e2f41452feb791e8a989643ada5",
447 |       "file:size": 44360377,
448 |       "roles": [
449 |         "data",
450 |         "reflectance"
451 |       ]
452 |     },
453 |     "rededge1": {
454 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B05.tif",
455 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
456 |       "title": "Red Edge 1 - 20m",
457 |       "eo:bands": [
458 |         {
459 |           "name": "B05",
460 |           "common_name": "rededge",
461 |           "center_wavelength": 0.704,
462 |           "full_width_half_max": 0.019
463 |         }
464 |       ],
465 |       "gsd": 20,
466 |       "proj:shape": [
467 |         5490,
468 |         5490
469 |       ],
470 |       "proj:transform": [
471 |         20,
472 |         0,
473 |         499980,
474 |         0,
475 |         -20,
476 |         6400000
477 |       ],
478 |       "raster:bands": [
479 |         {
480 |           "nodata": 0,
481 |           "data_type": "uint16",
482 |           "spatial_resolution": 20,
483 |           "scale": 0.0001,
484 |           "offset": -0.1
485 |         }
486 |       ],
487 |       "file:checksum": "12206051b3618d00910293f34ea4bc1b28215e29f655f10c06f3c3272ed1a82a2f89",
488 |       "file:size": 41066885,
489 |       "roles": [
490 |         "data",
491 |         "reflectance"
492 |       ]
493 |     },
494 |     "swir16": {
495 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B11.tif",
496 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
497 |       "title": "SWIR 1.6μm - 20m",
498 |       "eo:bands": [
499 |         {
500 |           "name": "B11",
501 |           "common_name": "swir16",
502 |           "center_wavelength": 1.61,
503 |           "full_width_half_max": 0.143
504 |         }
505 |       ],
506 |       "gsd": 20,
507 |       "proj:shape": [
508 |         5490,
509 |         5490
510 |       ],
511 |       "proj:transform": [
512 |         20,
513 |         0,
514 |         499980,
515 |         0,
516 |         -20,
517 |         6400000
518 |       ],
519 |       "raster:bands": [
520 |         {
521 |           "nodata": 0,
522 |           "data_type": "uint16",
523 |           "spatial_resolution": 20,
524 |           "scale": 0.0001,
525 |           "offset": -0.1
526 |         }
527 |       ],
528 |       "file:checksum": "12208f6ca0cdbf4dcefc57f603bcc776723405855039773cfdbfdbe036bdd0ceb1b4",
529 |       "file:size": 40598234,
530 |       "roles": [
531 |         "data",
532 |         "reflectance"
533 |       ]
534 |     },
535 |     "wvp": {
536 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/WVP.tif",
537 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
538 |       "title": "Water Vapour (WVP)",
539 |       "gsd": 20,
540 |       "proj:shape": [
541 |         5490,
542 |         5490
543 |       ],
544 |       "proj:transform": [
545 |         20,
546 |         0,
547 |         499980,
548 |         0,
549 |         -20,
550 |         6400000
551 |       ],
552 |       "raster:bands": [
553 |         {
554 |           "nodata": 0,
555 |           "data_type": "uint16",
556 |           "spatial_resolution": 20,
557 |           "unit": "cm",
558 |           "scale": 0.001,
559 |           "offset": 0
560 |         }
561 |       ],
562 |       "file:checksum": "122085b6cacf671829f5352b9a67bb60b83717c90594f3051310c0477b1571f03e8e",
563 |       "file:size": 28636030,
564 |       "roles": [
565 |         "data"
566 |       ]
567 |     },
568 |     "nir08": {
569 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B8A.tif",
570 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
571 |       "title": "NIR 2 - 20m",
572 |       "eo:bands": [
573 |         {
574 |           "name": "B8A",
575 |           "common_name": "nir08",
576 |           "center_wavelength": 0.865,
577 |           "full_width_half_max": 0.033
578 |         }
579 |       ],
580 |       "gsd": 20,
581 |       "proj:shape": [
582 |         5490,
583 |         5490
584 |       ],
585 |       "proj:transform": [
586 |         20,
587 |         0,
588 |         499980,
589 |         0,
590 |         -20,
591 |         6400000
592 |       ],
593 |       "raster:bands": [
594 |         {
595 |           "nodata": 0,
596 |           "data_type": "uint16",
597 |           "spatial_resolution": 20,
598 |           "scale": 0.0001,
599 |           "offset": -0.1
600 |         }
601 |       ],
602 |       "file:checksum": "122037f243f3d93f1a9da2d1476fbbdc765e3e7152c86688e2b09c241430584f8721",
603 |       "file:size": 44484301,
604 |       "roles": [
605 |         "data",
606 |         "reflectance"
607 |       ]
608 |     },
609 |     "scl": {
610 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/SCL.tif",
611 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
612 |       "title": "Scene classification map (SCL)",
613 |       "gsd": 20,
614 |       "proj:shape": [
615 |         5490,
616 |         5490
617 |       ],
618 |       "proj:transform": [
619 |         20,
620 |         0,
621 |         499980,
622 |         0,
623 |         -20,
624 |         6400000
625 |       ],
626 |       "raster:bands": [
627 |         {
628 |           "nodata": 0,
629 |           "data_type": "uint8",
630 |           "spatial_resolution": 20
631 |         }
632 |       ],
633 |       "file:checksum": "12205fe5d875f7f2d2ec95ec66aa7e1887da7c40d33ef86b0931c3717c7eddf6ae3a",
634 |       "file:size": 1985202,
635 |       "roles": [
636 |         "data"
637 |       ]
638 |     },
639 |     "aot": {
640 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/AOT.tif",
641 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
642 |       "title": "Aerosol optical thickness (AOT)",
643 |       "gsd": 20,
644 |       "proj:shape": [
645 |         5490,
646 |         5490
647 |       ],
648 |       "proj:transform": [
649 |         20,
650 |         0,
651 |         499980,
652 |         0,
653 |         -20,
654 |         6400000
655 |       ],
656 |       "raster:bands": [
657 |         {
658 |           "nodata": 0,
659 |           "data_type": "uint16",
660 |           "spatial_resolution": 20,
661 |           "scale": 0.001,
662 |           "offset": 0
663 |         }
664 |       ],
665 |       "file:checksum": "122023975c28613775d7ed4332faac2ba305cbfdf9d78458167fd935bdf483c14419",
666 |       "file:size": 1054367,
667 |       "roles": [
668 |         "data"
669 |       ]
670 |     },
671 |     "coastal": {
672 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B01.tif",
673 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
674 |       "title": "Coastal - 60m",
675 |       "eo:bands": [
676 |         {
677 |           "name": "B01",
678 |           "common_name": "coastal",
679 |           "center_wavelength": 0.443,
680 |           "full_width_half_max": 0.027
681 |         }
682 |       ],
683 |       "gsd": 60,
684 |       "proj:shape": [
685 |         1830,
686 |         1830
687 |       ],
688 |       "proj:transform": [
689 |         60,
690 |         0,
691 |         499980,
692 |         0,
693 |         -60,
694 |         6400000
695 |       ],
696 |       "raster:bands": [
697 |         {
698 |           "nodata": 0,
699 |           "data_type": "uint16",
700 |           "spatial_resolution": 60,
701 |           "scale": 0.0001,
702 |           "offset": -0.1
703 |         }
704 |       ],
705 |       "file:checksum": "1220bdcf1eae16278be37f3caaef1cd561af11fab3e8a8c3f555171e481624889718",
706 |       "file:size": 4384096,
707 |       "roles": [
708 |         "data",
709 |         "reflectance"
710 |       ]
711 |     },
712 |     "nir09": {
713 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/B09.tif",
714 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
715 |       "title": "NIR 3 - 60m",
716 |       "eo:bands": [
717 |         {
718 |           "name": "B09",
719 |           "common_name": "nir09",
720 |           "center_wavelength": 0.945,
721 |           "full_width_half_max": 0.026
722 |         }
723 |       ],
724 |       "gsd": 60,
725 |       "proj:shape": [
726 |         1830,
727 |         1830
728 |       ],
729 |       "proj:transform": [
730 |         60,
731 |         0,
732 |         499980,
733 |         0,
734 |         -60,
735 |         6400000
736 |       ],
737 |       "raster:bands": [
738 |         {
739 |           "nodata": 0,
740 |           "data_type": "uint16",
741 |           "spatial_resolution": 60,
742 |           "scale": 0.0001,
743 |           "offset": -0.1
744 |         }
745 |       ],
746 |       "file:checksum": "1220e50446aa3742da30bbd83ba3d7495430301323218f869f0ab5b195fbd20477ca",
747 |       "file:size": 4939673,
748 |       "roles": [
749 |         "data",
750 |         "reflectance"
751 |       ]
752 |     },
753 |     "cloud": {
754 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/CLD_20m.tif",
755 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
756 |       "title": "Cloud Probabilities",
757 |       "gsd": 20,
758 |       "proj:shape": [
759 |         5490,
760 |         5490
761 |       ],
762 |       "proj:transform": [
763 |         20,
764 |         0,
765 |         499980,
766 |         0,
767 |         -20,
768 |         6400000
769 |       ],
770 |       "raster:bands": [
771 |         {
772 |           "nodata": 0,
773 |           "data_type": "uint8",
774 |           "spatial_resolution": 20
775 |         }
776 |       ],
777 |       "file:checksum": "1220dede1f04dc46924dfe3255aa767e13a878379ed4339c7981a6aa164e3f748af3",
778 |       "file:size": 3352217,
779 |       "roles": [
780 |         "data",
781 |         "cloud"
782 |       ]
783 |     },
784 |     "snow": {
785 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/SNW_20m.tif",
786 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
787 |       "title": "Snow Probabilities",
788 |       "proj:shape": [
789 |         5490,
790 |         5490
791 |       ],
792 |       "proj:transform": [
793 |         20,
794 |         0,
795 |         499980,
796 |         0,
797 |         -20,
798 |         6400000
799 |       ],
800 |       "raster:bands": [
801 |         {
802 |           "nodata": 0,
803 |           "data_type": "uint8",
804 |           "spatial_resolution": 20
805 |         }
806 |       ],
807 |       "file:checksum": "1220088767e0c3695ee7179512e96443e5a790febd074bd7b88d3bf530559af52fdb",
808 |       "file:size": 145568,
809 |       "roles": [
810 |         "data",
811 |         "snow-ice"
812 |       ]
813 |     },
814 |     "preview": {
815 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/L2A_PVI.tif",
816 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
817 |       "title": "True color preview",
818 |       "eo:bands": [
819 |         {
820 |           "name": "B04",
821 |           "common_name": "red",
822 |           "center_wavelength": 0.665,
823 |           "full_width_half_max": 0.038
824 |         },
825 |         {
826 |           "name": "B03",
827 |           "common_name": "green",
828 |           "center_wavelength": 0.56,
829 |           "full_width_half_max": 0.045
830 |         },
831 |         {
832 |           "name": "B02",
833 |           "common_name": "blue",
834 |           "center_wavelength": 0.49,
835 |           "full_width_half_max": 0.098
836 |         }
837 |       ],
838 |       "file:checksum": "1220da6648f65195be831249f3779e483251eb212e9c279d68557a9af1c40f654e63",
839 |       "file:size": 189151,
840 |       "roles": [
841 |         "overview"
842 |       ]
843 |     },
844 |     "granule_metadata": {
845 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/metadata.xml",
846 |       "type": "application/xml",
847 |       "file:checksum": "1220a6caf06015a9742a4066eb77bbe9ee583c90351ca35a121b7e96fb3f752d164e",
848 |       "file:size": 374744,
849 |       "roles": [
850 |         "metadata"
851 |       ]
852 |     },
853 |     "tileinfo_metadata": {
854 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/tileInfo.json",
855 |       "type": "application/json",
856 |       "file:checksum": "1220e8c6ae8fdae5bcad7a9821f0397e1bf017df72399397c59bb5d72e66d7c94f0f",
857 |       "file:size": 1518,
858 |       "roles": [
859 |         "metadata"
860 |       ]
861 |     },
862 |     "product_metadata": {
863 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/product_metadata.xml",
864 |       "type": "application/xml",
865 |       "file:checksum": "1220eba33cfb8c573dc004bc7168c1f6dc71991851725f5fbf40215b5c648d46800b",
866 |       "file:size": 55234,
867 |       "roles": [
868 |         "metadata"
869 |       ]
870 |     },
871 |     "thumbnail": {
872 |       "href": "https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/20/H/NJ/2024/3/S2A_T20HNJ_20240311T140636_L2A/L2A_PVI.jpg",
873 |       "type": "image/jpeg",
874 |       "title": "Thumbnail of preview image",
875 |       "file:checksum": "1220accda8a2b685258c096cda5bf7f6903dae06f07e10560fe20e80d0099f535de2",
876 |       "file:size": 31673,
877 |       "roles": [
878 |         "thumbnail"
879 |       ]
880 |     }
881 |   },
882 |   "bbox": [
883 |     -62.817517,
884 |     -33.527644,
885 |     -61.817856,
886 |     -32.531931
887 |   ],
888 |   "stac_extensions": [
889 |     "https://stac-extensions.github.io/eo/v1.1.0/schema.json",
890 |     "https://stac-extensions.github.io/file/v2.1.0/schema.json",
891 |     "https://stac-extensions.github.io/grid/v1.1.0/schema.json",
892 |     "https://stac-extensions.github.io/mgrs/v1.0.0/schema.json",
893 |     "https://stac-extensions.github.io/processing/v1.1.0/schema.json",
894 |     "https://stac-extensions.github.io/projection/v1.1.0/schema.json",
895 |     "https://stac-extensions.github.io/raster/v1.1.0/schema.json",
896 |     "https://stac-extensions.github.io/sentinel-2/v1.0.0/schema.json",
897 |     "https://stac-extensions.github.io/storage/v1.0.0/schema.json",
898 |     "https://stac-extensions.github.io/view/v1.0.0/schema.json"
899 |   ],
900 |   "collection": "sentinel-2-c1-l2a"
901 | }
902 | 


--------------------------------------------------------------------------------
/tests/data/stacchip_test_item.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "type": "Feature",
  3 |     "stac_version": "1.0.0",
  4 |     "id": "m_4207009_ne_19_060_20211024.tif",
  5 |     "properties": {
  6 |       "proj:epsg": 26919,
  7 |       "proj:geometry": {
  8 |         "type": "Polygon",
  9 |         "coordinates": [
 10 |           [
 11 |             [
 12 |               341000.0,
 13 |               4741000.0
 14 |             ],
 15 |             [
 16 |               342000.0,
 17 |               4741000.0
 18 |             ],
 19 |             [
 20 |               342000.0,
 21 |               4741800.0
 22 |             ],
 23 |             [
 24 |               341000.0,
 25 |               4741800.0
 26 |             ],
 27 |             [
 28 |               341000.0,
 29 |               4741000.0
 30 |             ]
 31 |           ]
 32 |         ]
 33 |       },
 34 |       "proj:bbox": [
 35 |         341000.0,
 36 |         4741000.0,
 37 |         342000.0,
 38 |         4741800.0
 39 |       ],
 40 |       "proj:shape": [
 41 |         800,
 42 |         1000
 43 |       ],
 44 |       "proj:transform": [
 45 |         1,
 46 |         0,
 47 |         342000.0,
 48 |         0,
 49 |         -1,
 50 |         4741800.0,
 51 |         0,
 52 |         0,
 53 |         1
 54 |       ],
 55 |       "datetime": "2021-10-24T12:33:05.496897Z"
 56 |     },
 57 |     "geometry": {
 58 |       "type": "Polygon",
 59 |       "coordinates": [
 60 |         [
 61 |           [
 62 |             -70.9405470386063,
 63 |             42.80920310538916
 64 |           ],
 65 |           [
 66 |             -70.86937257210027,
 67 |             42.81038748290737
 68 |           ],
 69 |           [
 70 |             -70.8714366864438,
 71 |             42.87878792763812
 72 |           ],
 73 |           [
 74 |             -70.94268962889282,
 75 |             42.877600665218694
 76 |           ],
 77 |           [
 78 |             -70.9405470386063,
 79 |             42.80920310538916
 80 |           ]
 81 |         ]
 82 |       ]
 83 |     },
 84 |     "links": [],
 85 |     "assets": {
 86 |       "asset": {
 87 |         "href": "s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207009_ne_19_060_20211024.tif"
 88 |       }
 89 |     },
 90 |     "bbox": [
 91 |       -70.94268962889282,
 92 |       42.80920310538916,
 93 |       -70.86937257210027,
 94 |       42.87878792763812
 95 |     ],
 96 |     "stac_extensions": [
 97 |         "https://stac-extensions.github.io/projection/v1.1.0/schema.json"
 98 |     ]
 99 | }
100 |   


--------------------------------------------------------------------------------
/tests/test_chipper.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from tempfile import TemporaryDirectory
 4 | 
 5 | import numpy as np
 6 | import rasterio
 7 | from numpy.testing import assert_array_equal
 8 | from pystac import Item
 9 | 
10 | from stacchip.chipper import Chipper
11 | from stacchip.indexer import NoStatsChipIndexer
12 | 
13 | 
14 | def test_no_stats_indexer():
15 |     with TemporaryDirectory() as dirname:
16 |         mountpath = Path(dirname)
17 |         target_dir = mountpath / "naip/item1"
18 |         target_dir.mkdir(parents=True, exist_ok=True)
19 |         item = Item.from_file("tests/data/stacchip_test_item.json")
20 |         shape = item.properties["proj:shape"]
21 |         size = shape[0] * shape[1]
22 |         trsf = item.properties["proj:transform"]
23 |         bands = 2
24 |         with rasterio.open(
25 |             mountpath / "naip/item1/asset.tif",
26 |             "w",
27 |             width=shape[1],
28 |             height=shape[0],
29 |             count=bands,
30 |             dtype="uint8",
31 |             transform=[trsf[2], trsf[0], trsf[1], trsf[5], trsf[4], trsf[3]],
32 |         ) as rst:
33 |             raster_data = np.random.randint(
34 |                 0, 255, bands * size, dtype="uint8"
35 |             ).reshape((bands, *shape))
36 |             rst.write(raster_data)
37 | 
38 |         item.assets["asset"].href = "s3://example-bucket/naip/item1/asset.tif"
39 |         with open(mountpath / "naip/item1/stac_item.json", "w") as dst:
40 |             dst.write(json.dumps(item.to_dict()))
41 |         indexer = NoStatsChipIndexer(item)
42 |         index = indexer.create_index()
43 |         chipper = Chipper(indexer, mountpath=mountpath)
44 |         x = index.column("chip_index_x")[1].as_py()
45 |         y = index.column("chip_index_y")[2].as_py()
46 |         chip = chipper.chip(x, y)
47 |         assert chip["asset"].shape[0] == raster_data.shape[0]
48 |         assert_array_equal(
49 |             chip["asset"][0],
50 |             raster_data[
51 |                 0,
52 |                 (y * indexer.chip_size) : ((y + 1) * indexer.chip_size),
53 |                 (x * indexer.chip_size) : ((x + 1) * indexer.chip_size),
54 |             ],
55 |         )
56 |         # Test magic functions
57 |         assert len(chipper) == indexer.size
58 |         x_index, y_index, chipper_1 = chipper[1]
59 |         assert x == x_index
60 |         assert y == y_index
61 |         assert_array_equal(chip["asset"][0], chipper_1["asset"][0])
62 |         counter = 0
63 |         for _chip in chipper:
64 |             counter += 1
65 |         assert counter == len(chipper)
66 | 


--------------------------------------------------------------------------------
/tests/test_indexer.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | import mock
  4 | import numpy as np
  5 | import pyarrow as pa
  6 | import pytest
  7 | from pystac import Item
  8 | from rasterio import Affine
  9 | from rasterio.io import MemoryFile
 10 | from shapely import Point
 11 | 
 12 | from stacchip.indexer import (
 13 |     ChipIndexer,
 14 |     LandsatIndexer,
 15 |     NoStatsChipIndexer,
 16 |     Sentinel2Indexer,
 17 | )
 18 | 
 19 | 
 20 | def get_ls_mock(nodata: bool = False) -> MemoryFile:
 21 |     meta = {
 22 |         "driver": "GTiff",
 23 |         "dtype": "uint16",
 24 |         "nodata": None,
 25 |         "width": 8331,
 26 |         "height": 8271,
 27 |         "count": 1,
 28 |         "crs": "EPSG:3031",
 29 |         "transform": Affine(30.0, 0.0, 1517085.0, 0.0, -30.0, -1811685.0),
 30 |     }
 31 |     data = np.zeros((1, 8331, 8271))
 32 |     if nodata:
 33 |         data[0, :200, :200] = 1
 34 |     memfile = MemoryFile()
 35 |     with memfile.open(**meta) as dst:
 36 |         dst.write(data)
 37 |     return memfile.open()
 38 | 
 39 | 
 40 | def rasterio_open_ls_mock(href: str) -> MemoryFile:
 41 |     return get_ls_mock()
 42 | 
 43 | 
 44 | def rasterio_open_ls_nodata_mock(href: str) -> MemoryFile:
 45 |     return get_ls_mock(True)
 46 | 
 47 | 
 48 | def rasterio_open_sentinel_mock(href: str) -> MemoryFile:
 49 |     meta = {
 50 |         "driver": "GTiff",
 51 |         "dtype": "uint8",
 52 |         "nodata": 0.0,
 53 |         "width": 5490,
 54 |         "height": 5490,
 55 |         "count": 1,
 56 |         "crs": "EPSG:32720",
 57 |         "transform": Affine(20.0, 0.0, 499980.0, 0.0, -20.0, 6400000.0),
 58 |     }
 59 |     data = 5 * np.ones((1, 5490, 5490), dtype="uint16")
 60 |     # Make first chip nodata
 61 |     data[0, :100, :100] = 0
 62 |     # Make second chip cloudy
 63 |     data[0, :128, 128:192] = 1
 64 |     memfile = MemoryFile()
 65 |     with memfile.open(**meta) as dst:
 66 |         dst.write(data)
 67 |     return memfile.open()
 68 | 
 69 | 
 70 | def test_get_stats_error():
 71 |     item = Item.from_file("tests/data/naip_m_4207009_ne_19_060_20211024.json")
 72 |     indexer = ChipIndexer(item)
 73 |     with pytest.raises(NotImplementedError):
 74 |         indexer.create_index()
 75 | 
 76 | 
 77 | def test_no_stats_indexer():
 78 |     item = Item.from_file("tests/data/naip_m_4207009_ne_19_060_20211024.json")
 79 |     indexer = NoStatsChipIndexer(item)
 80 |     assert indexer.shape == [12666, 9704]
 81 |     index = indexer.create_index()
 82 |     assert str(index.column("chipid")[0]) == "m_4207009_ne_19_060_20211024.tif-0-0"
 83 |     assert index.column("date")[0] == pa.scalar(
 84 |         datetime.date(2021, 10, 24), pa.date32()
 85 |     )
 86 | 
 87 |     point = Point(
 88 |         indexer.transform[2],
 89 |         indexer.transform[5],
 90 |     )
 91 | 
 92 |     target = indexer.reproject(point)
 93 |     assert min(
 94 |         [dat["x"] for dat in index.column("geometry")[0].as_py()[0]]
 95 |     ) == pytest.approx(target.bounds[0])
 96 |     assert max(
 97 |         [dat["y"] for dat in index.column("geometry")[0].as_py()[0]]
 98 |     ) == pytest.approx(target.bounds[3])
 99 | 
100 | 
101 | @mock.patch("stacchip.indexer.rasterio.open", rasterio_open_sentinel_mock)
102 | def test_sentinel_2_indexer():
103 |     item = Item.from_file(
104 |         "tests/data/sentinel-2-l2a-S2A_T20HNJ_20240311T140636_L2A.json"
105 |     )
106 |     indexer = Sentinel2Indexer(item)
107 |     assert indexer.shape == [10980, 10980]
108 |     index = indexer.create_index()
109 |     assert index.shape == (1763, 7)
110 |     assert str(index.column("chipid")[0]) == "S2A_T20HNJ_20240311T140636_L2A-1-0"
111 |     assert index.column("cloud_cover_percentage")[0].as_py() == 0.5
112 | 
113 | 
114 | @mock.patch("stacchip.indexer.rasterio.open", rasterio_open_ls_mock)
115 | def test_landsat_indexer():
116 |     item = Item.from_file(
117 |         "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json"
118 |     )
119 |     indexer = LandsatIndexer(item)
120 |     assert indexer.shape == [8271, 8331]
121 |     index = indexer.create_index()
122 |     assert isinstance(index, pa.Table)
123 |     assert (
124 |         str(index.column("chipid")[0])
125 |         == "LC09_L2SR_086107_20240311_20240312_02_T2_SR-0-0"
126 |     )
127 |     assert index.shape == (1024, 7)
128 |     assert indexer.x_size == int(8331 / 256)
129 |     assert indexer.y_size == int(8271 / 256)
130 | 
131 | 
132 | @mock.patch("stacchip.indexer.rasterio.open", rasterio_open_ls_nodata_mock)
133 | def test_landsat_indexer_nodata():
134 |     item = Item.from_file(
135 |         "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json"
136 |     )
137 |     indexer = LandsatIndexer(item)
138 |     index = indexer.create_index()
139 |     assert index.shape == (1023, 7)
140 |     assert (
141 |         str(index.column("chipid")[0])
142 |         == "LC09_L2SR_086107_20240311_20240312_02_T2_SR-1-0"
143 |     )
144 | 
145 |     indexer = LandsatIndexer(item, chip_max_nodata=0.95)
146 |     index = indexer.create_index()
147 |     assert index.shape == (1024, 7)
148 | 
149 | 
150 | def test_indexer_manual_shape():
151 |     item = Item.from_file(
152 |         "tests/data/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json"
153 |     )
154 |     indexer = NoStatsChipIndexer(item, shape=[230, 420], chip_size=100)
155 |     assert indexer.shape == [230, 420]
156 |     assert indexer.y_size == 2
157 |     assert indexer.x_size == 4
158 | 


--------------------------------------------------------------------------------