├── .github
├── dependabot.yml
└── workflows
│ ├── nightly-build.yaml
│ ├── publish-book.yaml
│ ├── trigger-book-build.yaml
│ ├── trigger-delete-preview.yaml
│ ├── trigger-link-check.yaml
│ └── trigger-preview.yaml
├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── _config.yml
├── _gallery_info.yml
├── _static
├── custom.css
└── footer-logo-nsf.png
├── _templates
└── footer-extra.html
├── _toc.yml
├── environment.yml
├── notebooks
├── 0.0_Intro_Landsat.ipynb
├── 1.0_Data_Ingestion-Geospatial.ipynb
├── 1.1_Data_Ingestion-General.ipynb
├── 2.0_Spectral_Clustering_PC.ipynb
├── data
│ ├── catalog.yml
│ ├── landsat5_bands.csv
│ ├── landsat5_crop.nc
│ └── landsat8_bands.csv
└── images
│ ├── L-Next-SpectralBands-stack.png
│ ├── ProjectPythia_Logo_Final-01-Blue.svg
│ ├── icons
│ └── favicon.ico
│ ├── intake_landsat.png
│ ├── landsat_8_rend-sm1.png
│ ├── landsat_timeline.png
│ ├── logos
│ ├── NSF-NCAR_Lockup-UCAR-Dark_102523.svg
│ ├── UAlbany-A2-logo-purple-gold.svg
│ ├── Unidata_logo_horizontal_1200x300.svg
│ ├── pythia_logo-white-notext.svg
│ └── pythia_logo-white-rtext.svg
│ ├── nasa_bands.png
│ ├── nasa_landsat8.jpg
│ ├── planetary_computer_header_800w.png
│ ├── pystac.png
│ ├── spectral_clustering.png
│ └── spectral_clustering_lake.png
└── thumbnail.png
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | # - package-ecosystem: pip
4 | # directory: "/"
5 | # schedule:
6 | # interval: daily
7 | - package-ecosystem: 'github-actions'
8 | directory: '/'
9 | schedule:
10 | # Check for updates once a week
11 | interval: 'weekly'
12 |
--------------------------------------------------------------------------------
/.github/workflows/nightly-build.yaml:
--------------------------------------------------------------------------------
1 | name: nightly-build
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: '0 0 * * *' # Daily “At 00:00”
7 |
8 | jobs:
9 | build:
10 | if: ${{ github.repository_owner == 'ProjectPythia' }}
11 | uses: ProjectPythia/cookbook-actions/.github/workflows/build-book.yaml@main
12 | with:
13 | environment_name: cookbook-dev
14 |
15 | link-check:
16 | if: ${{ github.repository_owner == 'ProjectPythia' }}
17 | uses: ProjectPythia/cookbook-actions/.github/workflows/link-checker.yaml@main
18 |
--------------------------------------------------------------------------------
/.github/workflows/publish-book.yaml:
--------------------------------------------------------------------------------
1 | name: publish-book
2 |
3 | on:
4 | # Trigger the workflow on push to main branch
5 | push:
6 | branches:
7 | - main
8 | workflow_dispatch:
9 |
10 | jobs:
11 | build:
12 | uses: ProjectPythia/cookbook-actions/.github/workflows/build-book.yaml@main
13 | with:
14 | environment_name: cookbook-dev
15 |
16 | deploy:
17 | needs: build
18 | uses: ProjectPythia/cookbook-actions/.github/workflows/deploy-book.yaml@main
19 |
--------------------------------------------------------------------------------
/.github/workflows/trigger-book-build.yaml:
--------------------------------------------------------------------------------
1 | name: trigger-book-build
2 | on:
3 | pull_request:
4 |
5 | jobs:
6 | build:
7 | uses: ProjectPythia/cookbook-actions/.github/workflows/build-book.yaml@main
8 | with:
9 | environment_name: cookbook-dev
10 | artifact_name: book-zip-${{ github.event.number }}
11 | # Other input options are possible, see ProjectPythiaCookbooks/cookbook-actions/.github/workflows/build-book.yaml
12 |
--------------------------------------------------------------------------------
/.github/workflows/trigger-delete-preview.yaml:
--------------------------------------------------------------------------------
1 | name: trigger-delete-preview
2 |
3 | on:
4 | pull_request_target:
5 | types: closed
6 |
7 | jobs:
8 | delete:
9 | uses: ProjectPythia/cookbook-actions/.github/workflows/delete-preview.yaml@main
--------------------------------------------------------------------------------
/.github/workflows/trigger-link-check.yaml:
--------------------------------------------------------------------------------
1 | name: trigger-link-check
2 | on:
3 | pull_request:
4 |
5 | jobs:
6 | link-check:
7 | uses: ProjectPythia/cookbook-actions/.github/workflows/link-checker.yaml@main
8 |
--------------------------------------------------------------------------------
/.github/workflows/trigger-preview.yaml:
--------------------------------------------------------------------------------
1 | name: trigger-preview
2 | on:
3 | workflow_run:
4 | workflows:
5 | - trigger-book-build
6 | types:
7 | - requested
8 | - completed
9 |
10 | jobs:
11 | find-pull-request:
12 | uses: ProjectPythia/cookbook-actions/.github/workflows/find-pull-request.yaml@main
13 | deploy-preview:
14 | needs: find-pull-request
15 | if: github.event.workflow_run.conclusion == 'success'
16 | uses: ProjectPythia/cookbook-actions/.github/workflows/deploy-book.yaml@main
17 | with:
18 | artifact_name: book-zip-${{ needs.find-pull-request.outputs.number }}
19 | destination_dir: _preview/${{ needs.find-pull-request.outputs.number }} # deploy to subdirectory labeled with PR number
20 | is_preview: 'true'
21 |
22 | preview-comment:
23 | needs: find-pull-request
24 | uses: ProjectPythia/cookbook-actions/.github/workflows/preview-comment.yaml@main
25 | with:
26 | pull_request_number: ${{ needs.find-pull-request.outputs.number }}
27 | sha: ${{ needs.find-pull-request.outputs.sha }}
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | _build/
13 | notebooks/_build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 | local_settings.py
63 | db.sqlite3
64 | db.sqlite3-journal
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/_build/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | .python-version
88 |
89 | # pipenv
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 | # install all needed dependencies.
94 | #Pipfile.lock
95 |
96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97 | __pypackages__/
98 |
99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
133 | # DS store
134 | **/.DS_Store
135 |
136 | # IDEs
137 | .vscode/
138 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this cookbook, please cite it as below."
3 | authors:
4 | # add additional entries for each author -- see https://github.com/citation-file-format/citation-file-format/blob/main/schema-guide.md
5 | - family-names: Roumis
6 | given-names: Demetris
7 | website: https://github.com/droumis
8 | orcid: https://orcid.org/0000-0003-4670-1657
9 | - name: "Landsat ML Cookbook contributors" # use the 'name' field to acknowledge organizations
10 | website: "https://github.com/ProjectPythia/landsat-ml-cookbook/graphs/contributors"
11 | title: "Landsat ML Cookbook"
12 | abstract: "Machine learning on Landsat data."
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | # Landsat ML Cookbook
4 |
5 | [](https://github.com/ProjectPythia/landsat-ml-cookbook/actions/workflows/nightly-build.yaml)
6 | [](http://binder.projectpythia.org/v2/gh/ProjectPythia/landsat-ml-cookbook/main?labpath=notebooks)
7 | [](https://zenodo.org/badge/latestdoi/563445694)
8 |
9 | This Project Pythia Cookbook covers the essential materials for working with Landsat data in the context of machine learning workflows.
10 |
11 | ## Motivation
12 |
13 | Once you complete this cookbook, you will have the skills to access, resample, regrid, reshape, and rescale satellite data, as well as the foundation for applying machine learning to it. You will also learn how to interactively visualize your data at every step in the process.
14 |
15 | ## Authors
16 |
17 | [Demetris Roumis](https://github.com/droumis)
18 | [Andrew Huang](https://github.com/ahuang11)
19 |
20 | ### Contributors
21 |
22 |
23 |
24 |
25 |
26 |
27 | This cookbook was initially inspired by the [EarthML](https://github.com/pyviz-topics/EarthML) . See a list of the EarthML contributors [here:](https://github.com/pyviz-topics/EarthML/graphs/contributors)
28 |
29 |
30 |
31 |
32 | ## Structure
33 | This cookbook is broken up into two main sections - "Foundations" and "Example Workflows."
34 |
35 | ### Foundations
36 | The foundational content includes:
37 | - Start Here - Introduction to Landsat data.
38 | - Data Ingestion - Geospatial-Specific Tooling - Demonstrating a method for loading and accessing Landsat data from Microsoft's Planetary Computer platform with tooling from pystac and odc.
39 | - Data Ingestion - General Purpose Tooling - Demonstrating approaches for domain-independent data access using Intake.
40 |
41 | ### Example Workflows
42 | Example workflows include:
43 | - Spectral Clustering - Demonstrating a machine learning approach to cluster pixels of satellite data and comparing cluster results across time
44 |
45 | ## Running the Notebooks
46 | You can either run the notebook using [Binder](https://binder.projectpythia.org/) or on your local machine.
47 |
48 | ### Running on Binder
49 |
50 | The simplest way to interact with a Jupyter Notebook is through
51 | [Binder](https://binder.projectpythia.org/), which enables the execution of a
52 | [Jupyter Book](https://jupyterbook.org) in the cloud. The details of how this works are not
53 | important for now. All you need to know is how to launch a Pythia
54 | Cookbooks chapter via Binder. Simply navigate your mouse to
55 | the top right corner of the book chapter you are viewing and click
56 | on the rocket ship icon, (see figure below), and be sure to select
57 | “launch Binder”. After a moment you should be presented with a
58 | notebook that you can interact with. I.e. you’ll be able to execute
59 | and even change the example programs. You’ll see that the code cells
60 | have no output at first, until you execute them by pressing
61 | {kbd}`Shift`\+{kbd}`Enter`. Complete details on how to interact with
62 | a live Jupyter notebook are described in [Getting Started with
63 | Jupyter](https://foundations.projectpythia.org/foundations/getting-started-jupyter.html).
64 |
65 | ### Running on Your Own Machine
66 | If you are interested in running this material locally on your computer, you will need to follow this workflow:
67 |
68 | 1. Clone the Landsat ML Cookbook repository:
69 |
70 | ```bash
71 | git clone https://github.com/ProjectPythia/landsat-ml-cookbook.git
72 | ```
73 | 1. Move into the `landsat-ml-cookbook` directory
74 | ```bash
75 | cd landsat-ml-cookbook
76 | ```
77 | 1. Create and activate your conda environment from the `environment.yml` file
78 | ```bash
79 | conda env create -f environment.yml
80 | conda activate landsat-ml-cookbook
81 | ```
82 | 1. Move into the `notebooks` directory and start up Jupyterlab
83 | ```bash
84 | cd notebooks/
85 | jupyter lab
86 | ```
87 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | # Book settings
2 | # Learn more at https://jupyterbook.org/customize/config.html
3 |
4 | title: Landsat ML Cookbook
5 | author: Demetris Roumis
6 | logo: notebooks/images/logos/pythia_logo-white-rtext.svg
7 | copyright: '2024'
8 |
9 | execute:
10 | # To execute notebooks via a binder instead, replace 'cache' with 'binder'
11 | execute_notebooks: force
12 | timeout: 600
13 | allow_errors: True
14 |
15 | # Add a few extensions to help with parsing content
16 | parse:
17 | myst_enable_extensions: # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html
18 | - amsmath
19 | - colon_fence
20 | - deflist
21 | - dollarmath
22 | - html_admonition
23 | - html_image
24 | - replacements
25 | - smartquotes
26 | - substitution
27 |
28 | sphinx:
29 | config:
30 | html_favicon: notebooks/images/icons/favicon.ico
31 | html_last_updated_fmt: '%-d %B %Y'
32 | html_theme: sphinx_pythia_theme
33 | html_permalinks_icon: ''
34 | html_theme_options:
35 | home_page_in_toc: true
36 | repository_url: https://github.com/ProjectPythia/landsat-ml-cookbook/ # Online location of your book
37 | repository_branch: main # Which branch of the repository should be used when creating links (optional)
38 | use_issues_button: true
39 | use_repository_button: true
40 | use_edit_page_button: true
41 | use_fullscreen_button: true
42 | analytics:
43 | google_analytics_id: G-T52X8HNYE8
44 | github_url: https://github.com/ProjectPythia
45 | twitter_url: https://twitter.com/project_pythia
46 | icon_links:
47 | - name: YouTube
48 | url: https://www.youtube.com/channel/UCoZPBqJal5uKpO8ZiwzavCw
49 | icon: fab fa-youtube-square
50 | type: fontawesome
51 | launch_buttons:
52 | binderhub_url: https://binder.projectpythia.org
53 | notebook_interface: jupyterlab
54 | logo:
55 | link: https://projectpythia.org
56 | navbar_start:
57 | - navbar-logo
58 | navbar_end:
59 | - navbar-icon-links
60 | navbar_links:
61 | - name: Home
62 | url: https://projectpythia.org
63 | - name: Foundations
64 | url: https://foundations.projectpythia.org
65 | - name: Cookbooks
66 | url: https://cookbooks.projectpythia.org
67 | - name: Resources
68 | url: https://projectpythia.org/resource-gallery.html
69 | - name: Community
70 | url: https://projectpythia.org/index.html#join-us
71 | footer_logos:
72 | NCAR: notebooks/images/logos/NSF-NCAR_Lockup-UCAR-Dark_102523.svg
73 | Unidata: notebooks/images/logos/Unidata_logo_horizontal_1200x300.svg
74 | UAlbany: notebooks/images/logos/UAlbany-A2-logo-purple-gold.svg
75 | footer_start:
76 | - footer-logos
77 | - footer-info
78 | - footer-extra
79 |
--------------------------------------------------------------------------------
/_gallery_info.yml:
--------------------------------------------------------------------------------
1 | thumbnail: thumbnail.png
2 | tags:
3 | domains:
4 | - satellite
5 | - ml
6 | - climate
7 | packages:
8 | - hvPlot
9 | - intake
10 | - xarray
11 | - dask
--------------------------------------------------------------------------------
/_static/custom.css:
--------------------------------------------------------------------------------
1 | .bd-main .bd-content .bd-article-container {
2 | max-width: 100%; /* default is 60em */
3 | }
4 | .bd-page-width {
5 | max-width: 100%; /* default is 88rem */
6 | }
7 |
--------------------------------------------------------------------------------
/_static/footer-logo-nsf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/_static/footer-logo-nsf.png
--------------------------------------------------------------------------------
/_templates/footer-extra.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/_toc.yml:
--------------------------------------------------------------------------------
1 | format: jb-book
2 | root: README
3 | parts:
4 | - caption: Preamble
5 | chapters:
6 | - file: notebooks/how-to-cite
7 | - caption: Foundations
8 | chapters:
9 | - file: notebooks/0.0_Intro_Landsat
10 | - file: notebooks/1.0_Data_Ingestion-Geospatial
11 | - file: notebooks/1.1_Data_Ingestion-General
12 | - caption: Example Workflows
13 | chapters:
14 | - file: notebooks/2.0_Spectral_Clustering_PC
15 |
16 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: landsat-ml-cookbook
2 | channels:
3 | - nodefaults
4 | - conda-forge
5 | dependencies:
6 | - python=3.10
7 | - shapely<2.0.0
8 | - pandas
9 | - xarray-datatree
10 | - planetary-computer
11 | - pystac
12 | - pystac-client
13 | - odc-stac
14 | - ipykernel
15 | - hvplot
16 | - panel<1.4.0
17 | - geoviews
18 | - datashader
19 | - colorcet
20 | - intake-xarray
21 | - xarray<2023.04
22 | - bokeh<3.4.0
23 | - dask
24 | - dask-ml
25 | - pandas
26 | - numpy
27 | - cartopy
28 | - rioxarray
29 | - rasterio
30 | - s3fs
31 | - jupyter-book
32 | - jupyterlab
33 | - jupyter_server<2
34 | - pyopenssl>22
35 | - adlfs
36 | - pip
37 | - pip:
38 | - sphinx-pythia-theme
39 | - stac_geoparquet
40 | - dask_geopandas
41 | - jupyter_bokeh
42 | - pygeos
43 | - intake>=2.0.4
44 |
--------------------------------------------------------------------------------
/notebooks/0.0_Intro_Landsat.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "e9a66402-d146-40a2-a013-ef1078026efa",
6 | "metadata": {},
7 | "source": [
8 | "# Start Here - Intro to Landsat Data"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "477b00c6-6565-4a16-b302-46dac2fff9de",
14 | "metadata": {},
15 | "source": [
16 | ""
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "id": "8a8416c4-c56b-4b53-bc3f-8eb6285dfcac",
22 | "metadata": {},
23 | "source": [
24 | "---"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "id": "a6587d1b-8e22-4404-abf0-0da0bfde096e",
30 | "metadata": {},
31 | "source": [
32 | "## Overview\n",
33 | "\n",
34 | "In this cookbook, you will access, process, analyze, and visualize satellite data in the context of machine learning workflows. This particular cookbook notebook will provide an introduction to Landsat data to build our intuition as we move toward data ingestion, processing, and analysis."
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "id": "a8cb6848-e976-4390-9195-64ffb9049d81",
40 | "metadata": {},
41 | "source": [
42 | "- **Time to learn**: 5 minutes"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "id": "5ecd112d-3666-4412-acce-05a6dd770de9",
48 | "metadata": {},
49 | "source": [
50 | "## Landsat Data"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "id": "3b8e2b5e-9a96-4161-96b6-f0bbe6b17f47",
56 | "metadata": {},
57 | "source": [
58 | "The data in this cookbook originally come from the [Landsat](https://en.wikipedia.org/wiki/Landsat_program) program, which is the longest record of moderate resolution multispectral data of the Earth’s surface. This program has launched several different satellites spanning many years which are designated as Landsat 1-9.\n",
59 | "\n",
60 | "\n",
61 | "\n",
62 | "\n",
63 | "When accessing the data, it's important to keep in mind a couple key points. First, the instruments on different Landsat missions (1-9) varied in certain aspects. Second, Landsat data is available from multiple providers (USGS, NASA, Google, Microsoft, AWS, etc) but may vary in completeness and the level of processing applied. For the dataset that you end up using, it is crucial to review to relevant information from the particular data provider and the specific Landsat mission to understand the details, especially if you are comparing data across providers or missions.\n",
64 | "\n",
65 | "In general, a common aspect of Landsat data is the use of different wavelength-bands to capture multiple images of the same area - together providing much more information about different features on the ground than a single image alone. This provides us with a stack of images for each spatial region that we might be interested.\n",
66 | "\n",
67 | "\n",
68 | "\n",
69 | "Additionally, whenever we are looking at changes in satellite images over time, we will have an additional time dimension. For example, we will consider two stacks of images from different years to look at the change in the water level around a lake.\n"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "id": "e7b29113-9cf8-40d8-b03b-0b7f0a6a6604",
75 | "metadata": {},
76 | "source": [
77 | "___"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "id": "eb228292-bbda-437e-a6e8-2133cbb8e18b",
83 | "metadata": {},
84 | "source": [
85 | "## Summary\n",
86 | "Before accessing any data, it's a good idea to start by learning about the context and details of the dataset. This will give you the intuition to make informed decisions as you form a processing and analysis pipeline.\n",
87 | "\n",
88 | "### What's next?\n",
89 | "Next, we'll learn about loading the data using the Microsoft Planetary Computer platform."
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "id": "6e54a0ab-93e4-4299-a036-0fa7c3e23ec5",
95 | "metadata": {},
96 | "source": [
97 | "## Resources and references\n",
98 | "- The Landsat timeline image is originally from [USGS](https://www.usgs.gov/landsat-missions/landsat-satellite-missions?qt-science_support_page_related_con=2) but discovered through [earthsciencedata.org](https://www.earthdatascience.org/courses/use-data-open-source-python/multispectral-remote-sensing/landsat-in-Python/)\n",
99 | "- The Landsat 8 banner image is from NASA\n",
100 | "- The Landsat spectral bands is from [NASA](https://landsat.gsfc.nasa.gov/satellites/landsat-next/)\n",
101 | "- This page was authored by Demetris Roumis circa Jan, 2023"
102 | ]
103 | }
104 | ],
105 | "metadata": {
106 | "kernelspec": {
107 | "display_name": "Python 3 (ipykernel)",
108 | "language": "python",
109 | "name": "python3"
110 | },
111 | "language_info": {
112 | "codemirror_mode": {
113 | "name": "ipython",
114 | "version": 3
115 | },
116 | "file_extension": ".py",
117 | "mimetype": "text/x-python",
118 | "name": "python",
119 | "nbconvert_exporter": "python",
120 | "pygments_lexer": "ipython3",
121 | "version": "3.10.13"
122 | },
123 | "vscode": {
124 | "interpreter": {
125 | "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
126 | }
127 | }
128 | },
129 | "nbformat": 4,
130 | "nbformat_minor": 5
131 | }
132 |
--------------------------------------------------------------------------------
/notebooks/1.0_Data_Ingestion-Geospatial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "65c844ec-98e2-40e0-9312-9d6bcd30e4a4",
6 | "metadata": {},
7 | "source": [
8 | "# Data Ingestion - Geospatial-Specific Tooling"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "53b39a59-2225-4406-9136-65b0a4956a6c",
14 | "metadata": {},
15 | "source": [
16 | ""
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "id": "27bd740d-8c47-4843-9899-40a282eb5a18",
22 | "metadata": {},
23 | "source": [
24 | "---"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "id": "091f838a-f459-4c41-957d-5f04083f95da",
30 | "metadata": {},
31 | "source": [
32 | "## Overview"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "id": "488398aa-5d16-4a7a-b074-c2de5f6fac24",
38 | "metadata": {},
39 | "source": [
40 | "In this notebook, you will ingest Landsat data for use in machine learning. Machine learning tasks often involve a lot of data, and in Python, data is typically stored in memory as simple [NumPy](https://foundations.projectpythia.org/core/numpy.html) arrays. However, higher-level containers built on top of NumPy arrays provide more functionality for multidimensional gridded data ([xarray](http://xarray.pydata.org)) or out-of-core and distributed data ([Dask](http://dask.pydata.org)). Our goal for data ingestion will be to load specific Landsat data of interest into one of these higher-level containers.\n",
41 | "\n",
42 | "[Microsoft Plantery Computer](https://planetarycomputer.microsoft.com/docs/overview/about) is one of several providers of [Landsat Data](https://planetarycomputer.microsoft.com/dataset/group/landsat). We are using it together with [pystac-client](https://pystac-client.readthedocs.io/en/stable/index.html) and [odc-stac](https://odc-stac.readthedocs.io/en/latest/index.html) because together they provide a nice Python API for searching and loading with specific criteria such as spatial area, datetime, Landsat mission, and cloud coverage.\n",
43 | "\n",
44 | "Earth science datasets are often stored on remote servers that may be too large to download locally. Therefore, in this cookbook, we will focus primarily on ingestion approaches that load small portions of data from a remote source, as needed. However, the approach for your own work will depend not only on data size and location but also the intended analysis, so in a follow up notebook, you will see an alternative approache for generalized data access and management."
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "836bb190-f0a1-4cb4-b599-386ed72a63ff",
50 | "metadata": {},
51 | "source": [
52 | "## Prerequisites\n",
53 | "\n",
54 | "| Concepts | Importance | Notes |\n",
55 | "| --- | --- | --- |\n",
56 | "| [Intro to Landsat](./0.0_Intro_Landsat.ipynb) | Necessary | Background |\n",
57 | "| [About the Microsoft Planetary Computer](https://planetarycomputer.microsoft.com/docs/overview/about) | Helpful | Background |\n",
58 | "| [pystac-client Usage](https://pystac-client.readthedocs.io/en/stable/usage.html) | Helpful | Consult as needed |\n",
59 | "| [odc.stac.load Reference](https://odc-stac.readthedocs.io/en/latest/_api/odc.stac.load.html) | Helpful | Consult as needed |\n",
60 | "| [xarray](https://foundations.projectpythia.org/core/xarray.html) | Necessary | |\n",
61 | "| [Intro to Dask Array](https://docs.dask.org/en/stable/array.html) | Helpful | |\n",
62 | "| [Panel Getting Started Guide](https://panel.holoviz.org/getting_started/build_app.html) | Helpful | |\n",
63 | "\n",
64 | "- **Time to learn**: 10 minutes"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "id": "70f621c3-8cdc-401b-a21c-79f31abd7bbf",
70 | "metadata": {},
71 | "source": [
72 | "## Imports"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "id": "28f179f7-1dbc-4127-b284-8ebabe3eff72",
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "import odc.stac\n",
83 | "import pandas as pd\n",
84 | "import planetary_computer\n",
85 | "import pystac_client\n",
86 | "import xarray as xr\n",
87 | "from pystac.extensions.eo import EOExtension as eo\n",
88 | "\n",
89 | "# Viz\n",
90 | "import hvplot.xarray\n",
91 | "import panel as pn\n",
92 | "\n",
93 | "pn.extension()"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "id": "ec00475e-01fa-48f4-9323-0deaff92086b",
99 | "metadata": {},
100 | "source": [
101 | "## Open and read the root of the STAC catalog"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "id": "a4007807-b3b1-40fe-8aae-a01a0d01b03b",
108 | "metadata": {},
109 | "outputs": [],
110 | "source": [
111 | "catalog = pystac_client.Client.open(\n",
112 | " \"https://planetarycomputer.microsoft.com/api/stac/v1\",\n",
113 | " modifier=planetary_computer.sign_inplace,\n",
114 | ")\n",
115 | "catalog.title"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "id": "3ec64999-76f4-41db-b89b-b8c55c1ba788",
121 | "metadata": {},
122 | "source": [
123 | "Microsoft Planetary Computer has a public STAC metadata but the actual data assets are in private Azure Blob Storage containers and require authentication. `pystac-client` provides a `modifier` keyword that we can use to manually sign the item. Otherwise, we'd get an error when trying to access the asset."
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "id": "21cb2e57-3fd5-4fae-8fa3-beefc415478e",
129 | "metadata": {},
130 | "source": [
131 | "# Search for Landsat Data"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "id": "8444b837-8826-45f7-841e-4b053a4bea86",
137 | "metadata": {},
138 | "source": [
139 | "Let's say that an analysis we want to run requires landsat data over a specific region and from a specific time period. We can use our catalog to search for assets that fit our search criteria."
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "id": "42052634-9c2f-4073-9080-a346703b2081",
145 | "metadata": {},
146 | "source": [
147 | "First, let's find the name of the landsat dataset. [This page](https://planetarycomputer.microsoft.com/catalog) is a nice resource for browsing the available collections, but we can also just search the catalog for 'landsat':"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "id": "ba583769-59d7-49bd-b506-79c744430a42",
154 | "metadata": {},
155 | "outputs": [],
156 | "source": [
157 | "all_collections = [i.id for i in catalog.get_collections()]\n",
158 | "landsat_collections = [\n",
159 | " collection for collection in all_collections if \"landsat\" in collection\n",
160 | "]\n",
161 | "landsat_collections"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "id": "a15872e0-c837-48a0-833b-9815ef4262f6",
167 | "metadata": {},
168 | "source": [
169 | "We'll use the `landsat-c2-l2` dataset, which stands for Collection 2 Level-2. It contains data from several landsat missions and has better data quality than Level 1 (`landsat-c2-l1`). Microsoft Planetary Computer has descriptions of [Level 1](https://planetarycomputer.microsoft.com/dataset/landsat-c2-l1) and [Level 2](https://planetarycomputer.microsoft.com/dataset/landsat-c2-l2), but a direct and succinct comparison can be found in [this community post](https://gis.stackexchange.com/questions/439767/landsat-collections), and the information can be verified with [USGS](https://www.usgs.gov/landsat-missions/landsat-collection-2)."
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "id": "a05e403e-92ae-46b4-b925-28c4f9628aa2",
175 | "metadata": {},
176 | "source": [
177 | "Now, let's set our search parameters. You may already know the bounding box (region/area of interest) coordinates, but if you don't, there are many useful tools like [bboxfinder.com](http://bboxfinder.com/) that can help."
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "id": "77b40b78-c7f5-493d-b6a2-e5c765d2aaae",
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "bbox = [-118.89, 38.54, -118.57, 38.84] # Region over a lake in Nevada, USA\n",
188 | "datetime = \"2017-06-01/2017-09-30\" # Summer months of 2017\n",
189 | "collection = \"landsat-c2-l2\""
190 | ]
191 | },
192 | {
193 | "cell_type": "markdown",
194 | "id": "1f3f7c56-4cfb-4764-b7ee-c01f58667d13",
195 | "metadata": {},
196 | "source": [
197 | "We can also specify other parameters in the query, such as a specific landsat mission and the max percent of cloud cover:"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "id": "ce29099d-1a57-447b-83dc-957f3b9d0096",
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "platform = \"landsat-8\"\n",
208 | "cloudy_less_than = 1 # percent"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "id": "e44b0124-aaeb-461a-a48c-9e576da3bc54",
214 | "metadata": {},
215 | "source": [
216 | "Now we run the search and list the results:"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "id": "4d40e83c-d293-43d6-8f54-75ee67f66b99",
223 | "metadata": {},
224 | "outputs": [],
225 | "source": [
226 | "search = catalog.search(\n",
227 | " collections=[\"landsat-c2-l2\"],\n",
228 | " bbox=bbox,\n",
229 | " datetime=datetime,\n",
230 | " query={\"eo:cloud_cover\": {\"lt\": cloudy_less_than}, \"platform\": {\"in\": [platform]}},\n",
231 | ")\n",
232 | "items = search.item_collection()\n",
233 | "print(f\"Returned {len(items)} Items:\")\n",
234 | "item_id = {(i, item.id): i for i, item in enumerate(items)}\n",
235 | "item_id"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "id": "c246504f-f36c-4e9c-a740-8041a4d13612",
241 | "metadata": {},
242 | "source": [
243 | "It looks like there were three image stacks taken by Landsat 8 over this spatial region during the summer months of 2017 that has less than 1 percent cloud cover."
244 | ]
245 | },
246 | {
247 | "cell_type": "markdown",
248 | "id": "1ec703af-b574-42eb-b97c-756fd6f8f909",
249 | "metadata": {},
250 | "source": [
251 | "## Preview Results and Select a Dataset"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "id": "478b863e-e32c-499c-8a39-79c66ca1fb33",
257 | "metadata": {},
258 | "source": [
259 | "Before loading one of the available image stacks, it would be useful to get a visual check of the results. Many datasets have a rendered preview or thumbnail image that can be accessed without having to load the full resolution data.\n",
260 | "\n",
261 | "We can create a simple interactive application using the [Panel](https://panel.holoviz.org/index.html) library to access and display rendered PNG previews of the our search results. Note that these pre-rendered images are of large tiles that span beyond our bounding box of interest. In the next steps, we will only be loading in a small area around the lake."
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "id": "1f8e05bf-1c36-446b-8bc2-58bbecf6b68a",
268 | "metadata": {},
269 | "outputs": [],
270 | "source": [
271 | "item_sel = pn.widgets.Select(value=1, options=item_id, name=\"item\")\n",
272 | "\n",
273 | "def get_preview(i):\n",
274 | " return pn.panel(items[i].assets[\"rendered_preview\"].href, height=300)\n",
275 | "\n",
276 | "\n",
277 | "pn.Row(item_sel, pn.bind(get_preview, item_sel))"
278 | ]
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": null,
283 | "id": "18f2d557-f274-4076-b328-39a06dc066c9",
284 | "metadata": {},
285 | "outputs": [],
286 | "source": [
287 | "selected_item = items[1]\n",
288 | "selected_item"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "id": "311bb64c-230c-4a09-acd0-1c6d48dc927e",
294 | "metadata": {},
295 | "source": [
296 | "## Access the Data"
297 | ]
298 | },
299 | {
300 | "cell_type": "markdown",
301 | "id": "7e253896-b4cb-4111-8094-8572e2c6d2ff",
302 | "metadata": {},
303 | "source": [
304 | "Now that we have selected a dataset from our catalog, we can procede to access the data. We want to be very selective about the data that we read and when we read it because the amount of downloaded data can quickly get out of hand. Therefore, let's select only a subset of images.\n",
305 | "\n",
306 | "First, we'll preview the different image assets (or [Bands](https://github.com/stac-extensions/eo)) available in the Landsat item."
307 | ]
308 | },
309 | {
310 | "cell_type": "code",
311 | "execution_count": null,
312 | "id": "d49a164f-3d5d-4a7d-a5cb-029d6bede1db",
313 | "metadata": {},
314 | "outputs": [],
315 | "source": [
316 | "assets = []\n",
317 | "for _, asset in selected_item.assets.items():\n",
318 | " try:\n",
319 | " assets.append(asset.extra_fields[\"eo:bands\"][0])\n",
320 | " except:\n",
321 | " pass\n",
322 | "\n",
323 | "cols_ordered = [\n",
324 | " \"common_name\",\n",
325 | " \"description\",\n",
326 | " \"name\",\n",
327 | " \"center_wavelength\",\n",
328 | " \"full_width_half_max\",\n",
329 | "]\n",
330 | "bands = pd.DataFrame.from_dict(assets)[cols_ordered]\n",
331 | "bands"
332 | ]
333 | },
334 | {
335 | "cell_type": "markdown",
336 | "id": "f8cbc4e1-7754-47b2-8477-e9ed3fef00f7",
337 | "metadata": {},
338 | "source": [
339 | "Then we will select a few bands (images) of interest:"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "id": "1831d5f4-f068-476e-80fe-de32a851d12a",
346 | "metadata": {},
347 | "outputs": [],
348 | "source": [
349 | "bands_of_interest = [\"red\", \"green\", \"blue\"]"
350 | ]
351 | },
352 | {
353 | "cell_type": "markdown",
354 | "id": "764ecd52-b893-4aed-b021-02d25b257c12",
355 | "metadata": {},
356 | "source": [
357 | "Finally, we lazily load the selected data. We will use the package called `odc` which allows us to load only a specific region of interest (bounding box or 'bbox') and specific bands (images) of interest. We will also use the `chunks` argument to load the data as dask arrays; this will load the metadata now and delay the loading until we actually use the data, or until we force the data to be loaded by using `.compute()`."
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": null,
363 | "id": "eca1a4a4-c0bc-4c81-960f-fb3550048a33",
364 | "metadata": {},
365 | "outputs": [],
366 | "source": [
367 | "ds = odc.stac.stac_load(\n",
368 | " [selected_item],\n",
369 | " bands=bands_of_interest,\n",
370 | " bbox=bbox,\n",
371 | " chunks={}, # <-- use Dask\n",
372 | ").isel(time=0)\n",
373 | "ds"
374 | ]
375 | },
376 | {
377 | "cell_type": "markdown",
378 | "id": "1e410e59-8f71-4b0c-b2db-177b7ce00278",
379 | "metadata": {},
380 | "source": [
381 | "Let's combine the bands of the dataset into a single DataArray that has the band names as coordinates of a new 'band' dimension, and also call `.compute()` to finally load the data."
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": null,
387 | "id": "e63d16a2-a36d-4967-808c-ffcb1479088b",
388 | "metadata": {
389 | "tags": []
390 | },
391 | "outputs": [],
392 | "source": [
393 | "da = ds.to_array(dim=\"band\").compute()\n",
394 | "da"
395 | ]
396 | },
397 | {
398 | "cell_type": "markdown",
399 | "id": "4ac0f3c1-3ff8-46c5-aa51-b06c73d96c66",
400 | "metadata": {},
401 | "source": [
402 | "## Visualize the data"
403 | ]
404 | },
405 | {
406 | "cell_type": "markdown",
407 | "id": "ce573b8c-815d-439b-a49f-fa258d0e07a7",
408 | "metadata": {},
409 | "source": [
410 | "Often, data ingestion involves quickly visualizing your raw data to get a sense that things are proceeding accordingly. As we have created an array with red, blue, and green bands, we can quickly display a natural color image of the lake using the `.plot.imshow()` function of `xarray`. We'll use the `robust=True` argument because the data values are outside the range of typical RGB images."
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": null,
416 | "id": "30c9eec0-6975-4b1e-9a7c-318b98c332b2",
417 | "metadata": {
418 | "tags": []
419 | },
420 | "outputs": [],
421 | "source": [
422 | "da.plot.imshow(robust=True, size=3)"
423 | ]
424 | },
425 | {
426 | "cell_type": "markdown",
427 | "id": "c188d2c2-9eb9-4030-8fd0-54f88bb252c3",
428 | "metadata": {},
429 | "source": [
430 | "Now, let's use `hvplot` to provide an interactive visualization of the inividual bands in our array."
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": null,
436 | "id": "31d88fa3-dbb9-4c14-b086-51ee20b52602",
437 | "metadata": {},
438 | "outputs": [],
439 | "source": [
440 | "ds"
441 | ]
442 | },
443 | {
444 | "cell_type": "code",
445 | "execution_count": null,
446 | "id": "c0cc8aa2-2878-4d2e-8bf3-6d9f1fcf9a58",
447 | "metadata": {},
448 | "outputs": [],
449 | "source": [
450 | "da.hvplot.image(x=\"x\", y=\"y\", cmap=\"viridis\", aspect=1)"
451 | ]
452 | },
453 | {
454 | "cell_type": "markdown",
455 | "id": "a9724bab-42c1-4275-a6dc-562c16b907f7",
456 | "metadata": {},
457 | "source": [
458 | "Let's plot the bands as seperate columns by specifying a dimension to expand with `col='band'`. We can also set `rasterize=True` to use [Datashader](https://datashader.org/) (another HoloViz tool) to render large data into a 2D histogram, where every array cell counts the data points falling into that pixel, as set by the resolution of your screen. This is especially important for large and high resolution images that would otherwise cause issues when attempting to render in a browser."
459 | ]
460 | },
461 | {
462 | "cell_type": "code",
463 | "execution_count": null,
464 | "id": "2ef8c6d8-3717-44ca-8096-216e00116d45",
465 | "metadata": {},
466 | "outputs": [],
467 | "source": [
468 | "da.hvplot.image(\n",
469 | " x=\"x\", y=\"y\", col=\"band\", cmap=\"viridis\", xaxis=False, yaxis=False, colorbar=False, rasterize=True\n",
470 | ")"
471 | ]
472 | },
473 | {
474 | "cell_type": "markdown",
475 | "id": "651f855c-0b9b-49d8-a851-5717a08532e7",
476 | "metadata": {},
477 | "source": [
478 | "Select the zoom tool and zoom in on of the plots to see that all the images are all automatically linked!"
479 | ]
480 | },
481 | {
482 | "cell_type": "markdown",
483 | "id": "bbb72422-b9d6-4eee-a070-7f5ae5bca468",
484 | "metadata": {},
485 | "source": [
486 | "## Retain Attributes"
487 | ]
488 | },
489 | {
490 | "cell_type": "markdown",
491 | "id": "32708bb1-adf1-423c-b76b-d2c2e0ced5cd",
492 | "metadata": {},
493 | "source": [
494 | "When working with many image arrays, it's critical to retain the data properties as xarray attributes:"
495 | ]
496 | },
497 | {
498 | "cell_type": "code",
499 | "execution_count": null,
500 | "id": "1c746f5e-c68b-40ac-9672-8c184c6b2d30",
501 | "metadata": {},
502 | "outputs": [],
503 | "source": [
504 | "da.attrs = selected_item.properties\n",
505 | "da"
506 | ]
507 | },
508 | {
509 | "cell_type": "markdown",
510 | "id": "146c063b-563a-46fb-b964-676adb4a09c2",
511 | "metadata": {
512 | "jupyter": {
513 | "outputs_hidden": true
514 | },
515 | "tags": []
516 | },
517 | "source": [
518 | "Notice that you can now expand the `Attributes: ` dropdown to see the properties of this data."
519 | ]
520 | },
521 | {
522 | "cell_type": "markdown",
523 | "id": "63747da5-e126-4104-a27c-0501da98121b",
524 | "metadata": {},
525 | "source": [
526 | "## Set the `crs` attribute"
527 | ]
528 | },
529 | {
530 | "cell_type": "markdown",
531 | "id": "8f315ec7-c868-4946-afe8-0a9fb5632aea",
532 | "metadata": {},
533 | "source": [
534 | "As the data is in 'meter' units from a reference point, we can plot in commonly used longitude, latitude coordinates with `.hvplot(geo=True)` if our array has a valid coordinate reference system (CRS) attribute. This value is provided from Microsoft Planetary Computer as the `proj:epsg` property, so we just need to copy it to a new attribute `crs` so that hvPlot can automatically find it, without us having to further specify anything in our plotting code\n",
535 | "\n",
536 | "Note, this CRS is referenced by an EPSG code that can be accessed from the metadata of our selected catalog search result. We can see more about this dataset's specific code at [EPSG.io/32611](https://epsg.io/32611). You can also read more about EPSG codes in general in this [Coordinate Reference Systems: EPSG codes](https://pygis.io/docs/d_understand_crs_codes.html#epsg-codes) online book chapter. "
537 | ]
538 | },
539 | {
540 | "cell_type": "code",
541 | "execution_count": null,
542 | "id": "cfb957fb-94e7-458f-a4dd-cff04d6331ec",
543 | "metadata": {},
544 | "outputs": [],
545 | "source": [
546 | "da.attrs[\"crs\"] = f\"epsg:{selected_item.properties['proj:epsg']}\"\n",
547 | "da.attrs[\"crs\"]"
548 | ]
549 | },
550 | {
551 | "cell_type": "markdown",
552 | "id": "36637826-b5de-41bf-86fc-5d8d659a8217",
553 | "metadata": {},
554 | "source": [
555 | "Now we can use `.hvplot(geo=True)` to plot in longitude and latitude coordinates. Informing `hvPlot` that this is geographic data also allows us to overlay data on aligned geographic tiles using the `tiles` parameter."
556 | ]
557 | },
558 | {
559 | "cell_type": "code",
560 | "execution_count": null,
561 | "id": "87dbdd45-6a65-48c1-b0b0-559f31cb873f",
562 | "metadata": {},
563 | "outputs": [],
564 | "source": [
565 | "da.hvplot.image(\n",
566 | " x=\"x\", y=\"y\", cmap=\"viridis\", geo=True, alpha=.9, tiles=\"ESRI\", xlabel=\"Longitude\", ylabel=\"Latitude\", colorbar=False, aspect=1,\n",
567 | ")"
568 | ]
569 | },
570 | {
571 | "cell_type": "markdown",
572 | "id": "54214e63-420e-436c-aa95-c806c74c4c02",
573 | "metadata": {},
574 | "source": [
575 | "___"
576 | ]
577 | },
578 | {
579 | "cell_type": "markdown",
580 | "id": "356f1670-ffea-4e9c-a7df-632864aecff0",
581 | "metadata": {},
582 | "source": [
583 | "## Summary\n",
584 | "The data access approach should adapt to features of the data and your intended analysis. As Landsat data is large and multidimensional, a good approach is to use [Microsoft Plantery Computer](https://planetarycomputer.microsoft.com/docs/overview/about), [pystac-client](https://pystac-client.readthedocs.io/en/stable/index.html), and [odc-stac](https://odc-stac.readthedocs.io/en/latest/index.html) together for searching the metadata catalog and lazily loading specific data chunks. Once you have accessed data, visualize it with hvPlot to ensure that it matches your expectations.\n",
585 | "\n",
586 | "### What's next?\n",
587 | "Before we proceed to workflow examples, we can explore an alternate way of accessing data using generalized tooling."
588 | ]
589 | },
590 | {
591 | "cell_type": "markdown",
592 | "id": "d200eed4-b3be-4a09-ae4a-671fa6f22e23",
593 | "metadata": {},
594 | "source": [
595 | "## Resources and References\n",
596 | "- Authored by Demetris Roumis circa Jan, 2023\n",
597 | "- Guidance for parts of this notebook was provided by Microsoft in ['Reading Data from the STAC API'](https://planetarycomputer.microsoft.com/docs/quickstarts/reading-stac/)\n",
598 | "- The image used in the banner is from an announcement about PySTAC from Azavea"
599 | ]
600 | },
601 | {
602 | "cell_type": "code",
603 | "execution_count": null,
604 | "id": "22affb0b-715d-4520-9d5b-2631bf1105c3",
605 | "metadata": {},
606 | "outputs": [],
607 | "source": []
608 | }
609 | ],
610 | "metadata": {
611 | "kernelspec": {
612 | "display_name": "Python 3 (ipykernel)",
613 | "language": "python",
614 | "name": "python3"
615 | },
616 | "language_info": {
617 | "codemirror_mode": {
618 | "name": "ipython",
619 | "version": 3
620 | },
621 | "file_extension": ".py",
622 | "mimetype": "text/x-python",
623 | "name": "python",
624 | "nbconvert_exporter": "python",
625 | "pygments_lexer": "ipython3",
626 | "version": "3.10.13"
627 | },
628 | "vscode": {
629 | "interpreter": {
630 | "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
631 | }
632 | }
633 | },
634 | "nbformat": 4,
635 | "nbformat_minor": 5
636 | }
637 |
--------------------------------------------------------------------------------
/notebooks/1.1_Data_Ingestion-General.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Data Ingestion - General Purpose Tooling"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | " "
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "---"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "## Overview\n",
29 | "\n",
30 | "If the specialized geospatial tools discussed in the previous notebook suit your needs, feel free to proceed to explore a workflow example, such as [Spectral Clustering](2.0_Spectral_Clustering_PC.ipynb). However, if you're seeking a tool that is adaptable across a wider range of data types and sources, welcome to this introduction to [Intake V2](https://intake.readthedocs.io), a general-purpose data ingestion and management library.\n",
31 | "\n",
32 | "Intake is a high-level library designed for data ingestion and management. While the [geospatial-specific tooling](1.0_Data_Ingestion-Geospatial.ipynb) approach is optimized for satellite data, Intake offers a broader and potentially more flexible approach for multimodal data workflows, characterized by:\n",
33 | "\n",
34 | "- **Unified Interface**: Abstracts the details of data sources, enabling users to interact with a consistent API regardless of the data's underlying format.\n",
35 | "- **Dynamic and Shareable Catalogs**: Facilitates the creation and sharing of data catalogs that can be version-controlled, updated, and maintained.\n",
36 | "- **Extensible**: Facilitates the addition of new data sources and formats through its plugin system.\n",
37 | "\n",
38 | "In the following sections, we will guide you through an introduction to various Intake functionalities that simplify data access and enhance both modularity and reproducibility in geospatial workflows.\n"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "## Prerequisites\n",
46 | "\n",
47 | "| Concepts | Importance | Notes |\n",
48 | "| --- | --- | --- |\n",
49 | "| [Intro to Landsat](./0.0_Intro_Landsat.ipynb) | Necessary | Background |\n",
50 | "| [Data Ingestion - Geospatial-Specific Tooling](1.0_Data_Ingestion-Geospatial.ipynb) | Helpful | |\n",
51 | "| [Pandas Cookbook](https://foundations.projectpythia.org/core/pandas.html) | Helpful | |\n",
52 | "| [xarray Cookbook](https://foundations.projectpythia.org/core/xarray.html) | Necessary | |\n",
53 | "| [Intake Quickstart](https://intake.readthedocs.io/en/latest/index.html) | Helpful | |\n",
54 | "|[Intake Cookbook](https://projectpythia.org/intake-cookbook/README.html)| Necessary | |\n",
55 | "\n",
56 | "- **Time to learn**: 20 minutes"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "---"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {},
69 | "source": [
70 | "## Imports"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {
77 | "tags": []
78 | },
79 | "outputs": [],
80 | "source": [
81 | "import intake\n",
82 | "import planetary_computer\n",
83 | "from pprint import pprint\n",
84 | "\n",
85 | "# Viz\n",
86 | "import hvplot.xarray\n",
87 | "import panel as pn\n",
88 | "\n",
89 | "pn.extension()"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "## Connecting to Data Sources"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "To get started, we need to provide a STAC URL (or any other data source URL) to intake, and we can ask intake to recommend some suitable datatypes."
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {
110 | "tags": []
111 | },
112 | "outputs": [],
113 | "source": [
114 | "url = \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n",
115 | "data_types = intake.readers.datatypes.recommend(url)\n",
116 | "pprint(data_types)"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "## Selecting the Appropriate Data Type\n",
124 | "After identifying the possible data types, we choose the one that best suits our needs. For handling STAC formatted JSON data from our URL, we will proceed with `STACJSON`."
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "data_type = intake.datatypes.STACJSON(url)\n",
134 | "data_type"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "metadata": {},
140 | "source": [
141 | "This object now represents the specific data type we will work with, allowing us to streamline subsequent data operations."
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "metadata": {},
147 | "source": [
148 | "## Initializing Data Readers\n",
149 | "\n",
150 | "With the `STACJSON` data type specified, we explore available methods to read the data."
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "metadata": {},
157 | "outputs": [],
158 | "source": [
159 | "readers = data_type.possible_readers\n",
160 | "pprint(readers)"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "This output presents us with options that can interpret the `STACJSON` data format effectively. The `StacCatalogReader` is probably the most suitable for our use case. We can use it to read the STAC catalog and explore the available contents."
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "## Reading the Catalog\n",
175 | "Next, we can access the data catalog through our reader."
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "reader = intake.catalogs.StacCatalogReader(\n",
185 | " data_type, signer=planetary_computer.sign_inplace\n",
186 | ")\n",
187 | "reader"
188 | ]
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "metadata": {},
193 | "source": [
194 | "This reader is now configured to handle interactions with the data catalog."
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "## List Catalog Contents\n",
202 | "Once the catalog is accessible, we `read()` it and then collect each dataset's `description` to identify datasets of interest. For our purposes, we will just print the entries that include the word `'landsat'`."
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {
209 | "tags": []
210 | },
211 | "outputs": [],
212 | "source": [
213 | "stac_cat = reader.read()\n",
214 | "\n",
215 | "description = {}\n",
216 | "for data_description in stac_cat.data.values():\n",
217 | " data = data_description.kwargs[\"data\"]\n",
218 | " description[data[\"id\"]] = data[\"description\"]"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {
225 | "tags": []
226 | },
227 | "outputs": [],
228 | "source": [
229 | "# Print only keys that include the word 'landsat'\n",
230 | "pprint([key for key in description.keys() if 'landsat' in key.lower()])"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "## Detailed Dataset Examination"
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {},
243 | "source": [
244 | "By examining specific datasets more closely, we understand their content and relevance to our project goals. We can now print the description of the desired landsat IDs."
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": null,
250 | "metadata": {
251 | "scrolled": true
252 | },
253 | "outputs": [],
254 | "source": [
255 | "print(\"1:\", description[\"landsat-c2-l1\"])\n",
256 | "print('-------------------------------\\n')\n",
257 | "print(\"2:\", description[\"landsat-c2-l2\"])"
258 | ]
259 | },
260 | {
261 | "cell_type": "markdown",
262 | "metadata": {},
263 | "source": [
264 | "## Selecting and Accessing Data\n",
265 | "\n",
266 | "We want `\"landsat-c2-l2\"`, so with a chosen dataset, we can now access it directly and view the `metadata` specific to this dataset - key details that are important for analysis and interpretation. Since the output is long, we'll utilize the HoloViz Panel library to wrap the output in a scrollable element."
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": null,
272 | "metadata": {
273 | "tags": []
274 | },
275 | "outputs": [],
276 | "source": [
277 | "landsat_reader = stac_cat[\"landsat-c2-l2\"]\n",
278 | "landsat_metadata = landsat_reader.read().metadata\n",
279 | "\n",
280 | "# View extensive metadata in scrollable block\n",
281 | "json_pane = pn.pane.JSON(landsat_metadata, name='Metadata', max_height=400, sizing_mode='stretch_width', depth=-1, theme='light')\n",
282 | "scrollable_output = pn.Column(json_pane, height=400, sizing_mode='stretch_width', scroll=True, styles={'background': 'lightgrey'})\n",
283 | "scrollable_output"
284 | ]
285 | },
286 | {
287 | "cell_type": "markdown",
288 | "metadata": {},
289 | "source": [
290 | "## Visual Preview\n",
291 | "\n",
292 | "To get a visual preview of the dataset, particularly to check its quality and relevance, we use the following commands:"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "metadata": {
299 | "scrolled": true
300 | },
301 | "outputs": [],
302 | "source": [
303 | "landsat_reader[\"thumbnail\"].read()"
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "metadata": {},
309 | "source": [
310 | "## Accessing Geospatial Data Items\n",
311 | "\n",
312 | "Once we have selected the appropriate dataset, the next step is to access the specific data items. These items typically represent individual data files or collections that are part of the dataset.\n",
313 | "\n",
314 | "The following code retrieves a handle to the 'geoparquet-items' from the Landsat dataset, which are optimized for efficient geospatial operations and queries."
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": null,
320 | "metadata": {},
321 | "outputs": [],
322 | "source": [
323 | "landsat_items = landsat_reader[\"geoparquet-items\"]\n",
324 | "landsat_items"
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {},
330 | "source": [
331 | "## Converting Data for Analysis\n",
332 | "\n",
333 | "To facilitate analysis, the following code selects the last few entries (`tail`) of the dataset, converts them into a GeoDataFrame, and reads it back into a STAC catalog format. This format is particularly suited for geospatial data and necessary for compatibility with geospatial analysis tools and libraries like Geopandas."
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": null,
339 | "metadata": {
340 | "tags": []
341 | },
342 | "outputs": [],
343 | "source": [
344 | "cat = landsat_items.tail(output_instance=\"geopandas:GeoDataFrame\").GeoDataFrameToSTACCatalog.read()"
345 | ]
346 | },
347 | {
348 | "cell_type": "markdown",
349 | "metadata": {},
350 | "source": [
351 | "## Exploring Data Collections\n",
352 | "\n",
353 | "After conversion, we explore the structure of the data collection. Each \"item\" in this collection corresponds to a set of assets, providing a structured way to access multiple related data files. We'll simply print the structure of the catalog to understand the available items and their organization.\n"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": null,
359 | "metadata": {},
360 | "outputs": [],
361 | "source": [
362 | "cat"
363 | ]
364 | },
365 | {
366 | "cell_type": "markdown",
367 | "metadata": {},
368 | "source": [
369 | "## Accessing Sub-Collections\n",
370 | "\n",
371 | "To dive deeper into the data, we access a specific sub-collection based on its key. This allows us to focus on a particular geographic area or time period. We'll select the first item in the catalog for now."
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "metadata": {},
378 | "outputs": [],
379 | "source": [
380 | "item_key = list(cat.entries.keys())[0]\n",
381 | "subcat = cat[item_key].read()\n",
382 | "subcat"
383 | ]
384 | },
385 | {
386 | "cell_type": "markdown",
387 | "metadata": {},
388 | "source": [
389 | "## Reading Specific Data Bands\n",
390 | "\n",
391 | "For detailed analysis, especially in remote sensing, accessing specific spectral bands is crucial. Here, we read the red spectral band, which is often used in vegetation analysis and other remote sensing applications."
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": null,
397 | "metadata": {},
398 | "outputs": [],
399 | "source": [
400 | "subcat.red.read()"
401 | ]
402 | },
403 | {
404 | "cell_type": "markdown",
405 | "metadata": {},
406 | "source": [
407 | "## Preparing for Multiband Analysis\n",
408 | "To analyze true color imagery, we need to stack multiple spectral bands. Here, we prepare for this by setting up a band-stacking operation. Note, re-signing might be necessary at this point."
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "metadata": {},
415 | "outputs": [],
416 | "source": [
417 | "catbands = cat[item_key].to_reader(reader=\"StackBands\", bands=[\"red\", \"green\", \"blue\"], signer=planetary_computer.sign_inplace)"
418 | ]
419 | },
420 | {
421 | "cell_type": "markdown",
422 | "metadata": {},
423 | "source": [
424 | "## Loading and Visualizing True Color Imagery\n",
425 | "\n",
426 | "After setting up the band-stacking, we read the multiband data and prepare it for visualization."
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": null,
432 | "metadata": {},
433 | "outputs": [],
434 | "source": [
435 | "data = catbands.read(dim=\"band\")\n",
436 | "data"
437 | ]
438 | },
439 | {
440 | "cell_type": "markdown",
441 | "metadata": {},
442 | "source": [
443 | "## Visualizing Data\n",
444 | "Finally, we visualize the true color imagery. This visualization helps in assessing the quality of the data and the appropriateness of the bands used."
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": null,
450 | "metadata": {},
451 | "outputs": [],
452 | "source": [
453 | "data.plot.imshow(robust=True, figsize=(10, 10))"
454 | ]
455 | },
456 | {
457 | "cell_type": "markdown",
458 | "metadata": {},
459 | "source": [
460 | "## Summary\n",
461 | "As earth science data becomes integrated with other types of data, a powerful approach is to utilize a general purpose set of tools, including Intake and Xarray. Once you have accessed data, visualize it with hvPlot to ensure that it matches your expectations.\n",
462 | "\n",
463 | "\n",
464 | "\n",
465 | "### What's next?\n",
466 | "Now that we know how to access the data, it’s time to proceed to analysis, where we will explore a some simple machine learning approaches.\n",
467 | "\n",
468 | "\n",
469 | "## Resources and references\n",
470 | "Authored by Demetris Roumis and Andrew Huang circa April, 2024, with guidance from [Martin Durant](https://github.com/martindurant).\n",
471 | "\n",
472 | "The banner image is a mashup of a Landsat 8 image from NASA and the Intake logo.\n"
473 | ]
474 | }
475 | ],
476 | "metadata": {
477 | "kernelspec": {
478 | "display_name": "Python 3 (ipykernel)",
479 | "language": "python",
480 | "name": "python3"
481 | },
482 | "language_info": {
483 | "codemirror_mode": {
484 | "name": "ipython",
485 | "version": 3
486 | },
487 | "file_extension": ".py",
488 | "mimetype": "text/x-python",
489 | "name": "python",
490 | "nbconvert_exporter": "python",
491 | "pygments_lexer": "ipython3",
492 | "version": "3.10.13"
493 | },
494 | "nbdime-conflicts": {
495 | "local_diff": [
496 | {
497 | "diff": [
498 | {
499 | "diff": [
500 | {
501 | "key": 0,
502 | "op": "addrange",
503 | "valuelist": [
504 | "Python 3"
505 | ]
506 | },
507 | {
508 | "key": 0,
509 | "length": 1,
510 | "op": "removerange"
511 | }
512 | ],
513 | "key": "display_name",
514 | "op": "patch"
515 | }
516 | ],
517 | "key": "kernelspec",
518 | "op": "patch"
519 | }
520 | ],
521 | "remote_diff": [
522 | {
523 | "diff": [
524 | {
525 | "diff": [
526 | {
527 | "key": 0,
528 | "op": "addrange",
529 | "valuelist": [
530 | "Python3"
531 | ]
532 | },
533 | {
534 | "key": 0,
535 | "length": 1,
536 | "op": "removerange"
537 | }
538 | ],
539 | "key": "display_name",
540 | "op": "patch"
541 | }
542 | ],
543 | "key": "kernelspec",
544 | "op": "patch"
545 | }
546 | ]
547 | },
548 | "toc-autonumbering": false,
549 | "vscode": {
550 | "interpreter": {
551 | "hash": "d2ed0a8e3e051554a0b51e3917f81e884b169a97835ad70210b3681eb3cb39c7"
552 | }
553 | }
554 | },
555 | "nbformat": 4,
556 | "nbformat_minor": 4
557 | }
558 |
--------------------------------------------------------------------------------
/notebooks/2.0_Spectral_Clustering_PC.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "4257bf18-ba7c-42a6-81e3-2b5e48b3bc8b",
6 | "metadata": {},
7 | "source": [
8 | "# Spectral Clustering"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "95d6fc25-545a-4bcc-97ff-a17df7f6082e",
14 | "metadata": {},
15 | "source": [
16 | ""
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "id": "796926b4-a134-4daf-ae19-46978acf5e89",
22 | "metadata": {},
23 | "source": [
24 | "---"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "id": "9e7946b7-d7c6-4d60-bfb2-474188cfeb54",
30 | "metadata": {},
31 | "source": [
32 | "## Overview"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "id": "09b5645f-ea45-4f49-9811-18bff4192034",
38 | "metadata": {},
39 | "source": [
40 | "The current notebook will demonstrate a simplified machine learning approach to observe the change in a lake water's extent across time. In order to identify the water, we can use spectral clustering to classify each grid cell into a category based on the similarity of the combined set of pixels across [wavelength-bands](./0.0_Intro_Landsat) in our image stacks.\n",
41 | "\n",
42 | "Our example approach uses a version of spectral clustering from [dask_ml](http://ml.dask.org/clustering.html#spectral-clustering) that is a scalable equivalent of what is available in [scikit-learn](https://scikit-learn.org/stable/modules/clustering.html#spectral-clustering). We will begin this approach with a single image stack and then conduct a direct comparison on the results from different time points.\n",
43 | "\n",
44 | "This workflow uses data from Microsoft Planetary Computer but it can be adapted to work with any data ingestion approach from this cookbook."
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "f2d4e405-956d-4400-86f5-219664ef7b79",
50 | "metadata": {},
51 | "source": [
52 | "## Prerequisites\n",
53 | "\n",
54 | "| Concepts | Importance | Notes |\n",
55 | "| --- | --- | --- |\n",
56 | "| [Data Ingestion - Geospatial-Specific Tooling](1.0_Data_Ingestion-Geospatial.ipynb) | Necessary | |\n",
57 | "|[scikit-learn](https://scikit-learn.org/stable/modules/clustering.html#spectral-clustering) | Helpful | Spectral clustering |\n",
58 | "| [dask_ml](http://ml.dask.org/clustering.html#spectral-clustering) | Helpful | Spectral clustering at scale | \n",
59 | "\n",
60 | "\n",
61 | "- **Time to learn**: 20 minutes."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "id": "dd99cc55-fb0f-4bdf-bc7a-82044188a2f2",
67 | "metadata": {},
68 | "source": [
69 | "## Imports"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "id": "e106b4ce-e682-4a71-817b-966bbf926989",
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "import numpy as np\n",
80 | "import odc.stac\n",
81 | "import pandas as pd\n",
82 | "import planetary_computer\n",
83 | "import pystac_client\n",
84 | "import xarray as xr\n",
85 | "from dask.distributed import Client\n",
86 | "from pystac.extensions.eo import EOExtension as eo\n",
87 | "from dask_ml.cluster import SpectralClustering\n",
88 | "import pyproj\n",
89 | "\n",
90 | "# Viz\n",
91 | "import hvplot.xarray"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "id": "2eef62e1-2df6-4e67-9e10-92c9bf74c136",
97 | "metadata": {},
98 | "source": [
99 | "## Loading Data"
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "id": "41855f82-661f-4c02-9c48-6893a694bfe4",
105 | "metadata": {},
106 | "source": [
107 | "Let's start by loading some Landsat data. These steps are covered in the [Data Ingestion - Planetary Computer](1.0_Data_Ingestion-Planetary_Computer.ipynb) prerequisite."
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "id": "5dbc14ea-a560-4b01-81c0-4fc01f767de9",
113 | "metadata": {},
114 | "source": [
115 | "### Search the catalog"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "id": "c5059488-8f7e-446f-97d4-992eab1e7928",
122 | "metadata": {},
123 | "outputs": [],
124 | "source": [
125 | "catalog = pystac_client.Client.open(\n",
126 | " \"https://planetarycomputer.microsoft.com/api/stac/v1\",\n",
127 | " modifier=planetary_computer.sign_inplace,\n",
128 | ")\n",
129 | "\n",
130 | "bbox = [-118.89, 38.54, -118.57, 38.84] # Region over a lake in Nevada, USA\n",
131 | "datetime = \"2017-06-01/2017-09-30\" # Summer months of 2017\n",
132 | "collection = \"landsat-c2-l2\"\n",
133 | "platform = \"landsat-8\"\n",
134 | "cloudy_less_than = 1 # percent\n",
135 | "\n",
136 | "search = catalog.search(\n",
137 | " collections=[\"landsat-c2-l2\"],\n",
138 | " bbox=bbox,\n",
139 | " datetime=datetime,\n",
140 | " query={\"eo:cloud_cover\": {\"lt\": cloudy_less_than}, \"platform\": {\"in\": [platform]}},\n",
141 | ")\n",
142 | "items = search.get_all_items()\n",
143 | "print(f\"Returned {len(items)} Items:\")\n",
144 | "[[i, item.id] for i, item in enumerate(items)]"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "id": "e89ef2a4-8b5d-4799-a529-3d9adbc61a89",
150 | "metadata": {},
151 | "source": [
152 | "### Load a dataset"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "id": "4fbc1c60-ab38-49b8-beb2-167dc4b6f298",
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "item = items[1] # select one of the results"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": null,
168 | "id": "d5f6b4f4-80aa-4e82-bc9b-bd70b6d63d49",
169 | "metadata": {},
170 | "outputs": [],
171 | "source": [
172 | "assets = []\n",
173 | "for _, asset in item.assets.items():\n",
174 | " try:\n",
175 | " assets.append(asset.extra_fields[\"eo:bands\"][0])\n",
176 | " except:\n",
177 | " pass\n",
178 | "\n",
179 | "cols_ordered = [\n",
180 | " \"common_name\",\n",
181 | " \"description\",\n",
182 | " \"name\",\n",
183 | " \"center_wavelength\",\n",
184 | " \"full_width_half_max\",\n",
185 | "]\n",
186 | "bands = pd.DataFrame.from_dict(assets)[cols_ordered]\n",
187 | "bands"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "id": "d17bcf9e-116c-4471-90ca-754a3b3fb305",
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "ds_2017 = odc.stac.stac_load(\n",
198 | " [item],\n",
199 | " bands=bands.common_name.values,\n",
200 | " bbox=bbox,\n",
201 | " chunks={}, # <-- use Dask\n",
202 | ").isel(time=0)"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "id": "03e8f2cb-31ef-4e42-a16f-ffa7f2e79d78",
208 | "metadata": {},
209 | "source": [
210 | "### Retain CRS Attribute"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "id": "a8f8e88f-a517-4a13-b823-e8dff53b0e47",
217 | "metadata": {},
218 | "outputs": [],
219 | "source": [
220 | "epsg = item.properties[\"proj:epsg\"]\n",
221 | "ds_2017.attrs[\"crs\"] = f\"epsg:{epsg}\""
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": null,
227 | "id": "7b7d3201-bb41-48d0-90b7-565c2e454f9c",
228 | "metadata": {},
229 | "outputs": [],
230 | "source": [
231 | "da_2017 = ds_2017.to_array(dim=\"band\")\n",
232 | "da_2017"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "id": "865eb3d1-59a9-401a-b1f1-7588bb5c1700",
238 | "metadata": {},
239 | "source": [
240 | "## Reshaping Data\n",
241 | "\n",
242 | "The shape of our data is currently `n_bands`, `n_y`, `n_x`. In order for dask-ml / scikit-learn to consume our data, we'll need to reshape our image stacks into `n_samples, n_features`, where `n_features` is the number of wavelength-bands and `n_samples` is the total number of pixels in each wavelength-band image. Essentially, we'll be creating a vector of pixels out of each image, where each pixel has multiple features (bands), but the ordering of the pixels is no longer relevant to the computation. "
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "id": "042bfffb-c979-4958-9086-646a83918d61",
248 | "metadata": {},
249 | "source": [
250 | "By using xarray methods to flatten the data, we can keep track of the coordinate labels 'x' and 'y' along the way. This means that we have the ability to reshape back to our original array at any time with no information loss!"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "id": "653adf8b-da16-4eb6-ae64-d172c8eae75f",
257 | "metadata": {},
258 | "outputs": [],
259 | "source": [
260 | "flattened_xda = da_2017.stack(z=(\"x\", \"y\")) # flatten each band\n",
261 | "flattened_t_xda = flattened_xda.transpose(\"z\", \"band\")\n",
262 | "flattened_t_xda"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "id": "e427b1dd-08c3-4657-84e7-09b0269edad9",
268 | "metadata": {},
269 | "source": [
270 | "## Standardize Data\n",
271 | "\n",
272 | "Now that we have the data in the correct shape, let's standardize (or rescale) the values of the data. We do this to get all the flattened image vectors onto a common scale while preserving the differences in the ranges of values. Again, we'll demonstrate doing this first in NumPy and then xarray."
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": null,
278 | "id": "d8cd5c3c-1256-400a-9cea-32debd51ec4d",
279 | "metadata": {},
280 | "outputs": [],
281 | "source": [
282 | "with xr.set_options(keep_attrs=True):\n",
283 | " rescaled_xda = (flattened_t_xda - flattened_t_xda.mean()) / flattened_t_xda.std()\n",
284 | "rescaled_xda"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "id": "37a47e0b-7f74-404f-8f16-bf80c3bb1995",
290 | "metadata": {},
291 | "source": [
292 | "
\n",
293 | "
Info
\n",
294 | " Above, we are using a context manager \"with xr.set_options(keep_attrs=True):\" to retain the array's attributes through the operations. That is, we want any metadata like 'crs' to stay with our result so we can use 'geo=True' in our plotting.\n",
295 | "
"
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "id": "27d69080-eff4-471e-9536-02f99420bd0a",
301 | "metadata": {},
302 | "source": [
303 | "As `rescaled_xda` is still a Dask object, if we wanted to actually run the rescaling at this point (provided that all the data can fit into memory), we would use `rescaled_xda.compute()`."
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "id": "07b01093-da37-419c-93f2-9bba60be1578",
309 | "metadata": {},
310 | "source": [
311 | "\n",
312 | "## ML pipeline\n",
313 | "Now that our data is in the proper shape and value range, we are ready to conduct spectral clustering. Here we will use a version of [spectral clustering from dask_ml](https://ml.dask.org/modules/generated/dask_ml.cluster.SpectralClustering.html) that is a scalable equivalent to operations from Scikit-learn that cluster pixels based on similarity (across all wavelength-bands, which makes it spectral clustering by spectra!)"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": null,
319 | "id": "62075e17-55d1-470c-8df6-55be6ab895bd",
320 | "metadata": {},
321 | "outputs": [],
322 | "source": [
323 | "client = Client(processes=False)\n",
324 | "client"
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "id": "2d68c9d8-9b46-4123-8ab1-cef625ad9ea6",
330 | "metadata": {},
331 | "source": [
332 | "Now we will compute and persist the rescaled data to feed into the ML pipeline. Notice that our `X` matrix below has the shape: `n_samples, n_features` as discussed earlier. "
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": null,
338 | "id": "083a3ea0-1ae2-42d0-ac83-31d66a01b11d",
339 | "metadata": {},
340 | "outputs": [],
341 | "source": [
342 | "X = client.persist(rescaled_xda)\n",
343 | "X.shape"
344 | ]
345 | },
346 | {
347 | "cell_type": "markdown",
348 | "id": "c0f22024-87fd-49d2-8e06-6e6eff714708",
349 | "metadata": {},
350 | "source": [
351 | "First we will set up the model with the number of clusters, and other options."
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": null,
357 | "id": "4afcf563-8d46-4b35-9936-3d330e226d26",
358 | "metadata": {},
359 | "outputs": [],
360 | "source": [
361 | "clf = SpectralClustering(\n",
362 | " n_clusters=4,\n",
363 | " random_state=0,\n",
364 | " gamma=None,\n",
365 | " kmeans_params={\"init_max_iter\": 5},\n",
366 | " persist_embedding=True,\n",
367 | ")"
368 | ]
369 | },
370 | {
371 | "cell_type": "markdown",
372 | "id": "0666c6af-b948-41ed-a789-7f7cae9459f0",
373 | "metadata": {},
374 | "source": [
375 | "**This next step is the slow part.** We'll fit the model to our matrix `X`. Depending on your setup, it could take seconds to minutes to run depending on the size of our data."
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "id": "372c890c-8ce7-4ce8-958b-43909c3de0e9",
382 | "metadata": {},
383 | "outputs": [],
384 | "source": [
385 | "%time clf.fit(X)"
386 | ]
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "id": "60e746bd-5aa6-436a-84de-3b93e1bed1dd",
391 | "metadata": {},
392 | "source": [
393 | "Let's check the shape of the result:"
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": null,
399 | "id": "73e9af39-dadf-4956-b23e-5dc3feb65c56",
400 | "metadata": {},
401 | "outputs": [],
402 | "source": [
403 | "labels = clf.assign_labels_.labels_.compute()\n",
404 | "labels.shape"
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": null,
410 | "id": "f9e1a56c-0d6e-4c89-bbe8-0c8d9d0dd05f",
411 | "metadata": {},
412 | "outputs": [],
413 | "source": [
414 | "labels"
415 | ]
416 | },
417 | {
418 | "cell_type": "markdown",
419 | "id": "e8e59036-e8c0-4a0e-8d78-9037509b8c90",
420 | "metadata": {},
421 | "source": [
422 | "The result is a single vector of cluster labels."
423 | ]
424 | },
425 | {
426 | "cell_type": "markdown",
427 | "id": "084207cc-3e00-419e-a61d-300ad0868a6e",
428 | "metadata": {
429 | "tags": []
430 | },
431 | "source": [
432 | "## Un-flattening\n",
433 | "\n",
434 | "Once the computation is done, we can use the coordinates of our input array to restack our output array back into an image. Again, one of the main benefits of using `xarray` for this stacking and unstacking is that it keeps track of the coordinate information for us. "
435 | ]
436 | },
437 | {
438 | "cell_type": "markdown",
439 | "id": "625ae4c4-64cd-46fd-b6ec-43810c8be359",
440 | "metadata": {},
441 | "source": [
442 | "Since the original array is n_samples by n_features (90000, 6) and the cluster label output is (90000,), we just need the coordinates from one of the original features in the shape of n_samples. We can just copy the coordinates from the first input feature and populate is with our output data:"
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": null,
448 | "id": "642cf283-64d8-4c9e-91c3-32be8ec35c4b",
449 | "metadata": {},
450 | "outputs": [],
451 | "source": [
452 | "template = flattened_t_xda[:, 0]\n",
453 | "output_array = template.copy(data=labels)\n",
454 | "output_array"
455 | ]
456 | },
457 | {
458 | "cell_type": "markdown",
459 | "id": "d321a58f-9617-4540-98c7-8a7bccd414ae",
460 | "metadata": {},
461 | "source": [
462 | "With this new output array with coordinates copied from the input array, we can unstack back to the original `x` and `y` image dimensions by just using `.unstack()`."
463 | ]
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": null,
468 | "id": "d16ec3f0-c6dc-470b-9e56-7c3540066af7",
469 | "metadata": {},
470 | "outputs": [],
471 | "source": [
472 | "unstacked_2017 = output_array.unstack()\n",
473 | "unstacked_2017"
474 | ]
475 | },
476 | {
477 | "cell_type": "markdown",
478 | "id": "588814fe-3776-4ef5-9392-dd60e6e6776c",
479 | "metadata": {},
480 | "source": [
481 | "Finally, we can visualize the results! By hovering over the resulting imge, we can see that the lake water has been clustered with a certain label or 'value'."
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": null,
487 | "id": "a9e6863f-0cec-436b-bdcc-9aa533e6df9f",
488 | "metadata": {},
489 | "outputs": [],
490 | "source": [
491 | "raw_plot_2017 = da_2017.sel(band=\"red\").hvplot.image(\n",
492 | " x=\"x\", y=\"y\", geo=True, xlabel=\"lon\", ylabel=\"lat\", datashade=True, cmap=\"greys\", title=\"Raw Image 2017\",\n",
493 | ")\n",
494 | "\n",
495 | "result_plot_2017 = unstacked_2017.hvplot(\n",
496 | " x=\"x\", y=\"y\", cmap=\"Set3\", geo=True, xlabel=\"lon\", ylabel=\"lat\", colorbar=False, title=\"Spectral Clustering 2017\",\n",
497 | ")\n",
498 | "\n",
499 | "raw_plot_2017 + result_plot_2017"
500 | ]
501 | },
502 | {
503 | "cell_type": "markdown",
504 | "id": "ba47bdd3-995b-4b98-8cf0-efeea65688da",
505 | "metadata": {},
506 | "source": [
507 | "## Spectral Clustering for 1988"
508 | ]
509 | },
510 | {
511 | "cell_type": "markdown",
512 | "id": "bea4b2b5-5e74-4f8c-b171-89a24e57b8ae",
513 | "metadata": {},
514 | "source": [
515 | "We have conducted the spectral clustering for 2017 and now we want to compare this result to the lake in 1988. Let's load data from 1988 and run the same analysis as above."
516 | ]
517 | },
518 | {
519 | "cell_type": "markdown",
520 | "id": "f4d284d4-23b9-4c77-9275-58aeb9c6e563",
521 | "metadata": {},
522 | "source": [
523 | "We will use the same catalog, but we will search it for a different point in time and different Landsat mission"
524 | ]
525 | },
526 | {
527 | "cell_type": "markdown",
528 | "id": "dcb18049-fb4c-44e4-9d72-90bd24094bd7",
529 | "metadata": {},
530 | "source": [
531 | "### Load the data"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": null,
537 | "id": "e00ab9e6-b36b-47ec-a5a3-4b6bbf6a9ca6",
538 | "metadata": {},
539 | "outputs": [],
540 | "source": [
541 | "bbox = [-118.89, 38.54, -118.57, 38.84] # Region over a lake in Nevada, USA\n",
542 | "datetime = \"1988-06-01/1988-09-30\" # Summer months of 1988\n",
543 | "collection = \"landsat-c2-l2\"\n",
544 | "platform = \"landsat-5\" # Searching through an earlier landsat mission\n",
545 | "cloudy_less_than = 1 # percent\n",
546 | "\n",
547 | "search = catalog.search(\n",
548 | " collections=[\"landsat-c2-l2\"],\n",
549 | " bbox=bbox,\n",
550 | " datetime=datetime,\n",
551 | " query={\"eo:cloud_cover\": {\"lt\": cloudy_less_than}, \"platform\": {\"in\": [platform]}},\n",
552 | ")\n",
553 | "\n",
554 | "items = search.get_all_items()\n",
555 | "item = items[1] # select one of the results"
556 | ]
557 | },
558 | {
559 | "cell_type": "markdown",
560 | "id": "1428fff4-fc72-4692-9c20-e9e168337fc2",
561 | "metadata": {},
562 | "source": [
563 | "Notice that Landsat 5 data from 1988 has slightly different spectra than Landsat 8 from 2017. Details like this are important to keep in mind when performing analyses that directly compare across missions."
564 | ]
565 | },
566 | {
567 | "cell_type": "code",
568 | "execution_count": null,
569 | "id": "665e250d-6790-402d-b877-bbe967556089",
570 | "metadata": {},
571 | "outputs": [],
572 | "source": [
573 | "assets = []\n",
574 | "for _, asset in item.assets.items():\n",
575 | " try:\n",
576 | " assets.append(asset.extra_fields[\"eo:bands\"][0])\n",
577 | " except:\n",
578 | " pass\n",
579 | "\n",
580 | "cols_ordered = [\n",
581 | " \"common_name\",\n",
582 | " \"description\",\n",
583 | " \"name\",\n",
584 | " \"center_wavelength\",\n",
585 | " \"full_width_half_max\",\n",
586 | "]\n",
587 | "bands = pd.DataFrame.from_dict(assets)[cols_ordered]\n",
588 | "bands"
589 | ]
590 | },
591 | {
592 | "cell_type": "code",
593 | "execution_count": null,
594 | "id": "cef800e2-55f5-4777-90a5-63d466ce5f6f",
595 | "metadata": {},
596 | "outputs": [],
597 | "source": [
598 | "ds_1988 = odc.stac.stac_load(\n",
599 | " [item],\n",
600 | " bands=bands.common_name.values,\n",
601 | " bbox=bbox,\n",
602 | " chunks={}, # <-- use Dask\n",
603 | ").isel(time=0)\n",
604 | "\n",
605 | "epsg = item.properties[\"proj:epsg\"]\n",
606 | "ds_1988.attrs[\"crs\"] = f\"epsg:{epsg}\"\n",
607 | "\n",
608 | "da_1988 = ds_1988.to_array(dim=\"band\")\n",
609 | "da_1988"
610 | ]
611 | },
612 | {
613 | "cell_type": "markdown",
614 | "id": "057bd00b-193a-45b1-97ac-62a77fa0a322",
615 | "metadata": {},
616 | "source": [
617 | "### Reshape and Standardize"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "execution_count": null,
623 | "id": "ceb4ee87-9a8b-4278-9721-c2af8a56f6b8",
624 | "metadata": {},
625 | "outputs": [],
626 | "source": [
627 | "flattened_xda = da_1988.stack(z=(\"x\", \"y\"))\n",
628 | "flattened_t_xda = flattened_xda.transpose(\"z\", \"band\")\n",
629 | "with xr.set_options(keep_attrs=True):\n",
630 | " rescaled_xda = (flattened_t_xda - flattened_t_xda.mean()) / flattened_t_xda.std()\n",
631 | "rescaled_xda"
632 | ]
633 | },
634 | {
635 | "cell_type": "markdown",
636 | "id": "61bf4b05-3266-4bbd-b618-f4c01b56eccd",
637 | "metadata": {},
638 | "source": [
639 | "### Spectral Clustering"
640 | ]
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": null,
645 | "id": "4a4a50c6-f6dd-46c2-afb3-95196b341e21",
646 | "metadata": {},
647 | "outputs": [],
648 | "source": [
649 | "X = client.persist(rescaled_xda)\n",
650 | "clf = SpectralClustering(\n",
651 | " n_clusters=4,\n",
652 | " random_state=0,\n",
653 | " gamma=None,\n",
654 | " kmeans_params={\"init_max_iter\": 5},\n",
655 | " persist_embedding=True,\n",
656 | ")"
657 | ]
658 | },
659 | {
660 | "cell_type": "code",
661 | "execution_count": null,
662 | "id": "229368ab-ed3a-4fc1-b328-d84b7e7ebebb",
663 | "metadata": {},
664 | "outputs": [],
665 | "source": [
666 | "%time clf.fit(X)"
667 | ]
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": null,
672 | "id": "ec353001-4e0e-44af-b38d-f602f5a1dab8",
673 | "metadata": {},
674 | "outputs": [],
675 | "source": [
676 | "labels = clf.assign_labels_.labels_.compute()\n",
677 | "labels.shape"
678 | ]
679 | },
680 | {
681 | "cell_type": "code",
682 | "execution_count": null,
683 | "id": "937d7db4-b848-499b-8da2-8d00287b5d81",
684 | "metadata": {},
685 | "outputs": [],
686 | "source": [
687 | "labels"
688 | ]
689 | },
690 | {
691 | "cell_type": "markdown",
692 | "id": "1c690262-bb13-48cd-9e24-d3be8ed8fb91",
693 | "metadata": {},
694 | "source": [
695 | "### Unstack and Visualize"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": null,
701 | "id": "ba40cfc1-2da7-4be2-80f8-4bd0838dd64a",
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "template = flattened_t_xda[:, 0]\n",
706 | "output_array = template.copy(data=labels)\n",
707 | "unstacked_1988 = output_array.unstack()"
708 | ]
709 | },
710 | {
711 | "cell_type": "code",
712 | "execution_count": null,
713 | "id": "b3be0319-7f51-4384-befe-f0afcd2fecff",
714 | "metadata": {},
715 | "outputs": [],
716 | "source": [
717 | "unstacked_1988"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": null,
723 | "id": "d41ed729-af6c-4817-aad1-154671e743c5",
724 | "metadata": {},
725 | "outputs": [],
726 | "source": [
727 | "raw_plot_1988 = da_1988.sel(band=\"red\").hvplot.image(\n",
728 | " x=\"x\", y=\"y\", geo=True, xlabel=\"lon\", ylabel=\"lat\", datashade=True, cmap=\"greys\", title=\"Raw 1988\"\n",
729 | ")\n",
730 | "\n",
731 | "result_plot_1988 = unstacked_1988.hvplot(\n",
732 | " x=\"x\", y=\"y\", cmap=\"Set3\", geo=True, xlabel=\"lon\", ylabel=\"lat\", colorbar=False, title=\"Spectral Clustering 1988\",\n",
733 | ")\n",
734 | "\n",
735 | "raw_plot_1988 + result_plot_1988"
736 | ]
737 | },
738 | {
739 | "cell_type": "markdown",
740 | "id": "6ff424f8-b243-4a20-a069-ae45f2490370",
741 | "metadata": {},
742 | "source": [
743 | "## Spectral Clustering Over Time"
744 | ]
745 | },
746 | {
747 | "cell_type": "markdown",
748 | "id": "c872fea9-55bc-4c60-8979-91796ef649a9",
749 | "metadata": {},
750 | "source": [
751 | "Our hypothesis is that the lake's area is receding over time and so we want to visualize the potential change. Let's first visually compare the plot of the clustering results from the different time points."
752 | ]
753 | },
754 | {
755 | "cell_type": "code",
756 | "execution_count": null,
757 | "id": "5b3f7a6d-3651-4324-8bed-a044034e3e67",
758 | "metadata": {},
759 | "outputs": [],
760 | "source": [
761 | "result_plot_1988 + result_plot_2017"
762 | ]
763 | },
764 | {
765 | "cell_type": "markdown",
766 | "id": "4b170f41-3442-4fce-91be-40a41317f38d",
767 | "metadata": {},
768 | "source": [
769 | "By hovering over the lake in each image, we can see that the water was labeled ('value') with a certain cluster number in both images. We will programmatically grab the water cluster value from the middle of the lake using pyproj to convert from longtitude/latitude coordinates."
770 | ]
771 | },
772 | {
773 | "cell_type": "code",
774 | "execution_count": null,
775 | "id": "59dc881c-0ef8-425c-b18d-b11581c8bbcf",
776 | "metadata": {
777 | "tags": []
778 | },
779 | "outputs": [],
780 | "source": [
781 | "lon_lake_center = -118.71\n",
782 | "lat_lake_center = 38.7\n",
783 | "\n",
784 | "proj = pyproj.Proj(unstacked_1988.crs)\n",
785 | "lake_center_x, lake_center_y = proj(lon_lake_center, lat_lake_center)\n",
786 | "\n",
787 | "water_cluster_1988 = int(unstacked_1988.sel(x=lake_center_x, y=lake_center_y, method='nearest'))\n",
788 | "water_cluster_2017 = int(unstacked_2017.sel(x=lake_center_x, y=lake_center_y, method='nearest'))\n",
789 | "\n",
790 | "print('water cluster values:', water_cluster_1988, water_cluster_2017)"
791 | ]
792 | },
793 | {
794 | "cell_type": "markdown",
795 | "id": "0b51d984-1b83-408b-9758-c843e9d8c58e",
796 | "metadata": {},
797 | "source": [
798 | "Now, let's set any value that isn't our water cluster label to 0."
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": null,
804 | "id": "f20df2e1-b66d-4857-a39d-9a16ee50ad22",
805 | "metadata": {},
806 | "outputs": [],
807 | "source": [
808 | "with xr.set_options(keep_attrs=True):\n",
809 | " water_1988 = (unstacked_1988 == water_cluster_1988).astype(int)\n",
810 | " water_2017 = (unstacked_2017 == water_cluster_2017).astype(int)\n"
811 | ]
812 | },
813 | {
814 | "cell_type": "code",
815 | "execution_count": null,
816 | "id": "b90cabc5-cac6-4e53-8730-9b808c688104",
817 | "metadata": {},
818 | "outputs": [],
819 | "source": [
820 | "water_1988_plot = water_1988.hvplot(\n",
821 | " x=\"x\", y=\"y\", cmap=\"greys\", geo=True, colorbar=False, title=\"1988 Water\"\n",
822 | ")\n",
823 | "\n",
824 | "water_2017_plot = water_2017.hvplot(\n",
825 | " x=\"x\", y=\"y\", cmap=\"greys\", geo=True, colorbar=False, title=\"2017 Water\"\n",
826 | ")\n",
827 | "\n",
828 | "water_1988_plot + water_2017_plot"
829 | ]
830 | },
831 | {
832 | "cell_type": "markdown",
833 | "id": "313e08b0-bbec-43cf-94fa-a4b8a84e1177",
834 | "metadata": {},
835 | "source": [
836 | "Now we can take the difference of these water label arrays to see exactly where the water levels has changed."
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": null,
842 | "id": "4af122d1-3044-48cc-a0be-3922839474c1",
843 | "metadata": {},
844 | "outputs": [],
845 | "source": [
846 | "with xr.set_options(keep_attrs=True):\n",
847 | " water_diff = water_1988 - water_2017"
848 | ]
849 | },
850 | {
851 | "cell_type": "markdown",
852 | "id": "d5517bf1-bced-455e-a053-528b47809e0a",
853 | "metadata": {},
854 | "source": [
855 | "Red pixels (array value '1') of our image below are where water was lost from 1988 to 2017."
856 | ]
857 | },
858 | {
859 | "cell_type": "code",
860 | "execution_count": null,
861 | "id": "2b9ac483-0240-4ffc-b167-f84eac832a32",
862 | "metadata": {},
863 | "outputs": [],
864 | "source": [
865 | "water_diff.hvplot(\n",
866 | " x=\"x\", y=\"y\", cmap='coolwarm', geo=True, xlabel=\"long\", ylabel=\"lat\", colorbar=False, title=\"Water Change 1988-2017\",\n",
867 | ")"
868 | ]
869 | },
870 | {
871 | "cell_type": "markdown",
872 | "id": "1466477f-19b5-4866-940c-5cc52c979b83",
873 | "metadata": {},
874 | "source": [
875 | "We did it! We are observing the change in the lake shoreline over time using a simple spectral clustering approach."
876 | ]
877 | },
878 | {
879 | "cell_type": "markdown",
880 | "id": "d14ad11a-18df-4082-a596-a0be684e75a7",
881 | "metadata": {},
882 | "source": [
883 | "Let's finish things off by adding some geo tiles as a background. To only display the colored pixels overlaid on geo tiles, we could either set the array's background value ('0') to 'Not a Number' (NaN), or we could just inform hvPlot that we want the background valued pixels to be transparent with `.redim.nodata(value=0)`."
884 | ]
885 | },
886 | {
887 | "cell_type": "code",
888 | "execution_count": null,
889 | "id": "17ad0e8a-c880-4c67-9994-4e1425da2829",
890 | "metadata": {},
891 | "outputs": [],
892 | "source": [
893 | "water_diff.hvplot(\n",
894 | " x=\"x\", y=\"y\", width=400, height=400, cmap='coolwarm', geo=True, xlabel=\"lon\", ylabel=\"lat\", alpha=1, colorbar=False, title=\"Water Loss from 1988 to 2017\", tiles=\"ESRI\",\n",
895 | ").redim.nodata(value=0)\n",
896 | "\n"
897 | ]
898 | },
899 | {
900 | "cell_type": "markdown",
901 | "id": "46b24b86-d262-4ce3-8cae-5b4185574dc7",
902 | "metadata": {},
903 | "source": [
904 | "___"
905 | ]
906 | },
907 | {
908 | "cell_type": "markdown",
909 | "id": "072e479f-7b69-495c-83a3-9b77352be007",
910 | "metadata": {},
911 | "source": [
912 | "## Summary\n",
913 | "Starting from raw Landsat data, we have used a simple spectral clustering approach to observe the change in a lake water's extent across time.\n",
914 | "\n",
915 | "### What's next?\n",
916 | "Adapt this notebook for your own use case or select another workflow example notebook."
917 | ]
918 | },
919 | {
920 | "cell_type": "markdown",
921 | "id": "ebee9cbd-ed88-4827-809c-c820b8af509a",
922 | "metadata": {},
923 | "source": [
924 | "## Resources and References\n",
925 | "- Authored by Demetris Roumis circa Jan, 2023"
926 | ]
927 | },
928 | {
929 | "cell_type": "code",
930 | "execution_count": null,
931 | "id": "3993488c-4dcf-4613-8c21-b02431112866",
932 | "metadata": {},
933 | "outputs": [],
934 | "source": []
935 | }
936 | ],
937 | "metadata": {
938 | "kernelspec": {
939 | "display_name": "Python 3 (ipykernel)",
940 | "language": "python",
941 | "name": "python3"
942 | },
943 | "language_info": {
944 | "codemirror_mode": {
945 | "name": "ipython",
946 | "version": 3
947 | },
948 | "file_extension": ".py",
949 | "mimetype": "text/x-python",
950 | "name": "python",
951 | "nbconvert_exporter": "python",
952 | "pygments_lexer": "ipython3",
953 | "version": "3.10.13"
954 | },
955 | "vscode": {
956 | "interpreter": {
957 | "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
958 | }
959 | }
960 | },
961 | "nbformat": 4,
962 | "nbformat_minor": 5
963 | }
964 |
--------------------------------------------------------------------------------
/notebooks/data/catalog.yml:
--------------------------------------------------------------------------------
1 | sources:
2 | landsat_5_small:
3 | description: Small version of Landsat 5 Surface Reflectance Level-2 Science Product.
4 | driver: rasterio
5 | cache:
6 | - argkey: urlpath
7 | regex: 'earth-data/landsat'
8 | type: file
9 | args:
10 | urlpath: 's3://earth-data/landsat/small/LT05_L1TP_042033_19881022_20161001_01_T1_sr_band{band:d}.tif'
11 | chunks:
12 | band: 1
13 | x: 50
14 | y: 50
15 | concat_dim: band
16 | storage_options: {'anon': True}
17 | metadata:
18 | plots:
19 | band_image:
20 | kind: 'image'
21 | x: 'x'
22 | y: 'y'
23 | groupby: 'band'
24 | rasterize: True
25 | width: 400
26 | dynamic: False
27 |
28 | landsat_8_small:
29 | description: Small version of Landsat 8 Surface Reflectance Level-2 Science Product.
30 | driver: rasterio
31 | cache:
32 | - argkey: urlpath
33 | regex: 'earth-data/landsat'
34 | type: file
35 | args:
36 | urlpath: 's3://earth-data/landsat/small/LC08_L1TP_042033_20171022_20171107_01_T1_sr_band{band:d}.tif'
37 | chunks:
38 | band: 1
39 | x: 50
40 | y: 50
41 | concat_dim: band
42 | storage_options: {'anon': True}
43 |
44 | landsat_5:
45 | description: Images contain Landsat 5 Surface Reflectance Level-2 Science Product.
46 | driver: rasterio
47 | cache:
48 | - argkey: urlpath
49 | regex: 'earth-data/landsat'
50 | type: file
51 | args:
52 | urlpath: 's3://earth-data/landsat/LT05_L1TP_042033_19881022_20161001_01_T1_sr_band{band:d}.tif'
53 | chunks:
54 | band: 1
55 | x: 256
56 | y: 256
57 | concat_dim: band
58 | storage_options: {'anon': True}
59 | metadata:
60 | plots:
61 | band_image:
62 | kind: 'image'
63 | x: 'x'
64 | y: 'y'
65 | groupby: 'band'
66 | rasterize: True
67 | width: 400
68 |
69 | landsat_8:
70 | description: Images contain Landsat 8 Surface Reflectance Level-2 Science Product.
71 | driver: rasterio
72 | cache:
73 | - argkey: urlpath
74 | regex: 'earth-data/landsat'
75 | type: file
76 | args:
77 | urlpath: 's3://earth-data/landsat/LC08_L1TP_042033_20171022_20171107_01_T1_sr_band{band:d}.tif'
78 | chunks:
79 | band: 1
80 | x: 256
81 | y: 256
82 | concat_dim: band
83 | storage_options: {'anon': True}
84 |
85 | google_landsat_band:
86 | description: Landsat bands from Google Cloud Storage
87 | driver: rasterio
88 | parameters:
89 | path:
90 | description: landsat path
91 | type: int
92 | row:
93 | description: landsat row
94 | type: int
95 | product_id:
96 | description: landsat file id
97 | type: str
98 | band:
99 | description: band
100 | type: int
101 | args:
102 | urlpath: https://storage.googleapis.com/gcp-public-data-landsat/LC08/01/{{ '%03d' % path }}/{{ '%03d' % row }}/{{ product_id }}/{{ product_id }}_B{{ band }}.TIF
103 | chunks:
104 | band: 1
105 | x: 256
106 | y: 256
107 |
108 | amazon_landsat_band:
109 | description: Landsat bands from Amazon Web Services S3
110 | driver: rasterio
111 | parameters:
112 | path:
113 | description: landsat path
114 | type: int
115 | row:
116 | description: landsat row
117 | type: int
118 | product_id:
119 | description: landsat file id
120 | type: str
121 | band:
122 | description: band
123 | type: int
124 | cache:
125 | - argkey: urlpath
126 | regex: 'landsat-pds'
127 | type: file
128 | args:
129 | urlpath: s3://landsat-pds/c1/L8/{{ '%03d' % path }}/{{ '%03d' % row }}/{{ product_id }}/{{ product_id }}_B{{ band }}.TIF
130 | chunks:
131 | band: 1
132 | x: 256
133 | y: 256
134 | storage_options: {'anon': True}
135 |
136 | fluxnet_daily:
137 | driver: csv
138 | parameters:
139 | s3_path:
140 | description: Filename to load
141 | type: str
142 | default: earth-data/carbon_flux/nee_data_fusion/FLX_AR-SLu_FLUXNET2015_FULLSET_DD_2009-2011_1-3.csv
143 | cache:
144 | - argkey: urlpath
145 | regex: 'earth-data'
146 | type: file
147 | args:
148 | urlpath: "s3://{{ s3_path }}"
149 | path_as_pattern: 'FLX_{site}_FLUXNET2015_FULLSET_DD_{}.csv'
150 | csv_kwargs:
151 | assume_missing: true
152 | na_values: [-9999]
153 | parse_dates: ['TIMESTAMP']
154 | storage_options: {'anon': True}
155 |
156 | fluxnet_metadata:
157 | driver: csv
158 | cache:
159 | - argkey: urlpath
160 | regex: 'earth-data'
161 | type: file
162 | args:
163 | urlpath: "s3://earth-data/carbon_flux/nee_data_fusion/allflux_metadata.txt"
164 | csv_kwargs:
165 | header: null
166 | names: ['site', 'lat', 'lon', 'igbp', 'network']
167 | usecols: ['site', 'lat', 'lon', 'igbp']
168 | storage_options: {'anon': True}
169 |
170 | seattle_lidar:
171 | driver: csv
172 | cache:
173 | - argkey: urlpath
174 | regex: 'https://s3.amazonaws.com/earth-data'
175 | type: compressed
176 | decomp: infer
177 | args:
178 | urlpath: "https://s3.amazonaws.com/earth-data/seattle-lidar.zip"
179 | storage_options: {'anon': True}
180 | metadata:
181 | crs: State Plane Coordinate System Washington North FIPS 4601
182 |
--------------------------------------------------------------------------------
/notebooks/data/landsat5_bands.csv:
--------------------------------------------------------------------------------
1 | Band, Description, Range (nm)
2 | 1, Blue, 450-520
3 | 2, Green, 520-600
4 | 3, Red, 630-690
5 | 4, Near-Infrared, 760-900
6 | 5, Short Wavelength Infrared 1, 1550-1750
7 | 6, Thermal Infrared, 10400-12500
8 | 7, Short Wavelength Infrared 2, 2080-2350
9 |
--------------------------------------------------------------------------------
/notebooks/data/landsat5_crop.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/data/landsat5_crop.nc
--------------------------------------------------------------------------------
/notebooks/data/landsat8_bands.csv:
--------------------------------------------------------------------------------
1 | Band, Description, Range (nm)
2 | 1, Coastal Aerosol, 435-451
3 | 2, Blue, 452-512
4 | 3, Green, 533-590
5 | 4, Red, 636-673
6 | 5, Near-Infrared, 851-879
7 | 6, Short Wavelength Infrared 1, 1566-1651
8 | 7, Short Wavelength Infrared 2, 2107-2294
--------------------------------------------------------------------------------
/notebooks/images/L-Next-SpectralBands-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/L-Next-SpectralBands-stack.png
--------------------------------------------------------------------------------
/notebooks/images/ProjectPythia_Logo_Final-01-Blue.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/notebooks/images/icons/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/icons/favicon.ico
--------------------------------------------------------------------------------
/notebooks/images/intake_landsat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/intake_landsat.png
--------------------------------------------------------------------------------
/notebooks/images/landsat_8_rend-sm1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/landsat_8_rend-sm1.png
--------------------------------------------------------------------------------
/notebooks/images/landsat_timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/landsat_timeline.png
--------------------------------------------------------------------------------
/notebooks/images/logos/Unidata_logo_horizontal_1200x300.svg:
--------------------------------------------------------------------------------
1 |
2 |
892 |
--------------------------------------------------------------------------------
/notebooks/images/logos/pythia_logo-white-notext.svg:
--------------------------------------------------------------------------------
1 |
2 |
129 |
--------------------------------------------------------------------------------
/notebooks/images/logos/pythia_logo-white-rtext.svg:
--------------------------------------------------------------------------------
1 |
2 |
226 |
--------------------------------------------------------------------------------
/notebooks/images/nasa_bands.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/nasa_bands.png
--------------------------------------------------------------------------------
/notebooks/images/nasa_landsat8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/nasa_landsat8.jpg
--------------------------------------------------------------------------------
/notebooks/images/planetary_computer_header_800w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/planetary_computer_header_800w.png
--------------------------------------------------------------------------------
/notebooks/images/pystac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/pystac.png
--------------------------------------------------------------------------------
/notebooks/images/spectral_clustering.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/spectral_clustering.png
--------------------------------------------------------------------------------
/notebooks/images/spectral_clustering_lake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/notebooks/images/spectral_clustering_lake.png
--------------------------------------------------------------------------------
/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ProjectPythia/landsat-ml-cookbook/0854cfe3bcc5b27ee99e64363c216b8d9711593b/thumbnail.png
--------------------------------------------------------------------------------