├── docs
    └── README.md
├── img
    └── README.md
├── .gitignore
├── r
    ├── HLS_Tutorial.Rproj
    ├── earthdata_netrc_setup.R
    └── HLS_Tutorial.Rmd
├── data
    └── Field_Boundary.geojson
├── CODE_OF_CONDUCT.md
├── python
    ├── scripts
    │   └── HLS_SuPER
    │   │   ├── HLS_Su.py
    │   │   ├── HLS_PER.py
    │   │   ├── README.md
    │   │   └── HLS_SuPER.py
    └── tutorials
    │   └── EVI_timeseries_with_odc_stac.ipynb
├── README.md
├── CONTRIBUTE.md
├── CHANGELOG.md
├── bash
    └── hls-bulk-download
    │   ├── README.md
    │   └── getHLS.sh
└── LICENSE.md


/docs/README.md:
--------------------------------------------------------------------------------
1 | # HLS Data User Resources
2 | 
3 | The space contain tutorials and how-to guides that highlight the use of graphical user interfaces (GUI).
4 | 


--------------------------------------------------------------------------------
/img/README.md:
--------------------------------------------------------------------------------
1 | # HLS Data User Resources
2 | 
3 | This space contain images, charts, and visualizations that are used or references in other resources within the repository.
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.ipynb_checkpoints/
 2 | ignore
 3 | *.tif*
 4 | *data/HLS-Derived_EVI_Stats.csv*
 5 | *.txt*
 6 | *.rcookies*
 7 | *.Rproj.user*
 8 | *.Rhistory*
 9 | *.RData*
10 | *.Ruserdata*
11 | *.Rproj.user*
12 | *data/R_Output/*
13 | 


--------------------------------------------------------------------------------
/r/HLS_Tutorial.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/data/Field_Boundary.geojson:
--------------------------------------------------------------------------------
1 | {"type":"FeatureCollection","features":[{"type":"Feature","properties":{},"geometry":{"coordinates":[[[-122.09684570249401,39.90665319161309],[-122.09382925224304,39.89913263372122],[-122.09381158862462,39.89193960036616],[-122.0452080243209,39.89362074700392],[-122.03465069273044,39.89728856017888],[-122.0503870926483,39.92326328372664],[-122.09102285952527,39.90997156746636],[-122.09684570249401,39.90665319161309]]],"type":"Polygon"}}]}


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## 1. Our Commitment
 4 | 
 5 | We are dedicated to fostering a respectful environment for everyone contributing to this project. We expect all participants to treat each other with respect, professionalism, and kindness.
 6 | 
 7 | ## 2. Expected Behavior
 8 | 
 9 | - Be respectful and considerate of others.
10 | - Engage in constructive discussions and offer helpful feedback.
11 | - Gracefully accept constructive criticism.
12 | 
13 | ## 3. Unacceptable Behavior
14 | 
15 | The following behaviors will not be tolerated:
16 | 
17 | - Harassment, discrimination, or intimidation of any kind.
18 | - Offensive, abusive, or derogatory language and actions.
19 | - Personal attacks or insults.
20 | - Trolling or disruptive conduct.
21 | - Sharing inappropriate content.
22 | 
23 | ## 4. Reporting Violations
24 | If you experience or witness any behavior that violates this Code of Conduct, please report it by contacting the project maintainers. All reports will be reviewed confidentially.
25 | 
26 | ## 5. Enforcement
27 | Violations of this Code of Conduct may result in actions such as warnings, temporary bans, or permanent exclusion from participation at the discretion of the maintainers.
28 | 
29 | ## Contact Info  
30 | Email: <LPDAAC@usgs.gov>  
31 | Voice: +1-866-573-3222  
32 | Organization: Land Processes Distributed Active Archive Center (LP DAAC)¹  
33 | Website: <https://lpdaac.usgs.gov/>  
34 | Date last modified: 01-22-2025  
35 | 
36 | ¹Work performed under USGS contract G15PD00467 for NASA contract NNG14HH33I.


--------------------------------------------------------------------------------
/r/earthdata_netrc_setup.R:
--------------------------------------------------------------------------------
 1 | # Required packages for this script
 2 | packages = c('sys', 'getPass')
 3 | 
 4 | # Identify missing (not installed) packages
 5 | new.packages = packages[!(packages %in% installed.packages()[,"Package"])]
 6 | 
 7 | # Install missing packages
 8 | if(length(new.packages)) install.packages(new.packages, repos='http://cran.rstudio.com/')
 9 | 
10 | # Load packages into R
11 | library(sys)
12 | library(getPass)
13 | 
14 | # Specify path to user profile 
15 | up <- file.path(Sys.getenv("USERPROFILE"))                            # Retrieve user directory (for netrc file)
16 | 
17 | # Below, HOME and Userprofile directories are set.  
18 | 
19 | if (up == "") {
20 |     up <- Sys.getenv("HOME") 
21 |     Sys.setenv("userprofile" = up)
22 |     if (up == "") {
23 |         cat('USERPROFILE/HOME directories need to be set up. Please type sys.setenv("HOME" = "YOURDIRECTORY") or  sys.setenv("USERPROFILE" = "YOURDIRECTORY") in your console and type your USERPROFILE/HOME direcory instead of "YOURDIRECTORY". Next, run the code chunk again.')
24 |     }
25 | } else {Sys.setenv("HOME" = up)}        
26 | 
27 | netrc_path <- file.path(up, ".netrc", fsep = .Platform$file.sep)    # Path to netrc file
28 | 
29 | # Create a netrc file if one does not exist already
30 | if (file.exists(netrc_path) == FALSE || grepl("urs.earthdata.nasa.gov", readLines(netrc_path)) == FALSE) {
31 |     netrc_conn <- file(netrc_path)
32 |     
33 |     # User will be prompted for NASA Earthdata Login Username and Password below
34 |     writeLines(c("machine urs.earthdata.nasa.gov",
35 |                  sprintf("login %s", getPass(msg = "Enter NASA Earthdata Login Username \n (An account can be Created at urs.earthdata.nasa.gov):")),
36 |                  sprintf("password %s", getPass(msg = "Enter NASA Earthdata Login Password:"))), netrc_conn)
37 |     close(netrc_conn)
38 | }else{
39 |     i <- 0 
40 |     for (f in readLines(netrc_path)){
41 |         i <- i + 1
42 |         if (f =="machine urs.earthdata.nasa.gov"){
43 |             username <- strsplit(readLines(netrc_path)[i+1], " ")[[1]][2]
44 |             un <- getPass(msg = paste0("Is your NASA Earthdata Login Username: ", username, "\n\n Type yes or no."))
45 |             if (tolower(un) == 'yes'){
46 |                 tx <- gsub(readLines(netrc_path)[i+2], sprintf("password %s", getPass(msg = "Enter NASA Earthdata Login Password:")), readLines(netrc_path))
47 |                 writeLines(tx, netrc_path)
48 |                 rm(username, un, tx, f, i)
49 |             }else{
50 |                 user <- gsub(readLines(netrc_path)[i+1], sprintf("login %s", getPass(msg = "Enter NASA Earthdata Login Username:")), readLines(netrc_path))
51 |                 tx <- gsub(readLines(netrc_path)[i+2], sprintf("password %s", getPass(msg = "Enter NASA Earthdata Login Password:")), readLines(netrc_path))
52 |                 writeLines(tx, netrc_path)
53 |                 rm(username, un, user, tx, f, i)
54 |             
55 |             }
56 |             break
57 |         }
58 |     }
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/python/scripts/HLS_SuPER/HLS_Su.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | ===============================================================================
 4 | This module contains functions related to searching and preprocessing HLS data.
 5 | 
 6 | -------------------------------------------------------------------------------                        
 7 | Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch
 8 | Contact: lpdaac@usgs.gov                                                   
 9 | Last Updated: 2024-09-18                                    
10 | ===============================================================================
11 | """
12 | 
13 | # Import necessary packages
14 | import numpy as np
15 | import earthaccess
16 | 
17 | 
18 | # Main function to search and filter HLS data
19 | def hls_search(roi: list, band_dict: dict, dates=None, cloud_cover=None, log=False):
20 |     """
21 |     This function uses earthaccess to search for HLS data using an roi and temporal parameter, filter by cloud cover and delivers a list of results urls for the selected bands.
22 |     """
23 |     # Search for data
24 |     results = earthaccess.search_data(
25 |         short_name=list(band_dict.keys()),  # Band dict contains shortnames as keys
26 |         polygon=roi,
27 |         temporal=dates,
28 |     )
29 | 
30 |     # Filter by cloud cover
31 |     if cloud_cover:
32 |         results = hls_cc_filter(results, cloud_cover)
33 | 
34 |     # Get results urls
35 |     results_urls = [granule.data_links() for granule in results]
36 | 
37 |     # Flatten url list
38 |     # results_urls = [item for sublist in results_urls for item in sublist]
39 | 
40 |     # Filter url list based on selected bands
41 |     selected_results_urls = [
42 |         get_selected_bands_urls(granule_urls, band_dict)
43 |         for granule_urls in results_urls
44 |     ]
45 |     return selected_results_urls
46 | 
47 | 
48 | # Filter earthaccess results based on cloud cover threshold
49 | def hls_cc_filter(results, cc_threshold):
50 |     """
51 |     This function filters a list of earthaccess results based on a cloud cover threshold.
52 |     """
53 |     cc = []
54 |     for result in results:
55 |         # Retrieve Cloud Cover from json, convert to float and place in numpy array
56 |         cc.append(
57 |             float(
58 |                 next(
59 |                     (
60 |                         aa
61 |                         for aa in result["umm"]["AdditionalAttributes"]
62 |                         if aa.get("Name") == "CLOUD_COVERAGE"
63 |                     ),
64 |                     None,
65 |                 )["Values"][0]
66 |             )
67 |         )
68 |     cc = np.array(cc)
69 |     # Find indices based on cloud cover threshold
70 |     cc_indices = np.where(cc <= cc_threshold)
71 |     # Filter results based on indices
72 |     return [results[i] for i in cc_indices[0]]
73 | 
74 | 
75 | # Filter results urls based on selected bands
76 | def get_selected_bands_urls(url_list, band_dict):
77 |     """
78 |     This function filters a list of results urls based on HLS collection and selected bands.
79 |     """
80 |     selected_bands_urls = []
81 |     # Loop through urls
82 |     for url in url_list:
83 |         # Filter bands based on band dictionary
84 |         for collection, nested_dict in band_dict.items():
85 |             if collection in url:
86 |                 for band in nested_dict.values():
87 |                     if band in url:
88 |                         selected_bands_urls.append(url)
89 |     return selected_bands_urls
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HLS-Data-Resources  
 2 | 
 3 | Welcome! This repository provides guides, short how-tos, and tutorials to help users access and work with Harmonized Landsat Sentinel-2 (HLS) data. In the interest of open science this repository has been made public but is still under active development. All Jupyter notebooks and scripts should be functional, however, changes or additions may be made. Contributions from all parties are welcome.  
 4 | 
 5 | ## Resources  
 6 | 
 7 | Below are data use resources available HLS data.  
 8 | 
 9 | |Name|Type/Link|Summary|Services and Tools|
10 | |----|---------|-------|------------------|
11 | | HLS Python Tutorial | [Python Notebook](python/tutorials/HLS_Tutorial.ipynb) | Tutorial demonstrating how to search for, access, and process HLS data in Python | [earthaccess](https://github.com/nsidc/earthaccess) |
12 | |EVI Timeseries with ODC-STAC | [Python Notebook](python/tutorials/EVI_timeseries_with_odc_stac.ipynb) | Tutorial demonstrating how to use the Open Data Cube (ODC) and CMR-STAC to create an EVI time series very efficiently | [CMR STAC API](https://cmr.earthdata.nasa.gov/stac) |
13 | | HLS SuPER Script | [Python Script](python/scripts/HLS_SuPER/) | Find, download, and subset HLS data from a command line executable | [CMR API](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) |
14 | | HLS Bulk Download Bash Script | [Bash Script](bash/hls-bulk-download/)| Find and download | [CMR API](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) |
15 | |HLS R Tutorial | [R Markdown](r/HLS_Tutorial.Rmd) | Tutorial demonstrating how to search for, access, and process HLS data in R | [CMR STAC API](https://cmr.earthdata.nasa.gov/stac) |
16 | 
17 | **Additionally, the [LPDAAC-Data-Resources](https://github.com/nasa/LPDAAC-Data-Resources) Repository has general resources associated with datasets hosted by the LP DAAC, as well as links to other repositories for specific datasets such as EMIT, ECOSTRESS, and GEDI.**
18 | 
19 | ## HLS Background
20 | 
21 | The  Harmonized Landsat Sentinel-2 ([HLS](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/)) project produces seamless, harmonized surface reflectance data from the Operational Land Imager (OLI) and Multi-Spectral Instrument (MSI) aboard Landsat and Sentinel-2 Earth-observing satellites, respectively. The aim is to produce seamless products with normalized parameters, which include atmospheric correction, cloud and cloud-shadow masking, geographic co-registration and common gridding, normalized bidirectional reflectance distribution function, and spectral band adjustment. This will provide global observation of the Earth’s surface every 2-3 days with 30 meter spatial resolution. One of the major applications that will benefit from HLS is agriculture assessment and monitoring, which is used as the use case for this tutorial.
22 | 
23 | ## Prerequisites/Setup Instructions
24 | 
25 | Instructions for setting up a compatible environment for accessing HLS data are linked to below.
26 | 
27 | - [Python set up instructions](https://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md)
28 | 
29 | ## Helpful Links  
30 | 
31 | - [LP DAAC Data Resources GitHub](https://github.com/nasa/LPDAAC-Data-Resources)
32 | - [HLSL30 V2 Product Page](https://www.doi.org/10.5067/HLS/HLSL30.002)  
33 | - [HLSS30 V2 Product Page](https://www.doi.org/10.5067/HLS/HLSS30.002)  
34 | - [HLS on Earthdata Search](https://search.earthdata.nasa.gov/search?q=%22HLSS30%22)  
35 | - [HLS V2 User Guide](https://lpdaac.usgs.gov/documents/1326/HLS_User_Guide_V2.pdf)  
36 | - [LP DAAC Data Resources Repository](https://github.com/nasa/LPDAAC-Data-Resources)  
37 | 
38 | ## Contact Info  
39 | 
40 | Email: <LPDAAC@usgs.gov>  
41 | Voice: +1-866-573-3222  
42 | Organization: Land Processes Distributed Active Archive Center (LP DAAC)¹  
43 | Website: <https://lpdaac.usgs.gov/>  
44 | Date last modified: 01-19-2023  
45 | 
46 | ¹Work performed under USGS contract G15PD00467 for NASA contract NNG14HH33I.  
47 | 


--------------------------------------------------------------------------------
/CONTRIBUTE.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socio-economic status,
 10 | nationality, personal appearance, race, caste, color, religion, or sexual
 11 | identity and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | * Demonstrating empathy and kindness toward other people
 22 | * Being respectful of differing opinions, viewpoints, and experiences
 23 | * Giving and gracefully accepting constructive feedback
 24 | * Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | * Focusing on what is best not just for us as individuals, but for the overall
 27 |   community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | * The use of sexualized language or imagery, and sexual attention or advances of
 32 |   any kind
 33 | * Trolling, insulting or derogatory comments, and personal or political attacks
 34 | * Public or private harassment
 35 | * Publishing others' private information, such as a physical or email address,
 36 |   without their explicit permission
 37 | * Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official e-mail address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement at
 64 | LPDAAC@usgs.gov. All complaints will be reviewed and investigated
 65 | promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series of
 87 | actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or permanent
 94 | ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within the
114 | community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the [Zarr Developers][Github], available at
119 | [https://github.com/zarr-developers/.github/blob/main/CODE_OF_CONDUCT.md] and
120 | from the [Contributor Covenant][homepage], version 2.1, available at
121 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
122 | 
123 | Community Impact Guidelines were inspired by
124 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
128 | [https://www.contributor-covenant.org/translations][translations].
129 | 
130 | [homepage]: https://www.contributor-covenant.org
131 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
132 | [Mozilla CoC]: https://github.com/mozilla/diversity
133 | [FAQ]: https://www.contributor-covenant.org/faq
134 | [translations]: https://www.contributor-covenant.org/translations
135 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | 02-06-2025
  2 | 
  3 | - Add [EVI_timeseries_with_odc_stac.ipynb](python/tutorials/EVI_timeseries_with_odc_stac.ipynb) to the repository
  4 |   - This notebook demonstrates how to use the Open Data Cube (ODC) STAC API to create an EVI time series.
  5 | 
  6 | 09-17-2024
  7 | 
  8 | - Update [HLS_Tutorial.Rmd](r/tutorials/HLS_Tutorial.Rmd)
  9 |   - Changed collection identifiers to reflect latest changes to the CMR-STAC API. See [this announcement](https://lpdaac.usgs.gov/news/important-update-to-cmr-stac-new-identifier-and-search-parameter-format/) for more information.  
 10 |   - Changed the input bbox formatting. 
 11 | 
 12 | 08-26-2024
 13 | 
 14 | - Update [HLS_Tutorial.Rmd](r/tutorials/HLS_Tutorial.Rmd)
 15 |   - Add a new section for Quality Masking
 16 |   - Improve readability and vectorize some code chunks
 17 | 
 18 | 01-19-2024
 19 | 
 20 | - Added a retry loop to the [HLS_Tutorial.ipynb](https://github.com/nasa/HLS-Data-Resources/blob/main/python/tutorials/HLS_Tutorial.ipynb) to fix a `vsicurl` error users were getting
 21 | - Made minor visual improvements to plots and cell outputs in [HLS_Tutorial.ipynb](https://github.com/nasa/HLS-Data-Resources/blob/main/python/tutorials/HLS_Tutorial.ipynb)
 22 | - Updated readme
 23 | 
 24 | 10-12-2023  
 25 | 
 26 | - Added the HLS Bulk Download Bash script to the repository
 27 |   - Previous changes include:  
 28 | 
 29 |     02-11-2022  
 30 | 
 31 |     - getHLS.sh is updated to fix a bug.
 32 | 
 33 |     08-30-2021
 34 | 
 35 |     - Changed concept ids to query for HLS v2.0
 36 |     - Updated readme dates to query granules from the month of July 2021. HLS v2.0 currently does not have granules available for earlier dates.
 37 | 
 38 | 07-28-2023  
 39 | 
 40 | - Updated the authentication with [`earthaccess`](https://github.com/nsidc/earthaccess)
 41 | 
 42 | - Updated the quality filtering workflow
 43 | - Resolved the error in output creation due to Tiled option
 44 | - Added the Scale_factor and add_offset to the unscaled outputs
 45 | - Updated the Fmask output creation when data is quality filtered
 46 | 
 47 | 06-22-2023
 48 | 
 49 | - Updated instructions
 50 | - Created new .yml file for creating environment
 51 | - Updated some plot widths and limits in the tutorial
 52 | 
 53 | 06-02-2023
 54 | 
 55 | - Reworked the HLS tutorial to include:
 56 |   - The earthaccess for authentication,
 57 |   - The .yml file for environment requirements
 58 |   - Made new visualizations
 59 |   - Changed the ROI to include the larger area with variation in quality info
 60 | -
 61 | 
 62 | 01-11-2023
 63 | 
 64 | - Added The HLS R Tutorial
 65 | 
 66 | 11-02-2021  
 67 | 
 68 | - Geoviews seems to have compatibility issues with Shapely 1.8. Specified Shapely 1.7 in the environment setup instructions.  
 69 | - Remove Python env setup instructions from the tutorial. The setup instructions are now only found in the Readme.  
 70 | 
 71 | 10-25-2021  
 72 | 
 73 | - Updated to access HLS v2.0 data  
 74 | - Changes to NASA’s Earthdata cloud distribution environment configurations had unintended repercussions with GDAL resulting in the inability to access HLS data via the HTTPS URL. Access issues appear to be resolved by the following PRs:  <https://github.com/OSGeo/gdal/issues/4641> and <https://github.com/OSGeo/gdal/pull/4654>>  
 75 | - Updated Python environment requirements to specify GDAL v3.2 - tutorial has been successfully tested using GDAL 3.0, 3.1, and 3.2. Tests with GDAL 3.3 were unsuccessful  
 76 | - Updated visualization libraries (e.g., geoviews) to resolve visualization errors in tutorial
 77 | - Updated markdown formatting within tutorial
 78 | - Updated readme content and format
 79 | 
 80 | 03-29-2021
 81 | 
 82 | - Updated broken links to STAC API spec pages and HLS V1.5 User Guide
 83 | 
 84 | 03-25-2021
 85 | 
 86 | - Added `conda install jupyter notebook --yes` as an instruction to the README and tutorial (issue with `holoviews`/`geoviews` plots not rendering)
 87 | - Added a check during automation (section 6) that will skip a file that has already been processed and is available in the current directory  
 88 | - Updated the way that the crs is passed to `hvplot()`
 89 | - Added `from subprocess import Popen, DEVNULL, STDOUT` to address issue when `_netrc` is not found  
 90 | - Added optional `#gdal.SetConfigOption("GDAL_HTTP_UNSAFESSL", "YES")` call for users that need it  
 91 | - Updated time series with more observations from March 2021  
 92 | 
 93 | 03-22-2021
 94 | 
 95 | - Removed GDAL config option to turn SSL off (not needed)
 96 | - Added `.sortby()` function to the `xarray` data array to assure L30 and S30 observations are ordered chronologically
 97 | - Changed the time period of interest to 09-01-2020 to 03-31-2021
 98 | - Updated the section on generating a `.netrc` or `_netrc` file based on user platform
 99 | 
100 | 03-15-2021
101 | 
102 | - Updated `collections` to product `shortnames` (in place of `concept_id`) to align with latest release of CMR-STAC.
103 | - Updated logic for search responses with no data found (now returns valid response with `numberReturned` = 0).
104 | - Changed all queries to CMR-STAC LPCLOUD Search endpoint from **GET** requests to **POST** requests.
105 | - Combined `collections` call to LP CLOUD Search endpoint into a single POST request with a list of both HLS products.  
106 | - Updated README with link to CHANGELOG.md  
107 | 
108 | 02-12-2021
109 | 
110 | - Replaced `bounding_box` with `bbox` and `concept_id` with `collections` to fix breaking change in CMR-STAC search parameter spec and added CHANGELOG.md to repo  
111 | 
112 | 01-26-2021  
113 | 
114 | - Added Support for HLSL30 V1.5  
115 | - Updated the HLS Script
116 | 
117 | 12-15-2020
118 | 
119 | - Pull request #5: Added functionality to handle files that are unable to be downloaded
120 | - Fixed COG overviews issue and added filtering for empty observations
121 | - Added functionality to handle files that are unable to be downloaded
122 | 
123 | 11-20-2020
124 | 
125 | - Pull request #4: Corrected flipped description of bounding box coordinates
126 | - Corrected flipped description of bounding box coordinates
127 | 
128 | 11-19-2020
129 | 
130 | - Pull request #3: Updated tutorial and readme to reflect cmr-stac updates and peer review suggestions
131 | - Updated tutorial and readme to reflect cmr-stac updates and peer review suggestions
132 | 
133 | 11-17-2020
134 | 
135 | - Merge pull request #2 in LPDUR/hls-tutorial from develop to master
136 | - Updated code to reflect new STAC endpoint and peer review revisions
137 | 
138 | 10-28-2020
139 | 
140 | - Merge pull request #1 in LPDUR/hls-tutorial from develop to master
141 | - Updated tutorial and README from AF review
142 | - Updated tutorial and README from AF review
143 | 
144 | 10-27-2020
145 | 
146 | - updated tutorial and readme from MJ review
147 | 
148 | 10-08-2020
149 | 
150 | - Added additional guidance on setting up netrc file
151 | - Updated tutorial, README, and added html output and requirements file
152 | 
153 | 09-29-2020
154 | 
155 | - Initial Commit
156 | 


--------------------------------------------------------------------------------
/bash/hls-bulk-download/README.md:
--------------------------------------------------------------------------------
  1 | # Download HLS data from LP DAAC Cloud Archive
  2 | 
  3 | The [Harmonized Landsat Sentinel-2 (HLS)](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/) version 2.0 (v2) collection is archived and distributed by NASA's [Land Processes Distributed Active Archive Center](https://lpdaac.usgs.gov/) (LP DAAC). HLS v2 provide consistent science quality surface reflectance (SR) and top of atmosphere (TOA) brightness data from the Operational Land Imager (OLI) aboard the joint NASA/USGS Landsat 8 satellite ([HLSL30](https://doi.org/10.5067/HLS/HLSL30.002)) and the Multi-Spectral Instrument (MSI) aboard the European Space Agency (ESA) Sentinel-2A and Sentinel-2B satellites ([HLSS30](https://doi.org/10.5067/HLS/HLSS30.002)). The combined measurement enables global observations of the land every 2–3 days at 30 meter (m) spatial resolution. These data are available from [LP DAAC Cumulus cloud archive](https://search.earthdata.nasa.gov/search?q=HLS%20v2.0) as [Cloud Optimized GeoTIFFs](https://www.cogeo.org/) (COG).  
  4 | 
  5 | The `getHLS.sh` script was created to not only bulk download HLS data for a given HLS tile ID and date range (along with other filtering parameters), but also identify and download previously unavailable granules without redownloading previously found granules.
  6 | 
  7 | ## Requirements
  8 | 
  9 | </br>
 10 | 
 11 | > **DISCLAIMER**  
 12 | >
 13 | > - A [Bash shell](https://git-scm.com/download/win) is required for the execution of the `getHLS.sh` script.
 14 | > - The `getHLS.sh` script has been tested on Mac OS, Ubuntu 20.04, and Windows OS.  
 15 | > - **!!!** On Windows OS, [Git Bash](https://git-scm.com/download/win) was the only shell to successfully execute the `getHLS.sh` script. Testing of the script in other common environments with Linux shells (e.g., [Cygwin](https://www.cygwin.com/)) is on-going.
 16 | 
 17 | </br>
 18 | 
 19 | ### 1. NASA Earthdata Login Account
 20 | 
 21 | To download HLS data from any client, users must have a NASA Earthdata Login account. Please visit <https://urs.earthdata.nasa.gov> to register and manage your Earthdata Login account. This account is free to create and only takes a moment to set up.
 22 | 
 23 | ### 2. netrc File
 24 | 
 25 | A netrc file containing your NASA Earthdata Login credentials is needed for the script to authenticate and download HLS data. The file must be placed in the user's `HOME` (Mac or Linux OS) or `USERPROFILE` (Window OS) directory and must be saved as `.netrc` containing the information below:  
 26 | 
 27 | ```text
 28 | machine urs.earthdata.nasa.gov login <EDL Username> password <EDL Password>
 29 | ```
 30 | 
 31 | where `<EDL Username>` is the user's Earthdata Login username and `<EDL Password>` is the user's Earthdata Login password.
 32 | 
 33 | ### 3. Get `getHLS.sh` Script
 34 | 
 35 | [Download](https://github.com/nasa/HLS-Data-Resources/archive/refs/heads/main.zip) or clone the [HLS-Data-Resources Repository](https://github.com/nasa/HLS-Data-Resources.git) to a local directory.
 36 | 
 37 | ### 4. Bash shell and File Permissions
 38 | 
 39 | The `getHLS.sh` script mush be [run from a Bash shell](https://devconnected.com/how-to-run-a-bash-script/). Additionally, the [file permissions](https://www.linux.com/training-tutorials/understanding-linux-file-permissions/) must be set to allow the user to execute the script. The permissions can be set using the Bash shell. From the shell, navigate to the directory containing the `getHLS.sh`.  
 40 | 
 41 | #### EXAMPLE
 42 | 
 43 | ```text
 44 | λ cd ./hls-bulk-download
 45 | 
 46 | λ ls
 47 | getHLS.sh  README.md  tmp.tileid.txt
 48 | ```
 49 | 
 50 | Next, change the permissions so that the user can execute the script. This can be achieved by running the command below.
 51 | 
 52 | ```text
 53 | λ chmod u+rwx getHLS.sh
 54 | ```
 55 | 
 56 | The user may already have permissions to execute. In this case, explicitly setting the permissions will not have a negative impact.
 57 | 
 58 | ## Features
 59 | 
 60 | 1. Query the NASA Common Metadata Repository (CMR) based on HLS tile ID, date range, cloud cover, spatial cover, etc., to get a list of HLS files for downloading  
 61 | 2. Organize the HLS files into subdirectories based on data type (L30/S30), year, tile ID, and granule name  
 62 | 3. Uses [wget](https://www.gnu.org/software/wget/) or [curl](https://curl.se/), depending on which is available on the user's system, to run multiple download processes in parallel (see [Additional Configuration Options](#additional-configuration-options))  
 63 | 4. A second invocation will not download files that have been downloaded before. Only files that were missing and/or modified since the last invocation will be downloaded.  
 64 | 
 65 | ## Script parameters
 66 | 
 67 | The required positional parameters for commandline execution of the `getHLS.sh` script are below. For additional configuration options (i.e., cloud cover and spatial coverage), see the [Additional Configuration Options](#additional-configuration-options) section of this document.  
 68 | 
 69 | ```text
 70 | <tilelist> <date_begin> <date_end> <out_dir>
 71 | ```
 72 | 
 73 | where:  
 74 | 
 75 | - `<tilelist>` is a text file containing the 5-character tile IDs. **NOTE**, each entry **must** be separated by a space (see [tmp.tileid.txt](./tmp.tileid.txt)). **DO NOT** add a new line after a tile ID entry.  
 76 | 
 77 |     **EXAMPLE**
 78 | 
 79 |     ```text
 80 |     λ cat tileid_file.txt
 81 |     37QGC 10SGJ
 82 |     ```
 83 | 
 84 | - `<date_begin>` is the start of the sensing date (yyyy-mm-dd)  
 85 | 
 86 |     **EXAMPLE**
 87 | 
 88 |     ```text
 89 |     2021-07-01
 90 |     ```
 91 | 
 92 | - `<date_end>` is the end of the sensing date - inclusive (yyyy-mm-dd)  
 93 | 
 94 |     **EXAMPLE**
 95 | 
 96 |     ```text
 97 |     2021-07-30
 98 |     ```
 99 | 
100 | - `<out_dir>` is the base directory of output<sup>1</sup>
101 | 
102 |     **EXAMPLE**
103 | 
104 |     ```text
105 |     outdir
106 |     ```
107 | 
108 | <sup>1</sup> The base directory does not have to exist prior to the execution of the script. The directory will be created with subdirectories that bin the data being downloaded.  
109 | 
110 | ## Script Executions
111 | 
112 | To execute the script, the call must take the form below:
113 | 
114 | ```text
115 | λ ./getHLS.sh <tilelist> <date_begin> <date_end> <out_dir>
116 | ```
117 | 
118 | Using the example parameters described above, a fully parameterized call looks like:
119 | 
120 | ```text
121 | λ ./getHLS.sh tileid_file.txt 2021-07-01 2021-07-30 outdir
122 | ```
123 | 
124 | ## Additional Configuration Options
125 | 
126 | The script has three additional parameters that can be modified to either refine the search query further or increase the number of download processes to run. These additional parameter are only configurable **within** the `getHLS.sh` script itself. The parameters are:  
127 | 
128 | - `NP` (line 59) specifies how many download processes to run. The default is 10; can be modified based on the capacity of the local computer.  
129 | - `CLOUD` (line 60) is the maximum amount of cloud cover in %  
130 | - `SPATIAL` (line 61) is the minimum amount of spatial coverage within the tile in %  
131 | 
132 | > **NOTE**  
133 | > To modify the values for `NP`, `CLOUD`, and/or `SPATIAL`, open `getHLS.sh` in a text editor and change the value on the **right** side of the equal (=) sign.
134 | 
135 | ---
136 | 
137 | ## Contact Information
138 | 
139 | **Author:** Dr. Junchang Ju (Made available by NASA's LP DAAC)  
140 | **Contact:** LPDAAC@usgs.gov  
141 | **Date Last Modified:** See [CHANGELOG.md](../../CHANGELOG.md)  
142 | 


--------------------------------------------------------------------------------
/bash/hls-bulk-download/getHLS.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash 
  2 | 
  3 | # A bash script to download HLS data from LP DAAC. It runs on an OS where 
  4 | # bash is available: Linux, Mac, (some Windows as well?). An account on 
  5 | # urs.earthdata.nasa.gov is required. 
  6 | #
  7 | # Features include:
  8 | #   1) Query the DAAC metadata based on tile ID, date range, cloud cover,
  9 | #      spatial cover, etc, to get a list of HLS files for downloading
 10 | #   2) Organize the HLS files into subdirectories based on data type (L30/S30), 
 11 | #      year, tile ID, and granule name
 12 | #   3) Run multiple download processes in parallel 
 13 | #   4) A second invocation won't download files that have been downloaded before,
 14 | #      so similar to rsync.
 15 | #
 16 | # Commandline paramaeters:
 17 | #   $1: a text file of tile IDs
 18 | #   $2: start of the sensing date
 19 | #   $3: end of the sensing date, inclusive
 20 | #   $4: the base directory of output; subdirectories are to be created in it.  
 21 | #
 22 | # Implementation notes: 
 23 | #    1) The metadata query result can be returned in either xml or json format. 
 24 | #       Json format gives the data file paths directly, but the xml format needs a 
 25 | #       second query to find the data file paths.
 26 | #       This script chooses json.
 27 | #    2) The parameter NP in this script specifies how many download processes to run.
 28 | #       The default is 10; can be modifed based on the capacity of the local computer. 
 29 | #       Similarly, CLOUD_COVERAGE and SPATIAL_COVERAGE thresholds  are hard-coded to 
 30 | #	give all the data, but can be adjusted at the beginning of this script..
 31 | #    3) The DAAC script DAACDataDownload.py is not needed. As long as an entry in .netrc
 32 | #       file is set up for urs.earthdata.nasa.gov, wget/curl can be used in place of the 
 33 | #       DAAC script, which is described at 
 34 | # https://git.earthdata.nasa.gov/projects/LPDUR/repos/daac_data_download_python/browse
 35 | #    4) Both wget and curl can download multiple files in one invocation.
 36 | # 	They appear to be have the same speed.
 37 | #    5) Can be slow because of the use of bash and bash subshell.
 38 | #    6) Although the script will skip a file if the existing local copy appears to be 
 39 | #	identical to remote file, the time saving is not much, probably because there are 
 40 | #	so many files in a granule to check (time stamp, length)
 41 | #
 42 | # Junchang Ju. June 5, 2021
 43 | #              July 29, 2021
 44 | 
 45 | if [ $# -ne 4 ]
 46 | then
 47 | 	echo "Usage: $0 <tilelist> <date_begin> <date_end> <out_dir>" >&2
 48 | 	echo "where	<tilelist> is a text file of 5-character tile IDs" >&2
 49 | 	echo "		<date_begin> and <date_end> are in the format 2021-12-31" >&2 
 50 | 	echo "		<out_dir> is the base of output directory. Subdirectories are to be created within it " >&2 
 51 | 	exit 1
 52 | fi
 53 | tilelist=$1
 54 | datebeg=$2
 55 | dateend=$3
 56 | OUTDIR=$4
 57 | 
 58 | ### A few customizable parameter 
 59 | NP=10 		# Run this many download processes by default. 
 60 | CLOUD=100	# Maximum amount of cloud cover in %
 61 | SPATIAL=0	# Minimum amount of spatial cover in %
 62 | 
 63 | 
 64 | ############################### Stop Here! Do Not Enter ##################
 65 | 
 66 | ### earthdata account
 67 | if [ ! -f $HOME/.netrc ]
 68 | then
 69 | 	echo "$HOME/.netrc file unavailable" >&2
 70 | 	echo "Search the web for how to set up .netrc" >&2
 71 | 	exit 1
 72 | else 
 73 | 	if ! grep urs.earthdata.nasa.gov $HOME/.netrc -q
 74 | 	then
 75 | 		echo "urs.earthdata.nasa.gov entry not found in $HOME/.netrc" >&2
 76 | 		exit 1
 77 | 	fi
 78 | fi
 79 | 
 80 | ### Check on date format
 81 | for d in $datebeg $dateend
 82 | do
 83 | 	case $d in
 84 | 	  [12][0-9][0-9][0-9]-[01][0-9]-[0-3][0-9]);;
 85 | 	  *) echo "Given date $d not in the format 2021-12-31" >&2; exit 1;;
 86 |         esac	  
 87 | done
 88 | 
 89 | ### Delete the tailing "/" if there is any.
 90 | OUTDIR=$(echo $OUTDIR | sed 's:/$::')   
 91 | export OUTDIR	# Must export for the subshell
 92 | 
 93 | ### wget/curl availability
 94 | WGET=false
 95 | CURL=false
 96 | which wget >/dev/null 2>&1
 97 | if [ $? -eq 0 ]; then WGET=true; fi 
 98 | which curl >/dev/null 2>&1
 99 | if [ $? -eq 0 ]; then CURL=true; fi 
100 | 
101 | if [ $WGET = false ] && [ $CURL = false ]
102 | then
103 | 	echo "This script needs wget or curl to be installed on your system">&2
104 | 	exit 1
105 | fi 
106 | export WGET CURL #  Must export for the subshell
107 | 
108 | ### Force to use curl for speed comparison 
109 | #WGET=false
110 | 
111 | ### Create a string to name temporary files
112 | fbase=tmp
113 | for p in $1 $2 $3
114 | do
115 | 	base=$(basename $p)
116 | 	fbase=${fbase}_$base
117 | done
118 | 
119 | ### Build up the query.
120 | ### The base for search. Both L30 and S30. Page size 2000 is big enough for a single tile 
121 | ### over the given time period; pagination not needed.
122 | #query="https://cmr.earthdata.nasa.gov/search/granules.json?collection_concept_id=C1711924822-LPCLOUD&collection_concept_id=C1711972753-LPCLOUD&page_size=2000"
123 | query="https://cmr.earthdata.nasa.gov/search/granules.json?collection_concept_id=C2021957295-LPCLOUD&collection_concept_id=C2021957657-LPCLOUD&page_size=2000"
124 | ### Add date range
125 | query="${query}&temporal=${datebeg}T00:00:00Z,${dateend}T23:59:59Z"
126 | 
127 | ### Other possible parameters.
128 | query="${query}&attribute[]=int,SPATIAL_COVERAGE,$SPATIAL,"		# min 
129 | # query="${query}&attribute[]=float,CLOUD_COVERAGE,,$CLOUD" 		# max. There is an issue for data type for CLOUD_COVERAGE
130 | 
131 | ### Add tile ID and begin query
132 | meta=/tmp/${fbase}.down.meta.txt
133 | >$meta
134 | for tile in $(cat $tilelist)
135 | do
136 | 	# A rough check if the tile ID is valid
137 | 	case $tile in
138 | 	  [0-6][0-9][A-Z][A-Z][A-Z]);;
139 | 	  *) echo "Not a valid 5-character tile ID, ignore: $tile" >&2;
140 |              continue;;
141 | 	esac
142 | 
143 | 	query_final="${query}&attribute[]=int,CLOUD_COVERAGE,,$CLOUD" 		# max 
144 | 	if [ $WGET = true ]
145 | 	then
146 | 		wget -q "${query_final}&attribute[]=string,MGRS_TILE_ID,$tile" -O - >>$meta
147 | 	else
148 | 		curl -s "${query_final}&attribute[]=string,MGRS_TILE_ID,$tile" >>$meta
149 | 	fi
150 | done
151 | 
152 | ### Parse metadata for a list of files to download. Export for subshell.
153 | ### Sort file names for humans.
154 | flist=/tmp/${fbase}.down.flist.txt
155 | export flist
156 | 
157 | tr "," "\n" < $meta  | 
158 |   grep https |
159 |   egrep "/HLS.[LS]30." | 
160 |   tr "\"" " " |
161 |   awk '{print $3}' |
162 |   awk -F"/" '{print $NF, $0}' |
163 |   sort -k1,1 |
164 |   awk '{print $2}' >$flist
165 | 
166 | ### A function to download all the files in a granule. The B01 file pathname 
167 | ### of the granule is given.  Save the granule in its own directory.
168 | function download_granule()
169 | {
170 | 	outdir=
171 | 	set -o pipefail
172 | 	trap 'rm -rf $outdir' 1 2 15
173 | 
174 | 	# Example B01 basename: HLS.L30.T18SUJ.2021078T153941.v1.5.B01.tif
175 | 	fullpath=$1
176 | 	B1base=$(basename $fullpath)
177 | 
178 | 	# Granule name and the all the files for this granule
179 | 	granule=$(echo $B1base | awk -F"." '{print $1 "." $2 "." $3 "." $4 "." $5 "." $6}')
180 | 	allfile=/tmp/tmp.files.in.${granule}.txt	# PWD for the later subshell for curl.
181 | 	grep $granule $flist > $allfile 
182 | 
183 | 	# Output directory 
184 | 	set $(echo $B1base | awk -F"." '{ print $2, substr($3,2,5), substr($4,1,4)}')
185 | 	type=$1
186 | 	tileid=$2
187 | 	year=$3
188 | 
189 | 	subdir=$(echo $tileid | awk '{print substr($0,1,2) "/" substr($0,3,1) "/" substr($0,4,1) "/" substr($0,5,1)}')
190 | 	outdir=$OUTDIR/$type/$year/$subdir/$granule
191 | 	mkdir -p $outdir
192 | 
193 | 	# Cookie is needed by curl on my mac at least. Without it, only the jpg and json 
194 | 	# files in lp-prod-public are downloaded, but not the files in /lp-prod-protected/ 
195 | 	# on the DAAC server.
196 | 	cookie=/tmp/tmp.cookie.$granule
197 | 
198 | 	echo "Downloading into $outdir"
199 | 	if [ $WGET = true ]
200 | 	then
201 | 		wget -q -N -i $allfile -P $outdir
202 | 		if [ $? -eq 0 ]
203 | 		then
204 | 			echo "Finished downloading into $outdir"
205 | 		else
206 | 			rm -rf $outdir
207 | 		fi
208 | 	else
209 | 		# Older curl does not have the option for output directory. So use subshell.
210 | 		# And curl does not take a list of URL; bad.
211 | 		# ( cd $outdir && cat $allfile | xargs -n1 curl -n -s -C -  -OL )
212 | 		( cd $outdir && cat $allfile | xargs curl --cookie-jar $cookie -n -s -L -C - --remote-name-all )
213 | 		if [ $? -eq 0 ]
214 | 		then
215 | 			echo "Finished downloading $outdir"
216 | 		else 
217 | 			rm -rf $outdir
218 | 		fi
219 | 		rm $cookie
220 | 	fi
221 | 
222 | 	rm $allfile 
223 | }
224 | export -f download_granule
225 | 
226 | ### Run $NP bash subshells
227 | ng=$(grep B01 $flist | wc -l | awk '{print $1}')
228 | echo "$ng granules to download"
229 | grep B01 $flist | xargs -n1 -P $NP -I% bash -c "download_granule %"  
230 | 
231 | rm -f $meta $flist
232 | exit 0
233 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 |                                 Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/python/scripts/HLS_SuPER/HLS_PER.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | ===============================================================================
  4 | HLS Processing and Exporting Reformatted Data (HLS_PER)
  5 | 
  6 | This module contains functions to conduct subsetting and quality filtering of 
  7 | search results.
  8 | -------------------------------------------------------------------------------
  9 | Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
 10 | Last Updated: 2024-09-18
 11 | ===============================================================================
 12 | """
 13 | 
 14 | import os
 15 | import sys
 16 | import logging
 17 | 
 18 | import numpy as np
 19 | from datetime import datetime as dt
 20 | import xarray as xr
 21 | import rioxarray as rxr
 22 | import dask.distributed
 23 | 
 24 | 
 25 | def create_output_name(url, band_dict):
 26 |     """
 27 |     Uses HLS default naming scheme to generate an output name with common band names.
 28 |     This allows for easier stacking of bands from both collections.
 29 |     """
 30 |     # Get Necessary Strings
 31 |     prod = url.split("/")[4].split(".")[0]
 32 |     asset = url.split("/")[-1].split(".")[-2]
 33 |     # Hard-coded one off for Fmask name incase it is not in the band_dict but is needed for masking
 34 |     if asset == "Fmask":
 35 |         output_name = f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.FMASK.subset.tif"
 36 |     else:
 37 |         for key, value in band_dict[prod].items():
 38 |             if value == asset:
 39 |                 output_name = (
 40 |                     f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.{key}.subset.tif"
 41 |                 )
 42 |     return output_name
 43 | 
 44 | 
 45 | def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)):
 46 |     """
 47 |     Generic Function to open an HLS COG and clip to ROI. For consistent scaling, this must be done manually.
 48 |     Some HLS Landsat scenes have the metadata in the wrong location.
 49 |     """
 50 |     # Open using rioxarray
 51 |     da = rxr.open_rasterio(url, chunks=chunk_size, mask_and_scale=False).squeeze(
 52 |         "band", drop=True
 53 |     )
 54 | 
 55 |     # Reproject ROI and Clip if ROI is provided
 56 |     if roi is not None:
 57 |         roi = roi.to_crs(da.spatial_ref.crs_wkt)
 58 |         da = da.rio.clip(roi.geometry.values, roi.crs, all_touched=True)
 59 | 
 60 |     # Apply Scale Factor if desired for non-quality layer
 61 |     if scale and "Fmask" not in url:
 62 |         # Mask Fill Values
 63 |         da = xr.where(da == -9999, np.nan, da)
 64 |         # Scale Data
 65 |         da = da * 0.0001
 66 |         # Remove Scale Factor After Scaling - Prevents Double Scaling
 67 |         da.attrs["scale_factor"] = 1.0
 68 | 
 69 |     # Add Scale Factor to Attributes Manually - This will overwrite/add if the data is missing.
 70 |     if not scale and "Fmask" not in url:
 71 |         da.attrs["scale_factor"] = 0.0001
 72 | 
 73 |     return da
 74 | 
 75 | 
 76 | def create_quality_mask(quality_data, bit_nums: list = [0, 1, 2, 3, 4, 5]):
 77 |     """
 78 |     Uses the Fmask layer and bit numbers to create a binary mask of good pixels.
 79 |     By default, bits 0-5 are used.
 80 |     """
 81 |     mask_array = np.zeros((quality_data.shape[0], quality_data.shape[1]))
 82 |     # Remove/Mask Fill Values and Convert to Integer
 83 |     quality_data = np.nan_to_num(quality_data, 0).astype(np.int8)
 84 |     for bit in bit_nums:
 85 |         # Create a Single Binary Mask Layer
 86 |         mask_temp = np.array(quality_data) & 1 << bit > 0
 87 |         mask_array = np.logical_or(mask_array, mask_temp)
 88 |     return mask_array
 89 | 
 90 | 
 91 | def process_granule(
 92 |     granule_urls,
 93 |     roi,
 94 |     quality_filter,
 95 |     scale,
 96 |     output_dir,
 97 |     band_dict,
 98 |     bit_nums=[0, 1, 2, 3, 4, 5],
 99 |     chunk_size=dict(band=1, x=512, y=512),
100 | ):
101 |     """
102 |     Processes a list of HLS asset urls for a single granule.
103 |     """
104 | 
105 |     # Setup Logging
106 |     logging.basicConfig(
107 |         level=logging.INFO,
108 |         format="%(levelname)s:%(asctime)s ||| %(message)s",
109 |         handlers=[logging.StreamHandler(sys.stdout)],
110 |     )
111 | 
112 |     # Check if all Outputs Exist for a Granule
113 |     if not all(
114 |         os.path.isfile(f"{output_dir}/{create_output_name(url, band_dict)}")
115 |         for url in granule_urls
116 |     ):
117 | 
118 |         # First Handle Quality Layer
119 |         if quality_filter:
120 |             # Generate Quality Layer URL
121 |             split_asset = granule_urls[0].split("/")[-1].split(".")
122 |             split_asset[-2] = "Fmask"
123 |             quality_url = (
124 |                 f"{'/'.join(granule_urls[0].split('/')[:-1])}/{'.'.join(split_asset)}"
125 |             )
126 | 
127 |             # Check if File exists in Output Directory
128 |             output_name = create_output_name(quality_url, band_dict)
129 |             output_file = f"{output_dir}/{output_name}"
130 | 
131 |             # Open Quality Layer
132 |             qa_da = open_hls(quality_url, roi, scale, chunk_size)
133 | 
134 |             # Check if quality asset is already processed
135 |             if not os.path.isfile(output_file):
136 |                 # Write Output
137 |                 qa_da.rio.to_raster(raster_path=output_file, driver="COG")
138 |             else:
139 |                 logging.info(
140 |                     f"Existing file {output_name} found in {output_dir}. Skipping."
141 |                 )
142 | 
143 |             # Remove Quality Layer from Granule Asset List if Present
144 |             granule_urls = [asset for asset in granule_urls if asset != quality_url]
145 | 
146 |             # Create Quality Mask
147 |             qa_mask = create_quality_mask(qa_da, bit_nums=bit_nums)
148 | 
149 |         # Process Remaining Assets
150 | 
151 |         for url in granule_urls:
152 |             # Check if File exists in Output Directory
153 |             output_name = create_output_name(url, band_dict)
154 |             output_file = f"{output_dir}/{output_name}"
155 | 
156 |             # Check if scene is already processed
157 |             if not os.path.isfile(output_file):
158 |                 # Open Asset
159 |                 da = open_hls(url, roi, scale, chunk_size)
160 | 
161 |                 # Apply Quality Mask if Desired
162 |                 if quality_filter:
163 |                     da = da.where(~qa_mask)
164 | 
165 |                 # Write Output
166 |                 da.rio.to_raster(raster_path=output_file, driver="COG")
167 |             else:
168 |                 logging.info(
169 |                     f"Existing file {output_name} found in {output_dir}. Skipping."
170 |                 )
171 |     else:
172 |         logging.info(
173 |             f"All assets related to {granule_urls[0].split('/')[-1]} are already processed, skipping."
174 |         )
175 | 
176 | 
177 | def build_hls_xarray_timeseries(
178 |     hls_cog_list, mask_and_scale=True, chunk_size=dict(band=1, x=512, y=512)
179 | ):
180 |     """
181 |     Builds a single band timeseries using xarray for a list of HLS COGs. Dependent on file naming convention.
182 |     Works on SuPERScript named files. Files need common naming bands corresponding HLSS and HLSL bands,
183 |     e.g. HLSL30 Band 5 (NIR1) and HLSS30 Band 8A (NIR1)
184 |     """
185 |     # Define Band(s)
186 |     bands = [filename.split(".")[6] for filename in hls_cog_list]
187 | 
188 |     # Make sure all files in list are the same band
189 |     if not all(band == bands[0] for band in bands):
190 |         raise ValueError("All listed files must be of the same band.")
191 | 
192 |     band_name = bands[0]
193 | 
194 |     # Create Time Variable
195 |     try:
196 |         time_list = [
197 |             dt.strptime(filename.split(".")[3], "%Y%jT%H%M%S")
198 |             for filename in hls_cog_list
199 |         ]
200 |     except ValueError:
201 |         print("A COG does not have a valid date string in the filename.")
202 | 
203 |     time = xr.Variable("time", time_list)
204 | 
205 |     timeseries_da = xr.concat(
206 |         [
207 |             rxr.open_rasterio(
208 |                 filename, mask_and_scale=mask_and_scale, chunks=chunk_size
209 |             ).squeeze("band", drop=True)
210 |             for filename in hls_cog_list
211 |         ],
212 |         dim=time,
213 |     )
214 |     timeseries_da.name = band_name
215 | 
216 |     return timeseries_da
217 | 
218 | 
219 | def create_timeseries_dataset(hls_file_dir, output_type, output_dir=None):
220 |     """
221 |     Creates an xarray dataset timeseries from a directory of HLS COGs.
222 |     Writes to a netcdf output. Currently only works for HLS SuPER outputs.
223 |     """
224 | 
225 |     # Setup Logging
226 |     logging.basicConfig(
227 |         level=logging.INFO,
228 |         format="%(levelname)s:%(asctime)s ||| %(message)s",
229 |         handlers=[logging.StreamHandler(sys.stdout)],
230 |     )
231 | 
232 |     # List Files in Directory
233 |     all_files = [file for file in os.listdir(hls_file_dir) if file.endswith(".tif")]
234 | 
235 |     # Create Dictionary of Files by Band
236 |     file_dict = {}
237 |     for file in all_files:
238 |         tile = file.split(".")[2]
239 |         band = file.split(".")[6]
240 |         full_path = os.path.join(hls_file_dir, file)
241 |         if tile not in file_dict:
242 |             file_dict[tile] = {}
243 |         if band not in file_dict[tile]:
244 |             file_dict[tile][band] = []
245 |         file_dict[tile][band].append(full_path)
246 | 
247 |     # logging.info(f"{file_dict}")
248 | 
249 |     # Check that all bands within each tile have the same number of observations
250 |     for tile, bands in file_dict.items():
251 |         q_obs = {band: len(files) for band, files in bands.items()}
252 |         if not all(q == list(q_obs.values())[0] for q in q_obs.values()):
253 |             logging.info(
254 |                 f"Not all bands in {tile} have the same number of observations."
255 |             )
256 |             logging.info(f"{q_obs}")
257 | 
258 |     # Loop through each tile and build timeseries output
259 | 
260 |     for tile, bands in file_dict.items():
261 |         dataset = xr.Dataset()
262 | 
263 |         timeseries_dict = {
264 |             band: dask.delayed(build_hls_xarray_timeseries)(files)
265 |             for band, files in bands.items()
266 |         }
267 |         timeseries_dict = dask.compute(timeseries_dict)[0]
268 |         dataset = xr.Dataset(timeseries_dict)
269 | 
270 |         # Set up CF-Compliant Coordinate Attributes
271 |         dataset.attrs["Conventions"] = "CF-1.6"
272 |         dataset.attrs["title"] = "HLS SuPER Timeseries Dataset"
273 |         dataset.attrs["institution"] = "LP DAAC"
274 | 
275 |         dataset.x.attrs["axis"] = "X"
276 |         dataset.x.attrs["standard_name"] = "projection_x_coordinate"
277 |         dataset.x.attrs["long_name"] = "x-coordinate in projected coordinate system"
278 |         dataset.x.attrs["units"] = "m"
279 | 
280 |         dataset.y.attrs["axis"] = "Y"
281 |         dataset.y.attrs["standard_name"] = "projection_y_coordinate"
282 |         dataset.y.attrs["long_name"] = "y-coordinate in projected coordinate system"
283 |         dataset.y.attrs["units"] = "m"
284 | 
285 |         dataset.time.attrs["axis"] = "Z"
286 |         dataset.time.attrs["standard_name"] = "time"
287 |         dataset.time.attrs["long_name"] = "time"
288 | 
289 |         # Get first and last date
290 |         first_date = (
291 |             dataset.time.data[0].astype("M8[ms]").astype(dt).strftime("%Y-%m-%d")
292 |         )
293 |         final_date = (
294 |             dataset.time.data[-1].astype("M8[ms]").astype(dt).strftime("%Y-%m-%d")
295 |         )
296 | 
297 |         # Write Outputs
298 |         # if output_type == "NC4":
299 |         output_path = os.path.join(
300 |             output_dir, f"HLS.{tile}.{first_date}.{final_date}.subset.nc"
301 |         )
302 |         dataset.to_netcdf(output_path)
303 |         # elif output_type == "ZARR":
304 |         #     output_path = os.path.join(output_dir, "hls_timeseries_dataset.zarr")
305 |         #     dataset.to_zarr(output_path)
306 |         logging.info(f"Output saved to {output_path}")
307 | 


--------------------------------------------------------------------------------
/python/scripts/HLS_SuPER/README.md:
--------------------------------------------------------------------------------
  1 | # HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script  
  2 | 
  3 | ---
  4 | 
  5 | ## Objective  
  6 | 
  7 | NASA's Land Processes Distributed Active Archive Center (LP DAAC) archives and distributes Harmonized Landsat Sentinel-2 (HLS) version 2.0 products in the LP DAAC Cumulus cloud archive as Cloud Optimized GeoTIFFs (COG). the HLS_SuPER.py data prep script is a command line-executable Python script that allows users to submit inputs for their desired spatial (GeoJSON, Shapefile, bounding box) region of interest (ROI), time period of interest, and the specific desired product(s) and bands/layers within the HLS products. The script also includes options for cloud screening observations by a user-defined threshold, quality filtering, applying the scale factor to the data, and users can pick between two output file format options:
  8 | 
  9 |   1. COG which returns an output for each source file
 10 |   2. NetCDF4 which creates a single output with variables corresponding to bands and stacking all temporal observations for each band.
 11 |   
 12 | To construct these outputs, the input arguments provided by the user in command line are submitted to NASA's Common Metadata Repository API endpoint via the `earthaccess` Python library to find data. The script then returns a .json containing a nested list of all resulting granules with assets nested within for each HLS observation that intersect the user's input parameters. After outputing this file, it is leveraged to access the cloud-native HLS data for each asset, which are clipped to the ROI provided and exported in the desired output file format. Optionally, data can be quality filtered (see section on quality filtering below) and/or scaled. **This script does not support resampling or reprojection.**
 13 | 
 14 | ### Available Products  
 15 | 
 16 | 1. Daily 30 meter (m) global HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance - [HLSS30.002](https://doi.org/10.5067/HLS/HLSS30.002)  
 17 | 
 18 | 2. Daily 30 meter (m) global HLS Landsat 8 Operational Land Imager Surface Reflectance - [HLSL30.002](https://doi.org/10.5067/HLS/HLSL30.002)  
 19 | 
 20 | > **Note:** On November 2021, this data prep script is updated to processes Version 2.0 daily 30 meter (m) global HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance (HLSS30) data and Version 2.0 daily 30 m global HLS Landsat 8 OLI Surface Reflectance (HLSL30) data.  
 21 | 
 22 | ---
 23 | 
 24 | ## Prerequisites  
 25 | 
 26 | 1. **Earthdata Login account**  
 27 |     - Create an Earthdata Login account (if you don't already have one) at <https://urs.earthdata.nasa.gov/users/new>
 28 |     - Remember your username and password; you will need them to download or access data during the workshop and beyond.
 29 | 2. **A Local Copy of this Repository**
 30 |     - Copy/clone/[download](https://github.com/nasa/HLS-Data-Resources/archive/refs/heads/main.zip) the [HLS-Data-Resources Repository](https://github.com/nasa/HLS-Data-Resources.git). You will need all three of the python scripts downloaded to the same directory on your OS (HLS_Su.py, HLS_PER.py, HLS_SuPER.
 31 | 3. **Compatible Python Environment**
 32 |     - See the [Python Environment Setup](#python-environment-setup) section below.
 33 |     - If you have previously set up the [**lpdaac_vitals** environment](https://github.com/nasa/VITALS/blob/main/setup/setup_instructions.md) for a workshop or content from the [VITALS repository](https://github.com/nasa/VITALS/tree/main), you can use that environment for this script as well. 
 34 | 
 35 | 
 36 | ### Python Environment Setup  
 37 | 
 38 | For local Python environment setup we recommend using [mamba](https://mamba.readthedocs.io/en/latest/) to manage Python packages. To install *mamba*, download [miniforge](https://github.com/conda-forge/miniforge) for your operating system.  If using Windows, be sure to check the box to "Add mamba to my PATH environment variable" to enable use of mamba directly from your command line interface. **Note that this may cause an issue if you have an existing mamba install through Anaconda.**  
 39 | 
 40 | 1. Using your preferred command line interface (command prompt, terminal, cmder, etc.) navigate to your local copy of the repository, then type the following to create a compatible Python environment.
 41 | 
 42 |     For Windows:
 43 | 
 44 |     ```cmd
 45 |     mamba create -n lpdaac_vitals -c conda-forge --yes python=3.10 fiona=1.8.22 gdal hvplot geoviews rioxarray rasterio jupyter geopandas earthaccess jupyter_bokeh h5py h5netcdf spectral scikit-image jupyterlab seaborn dask ray-default
 46 |     ```
 47 | 
 48 |     For MacOSX:
 49 | 
 50 |     ```cmd
 51 |     mamba create -n lpdaac_vitals -c conda-forge --yes python=3.10 gdal=3.7.2 hvplot geoviews rioxarray rasterio geopandas fiona=1.9.4 jupyter earthaccess jupyter_bokeh h5py h5netcdf spectral scikit-image seaborn jupyterlab dask ray-default ray-dashboard
 52 |     ```
 53 | 
 54 | 2. Next, activate the Python Environment that you just created.
 55 | 
 56 |     ```cmd
 57 |     mamba activate lpdaac_vitals 
 58 |     ```
 59 | **Still having trouble getting a compatible Python environment set up? Contact [LP DAAC User Services](https://lpdaac.usgs.gov/lpdaac-contact-us/).**  
 60 | 
 61 | ## Script Execution  
 62 | 
 63 | 1. Once you have completed the prerequisites, open your command line interface navigate to the directory containing the script.
 64 |  
 65 | 2. Ensure your python environment created above is activated.
 66 | 
 67 |     ```cmd
 68 |     mamba activate lpdaac_vitals 
 69 |     ```
 70 | 
 71 | 3.  The script requires an `roi`, which can be either a shapefile, geojson, or list of bbox coordinates (lower left longitude, lower left latitude, upper right longitude, upper right latitude). Other arguments are optional. See below for some examples of how to execute the script.
 72 | 
 73 | ```cmd
 74 | > python HLS_SuPER.py -roi <insert geojson, shapefile, or bounding box coordinates here> -dir <insert directory to save the output files to>
 75 | ```  
 76 | 
 77 | > **Note:** After running the script, it will show inputs then conduct a search for results. A prompt for a **y/n** will appear to proceed with processing. This is to ensure that the user is away of the quantity of results/files that will be processed.
 78 | 
 79 | ### Examples  
 80 | 
 81 | #### Region of interest (```-roi```) specified using a geojson file  
 82 | 
 83 | ```None  
 84 | > python HLS_SuPER.py -roi LA_County.geojson  
 85 | ```  
 86 | 
 87 | #### Region of interest (```-roi```) specified using a bounding box and save outputs to specified directory  
 88 | 
 89 | ```None  
 90 | > python HLS_SuPER.py -dir C:\Users\HLS\ -roi '-122.8,42.1,-120.5,43.1'  
 91 | ```  
 92 | 
 93 | > **Note:** The bounding box is a comma-separated string of LL-Lon, LL-Lat, UR-Lon, UR-Lat.  **Also**, if the first value in your bounding box is negative, you **MUST** use *single* quotations around the bounding box string. If you are using MacOS, you may need to use double quotes followed by single quotes ("'-122.8,42.1,-120.5,43.1'")  
 94 | 
 95 | ## Additional Script Execution Documentation  
 96 | 
 97 | To see the full set of command line arguments and how to use them, type the following in the command prompt:  
 98 | 
 99 | ```None
100 | > python HLS_SuPER.py -h  
101 | 
102 | usage: HLS_SuPER.py [-h] -roi ROI [-dir DIR] [-start START] [-end END]
103 |                     [-prod {HLSS30,HLSL30,both}] [-bands BANDS] [-cc CC]
104 |                     [-qf {True,False}] [-scale {True,False}]
105 |                     [-of {COG,NC4}]  
106 | ...
107 | ```
108 | 
109 | ### Script Arguments  
110 | 
111 | #### -roi ROI  
112 | 
113 | ```None
114 | (Required) Region of Interest (ROI) for spatial subset. Valid inputs are: (1) a geojson or shapefile (absolute path to file required if not in same directory as this script), or (2) bounding box coordinates: 'LowerLeft_lon,LowerLeft_lat,UpperRight_lon,UpperRight_lat' NOTE: Negative coordinates MUST be
115 | written in single quotation marks '-120,43,-118,48'.  
116 | 
117 | Example  
118 | > python HLS_SuPER.py -roi '-120,43,-118,48'  
119 | ```  
120 | 
121 | #### -dir DIR  
122 | 
123 | ```None  
124 | Directory to save output HLS files to. (default: <directory that the script is executed from>)  
125 | 
126 | Example  
127 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\  
128 | ```  
129 | 
130 | #### -start START  
131 | 
132 | ```None  
133 | Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2020-10-20). (default: 2014-04-03)  
134 | 
135 | Example  
136 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02  
137 | ```  
138 | 
139 | #### -end END  
140 | 
141 | ```None  
142 | Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2020-10-20). (default: current date)  
143 | 
144 | Example  
145 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24  
146 | ```  
147 | 
148 | #### -prod {HLSS30,HLSL30,both}  
149 | 
150 | ```None  
151 | Desired product(s) to be subset and processed. (default: both)  
152 | 
153 | Example  
154 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both  
155 | ```  
156 | 
157 | #### -bands BANDS  
158 | 
159 | ```None  
160 | Desired layers to be processed. Valid inputs are ALL, COASTAL-AEROSOL, BLUE, GREEN, RED, RED-EDGE1, RED-EDGE2, RED-EDGE3, NIR1, SWIR1, SWIR2, CIRRUS, TIR1, TIR2, WATER-VAPOR, FMASK. To request multiple layers, provide them in comma separated format with no spaces. Unsure of the names for your bands?--check out the README which contains a table of all bands and band names. (default: ALL)  
161 | 
162 | Example  
163 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1  
164 | ```  
165 | 
166 | #### -cc CC  
167 | 
168 | ```None  
169 | Maximum cloud cover (percent) allowed for returned observations (e.g. 35). Valid range: 0 to 100 (integers only) (default: 100)  
170 | 
171 | Example  
172 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50`  
173 | ```  
174 | 
175 | #### -qf {True,False}  
176 | 
177 | ```None  
178 | Flag to quality filter before exporting output files (see section below for quality filtering performed). (default: True)  
179 | 
180 | Example  
181 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50 -qf True  
182 | ```  
183 | 
184 | #### -scale {True,False}  
185 | 
186 | ```None  
187 | Flag to apply scale factor to layers before exporting output files. (default: True)  
188 | 
189 | Example  
190 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50 -qf True -scale False  
191 | ```
192 | 
193 | #### -of {COG,NC4}  
194 | 
195 | ```None  
196 | Define the desired output file format (default: COG)  
197 | 
198 | Example  
199 | > python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50 -qf True -scale False -of NC4  
200 | ```  
201 | 
202 | ### Quality Filtering  
203 | 
204 | If quality filtering is set to True (default), the following quality filtering will be used:  
205 | 
206 | - Cloud == 0 (No Cloud)  
207 | - Cloud shadow == 0 (No Cloud shadow)  
208 | - Adjacent to cloud/shadow == 0 (No Adjacent to cloud/shadow)
209 | - Snow/ice == 0 (No Snow/ice)
210 | - Water == 0 (No Water)
211 | - aerosol level == Climatology aerosol (No Low, Moderate, and High aerosol level)
212 | 
213 | 
214 | meaning that any pixel that does not meet the criteria outlined above will be removed and set to `_FillValue` in the output files.  
215 | 
216 | The quality table for the HLS `Fmask` can be found in section 6.4 of the [HLS V2.0 User Guide](https://lpdaac.usgs.gov/documents/1118/HLS_User_Guide_V2.pdf).  
217 | 
218 | If you do not want the data to be quality filtered, set argument `qf` to `False`.  
219 | 
220 | ### Output File Formats  
221 | 
222 | Cloud-Optimized GeoTIFF (COG) is the default output file format. If NetCDF-4 (NC4) is selected by the user as the output file format, the script will export a single NC4 file for each HLS tile returned by the query, in the source HLS projection. 
223 | 
224 | #### Output File Names  
225 | 
226 | The standard format  for HLS S30 V2.0 and HLS L30 V2.0 filenames is as follows:  
227 | **ex:** HLS.S30.T17SLU.2020117T160901.v2.0.B8A.tif  
228 | > **HLS.S30/HLS.L30**: Product Short Name  
229 |   **T17SLU**: MGRS Tile ID (T+5-digits)  
230 |   **2020117T160901**: Julian Date and Time of Acquisition (YYYYDDDTHHMMSS)  
231 |   **v2.0**: Product Version  
232 |   **B8A/B05**: Spectral Band  
233 |   **.tif**: Data Format (Cloud Optimized GeoTIFF)  
234 | 
235 | For additional information on HLS naming conventions, be sure to check out the [HLS Overview Page](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/#hls-naming-conventions).  
236 | 
237 | If you selected COG as the output file format, the output file name will have product specific band names renamed the common names in available bands and include **.subset.tif** at the end of the filename:  
238 | > HLS.S30.T17SLU.2020117T160901.v2.0.NIR1.subset.tif  
239 | 
240 | If you selected nc4 as the output file format, the following naming convention will be used:  
241 | **ex:** HLS.T17SLU.2020-10-24.2020-11-10.subset.nc4  
242 | > HLS.[MGRS Tile ID].[date of first observation in output file].[date of last observation in output file].subset.nc4  
243 | 
244 | ---
245 | 
246 | ## Contact Info  
247 | 
248 | Email: <LPDAAC@usgs.gov>  
249 | Voice: +1-866-573-3222  
250 | Organization: Land Processes Distributed Active Archive Center (LP DAAC)¹  
251 | Website: <https://lpdaac.usgs.gov/>  
252 | Date last modified: 2024-09-18  
253 | 
254 | ¹Work performed under USGS contract 140G0121D0001 for NASA contract NNG14HH33I.  
255 | 


--------------------------------------------------------------------------------
/python/scripts/HLS_SuPER/HLS_SuPER.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | ===============================================================================
  4 | HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script                         
  5 | Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
  6 | Contact: lpdaac@usgs.gov
  7 | Last Updated: 2024-09-18  
  8 | ===============================================================================
  9 | """
 10 | 
 11 | # Possible Future Improvements:
 12 | # TODO Improve CF-1.6 NetCDF Compliance
 13 | # TODO Improve behavior around deletion of cogs when a netcdf is requested
 14 | # TODO Add ZARR as output option
 15 | 
 16 | import argparse
 17 | import sys
 18 | import os
 19 | import shutil
 20 | import logging
 21 | import time
 22 | import json
 23 | 
 24 | import earthaccess
 25 | from shapely.geometry import polygon, box
 26 | from shapely.geometry.polygon import orient
 27 | import geopandas as gpd
 28 | from datetime import datetime as dt
 29 | import dask.distributed
 30 | 
 31 | from HLS_Su import hls_search
 32 | from HLS_PER import process_granule, create_timeseries_dataset
 33 | 
 34 | 
 35 | def parse_arguments():
 36 |     """
 37 |     Function to parse command line input arguments.
 38 |     """
 39 |     parser = argparse.ArgumentParser(
 40 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 41 |         description="Performs Spatial/Temporal/Band Subsetting, Processing, and Customized Exporting for HLS V2.0 files",
 42 |     )
 43 | 
 44 |     # roi: Region of interest as shapefile, geojson, or comma separated LL Lon, LL Lat, UR Lon, UR Lat
 45 |     parser.add_argument(
 46 |         "-roi",
 47 |         type=str,
 48 |         required=True,
 49 |         help="(Required) Region of Interest (ROI) for spatial subset. \
 50 |                         Valid inputs are: (1) a geojson or shapefile (absolute path to file required if not in same directory as this script), or \
 51 |                         (2) bounding box coordinates: 'LowerLeft_lon,LowerLeft_lat,UpperRight_lon,UpperRight_lat'\
 52 |                         NOTE: Negative coordinates MUST be written in single quotation marks '-120,43,-118,48'\
 53 |                         NOTE 2: If providing an absolute path with spaces in directory names, please use double quotation marks "
 54 |         " ",
 55 |     )
 56 | 
 57 |     # dir: Directory to save the files to
 58 |     parser.add_argument(
 59 |         "-dir",
 60 |         required=False,
 61 |         help="Directory to export output HLS files to.",
 62 |         default=os.getcwd(),
 63 |     )
 64 | 
 65 |     # start: Start Date
 66 |     parser.add_argument(
 67 |         "-start",
 68 |         required=False,
 69 |         help="Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2020-10-20).",
 70 |         default="2014-04-03",
 71 |     )
 72 | 
 73 |     # end: End Date
 74 |     parser.add_argument(
 75 |         "-end",
 76 |         required=False,
 77 |         help="Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2022-10-24).",
 78 |         default=dt.today().strftime("%Y-%m-%d"),
 79 |     )
 80 | 
 81 |     # prod: product(s) desired to be downloaded
 82 |     parser.add_argument(
 83 |         "-prod",
 84 |         choices=["HLSS30", "HLSL30", "both"],
 85 |         required=False,
 86 |         help="Desired product(s) to be subset and processed.",
 87 |         default="both",
 88 |     )
 89 | 
 90 |     # layers: layers desired to be processed within the products selected
 91 |     parser.add_argument(
 92 |         "-bands",
 93 |         required=False,
 94 |         help="Desired layers to be processed. Valid inputs are ALL, COASTAL-AEROSOL, BLUE, GREEN, RED, RED-EDGE1, RED-EDGE2, RED-EDGE3, NIR1, SWIR1, SWIR2, CIRRUS, TIR1, TIR2, WATER-VAPOR, FMASK, VZA, VAA, SZA, SAA. To request multiple layers, provide them in comma separated format with no spaces. Unsure of the names for your bands?--check out the README which contains a table of all bands and band names.",
 95 |         default="ALL",
 96 |     )
 97 | 
 98 |     # cc: maximum cloud cover (%) allowed to be returned (by scene)
 99 |     parser.add_argument(
100 |         "-cc",
101 |         required=False,
102 |         help="Maximum (scene-level) cloud cover (percent) allowed for returned observations (e.g. 35). Valid range: 0 to 100 (integers only)",
103 |         default="100",
104 |     )
105 | 
106 |     # qf: quality filter flag: filter out poor quality data yes/no
107 |     parser.add_argument(
108 |         "-qf",
109 |         choices=["True", "False"],
110 |         required=False,
111 |         help="Flag to quality filter before exporting output files (see README for quality filtering performed).",
112 |         default="True",
113 |     )
114 | 
115 |     # sf: scale factor flag: Scale data or leave unscaled yes/no
116 |     parser.add_argument(
117 |         "-scale",
118 |         choices=["True", "False"],
119 |         required=False,
120 |         help="Flag to apply scale factor to layers before exporting output files. This is generally unecessary as most applications will scale automatically.",
121 |         default="False",
122 |     )
123 | 
124 |     # of: output file format
125 |     parser.add_argument(
126 |         "-of",
127 |         choices=["COG", "NC4", "ZARR"],
128 |         required=False,
129 |         help="Define the desired output file format",
130 |         default="COG",
131 |     )
132 | 
133 |     # chunksize: chunk size for processing with dask
134 |     parser.add_argument(
135 |         "-cs",
136 |         type=str,
137 |         help="Chunksize for processing scenes with dask in format 'band,x,y'. This is used to provide chunk_size argument to rioxarray.open_rasterio to improve processing speed.\
138 |             For example: '1,512,512' (native hls chunk size) provides better performance for ROIs that fall within a single scene, while '1,3600,3600' (full HLS scene) provides better performance for \
139 |             larger ROIs that span multiple scenes. The default is '1,512,512', but this can lead to a very large task list for large ROIs.",
140 |         default="1,512,512",
141 |     )
142 | 
143 |     # logfile: Optional logfile path
144 |     parser.add_argument(
145 |         "-logfile",
146 |         required=False,
147 |         help="Optional path to output logfile. If not provided, logging will only be to the console.",
148 |     )
149 | 
150 |     return parser.parse_args()
151 | 
152 | def ensure_ccw(geom):
153 |     """
154 |     Ensure the exterior ring of the polygon is counterclockwise.
155 |     """
156 |     if geom.exterior.is_ccw:
157 |         return geom  # Already counterclockwise
158 |     else:
159 |         return orient(geom, sign=1.0)  # Make it counterclockwise
160 |     
161 | def format_roi(roi):
162 |     """
163 |     Determines if submitted ROI is a file or bbox coordinates.
164 | 
165 |     If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds.
166 | 
167 |     If bbox coordinates, creates a geodataframe with a single Polygon geometry.
168 | 
169 |     Returns a geopandas dataframe for clipping and a list of vertices for searching.
170 |     """
171 |     if os.path.isfile(roi):  # and roi.endswith(("geojson", "shp")):
172 |         print(roi)
173 |         try:
174 |             # Open ROI if file
175 |             roi = gpd.read_file(roi)
176 |             if len(roi) > 1:
177 |                 # Merge all Polygon geometries and create external boundary
178 |                 logging.info(
179 |                     "Multiple polygons detected. Creating single geometry of external coordinates."
180 |                 )
181 |                 single_geometry = roi.unary_union.convex_hull
182 |                 roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
183 |                 logging.info(roi)
184 |             # Check if ROI is in Geographic CRS, if not, convert to it
185 |             if roi.crs.is_geographic:
186 |                 roi['geometry'] = roi['geometry'].apply(ensure_ccw)
187 |                 # List Vertices in correct order for search
188 |                 vertices_list = list(roi.geometry[0].exterior.coords)
189 | 
190 |             else:
191 |                 roi_geographic = roi.to_crs("EPSG:4326")
192 |                 logging.info(
193 |                     "Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
194 |                 )
195 |                 roi['geometry'] = roi['geometry'].apply(ensure_ccw)
196 |                 vertices_list = list(roi_geographic.geometry[0].exterior.coords)
197 |         except (FileNotFoundError, ValueError):
198 |             sys.exit(
199 |                 f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
200 |             )
201 |     else:
202 |         # If bbox coordinates are submitted
203 |         bbox = tuple(map(float, roi.strip("'\"").split(",")))
204 |         print(bbox)
205 | 
206 |         # Convert bbox to a geodataframe for clipping
207 |         roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
208 |         roi['geometry'] = roi['geometry'].apply(ensure_ccw) 
209 | 
210 |         vertices_list = list(roi.geometry[0].exterior.coords)
211 | 
212 |     return (roi, vertices_list)
213 | 
214 | 
215 | def format_dates(start, end):
216 |     # Strip Quotes
217 |     start = start.strip("'").strip('"')
218 |     end = end.strip("'").strip('"')
219 |     # Convert to datetime
220 |     try:
221 |         start = dt.strptime(start, "%Y-%m-%d")
222 |         end = dt.strptime(end, "%Y-%m-%d")
223 |     except ValueError:
224 |         sys.exit(
225 |             "A date format is not valid. The valid format is ISO 8601: YYYY-MM-DD (e.g. 2020-10-20)"
226 |         )
227 |     if start > end:
228 |         sys.exit(
229 |             f"The Start Date requested: {start} is after the End Date Requested: {end}."
230 |         )
231 |     else:
232 |         dates = (start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d"))
233 |     return dates
234 | 
235 | 
236 | def format_cloud_cover(cc):
237 |     try:
238 |         cc = int(cc.strip("'").strip('"'))
239 |     except ValueError:
240 |         sys.exit(
241 |             f"{cc} is not a valid input for filtering by cloud cover (e.g. 35). Valid range: 0 to 100 (integers only)"
242 |         )
243 | 
244 |     # Validate that cc is in the valid range (0-100)
245 |     if cc < 0 or cc > 100:
246 |         sys.exit(
247 |             f"{cc} is not a valid input option for filtering by cloud cover (e.g. 35). Valid range: 0 to 100 (integers only)"
248 |         )
249 |     return cc
250 | 
251 | 
252 | def str_to_bool(value):
253 |     """
254 |     Converts a string to a boolean.
255 |     Accepts 'True', 'true', '1' as True.
256 |     Accepts 'False', 'false', '0' as False.
257 |     """
258 |     if isinstance(value, str):
259 |         if value.lower() in ("true", "1"):
260 |             return True
261 |         elif value.lower() in ("false", "0"):
262 |             return False
263 |     raise ValueError(f"Cannot convert {value} to boolean.")
264 | 
265 | 
266 | def create_band_dict(prod, bands):
267 |     """
268 |     Creates a dictionary of bands and common band names for each collection requested.
269 |     """
270 |     shortname = {"HLSS30": "HLSS30.v2.0", "HLSL30": "HLSL30.v2.0"}
271 | 
272 |     # Create a dictionary with product name and shortname
273 |     if prod == "both":
274 |         prods = shortname
275 |     else:
276 |         prods = {prod: shortname[prod]}
277 | 
278 |     # Strip spacing, quotes, make all upper case and create a list
279 |     bands = bands.strip(" ").strip("'").strip('"').upper()
280 |     band_list = bands.split(",")
281 | 
282 |     # Create a LUT dict including the HLS product bands mapped to names
283 |     lut = {
284 |         "HLSS30": {
285 |             "COASTAL-AEROSOL": "B01",
286 |             "BLUE": "B02",
287 |             "GREEN": "B03",
288 |             "RED": "B04",
289 |             "RED-EDGE1": "B05",
290 |             "RED-EDGE2": "B06",
291 |             "RED-EDGE3": "B07",
292 |             "NIR-Broad": "B08",
293 |             "NIR1": "B8A",
294 |             "WATER-VAPOR": "B09",
295 |             "CIRRUS": "B10",
296 |             "SWIR1": "B11",
297 |             "SWIR2": "B12",
298 |             "FMASK": "Fmask",
299 |             "VZA": "VZA",
300 |             "VAA": "VAA",
301 |             "SZA": "SZA",
302 |             "SAA": "SAA",
303 |         },
304 |         "HLSL30": {
305 |             "COASTAL-AEROSOL": "B01",
306 |             "BLUE": "B02",
307 |             "GREEN": "B03",
308 |             "RED": "B04",
309 |             "NIR1": "B05",
310 |             "SWIR1": "B06",
311 |             "SWIR2": "B07",
312 |             "CIRRUS": "B09",
313 |             "TIR1": "B10",
314 |             "TIR2": "B11",
315 |             "FMASK": "Fmask",
316 |             "VZA": "VZA",
317 |             "VAA": "VAA",
318 |             "SZA": "SZA",
319 |             "SAA": "SAA",
320 |         },
321 |     }
322 | 
323 |     # List of all available/acceptable band names
324 |     all_bands = [
325 |         "ALL",
326 |         "COASTAL-AEROSOL",
327 |         "BLUE",
328 |         "GREEN",
329 |         "RED",
330 |         "RED-EDGE1",
331 |         "RED-EDGE2",
332 |         "RED-EDGE3",
333 |         "NIR1",
334 |         "SWIR1",
335 |         "SWIR2",
336 |         "CIRRUS",
337 |         "TIR1",
338 |         "TIR2",
339 |         "WATER-VAPOR",
340 |         "FMASK",
341 |         "VZA",
342 |         "VAA",
343 |         "SZA",
344 |         "SAA",
345 |     ]
346 | 
347 |     # Validate that bands are named correctly
348 |     for b in band_list:
349 |         if b not in all_bands:
350 |             sys.exit(
351 |                 f"Band: {b} is not a valid input option. Valid inputs are {all_bands}. To request multiple layers, provide them in comma separated format with no spaces. Unsure of the names for your bands?--check out the README which contains a table of all bands and band names."
352 |             )
353 | 
354 |     # Set up a dictionary of band names and numbers by product
355 |     band_dict = {}
356 |     for p in prods:
357 |         band_dict[p] = {}
358 |         for b in band_list:
359 |             if b == "ALL":
360 |                 band_dict[p] = lut[p]
361 |             else:
362 |                 try:
363 |                     band_dict[p][b] = lut[p][b]
364 |                 except ValueError:
365 |                     print(f"Product {p} does not contain band {b}")
366 |     return band_dict
367 | 
368 | 
369 | def format_chunksize(chunksize):
370 |     """
371 |     Converts comma-separated chunksize string to dictionary.
372 |     """
373 |     keys = ["band", "x", "y"]
374 |     values = list(map(int, chunksize.strip("'\"").split(",")))
375 | 
376 |     if len(values) != len(keys):
377 |         raise ValueError(
378 |             "Chunksize must provide band, x and y (3) values separated by commas."
379 |         )
380 | 
381 |     return dict(zip(keys, values))
382 | 
383 | 
384 | def confirm_action(prompt):
385 |     """
386 |     Prompts the user to confirm an action.
387 |     """
388 |     while True:
389 |         response = input(prompt).lower()
390 |         if response in ["y", "yes"]:
391 |             return True
392 |         elif response in ["n", "no"]:
393 |             return False
394 |         else:
395 |             print("Invalid input. Please enter 'y' or 'n'.")
396 | 
397 | 
398 | def setup_dask_environment():
399 |     """
400 |     Passes RIO environment variables to dask workers for authentication.
401 |     """
402 |     import os
403 |     import rasterio
404 | 
405 |     global env
406 |     env = rasterio.Env(
407 |         GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR",
408 |         GDAL_HTTP_COOKIEFILE=os.path.expanduser("~/cookies.txt"),
409 |         GDAL_HTTP_COOKIEJAR=os.path.expanduser("~/cookies.txt"),
410 |         GDAL_HTTP_MAX_RETRY="10",
411 |         GDAL_HTTP_RETRY_DELAY="0.5",
412 |     )
413 |     env.__enter__()
414 | 
415 | 
416 | def main():
417 |     """
418 |     Main function to run the HLS SuPER script.
419 |     """
420 | 
421 |     # Parse arguments
422 |     args = parse_arguments()
423 | 
424 |     # Configure logging
425 |     log_handlers = [logging.StreamHandler(sys.stdout)]
426 |     if args.logfile:
427 |         log_handlers.append(logging.FileHandler(args.logfile))
428 | 
429 |     logging.basicConfig(
430 |         level=logging.INFO,
431 |         format="%(levelname)s:%(asctime)s ||| %(message)s",
432 |         handlers=log_handlers,
433 |     )
434 | 
435 |     # Handle Login Credentials with earthaccess
436 |     earthaccess.login(persist=True)
437 | 
438 |     # Start Log
439 |     logging.info("HLS SuPER script started")
440 | 
441 |     # Format ROI
442 |     roi, vl = format_roi(args.roi)
443 |     logging.info("Region of Interest formatted successfully")
444 | 
445 |     # Set Output Directory
446 |     if args.dir is not None:
447 |         output_dir = os.path.normpath(args.dir.strip("'").strip('"')) + os.sep
448 |     else:
449 |         # Defaults to the current directory
450 |         output_dir = os.getcwd() + os.sep
451 | 
452 |     logging.info(f"Output directory set to: {output_dir}")
453 | 
454 |     # Format/Validate Dates
455 |     dates = format_dates(args.start, args.end)
456 |     logging.info(f"Date Parameters: {dates}")
457 | 
458 |     # Create Product/Band Dictionary
459 |     band_dict = create_band_dict(args.prod, args.bands)
460 |     logging.info(f"Products/Bands Selected: {band_dict}")
461 | 
462 |     # Format Cloud Cover
463 |     cc = format_cloud_cover(args.cc)
464 |     logging.info(f"Cloud Cover Filter <= {cc}")
465 | 
466 |     # Quality Filtering
467 |     qf = str_to_bool(args.qf)
468 |     logging.info(f"Quality Filtering: {qf}")
469 | 
470 |     # Scale Factor
471 |     scale = str_to_bool(args.scale)
472 |     logging.info(f"Apply Scale Factor: {scale}")
473 | 
474 |     # Chunk Size
475 |     chunk_size = format_chunksize(args.cs)
476 |     logging.info(f"Chunk Size: {chunk_size}")
477 | 
478 |     # Output File Type
479 |     if args.of not in ["COG", "NC4"]:
480 |         sys.exit(
481 |             f"Output format {args.of} is not a valid output format. Please choose from 'COG', 'NC4'."
482 |         )
483 | 
484 |     logging.info(f"Output format: {args.of}")
485 | 
486 |     # Search for Data and Save Results
487 |     results_urls_file = os.path.join(output_dir, "hls_super_results_urls.json")
488 |     use_existing_file = False
489 | 
490 |     if os.path.isfile(results_urls_file):
491 |         logging.info(f"Results url list already exists in {output_dir}.")
492 |         # Confirm if user wants to use existing file.
493 |         if confirm_action(
494 |             f"Do you want to use the existing results file ({results_urls_file})? (y/n)"
495 |         ):
496 |             use_existing_file = True
497 | 
498 |         else:
499 |             if not confirm_action(
500 |                 "Do you want to overwrite the existing results file? (y/n)"
501 |             ):
502 |                 sys.exit(
503 |                     f"Processing aborted. Please move, rename, or remove existing file: {results_urls_file}."
504 |                 )
505 | 
506 |     if use_existing_file:
507 |         logging.info("Using existing results file.")
508 |         with open(results_urls_file, "r") as file:
509 |             results_urls = json.load(file)
510 | 
511 |     else:
512 |         logging.info("Searching for data...")
513 |         results_urls = hls_search(
514 |             roi=vl, band_dict=band_dict, dates=dates, cloud_cover=cc
515 |         )
516 |         logging.info(f"Writing search results to {results_urls_file}")
517 |         with open(results_urls_file, "w") as file:
518 |             json.dump(results_urls, file)
519 | 
520 |     total_assets = sum(len(sublist) for sublist in results_urls)
521 | 
522 |     if cc:
523 |         logging.info(
524 |             f"{len(results_urls)} granules remain after cloud filtering. {total_assets} assets will be processed."
525 |         )
526 |     else:
527 |         logging.info(f"{total_assets} assets will be processed.")
528 | 
529 |     # Confirm Processing
530 |     if not confirm_action("Do you want to proceed with processing? (y/n)"):
531 |         sys.exit("Processing aborted.")
532 | 
533 |     # Initialize Dask Cluster
534 |     client = dask.distributed.Client()
535 | 
536 |     # Setup Dask Environment (GDAL Configs)
537 |     client.run(setup_dask_environment)
538 | 
539 |     logging.info(
540 |         f"Dask environment setup successfully. View dashboard: {client.dashboard_link}."
541 |     )
542 | 
543 |     # Scatter Results Results url
544 |     client.scatter(results_urls)
545 | 
546 |     # If NC4, create a temporary directory to store COGs
547 |     if args.of == "NC4":
548 |         cog_dir = os.path.join(output_dir, "temp")
549 |         if not os.path.exists(cog_dir):
550 |             os.makedirs(cog_dir, exist_ok=True)
551 |         else:
552 |             if not confirm_action(
553 |                 "Temporary directory to store COGs already exists. Use these files to create NC4 outputs? (y/n)"
554 |             ):
555 |                 sys.exit(
556 |                     f"Processing aborted. Please remove existing directory: {cog_dir}."
557 |                 )
558 | 
559 |     else:
560 |         cog_dir = output_dir
561 | 
562 |     # Process Granules
563 |     start_time = time.time()
564 |     logging.info("Processing...")
565 |     tasks = [
566 |         dask.delayed(process_granule)(
567 |             granule_url,
568 |             roi=roi,
569 |             quality_filter=qf,
570 |             scale=scale,
571 |             output_dir=cog_dir,
572 |             band_dict=band_dict,
573 |             bit_nums=[0, 1, 2, 3, 4, 5],
574 |             chunk_size=chunk_size,
575 |         )
576 |         for granule_url in results_urls
577 |     ]
578 |     dask.compute(*tasks)
579 | 
580 |     # Create Timeseries Dataset if NC4
581 |     if args.of == "NC4":
582 |         logging.info("Creating timeseries dataset...")
583 |         create_timeseries_dataset(cog_dir, output_type=args.of, output_dir=output_dir)
584 | 
585 |     # Close Dask Client
586 |     client.close()
587 | 
588 |     # Remove Temporary COGs if NC4
589 |     if args.of == "NC4":
590 |         logging.info("Timeseries Dataset Created. Removing Temporary Files...")
591 |         shutil.rmtree(cog_dir)
592 | 
593 |     # End Timer
594 |     total_time = time.time() - start_time
595 |     logging.info(
596 |         f"Processing complete. Total time: {round(total_time,2)}s, "
597 |     )
598 | 
599 | 
600 | if __name__ == "__main__":
601 |     main()
602 | 


--------------------------------------------------------------------------------
/python/tutorials/EVI_timeseries_with_odc_stac.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Building an EVI Timeseries with ODC STAC\n",
  8 |     "\n",
  9 |     "This tutorial provides an alternate way to work with Harmonized Landsat Sentinel-2 ([HLS](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/)) data using [CMR-STAC](https://cmr.earthdata.nasa.gov/stac/docs/index.html) coupled and [OpenDataCube](https://www.opendatacube.org/), which uses the [dask](https://www.dask.org/) and [xarray](https://xarray.dev/) libraries to build a timeseries of data. `OpenDataCube` leverages the STAC metadata to provide values to `xarray` so that data can be loaded lazily with all of the dimensions, coordinate reference system and other relevant attributes. This information along with the cloud-optimized geotiff format of HLS Data means that only the necessary portions of the files required for the desired operation need to be read. On top of this, `dask` is used for parallelization. This workflow is a much faster alternative to the methods used in the [HLS Tutorial](https://github.com/nasa/HLS-Data-Resources/blob/main/python/tutorials/HLS_Tutorial.ipynb), but uses higher level Python libraries, making the code less adaptable to other use-cases. Using these additional Python libraries requires additional dependencies from other resources in this repository. Please use the [Python Environment Setup](#python-environment-setup) section below to set up a compatible Python environment.\n",
 10 |     "\n",
 11 |     "## Python Environment Setup\n",
 12 |     "\n",
 13 |     "A compatible python environment can be created by following the [Python Environment setup instructions](https://github.com/nasa/VITALS/blob/main/setup/setup_instructions.md), activating that environment and adding the `pystac-client` and `odc-stac` packages:\n",
 14 |     "\n",
 15 |     "```\n",
 16 |     "mamba activate lpdaac_vitals\n",
 17 |     "```\n",
 18 |     "\n",
 19 |     "```\n",
 20 |     "mamba install -c conda-forge pystac-client odc-stac dask\n",
 21 |     "```\n",
 22 |     "\n",
 23 |     "or if you want to set up a separate environment:\n",
 24 |     "\n",
 25 |     "```\n",
 26 |     "mamba create -n hls_odc -c conda-forge --yes python=3.12 fiona gdal hvplot geoviews rioxarray rasterio jupyter geopandas earthaccess jupyter_bokeh scikit-image jupyterlab dask odc-stac pystac-client\n",
 27 |     "```\n",
 28 |     "\n",
 29 |     "## Tutorial Use Case\n",
 30 |     "\n",
 31 |     "This guide examines changes in enhanced vegetation index ([EVI](https://earthobservatory.nasa.gov/features/MeasuringVegetation/measuring_vegetation_4.php)) over an agricultural region in northern California, the same as the [HLS Tutorial: Getting Started with Cloud-Native HLS Data in Python](https://github.com/nasa/HLS-Data-Resources/blob/main/python/tutorials/HLS_Tutorial.ipynb). The goal of the project is to observe HLS-derived mean EVI over these regions without downloading the entirety of the HLS source data in a cloud-friendly, efficient way. In this notebook we will extract an EVI timeseries from Harmonized Landsat Sentinel-2 ([HLS](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/)) data. First we will search and find data, then we will lazily load only the necessary subsets of these results directly from the cloud and calculate EVI. \n",
 32 |     "\n",
 33 |     "### Background\n",
 34 |     "\n",
 35 |     "The Harmonized Landsat Sentinel-2 ([HLS](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/)) project produces seamless, harmonized surface reflectance data from the Operational Land Imager (OLI) and Multi-Spectral Instrument (MSI) aboard Landsat and Sentinel-2 Earth-observing satellites, respectively. The aim is to produce seamless products with normalized parameters, which include atmospheric correction, cloud and cloud-shadow masking, geographic co-registration and common gridding, normalized bidirectional reflectance distribution function, and spectral band adjustment. This will provide global observation of the Earth’s surface every 2-3 days with 30 meter spatial resolution. One of the major applications that will benefit from HLS is agriculture assessment and monitoring, which is used as the use case for this tutorial.\n",
 36 |     "\n",
 37 |     "## Requirements\n",
 38 |     "\n",
 39 |     "- A [NASA Earthdata Login](https://urs.earthdata.nasa.gov/) account is required to download the data used in this tutorial. You can create an account at the link provided.\n",
 40 |     "\n",
 41 |     "## Learning Objectives\n",
 42 |     "\n",
 43 |     "- How to use CMR-STAC to search for HLS data\n",
 44 |     "- How to open and access data using dask and Open Data Cube.\n",
 45 |     "\n",
 46 |     "## Data Used\n",
 47 |     "\n",
 48 |     "- Daily 30 meter (m) global HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance - [HLSS30.002](https://doi.org/10.5067/HLS/HLSS30.002)  \n",
 49 |     "    - The HLSS30 product provides 30 m Nadir normalized Bidirectional Reflectance Distribution Function (BRDF)-Adjusted Reflectance (NBAR) and is derived from Sentinel-2A and Sentinel-2B MSI data products.  \n",
 50 |     "    - Science Dataset (SDS) layers:  \n",
 51 |     "        - B8A (NIR Narrow)  \n",
 52 |     "        - B04 (Red)  \n",
 53 |     "        - B02 (Blue)   \n",
 54 |     "- Daily 30 meter (m) global HLS Landsat-8 OLI Surface Reflectance - [HLSL30.002](https://doi.org/10.5067/HLS/HLSL30.002)  \n",
 55 |     "    - The HLSL30 product provides 30 m Nadir normalized Bidirectional Reflectance Distribution Function (BRDF)-Adjusted Reflectance (NBAR) and is derived from Landsat-8 OLI data products.  \n",
 56 |     "     - Science Dataset (SDS) layers:  \n",
 57 |     "        - B05 (NIR)  \n",
 58 |     "        - B04 (Red)  \n",
 59 |     "        - B02 (Blue)   \n",
 60 |     "\n",
 61 |     "## Tutorial Outline\n",
 62 |     "\n",
 63 |     "1. [**Getting Started**](#getstarted)  \n",
 64 |     "    1.1 Import Packages  \n",
 65 |     "    1.2 EarthData Login\n",
 66 |     "    1.3 Set up Dask Client\n",
 67 |     "2. [**Finding HLS Data**](#find)  \n",
 68 |     "3. [**Opening Data with ODC-STAC**](#odcstac)  \n",
 69 |     "    3.1 Apply Scale Factor\n",
 70 |     "4. [**Calculating EVI**](#calcevi)   \n",
 71 |     "5. [**Quality Filtering**](#qualityfilter)  \n",
 72 |     "6. [**Calculating Statistics**](#stats)  "
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "## 1. Getting Started<a id=\"getstarted\"></a>\n",
 80 |     "\n",
 81 |     "### 1.1 Import Packages\n",
 82 |     "\n",
 83 |     "Import the required packages."
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# Load all the libraries\n",
 93 |     "import os\n",
 94 |     "import numpy as np\n",
 95 |     "import dask.distributed\n",
 96 |     "import pystac_client\n",
 97 |     "import geopandas as gpd\n",
 98 |     "import odc.stac\n",
 99 |     "import xarray as xr\n",
100 |     "import rasterio as rio\n",
101 |     "import rioxarray as rxr\n",
102 |     "import earthaccess\n",
103 |     "import hvplot.xarray"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "### 1.2 Earthdata Login Authentication\n",
111 |     "\n",
112 |     "We will use the `earthaccess` package for authentication. `earthaccess` can either create a a new local `.netrc` file to store credentials or validate that one exists already in you user profile. If you do not have a `.netrc` file, you will be prompted for your credentials and one will be created."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "# Log into earthaccess - ensures creation of .netrc file\n",
122 |     "earthaccess.login(persist=True)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "### 1.3 Set up Dask Client"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "Here we set up a local `dask` cluster, which will define tasks based on our lazy loaded data and functions, then split those tasks accross our locally available threads or workers to improve process efficiency. You can view the dashboard by clicking the link and see various dashboards and monitor them as you run future cells."
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "# Initialize Dask Client\n",
146 |     "client = dask.distributed.Client()\n",
147 |     "display(client)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "### 1.4 Configure GDAL Options and rio environment"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "# Configure odc.stac rio env - requires a .netrc file, sends info to dask client\n",
164 |     "odc.stac.configure_rio(cloud_defaults=True,\n",
165 |     "                       verbose=True,\n",
166 |     "                       client=client,\n",
167 |     "                       GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR',\n",
168 |     "                       GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),\n",
169 |     "                       GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "## 2. CMR-STAC Search<a id=\"find\"></a>\n",
177 |     "\n",
178 |     "To find HLS data, we will use the `pystac_client` python library to search [NASA's Common Metadata Repository SpatioTemporal Asset Catalog (CMR-STAC)](https://cmr.earthdata.nasa.gov/stac/docs/index.html#tag/STAC) for HLS data. We will use a geojson file containing our region of interest (ROI) to search for files that intersect. For this use case, our ROI is an agricultural field in Northern California."
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 5,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "# Open ROI polygon\n",
188 |     "roi = gpd.read_file('../../data/Field_Boundary.geojson')"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "Add the collection, datetime range, results limit, and simplify our ROI to a bounding box and store these as search parameters. After defining these, conduct a stac search using the `LPCLOUD` STAC endpoint and return our query as a list of items."
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 6,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "catalog = pystac_client.Client.open(\"https://cmr.earthdata.nasa.gov/stac/LPCLOUD\")\n",
205 |     "# Define search parameters\n",
206 |     "search_params = {\n",
207 |     "    \"collections\": [\"HLSS30_2.0\",\"HLSL30_2.0\"],\n",
208 |     "    \"bbox\": tuple(list(roi.total_bounds)),\n",
209 |     "    \"datetime\": \"2021-05-01/2021-09-30\",\n",
210 |     "    \"limit\": 100,\n",
211 |     "}\n",
212 |     "# Perform the search\n",
213 |     "query = catalog.search(**search_params)\n",
214 |     "items = query.items()"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": [
223 |     "items = list(query.items())\n",
224 |     "print(f\"Found: {len(items):d} granules\")"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "We can preview what these results look like."
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "items[0]"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "items[3]"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "To stack the data from both Landsat and Sentinel instruments, we need common band names for HLSL30 B5 (NIR) and HLSS30 B8A (NIR). We can simply rename them NIR in the stac results."
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 10,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "# Rename HLSS B8A and HLSL B05 to common band name\n",
266 |     "for item in items:\n",
267 |     "    if \"HLS.L30\" in item.id:\n",
268 |     "        item.assets[\"NIR\"] = item.assets.pop(\"B05\")\n",
269 |     "    if \"HLS.S30\" in item.id:\n",
270 |     "        item.assets[\"NIR\"] = item.assets.pop(\"B8A\")\n"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "# Confirm this changed the stac results\n",
280 |     "items[0]"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {},
286 |    "source": [
287 |     "## 3. Opening Data with ODC-STAC<a id=\"odcstac\"></a>\n",
288 |     "\n",
289 |     "Use the `odc.stac.stac_load` function to load the data from the STAC results lazily into a dataset. To do this, we must provide a crs, a list of STAC results, a tuple of the bands we want to load, an expected resolution, and a dask chunk size."
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": [
298 |     "# Set CRS and resolution, open lazily with odc.stac\n",
299 |     "crs = \"utm\"\n",
300 |     "ds = odc.stac.stac_load(\n",
301 |     "    items,\n",
302 |     "    bands=(\"B02\", \"B04\",\"NIR\", \"Fmask\"),\n",
303 |     "    crs=crs,\n",
304 |     "    resolution=30,\n",
305 |     "    chunks={\"band\":1,\"x\":512,\"y\":512},  # If empty, chunks along band dim, \n",
306 |     "    #groupby=\"solar_day\", # This limits to first obs per day\n",
307 |     ")\n",
308 |     "display(ds)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "We can preview the size and shapes of the data we have loaded using the `geobox` function."
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": null,
321 |    "metadata": {},
322 |    "outputs": [],
323 |    "source": [
324 |     "# Show Geobox\n",
325 |     "ds.odc.geobox"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {},
331 |    "source": [
332 |     "Next, we will clip the data to our ROI using the `rio.clip` function. This will continue to work lazily on the data, so we have yet to actually load any data."
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": 14,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": [
341 |     "# Clip\n",
342 |     "ds = ds.rio.clip(roi.geometry.values, roi.crs, all_touched=True)"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": null,
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "# Show Clipped Geobox\n",
352 |     "ds.odc.geobox"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "markdown",
357 |    "metadata": {},
358 |    "source": [
359 |     "The `scale_factor` information in some of the HLSL30 granules are found in the file metadata, but missing from the Band metadata, meaning this isn't applied automatically. Manually scale each of the data arrays by the scale factor."
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": 16,
365 |    "metadata": {},
366 |    "outputs": [],
367 |    "source": [
368 |     "# Scale the data\n",
369 |     "ds.NIR.data = 0.0001 * ds.NIR.data\n",
370 |     "ds.B04.data = 0.0001 * ds.B04.data\n",
371 |     "ds.B02.data = 0.0001 * ds.B02.data"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "Now that we have clipped and scaled our data, we can load the minimal subset of data we need to calculate our EVI over our time period of interest."
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "metadata": {},
385 |    "outputs": [],
386 |    "source": [
387 |     "ds.load()"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "Plot the NIR data to ensure our scaling and clipping worked as expected. We can use the slide bar to scroll through the time dimension showing the "
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "# Plot to ensure scaling worked\n",
404 |     "ds.NIR.hvplot.image(x=\"x\", y=\"y\", groupby=\"time\", cmap=\"viridis\", width=600, height=500, crs='EPSG:32610', tiles='ESRI', rasterize=True)"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "markdown",
409 |    "metadata": {},
410 |    "source": [
411 |     "## 4. Calculate EVI<a id=\"calcevi\"></a>"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "markdown",
416 |    "metadata": {},
417 |    "source": [
418 |     "Now we can build an Enhanced Vegetation Index (EVI) dataset using the EVI formula:\n",
419 |     "\n",
420 |     "$$\n",
421 |     "\\text{EVI} = G \\cdot \\frac{\\text{NIR} - \\text{Red}}{\\text{NIR} + C_1 \\cdot \\text{Red} - C_2 \\cdot \\text{Blue} + L}\n",
422 |     "$$\n",
423 |     "\n",
424 |     "**NIR**: Near-Infrared reflectance.  \n",
425 |     "**Red**: Reflectance in the red band.  \n",
426 |     "**Blue**: Reflectance in the blue band, used primarily to correct for aerosol influences.  \n",
427 |     "**G**: Gain factor (often set to 2.5) that scales the index.  \n",
428 |     "**C₁**: Coefficient for the aerosol resistance term using the red band (commonly 6).  \n",
429 |     "**C₂**: Coefficient for the aerosol resistance term using the blue band (commonly 7.5).  \n",
430 |     "**L**: Canopy background adjustment, which helps to minimize soil brightness influences (commonly 1).  \n",
431 |     "\n",
432 |     "We will use the typical values adopted for the MODIS algorithm for G, C1, C2 and L. The output values will range from -1 to 1 and indicate strength of vegetation signal, which correlates with biomass."
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": 19,
438 |    "metadata": {},
439 |    "outputs": [],
440 |    "source": [
441 |     "# Calculate EVI\n",
442 |     "evi_ds = 2.5 * ((ds.NIR - ds.B04) / (ds.NIR + 6.0 * ds.B04 - 7.5 * ds.B02 + 1.0))"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": 20,
448 |    "metadata": {},
449 |    "outputs": [],
450 |    "source": [
451 |     "evi_ds = evi_ds.compute()"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "markdown",
456 |    "metadata": {},
457 |    "source": [
458 |     "Now lets visualize our EVI over our ROI. We can scroll through the time-series using the slider bar to the right of the figure."
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": null,
464 |    "metadata": {},
465 |    "outputs": [],
466 |    "source": [
467 |     "evi_ds.hvplot.image(x=\"x\", y=\"y\", groupby=\"time\", cmap=\"YlGn\", clim=(0, 1), crs='EPSG:32610', tiles='ESRI', rasterize=True)"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "markdown",
472 |    "metadata": {},
473 |    "source": [
474 |     "## 5. Quality Masking<a id=\"qualityfilter\"></a>"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {},
480 |    "source": [
481 |     "Lastly, we will apply a function to our EVI dataset, which will mask out pixels based on our bit selection. \n",
482 |     "\n",
483 |     "For HLS v2.0 products, all quality information is included in the Fmask layer. This layer includes values corresponding to combinations of bits that represent\n",
484 |     "different quality descriptions at each location within the scene.  Bits are ordered `76543210` and correspond to the following quality descriptions:  \n",
485 |     "\n",
486 |     "|Bit Number|Mask Name|Bit Value|Description|\n",
487 |     "|---|---|---|---|\n",
488 |     "|7-6|Aerosol<br>Level|11<br>10<br>01<br>00|High<br>Medium<br>Low<br>Clear<br>|\n",
489 |     "|5|Water|1<br>0|Yes<br>No|\n",
490 |     "|4|Snow/Ice|1<br>0|Yes<br>No|\n",
491 |     "|3|Cloud<br>Shadow|1<br>0|Yes<br>No|\n",
492 |     "|2|Cloud/Shadow<br>Adjacent|1<br>0|Yes<br>No|\n",
493 |     "|1|Cloud|1<br>0|Yes<br>No|\n",
494 |     "|0|Cirrus|Reserved|NA|\n",
495 |     "\n",
496 |     "For example, an 8bit integer 224 converted to binary is 11100000. This would indicate high aerosol (bits 7-6), and water (bit 5) are present in a pixel with that value."
497 |    ]
498 |   },
499 |   {
500 |    "cell_type": "code",
501 |    "execution_count": null,
502 |    "metadata": {},
503 |    "outputs": [],
504 |    "source": [
505 |     "def create_quality_mask(quality_data, bit_nums: list = [1, 2, 3, 4, 5]):\n",
506 |     "    \"\"\"\n",
507 |     "    Uses the Fmask layer and bit numbers to create a binary mask of good pixels.\n",
508 |     "    By default, bits 1-5 are used.\n",
509 |     "    \"\"\"\n",
510 |     "    mask_array = np.zeros((quality_data.shape[0], quality_data.shape[1]))\n",
511 |     "    # Remove/Mask Fill Values and Convert to Integer\n",
512 |     "    quality_data = np.nan_to_num(quality_data.copy(), nan=255).astype(np.int8)\n",
513 |     "    for bit in bit_nums:\n",
514 |     "        # Create a Single Binary Mask Layer\n",
515 |     "        mask_temp = np.array(quality_data) & 1 << bit > 0\n",
516 |     "        mask_array = np.logical_or(mask_array, mask_temp)\n",
517 |     "    return mask_array"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "markdown",
522 |    "metadata": {},
523 |    "source": [
524 |     "Use xarray's `apply_ufunc` to apply a function in a vectorized way to our EVI timeseries dataset. `bit_nums` can be provided as a dict to specify which bits to mask out."
525 |    ]
526 |   },
527 |   {
528 |    "cell_type": "code",
529 |    "execution_count": 23,
530 |    "metadata": {},
531 |    "outputs": [],
532 |    "source": [
533 |     "quality_mask = xr.apply_ufunc(\n",
534 |     "    create_quality_mask,\n",
535 |     "    ds.Fmask,\n",
536 |     "    kwargs={\"bit_nums\": [1,2,3,4,5]},\n",
537 |     "    input_core_dims=[[\"x\", \"y\"]],\n",
538 |     "    output_core_dims=[[\"x\", \"y\"]],\n",
539 |     "    vectorize=True,\n",
540 |     "    dask='parallelized',\n",
541 |     "    output_dtypes=[np.bool],\n",
542 |     ")"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "markdown",
547 |    "metadata": {},
548 |    "source": [
549 |     "Now use the `where` function to mask out regions corresponding with the bit numbers and visualize the EVI timeseries again."
550 |    ]
551 |   },
552 |   {
553 |    "cell_type": "code",
554 |    "execution_count": null,
555 |    "metadata": {},
556 |    "outputs": [],
557 |    "source": [
558 |     "evi_ds.where(~quality_mask).hvplot.image(x=\"x\", y=\"y\", groupby=\"time\", cmap=\"YlGn\", clim=(0, 1), crs='EPSG:32610', tiles='ESRI', rasterize=True)"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "markdown",
563 |    "metadata": {},
564 |    "source": [
565 |     "## Contact Info  \n",
566 |     "\n",
567 |     "Email: LPDAAC@usgs.gov  \n",
568 |     "Voice: +1-866-573-3222  \n",
569 |     "Organization: Land Processes Distributed Active Archive Center (LP DAAC)¹  \n",
570 |     "Website: <https://lpdaac.usgs.gov/>  \n",
571 |     "\n",
572 |     "¹Work performed under USGS contract G15PD00467 for NASA contract NNG14HH33I. "
573 |    ]
574 |   }
575 |  ],
576 |  "metadata": {
577 |   "kernelspec": {
578 |    "display_name": "lpdaac_vitals",
579 |    "language": "python",
580 |    "name": "python3"
581 |   },
582 |   "language_info": {
583 |    "codemirror_mode": {
584 |     "name": "ipython",
585 |     "version": 3
586 |    },
587 |    "file_extension": ".py",
588 |    "mimetype": "text/x-python",
589 |    "name": "python",
590 |    "nbconvert_exporter": "python",
591 |    "pygments_lexer": "ipython3",
592 |    "version": "3.12.9"
593 |   }
594 |  },
595 |  "nbformat": 4,
596 |  "nbformat_minor": 2
597 | }
598 | 


--------------------------------------------------------------------------------
/r/HLS_Tutorial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Getting Started with Cloud-Native Harmonized Landsat Sentinel-2 (HLS) Data in R"
  3 | output:
  4 |   html_document:
  5 |     df_print: paged
  6 |     fig_caption: yes
  7 |     theme: paper
  8 |     toc: yes
  9 |     toc_depth: 2
 10 |     toc_float: yes
 11 |   pdf_document:
 12 |     toc: yes
 13 |     toc_depth: '2'
 14 |   word_document:
 15 |     toc: yes
 16 |     toc_depth: '2'
 17 | theme: lumen
 18 | ---
 19 | 
 20 | ```{r setup, include=FALSE}
 21 | knitr::opts_chunk$set(echo = TRUE)
 22 | knitr::opts_chunk$set(comment = NA)
 23 | knitr::opts_knit$set(root.dir = dirname(rprojroot::find_rstudio_root_file()))
 24 | ```
 25 | 
 26 | ------------------------------------------------------------------------
 27 | 
 28 | **This tutorial demonstrates how to work with the HLS Landsat (HLSL30.002) and Sentinel-2 (HLSS30.002) data products in R.** 
 29 | 
 30 | The Harmonized Landsat Sentinel-2 [(HLS)](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview)
 31 | project is a NASA initiative aiming to produce a consistent, harmonized
 32 | surface reflectance product from the Operational Land Imager (OLI) aboard 
 33 | the joint NASA/USGS Landsat 8 and Landsat 9 satellites and the Multi-Spectral 
 34 | Instrument (MSI) aboard Europe’s Copernicus Sentinel-2A and Sentinel-2B 
 35 | satellites. Using sets of algorithms, all the necessary radiometric, spectral,
 36 | geometric, and spatial corrections have been applied to make HLS into seamless
 37 | timeseries that are stackable and comparable.  
 38 | 
 39 | The dense timeseries of HLS data presents unprecedented opportunities to 
 40 | monitor and map land surface dynamics with exceptional spatial and temporal
 41 | detail. The enhanced resolution will provide value in numerous fields, 
 42 | including:  
 43 | 
 44 | - Land cover change detection
 45 | - Agricultural management and monitoring
 46 | - Disaster response and recovery
 47 | - Water resource management
 48 | - Vegetation phenology studies  
 49 | 
 50 | By leveraging the higher temporal resolution of this dataset, researchers and 
 51 | practitioners can gain a deeper understanding of complex environmental 
 52 | processes and make more informed decisions. 
 53 | 
 54 | NASA's Land Processes Distributed Active Archive Center (LP DAAC) archives 
 55 | and distributes HLS products in NASA's Earthdata cloud where each file is
 56 | stored as Cloud Optimized GeoTIFFs (COG). The primary objective of this 
 57 | tutorial is to show how to query and subset HLS data using the NASA CMR-STAC
 58 | application programming interface (API). Using cloud hosted publicly available 
 59 | data from NASA's Earthdata Cloud, you are not restricted by the need to 
 60 | download HLS data files for your research needs anymore. The CMR-STAC API 
 61 | allows you to subset your desired data collection by region, time, and band 
 62 | while Earthdata Cloud allows you to download or stream the desired data to your 
 63 | workspace.  
 64 | 
 65 | ---
 66 | 
 67 | ## Use Case Example
 68 | 
 69 | In this tutorial, we demonstrate how to process and analyze NDVI time series
 70 | data derived from HLS data over a region of interest (ROI). Using a case study,
 71 | we cover the following steps:
 72 | 
 73 | - **Calculate NDVI**: Learn how to compute the Normalized Difference Vegetation
 74 | Index (NDVI).
 75 | - **Quality Masking**: Apply quality masks to filter out low-quality data.
 76 | - **Visualization**: Visualize the NDVI data for better understanding and
 77 | interpretation.
 78 | - **Statistical Analysis**: Calculate and export statistics specific to your
 79 | ROI without downloading the source data files.  
 80 | 
 81 | We use multiple agricultural fields in California’s Central Valley as an 
 82 | example to illustrate how to interact with HLS data effectively.
 83 | 
 84 | ### Products Used:	
 85 | 	
 86 | **1. Daily 30 meter (m) global HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance - [HLSS30.002](https://doi.org/10.5067/HLS/HLSS30.002)**    
 87 |    **Science Dataset (SDS) layers:**    
 88 |     - B8A (NIR Narrow)    
 89 |     - B04 (Red)    
 90 |     - Fmask (Quality)  
 91 | 
 92 | **2. Daily 30 meter (m) global HLS Landsat-8 and 9 OLI Surface Reflectance - [HLSL30.002](https://doi.org/10.5067/HLS/HLSL30.002)**  
 93 |   **Science Dataset (SDS) layers:**  
 94 |     - B05 (NIR)  
 95 |     - B04 (Red)  
 96 |     - Fmask (Quality)  
 97 | 
 98 | ## Topics Covered in This Tutorial
 99 | 
100 | 1.  **Getting Started**\
101 |     1a. Load Required Libraries\
102 |     1b. Set Up the Working Directory\
103 | 2.  **CMR-STAC API: Searching for Items**\
104 |     2a. Collection Query\
105 |     2b. Spatial Query Search Parameter\
106 |     2c. Temporal Query Search Parameter\
107 |     2d. Submit a Query for Our Search Criteria\
108 | 3.  **Accessing and Interacting with HLS Data**\
109 |     3a. Subset by Band\
110 |     3b. Subset HLS COGs Spatially and Stack HLS Data Layers\
111 | 4.  **Processing HLS Data**\
112 |     4a. Calculate NDVI\
113 |     4b. Apply Quality Filtering\
114 |     4c. Visualize Stacked Time Series\
115 |     4d. Export Statistics\
116 |     5.  **Export Output to GeoTIFF**  
117 | 
118 | ## Prerequisites:
119 | 
120 | -   This tutorial can be executed using R with RStudio or in Visual Studio Code
121 | (VS Code).
122 | -   Tested on Windows using R Version 4.0.5 and RStudio version 1.2.5033.
123 | -   A [NASA Earthdata Login](https://urs.earthdata.nasa.gov/) account is 
124 | required to access the data used in this Tutorial. You can create an account
125 | [here](https://urs.earthdata.nasa.gov/users/new).  
126 | 
127 | ## Procedures:
128 | 
129 | ### Getting Started:
130 | 
131 |   - [Clone](https://github.com/nasa/HLS-Data-Resources.git) or 
132 |     [download](https://github.com/nasa/HLS-Data-Resources/archive/refs/heads/main.zip)
133 |     the HLS Data Resources Repository. 
134 | 
135 |   - When you open this Rmarkdown notebook in RStudio, you can click the little 
136 |     green "Play" button in each grey code chunk to execute the code. The result
137 |     can be printed either in the R Console or inline in the RMarkdown notebook,
138 |     depending on your RStudio preferences. 
139 | 
140 | ### Environment Setup: 
141 | 
142 | #### 1. Check the version of R by typing `version` into the console and RStudio by typing `RStudio.Version()` into the console and update them if needed.
143 | 
144 | -   Windows
145 | 
146 |     -   Install and load installr:
147 | 
148 |         -   `install.packages("installr");library(installr)`\
149 | 
150 |     -   Copy/Update the existing packages to the new R installation:
151 | 
152 |         -   `updateR()`
153 | 
154 |     -   Open RStudio, go to Help \> Check for Updates to install newer
155 |         version of RStudio (if available).
156 | 
157 | -   Mac
158 | 
159 |     -   Go to <https://cloud.r-project.org/bin/macosx/>.\
160 |     -   Download the latest release (R-4.4.1.pkg) and finish the
161 |         installation.
162 |     -   Open RStudio, go to Help \> Check for Updates to install newer
163 |         version of RStudio (if available).
164 |     -   To update packages, go to Tools \> Check for Package Updates. If
165 |         updates are available, select All, and click Install Updates.
166 | 
167 | #### 2. Required packages          
168 | 
169 |     -   `rmarkdown`
170 |     -   `rstac`
171 |     -   `terra`
172 |     -   `imager`
173 |     -   `leaflet`  
174 |     -   `dygraphs`
175 |     -   `xts`  
176 |     -   `lubridate`  
177 |     -   `earthdatalogin`
178 |   
179 | Run the cell below to identify any missing packages to install, and then load 
180 | all of the required packages.
181 | 
182 | ```{r, warning = FALSE, message = FALSE}
183 | packages <- c('rmarkdown','earthdatalogin', 'rstac','imager','lubridate','xts','dygraphs','leaflet','terra')              
184 | 
185 | # Identify missing (not installed) packages
186 | new.packages = packages[!(packages %in% installed.packages()[,"Package"])]
187 | 
188 | # Install new (not installed) packages
189 | if(length(new.packages)) install.packages(new.packages, repos='http://cran.rstudio.com/', dependencies = TRUE) else print('All required packages are installed.')
190 | ```
191 | 
192 | # 1. Getting Started
193 | 
194 | ## 1a. Load Required Libraries
195 | 
196 | Next load all packages using `library()` function.
197 | 
198 | ```{r, warning= FALSE, message=FALSE}
199 | invisible(lapply(packages, library, character.only = TRUE))
200 | ```
201 | 
202 | ## 1b. Set Up the Working Directory
203 | 
204 | Create an output directory for the results.
205 | 
206 | ```{r}
207 | # Create an output directory if it doesn't exist
208 | outDir <- file.path("data", "R_Output", fsep="/")
209 | suppressWarnings(dir.create(outDir)) 
210 | ```
211 | 
212 | ## 1c. Set Up the Authentication. 
213 | 
214 | The `earthdatalogin` sets up authentication using environmental variables. 
215 | 
216 | ```{r}
217 | # Authentication is not used needed immediately, but it is good to set up in the beginning.
218 | earthdatalogin::edl_netrc()
219 | ```
220 | 
221 | # 2. CMR-STAC API: Searching for Items
222 | 
223 | We will use the CMR-STAC API search endpoint to query the LPCLOUD catalog for
224 | HLS data by ROI and time period of interest. In order to retrieve STAC Items 
225 | that match your criteria, you need to define your query parameters, which we 
226 | will walk through below.
227 | 
228 | To learn how to navigate through the structure of a CMR-STAC Catalog and
229 | define Search parameters, see [Getting Started with NASA's CMR-STAC
230 | API](https://git.earthdata.nasa.gov/projects/LPDUR/repos/data-discovery---cmr-stac-api/browse).
231 | Information on the specifications for adding parameters to a search query can
232 | be found [here](https://github.com/radiantearth/stac-api-spec/tree/master/item-search#query-parameters-and-fields).
233 | 
234 | For this tutorial, we will use the [rstac](https://brazil-data-cube.github.io/rstac/index.html)
235 | library to interact with the CMR-STAC API. 
236 | 
237 | Define the CMR-STAC endpoint, which we will connect to for our search.
238 | ```{r}
239 | s = stac("https://cmr.earthdata.nasa.gov/stac/LPCLOUD/")
240 | ```
241 | 
242 | ## 2a. Collection Query
243 | 
244 | We will need to assign the lists one or more Collection IDs we want to include
245 | in the search query to a variable. Only STAC Items from the provided 
246 | Collections will be searched. Here, we are interested in both HLS **L30** (Landsata
247 | 8 and 9) and **S30** (Sentinel-2A and B) collections. To learn how to access 
248 | Collection IDs in CMR-STAC visit [Getting Started with NASA's CMR-STAC API](https://git.earthdata.nasa.gov/projects/LPDUR/repos/data-discovery---cmr-stac-api/browse).
249 | 
250 | ```{r}
251 | HLS_col <- list("HLSS30_2.0", "HLSL30_2.0")
252 | ```
253 | 
254 | ## 2b. Spatial Query Search Parameter
255 | 
256 | We can assign our spatial region of interest by loading a GeoJSON file using the 
257 | `terra` package. An example GeoJSON file is supplied in the 'Data' directory of 
258 | the 'hls_tutorial_r' repository named `FieldBoundary.geojson`. Read the file in 
259 | and use it to perform the spatial subset.  
260 | 
261 | ```{r, results= "hide"}
262 | roi <- terra::vect("data/Field_Boundary.geojson")
263 | ```
264 | 
265 | We will use the `leaflet` package to plot the agricultural fields boundary on 
266 | top of a ESRI world imagery basemap.   
267 | 
268 | ```{r}
269 | leaflet() %>% 
270 |   addPolygons(data = roi, fill = FALSE) %>% 
271 |   addProviderTiles(providers$Esri.WorldImagery) %>% 
272 |   addMiniMap(zoomLevelFixed = 5)
273 | ```
274 | 
275 | This map provides a visual representation of our study area. To perfor a 
276 | spatial search using the CMR-STAC we need to define the "bounding box" using
277 | the lower left and upper right coordinates of our area of interest. The `terra`
278 | package offers a convenient solution to get this information. Below, we'll 
279 | extract the extent of our input GeoJSON file and generate the necessary spatial
280 | search parameters for precise data retrieval.
281 | 
282 | ```{r}
283 | roi_extent <- terra::ext(roi)
284 | bbox <- c(roi_extent$xmin, roi_extent$ymin, roi_extent$xmax, roi_extent$ymax)
285 | ```
286 | 
287 | ## 2c. Temporal Query Search Parameter
288 | 
289 | Next, we will need to define a temporal search query parameter. In our example, 
290 | we will set the time period of interest for two months of August and September 2021. 
291 | Note that the temporal ranges should be specified as a pair of date-time values 
292 | separated by comma (,) or forward slash (/). Each date-time value must have the 
293 | format of`YYYY-MM-DDTHH:MM:SSZ` (ISO 8601). Additional information on setting temporal 
294 | searches can be found in the [NASA CMR Documentation](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#temporal-range-searches).  
295 | 
296 | ```{r}
297 | roi_datetime <- '2021-08-01T00:00:00Z/2021-09-30T23:59:59Z'   # YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ
298 | ```
299 | 
300 | 
301 | ## 2d. Submit a Query with Our Search Criteria
302 | 
303 | Now we are all prepared to submit a request to the CMR-STAC endpoint! We will 
304 | do this using the `rstac` package. The output will show all the available 
305 | data matching our search criteria based on our datetime and bounding box.  
306 | 
307 | ```{r}
308 | items <- s %>%
309 |     stac_search(collections = HLS_col,
310 |                 bbox = bbox,
311 |                 datetime = roi_datetime,
312 |                 limit = 100) %>%
313 |     post_request()
314 | print(items)
315 | ```
316 | 
317 | Each item returned is a `FeatureCollection`. We can explore an example of what 
318 | is in a feature by selecting the first one.  
319 | 
320 | ```{r}
321 | View(items$features[[1]])
322 | ```
323 | 
324 | We can also view the assets of each item in the response, to see what data 
325 | (bands) can be accessed.  
326 | 
327 | ```{r}
328 | assets <- items_assets(items)
329 | print(assets)
330 | ```
331 | 
332 | Working with the first feature, we can extract the browse image URL and plot it.
333 | 
334 | ```{r}
335 | browse_image_url <- items$features[[1]]$assets$browse$href
336 | 
337 | browse <-load.image(browse_image_url)
338 | plot(browse)
339 | ```
340 | 
341 | Our first view of NASA data from the cloud.
342 | 
343 | # 3. Accessing and Interacting with HLS Data
344 | 
345 | This section will demonstrate how to simplify interacting with our search results,
346 | filter based on cloud cover, select assets (bands), stream the data to memory, 
347 | subset by our region of interest, and create a time-series of NDVI.
348 | 
349 | To make this data easier to work with, we can start to place the information
350 | into a spatial dataframe using the use the `items_as_sf()` function.
351 | 
352 | ```{r}
353 | sf_items <- items_as_sf(items)
354 | View(sf_items)
355 | ```
356 | 
357 | More information can be added to the spatial dataframe by pulling information
358 | from each feature using `sapply` to extract the properties of each feature, then
359 | binding it to our spatial dataframe.
360 | 
361 | ```{r}
362 | # Retrieve Granule ID for each feature
363 | granule_id <- sapply(items$features, function(feature) feature$id)
364 | # Add as first column in sf_items
365 | sf_items <- cbind(granule = granule_id, sf_items)
366 | View(sf_items)
367 | ```
368 | 
369 | ## 3a. Select Bands and Filter by Cloud Cover
370 | 
371 | We can also retrieve the asset URLs for the bands we are interested in. In this
372 | case, our goal is to build an NDVI timeseries from the NIR and Red bands. We 
373 | will also want the quality and Fmask layers. The band number is different for
374 | NIR between the HLSL30 and HLSS30 products. Below you can find the band numbers 
375 | we want to retrieve for each of the two products.   
376 | 
377 | -   HLSS30 (Sentinel 2):
378 | 
379 |     -   "narrow" NIR = B8A
380 |     -   Red = B04
381 |     -   Quality = Fmask
382 | 
383 | -   HLSL30 (Landsat):
384 | 
385 |     -   NIR = B05
386 |     -   Red = B04
387 |     -   Quality = Fmask
388 | 
389 | Additional information on HLS band allocations can be found [here](https://lpdaac.usgs.gov/documents/1118/HLS_User_Guide_V2.pdf).
390 | 
391 | Let's define a function to retrieve the URL for each of these bands and add
392 | them as columns to the dataframe. Afterwards, run it.
393 | 
394 | ```{r}
395 | # Define a function to extract asset urls for selected bands
396 | # # This also includes a check to ensure the correct bands are extracted
397 | # # # depending on the collection (HLSL30 or HLSS30)
398 | extract_asset_urls <- function(feature) {
399 |     collection_id <- feature$collection
400 |     if (collection_id == "HLSS30_2.0") {
401 |         bands = c('B8A','B04','Fmask')
402 |     } else if (collection_id == "HLSL30_2.0") {
403 |         bands = c('B05','B04','Fmask')}
404 |     sapply(bands, function(band) feature$assets[[band]]$href)
405 | }
406 | # Retrieve Asset URLs for each feature using our extract_asset_urls function and transpose them to columns
407 | asset_urls <- t(sapply(items$features, extract_asset_urls))
408 | View(asset_urls)
409 | ```
410 | 
411 | Notice that our function has returned a table of each asset, but that the name 
412 | for the NIR is B8A. This is because Sentinel was the first item in the search 
413 | results. We'll want to rename these for clarity as we add them to the spatial 
414 | dataframe.
415 | 
416 | ```{r}
417 | colnames(asset_urls) <- c('nir', 'red', 'fmask')
418 | sf_items <- cbind(sf_items, asset_urls)
419 | View(sf_items)
420 | ```
421 | 
422 | The 'eo:cloud_cover' column contains a percentage cloud-cover of a scene, which we can use
423 | to filter our results. We'll filter out any scenes with more than 30% cloud cover.
424 | 
425 | ```{r}
426 | # Filter based on cloud cover
427 | sf_items <- sf_items[sf_items$eo.cloud_cover < 30,]
428 | # Reset Row Indices
429 | row.names(sf_items) <- NULL
430 | View(sf_items)
431 | ```
432 | 
433 | ## 3b. Subset HLS COGs Spatially and Stack HLS Data Layers
434 | 
435 | First, set up rgdal configurations to access the cloud assets that we are 
436 | interested in. You can learn more about these configuration options [here](https://trac.osgeo.org/gdal/wiki/ConfigOptions).  
437 | 
438 | ```{r, results= "hide"}
439 | setGDALconfig("GDAL_HTTP_UNSAFESSL", value = "YES")
440 | setGDALconfig("GDAL_HTTP_COOKIEFILE", value = ".rcookies")
441 | setGDALconfig("GDAL_HTTP_COOKIEJAR", value = ".rcookies")
442 | setGDALconfig("GDAL_DISABLE_READDIR_ON_OPEN", value = "EMPTY_DIR")
443 | setGDALconfig("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", value = "TIF")
444 | ```
445 | 
446 | Next, we define a function to open an HLS COG file from Earthdata Cloud and 
447 | crop it to our region of interest. During the cropping phase we want to be sure
448 | to reproject our region of interest to match the CRS of the HLS data.  
449 | 
450 | **One important thing to note about HLS data is that some scenes have the scale 
451 | factor for bands located in the wrong section of the metadata. This means that 
452 | the scale factor is not applied to the data when it is read by `terra::rast`. 
453 | To correct this we will check the metadata for the scale factor and apply it if
454 | necessary.**  
455 | 
456 | ```{r}
457 | # This function reads an HLS scene from a URL, applies the scale factor if necessary, and optionally crops and
458 | # masks the scene based on a polygon. It requries the above GDAL configurations and a .netrc file. A .netrc
459 | # can be created by running `earthdatalogin::edl_netrc()`.
460 | open_hls <- function(url, roi = NULL) {
461 |   # Add VSICURL prefix
462 |   url <- paste0('/vsicurl/', url)
463 |   # Retrieve metadata
464 |   meta <- describe(url)
465 |   # Check if dataset is Quality Layer (Fmask) - no scaling this asset (int8 datatype)
466 |   is_fmask <- any(grep("Fmask", meta))
467 |   # Check if Scale is present in band metadata
468 |   will_autoscale <- any(grep("Scale:", meta))
469 |   # Read the raster
470 |   r <- rast(url)
471 |   # Apply Scale Factor if necessary
472 |   if (!will_autoscale && !is_fmask){
473 |     print(paste("No scale factor found in band metadata. Applying scale factor of 0.0001 to", basename(url)))
474 |     r <- r * 0.0001
475 |   }
476 |   # Crop if roi specified
477 |   if (!is.null(roi)){
478 |     # Reproject roi to match crs of r
479 |     roi_reproj <- project(roi, crs(r))
480 |     r <- mask(crop(r, roi_reproj), roi_reproj)
481 |   }
482 | return(r)
483 | }
484 | ```
485 | 
486 | Let's test this function on the first red band in our spatial dataframe and 
487 | plot the results.
488 | 
489 | ```{r}
490 | # Test opening and crop
491 | red <- open_hls(sf_items$red[1], roi)
492 | plot(red)
493 | ```
494 | 
495 | We can apply this function to all of the red, nir, and fmask scenes in our 
496 | spatial dataframe, using `lapply` to place the results in a list. This cell may
497 | take some time depending on internet speed, as we're loading 72 scenes from the
498 | cloud and cropping them. 
499 | 
500 | ```{r}
501 | red_stack <- lapply(sf_items$red, open_hls, roi = roi)
502 | nir_stack <- lapply(sf_items$nir, open_hls, roi = roi)
503 | fmask_stack <- lapply(sf_items$fmask, open_hls, roi = roi)
504 | ```
505 | 
506 | ------------------------------------------------------------------------
507 | 
508 | # 4. Processing HLS Data
509 | 
510 | Now we can start asking our science questions. First we define the NDVI 
511 | function and then execute it on the data loaded into memory. After that, we can
512 | perform quality filtering to screen out any poor-quality observations.
513 | 
514 | ## 4a. Calculate NDVI
515 | 
516 | Create a function to calculate NDVI.
517 | 
518 | ```{r}
519 | calculate_NDVI <- function(nir, red){
520 |   ndvi <- (nir-red)/(nir+red)
521 |   return(ndvi)
522 | }
523 | ```
524 | 
525 | Now we can calculate NDVI from the list of red and nir rasters.  
526 | 
527 | **Note that we get a warning about the CRS. This is because HLS-Landsat and 
528 | HLS-Sentinel have slightly different WKT strings that define their projections. 
529 | This is a known issue with HLS data. Although we get a warning here, the CRS is
530 | the same.**
531 | 
532 | ```{r}
533 | # Calculate NDVI For All of our Scenes
534 | ndvi_stack <- mapply(calculate_NDVI, nir_stack, red_stack, SIMPLIFY = FALSE)
535 | # Rename the Scenes in our List
536 | names(ndvi_stack) <- sf_items$datetime
537 | # Create a single Rast Object from our list
538 | ndvi_stacks <- terra::rast(ndvi_stack)
539 | ```
540 | 
541 | Now we plot! Let's start with the first item in NDVI time series.  
542 | 
543 | ```{r, warning=FALSE, message=FALSE}
544 | # Create a color palette 
545 | pal <- colorNumeric(terrain.colors(n = 100), c(0,1) ,na.color = "transparent", reverse = TRUE)
546 | 
547 | leaflet() %>% 
548 |     addProviderTiles(providers$Esri.WorldImagery) %>%
549 |     addRasterImage(ndvi_stacks[[1]], color = pal, opacity = 1) %>%
550 |     addPolygons(data = roi, fill = FALSE) %>%
551 |     addMiniMap(zoomLevelFixed = 5) %>%
552 |     leaflet::addLegend(pal = pal, values = c(0,1), title = "NDVI")
553 | 
554 | ```
555 | 
556 | If you get an from this chunk, please update to the latest version of `leaflet`.  
557 | 
558 | ------------------------------------------------------------------------
559 | 
560 | ## 4b. Quality Filtering
561 | 
562 | For HLS v2.0 products, all quality information is included in the Fmask layer. 
563 | This layer includes values corresponding to combinations of bits that represent
564 | different quality descriptions at each location within the scene.  
565 | 
566 | Bits are ordered `76543210` and correspond to the following quality descriptions:  
567 | 
568 | |Bit Number|Mask Name|Bit Value|Description|
569 | |----------|---------|---------|-----------|
570 | |   7-6    | Aerosol |  11     |   High    |
571 | |          | Level   |  10     |   Medium  |
572 | |          |         |  01     |   Low     |
573 | |          |         |  00     |   Clear   |
574 | |----------|---------|---------|-----------|
575 | |    5     | Water   |  1      |    Yes    |
576 | |          |         |  0      |    No     |
577 | |----------|---------|---------|-----------|
578 | |    4     |  Snow/  |  1      |    Yes    |
579 | |          |  Ice    |  0      |    No     |
580 | |----------|---------|---------|-----------|
581 | |    3     | Cloud   |  1      |    Yes    |
582 | |          | Shadow  |  0      |    No     |
583 | |----------|---------|---------|-----------|
584 | |    2     | Cloud/  |  1      |    Yes    |
585 | |          | Shadow  |  0      |    No     |
586 | |          | Adjacent|         |           |
587 | |----------|---------|---------|-----------|
588 | |    1     | Cloud   |  1      |    Yes    |
589 | |          |         |  0      |    No     |
590 | |----------|---------|---------|-----------|
591 | |    0     | Cirrus  |Reserved |    NA     |
592 | |----------|---------|---------|-----------|
593 | 
594 | Open an fmask layer and plot it to see examples of the quality values.  
595 | 
596 | ```{r}
597 | # Test fmask
598 | fmask <- fmask_stack[[23]]
599 | plot(fmask)
600 | ```
601 | 
602 | We can select select the bits we want to mask from our analysis. In this case, 
603 | we will mask pixels identified as clouds, cloud/shadow adjacent, cloud shadow, 
604 | snow/ice, and water. We will ignore the aerosol levels. Create a list of those 
605 | bit numbers:  
606 | 
607 | ```{r}
608 | selected_bit_nums <- c(1,2,3,4,5)
609 | ```
610 | 
611 | Now we can build a function to create a binary mask layer from the Fmask 
612 | layers. This function does a couple things:  
613 | 
614 | 1. Read the Fmask layer
615 | 2. Creates and empty mask Layer
616 | 3. Applies a bitwise AND operation to determine if the fmask value has the 
617 | selected bit numbers number set when converted to binary.
618 | 4. If so, the mask value is set to 1, then the mask is updated using a bitwise
619 | OR operation to update the layer for each selected bit as we loop through the
620 | selected bit numbers.
621 | 5. Lastly, it returns the new mask layer we can use to filter our data.  
622 | 
623 | ```{r}
624 | # Filter based on quality
625 | build_mask <- function(fmask, selected_bit_nums){
626 |   # Create a mask of all zeros
627 |   mask <- rast(fmask, vals=0)
628 |   for (b in selected_bit_nums){
629 |     # Apply Bitwise AND to fmask values and selected bit numbers
630 |     mask_temp <- app(fmask, function(x) bitwAnd(x, bitwShiftL(1,b)) >0)
631 |     # Update Mask to maintain only 1 layer with bitwise OR
632 |     mask <- mask | mask_temp
633 |   }
634 |   return(mask)
635 | }
636 | ```
637 | 
638 | To see this in action, we can apply the function to an Fmask layer and plot the 
639 | results.  
640 | 
641 | ```{r}
642 | qmask <- build_mask(fmask[[1]], selected_bit_nums)
643 | plot(qmask)
644 | ```
645 | 
646 | Like we did before, we can apply this function to our Fmask stack to create a 
647 | list of masks.  
648 | 
649 | ```{r}
650 | # Create List of Masks
651 | qmask_stack <- lapply(fmask_stack, build_mask, selected_bit_nums=selected_bit_nums)
652 | ```
653 | 
654 | After creating a list of masks, we can apply them to our NDVI stack to filter 
655 | out the poor-quality pixels for each scene in the stack, making sure to update
656 | the values masked values to `NA`.  
657 | 
658 | ```{r}
659 | # Apply Mask to NDVI using NA Values
660 | ndvi_masked <- mapply(function(x, y) {
661 |   mask(x, y, maskvalue = TRUE, updatevalue = NA)
662 | }, ndvi_stack, qmask_stack, SIMPLIFY = FALSE)
663 | ```
664 | 
665 | Right now ndvi_masked is a list of `SpatRaster` objects. We can stack it using 
666 | the `rast` function to provide the expected format for the next steps.  
667 | 
668 | ```{r}
669 | ndvi_masked <- rast(ndvi_masked)
670 | ```
671 | 
672 | Let's visualize a single layer of the masked NDVI to ensure our quality 
673 | filtering worked.  
674 | 
675 | ```{r}
676 | plot(ndvi_masked[[23]])
677 | ```
678 | 
679 | We can see that some of the water present has been masked out of the NDVI layer,
680 | which was part of our quality bit selection.  
681 | 
682 | ## 4c.Visualize Stacked Time Series
683 | 
684 | Now we can plot multiple layers to create an interactive NDVI time series map 
685 | with `leaflet`. Click on the dates on the left side to view the layer.  
686 | 
687 | ```{r}
688 | # Create a leaflet map and add the base layer
689 | map <- leaflet() %>%
690 |   addProviderTiles(providers$Esri.WorldImagery) %>%
691 |   addMiniMap(zoomLevelFixed = 5)
692 | 
693 | # Add each layer from our rasterstack
694 | for (i in 1:nlyr(ndvi_masked)) {
695 |   map <- map %>%
696 |     addRasterImage(ndvi_masked[[i]], colors = pal, opacity = 1, group = names(ndvi_masked)[i])
697 | }
698 | 
699 | # Add layer controls and legend
700 | map <- map %>%
701 |   addLayersControl(baseGroups = names(ndvi_masked),
702 |                    options = layersControlOptions(collapsed = FALSE), position = 'topleft') %>%
703 |   addLegend(pal = pal, values = c(0, 1), title = 'NDVI')
704 | # Show map
705 | map
706 | ```
707 | 
708 | Above, the time series show the changes in NDVI over the two months of September 
709 | and August 2021. The NDVI values over these agricultural fields are high and 
710 | stable and then they drastically decrease showing that they are cultivated 
711 | before the send of September.  
712 | 
713 | ## 4d. Calculating and Exporting Statistics
714 | 
715 | We can plot the time series NDVI values as boxplots showing their distribution 
716 | for our farm fields.  
717 | 
718 | ```{r}
719 | # Add Date Only Column
720 | sf_items$date <- sapply(sf_items$datetime, function(x) strsplit(x, "T")[[1]][1])
721 | # Set Plot Margins and Font Sizes
722 | par(mar = c(10, 6, 4, 2), cex.axis = 2, cex.lab = 2, cex.main = 2) # bottom, left, top, right
723 | # Create Boxplot
724 | terra::boxplot(ndvi_masked, col=c('olivedrab3'),  main='NDVI Time Series', ylab='NDVI',
725 |                 names = sf_items$date, las=2)
726 | ```
727 | 
728 | Next, calculate the statistics for each observation using built in statistics 
729 | functions from `terra`.  
730 | 
731 | ```{r}
732 | ndvi_mean <- terra::global(ndvi_masked, 'mean', na.rm=TRUE)
733 | ndvi_max <- terra::global(ndvi_masked, 'max', na.rm=TRUE)
734 | ndvi_min <- terra::global(ndvi_masked, 'min', na.rm=TRUE)
735 | ndvi_sd <- terra::global(ndvi_masked, 'sd', na.rm=TRUE)
736 | ```
737 | 
738 | With these stats, we can create an interactive plot using `dygraphs` library. 
739 | We will leverage the `lubridate` to convert our dates to a more usable format, 
740 | and the `xts` package to transform the dataframe to a format for `dygraph`.
741 | 
742 | ```{r}
743 | stats <- data.frame(
744 |   NDVI_Max = ndvi_max,
745 |   NDVI_Min = ndvi_min,
746 |   NDVI_mean = ndvi_mean,
747 |   NDVI_SD = ndvi_sd
748 | )
749 | stats$Date <- ymd_hms(sf_items$datetime) # convert string to date format (ISO 8601)
750 | variables = xts(x=stats[,-5], order.by=stats$Date) # Choose the cols with the variables
751 | dygraph(variables) %>%
752 |   dyAxis("y",label = "NDVI")
753 | ```
754 | 
755 | If you want to export these statistics, we can do so to a CSV file.  
756 | 
757 | ```{r}
758 | stats_name <- file.path(outDir, "HLS_NDVI_Statistics.csv")
759 | write.csv(stats,stats_name)
760 | ```
761 | 
762 | # 5. Export Output to GeoTIFF
763 | 
764 | Lastly, if you want to capture the final output files locally on your machine, 
765 | you can export them as GeoTIFFs.  
766 | 
767 | ```{r}
768 | for (i in 1:nlyr(ndvi_masked)){
769 |   file_name <- paste0("HLS.", gsub("[:]",".", names(ndvi_masked[[i]])), ".NDVI.tif")
770 |   output_name <- file.path(outDir, file_name)
771 |   terra::writeRaster(ndvi_masked[[i]], output_name, overwrite = TRUE)
772 | }
773 | ```
774 | 
775 | The raster stack object can also be written to the disk.  
776 | 
777 | ```{r, warning=FALSE}
778 | output_name <- file.path(outDir, "HLS_NDVI_stack.tif")
779 | terra::writeRaster(ndvi_masked, filename=output_name, overwrite=TRUE)
780 | ```
781 | 
782 | And we're done! You have successfully analyzed data in the cloud, exporting 
783 | just the information you needed for your area of interest rather than having to
784 | download everything.  
785 | 
786 | ------------------------------------------------------------------------
787 | 
788 | ### Contact Information
789 |   
790 |   Contact: [LPDAAC\@usgs.gov](mailto:LPDAAC@usgs.gov){.email}  
791 |   Voice: +1-866-573-3222  
792 |   Organization: Land Processes Distributed Active Archive Center (LP DAAC)  
793 |   Website: <https://lpdaac.usgs.gov/>                   
794 |   Date last modified: 09-17-2024                
795 |   
796 |   Work performed under USGS contract G0121D0001 for LP DAAC^1^.
797 |   ^1^ LP DAAC Work performed under NASA contract NNG14HH33I.
798 | 


--------------------------------------------------------------------------------