├── odc └── stac │ ├── py.typed │ ├── testing │ ├── __init__.py │ └── stac.py │ ├── bench │ ├── __main__.py │ ├── __init__.py │ ├── _report.py │ ├── _prepare.py │ └── _cli.py │ └── __init__.py ├── tests ├── __init__.py ├── data │ ├── test-product-eo.yml │ ├── test-product-eo3.yml │ ├── lidar_dem.json │ ├── only_crs_proj.json │ ├── S2A_28QCH_20200714_0_L2A.json │ └── LC08_L2SR_081119_20200101_20200823_02_T2.json ├── test-env-py310.yml ├── common │ └── __init__.py ├── notebooks │ ├── bench-runner.py │ └── bench-prep-query.py ├── conftest.py ├── test_load.py ├── test_bench.py └── test_model.py ├── docs ├── _static │ ├── .gitkeep │ └── xr-fixes.css ├── rtd-requirements.txt ├── .gitignore ├── tocs.rst ├── examples.rst ├── index.rst ├── Makefile ├── make.bat ├── api.rst ├── intro.rst ├── stac-best-practice.rst ├── samples │ └── save-cog-from-stac.py ├── stac-vs-odc.rst ├── conf.py └── benchmarking.rst ├── notebooks ├── .gitignore ├── Welcome.md ├── render-nb.sh ├── render-html.sh ├── build.sh ├── Makefile ├── README.md ├── stac-load-S2-deafrica.py ├── stac-load-e84-aws.py └── stac-load-S2-ms.py ├── .devcontainer ├── requirements.txt ├── requirements-dev.txt ├── docker-compose.yml ├── Dockerfile ├── post-create.sh └── devcontainer.json ├── binder ├── apt.txt ├── README.md ├── postBuild ├── _home │ ├── .config │ │ └── dask │ │ │ ├── dask.yaml │ │ │ └── labextension.yaml │ └── .jupyter │ │ └── lab │ │ ├── user-settings │ │ └── @jupyterlab │ │ │ └── shortcuts-extension │ │ │ └── shortcuts.jupyterlab-settings │ │ └── workspaces │ │ └── demo-2a97.jupyterlab-workspace ├── render-nb-pipe.sh ├── start └── environment.yml ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .github ├── codecov.yml └── workflows │ ├── publish-pypi.yml │ ├── build-binder.yml │ ├── render.yml │ └── main.yml ├── scripts └── notebook_hash.py ├── .gitignore ├── pyproject.toml ├── CHANGELOG.md ├── README.rst └── LICENSE /odc/stac/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/rtd-requirements.txt: -------------------------------------------------------------------------------- 1 | .[docs] 2 | -------------------------------------------------------------------------------- /notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb 2 | dbg/* 3 | bk/* 4 | -------------------------------------------------------------------------------- /.devcontainer/requirements.txt: -------------------------------------------------------------------------------- 1 | -e .[test-all,docs] 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/* 2 | _generated/* 3 | _api/* 4 | notebooks 5 | -------------------------------------------------------------------------------- /odc/stac/testing/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities used in tests. 3 | """ 4 | -------------------------------------------------------------------------------- /binder/apt.txt: -------------------------------------------------------------------------------- 1 | fish 2 | git 3 | tig 4 | htop 5 | jq 6 | silversearcher-ag 7 | graphviz 8 | -------------------------------------------------------------------------------- /odc/stac/bench/__main__.py: -------------------------------------------------------------------------------- 1 | """Run main.""" 2 | 3 | from ._cli import main 4 | 5 | main() 6 | -------------------------------------------------------------------------------- /docs/tocs.rst: -------------------------------------------------------------------------------- 1 | Indices and tables 2 | ================== 3 | 4 | * :ref:`genindex` 5 | * :ref:`modindex` 6 | * :ref:`search` 7 | -------------------------------------------------------------------------------- /binder/README.md: -------------------------------------------------------------------------------- 1 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo) 2 | 3 | Configuration for mybinder launcher. 4 | -------------------------------------------------------------------------------- /.devcontainer/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pylint >=3 2 | black[jupyter] 3 | isort 4 | pycodestyle 5 | pylint 6 | docutils 7 | autopep8 8 | autoflake 9 | 10 | mypy 11 | types-shapely 12 | types-cachetools 13 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v6.0.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | - id: check-added-large-files 7 | - id: check-merge-conflict 8 | -------------------------------------------------------------------------------- /tests/data/test-product-eo.yml: -------------------------------------------------------------------------------- 1 | name: test_product_eo 2 | metadata_type: eo 3 | metadata: 4 | product: 5 | name: test_product_eo 6 | measurements: 7 | - name: band 8 | dtype: "float32" 9 | nodata: .nan 10 | units: "1" 11 | -------------------------------------------------------------------------------- /tests/data/test-product-eo3.yml: -------------------------------------------------------------------------------- 1 | name: test_product_eo3 2 | metadata_type: eo3 3 | metadata: 4 | product: 5 | name: test_product_eo3 6 | measurements: 7 | - name: band 8 | dtype: int16 9 | nodata: -999 10 | units: "1" 11 | -------------------------------------------------------------------------------- /binder/postBuild: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This runs as a last step of Docker build 4 | echo "+++++++++++++++++++++++++++++++++" 5 | echo "+ post build started +" 6 | echo "+++++++++++++++++++++++++++++++++" 7 | 8 | (cd binder/_home && tar c .) | tar x 9 | -------------------------------------------------------------------------------- /notebooks/Welcome.md: -------------------------------------------------------------------------------- 1 | # Sample Notebooks 2 | 3 | - Access Sentinel 2 Data on Planetary Computer, [open](stac-load-S2-ms.ipynb) 4 | - Works on binder too, but might need to decrease resolution to fit in to 2Gb of RAM 5 | - Access Sentinel 2 Data from AWS, [open](stac-load-e84-aws.ipynb) 6 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | python: 9 | install: 10 | - requirements: docs/rtd-requirements.txt 11 | 12 | sphinx: 13 | builder: html 14 | configuration: docs/conf.py 15 | fail_on_warning: true 16 | -------------------------------------------------------------------------------- /binder/_home/.config/dask/dask.yaml: -------------------------------------------------------------------------------- 1 | temporary-directory: /tmp 2 | 3 | distributed: 4 | dashboard: 5 | link: "/user/__JUPYTERHUB_USER__/proxy/{port}/status" 6 | worker: 7 | memory: 8 | target: 0.95 9 | spill: 0.99 10 | pause: 0.99 11 | terminate: 0.99 12 | -------------------------------------------------------------------------------- /binder/_home/.config/dask/labextension.yaml: -------------------------------------------------------------------------------- 1 | labextension: 2 | factory: 3 | module: 'dask.distributed' 4 | class: 'LocalCluster' 5 | args: [] 6 | kwargs: {} 7 | 8 | default: 9 | workers: null 10 | adapt: 11 | null 12 | # minimum: 0 13 | # maximum: 10 14 | initial: [] 15 | -------------------------------------------------------------------------------- /.devcontainer/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | main: 3 | build: 4 | context: . 5 | dockerfile: Dockerfile 6 | volumes: 7 | - ..:/workspace:cached 8 | - home_vscode:/home/vscode/ 9 | command: sleep infinity 10 | 11 | environment: 12 | - PYTHONUNBUFFERED=1 13 | volumes: 14 | home_vscode: 15 | -------------------------------------------------------------------------------- /.github/codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: yes 3 | 4 | coverage: 5 | precision: 2 6 | round: down 7 | range: "60...100" 8 | 9 | status: 10 | project: 11 | default: # This can be anything, but it needs to exist as the name 12 | # basic settings 13 | target: 60% 14 | threshold: 20% 15 | -------------------------------------------------------------------------------- /notebooks/render-nb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | src=$1 4 | dst=${2:-${src%%.py}.ipynb} 5 | 6 | echo "$src -> $dst" 7 | 8 | jupytext $src --to ipynb -o - \ 9 | | jupyter nbconvert \ 10 | --stdin \ 11 | --to notebook \ 12 | --stdout \ 13 | --ExecutePreprocessor.store_widget_state=True \ 14 | --execute > "${dst}" 15 | -------------------------------------------------------------------------------- /notebooks/render-html.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | src=$1 4 | dst=${2:-${src%%.py}.html} 5 | 6 | echo "$src -> $dst" 7 | 8 | jupytext $src --set-kernel "python3" --to ipynb -o - \ 9 | | jupyter nbconvert \ 10 | --stdin \ 11 | --to html \ 12 | --stdout \ 13 | --ExecutePreprocessor.store_widget_state=True \ 14 | --execute > "${dst}" 15 | -------------------------------------------------------------------------------- /binder/render-nb-pipe.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Expects percent script notebook on stdin 4 | # produces rendered ipynb notebook on stdout 5 | 6 | jupytext --from 'py:percent' --to ipynb -o - \ 7 | | jupyter nbconvert \ 8 | --stdin \ 9 | --to notebook \ 10 | --stdout \ 11 | --ExecutePreprocessor.store_widget_state=True \ 12 | --execute 13 | -------------------------------------------------------------------------------- /binder/start: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This runs when binder is launched 4 | 5 | date > .startup.log 6 | jupytext -k python3 $HOME/notebooks/*py 7 | jupytext -s $HOME/notebooks/*{py,md} 8 | 9 | sed -i -e "s|__JUPYTERHUB_USER__|${JUPYTERHUB_USER}|g" $HOME/.config/dask/dask.yaml 10 | sed -i -e "s|__JUPYTERHUB_USER__|${JUPYTERHUB_USER}|g" $HOME/.jupyter/lab/workspaces/demo-* 11 | 12 | exec "$@" 13 | -------------------------------------------------------------------------------- /notebooks/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -e 4 | 5 | indir="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" 6 | outdir="$(dirname $indir)/docs/notebooks" 7 | 8 | mkdir -p $outdir 9 | 10 | 11 | for infile in $(find $indir -type f -maxdepth 1 -name '*py'); do 12 | outfile="${outdir}/$(basename ${infile%%.py}.ipynb)" 13 | $indir/render-nb.sh $infile $outfile 14 | done 15 | -------------------------------------------------------------------------------- /notebooks/Makefile: -------------------------------------------------------------------------------- 1 | DKR ?= kirillodc/odc-stac-binder:latest 2 | ALL_PY := $(wildcard *.py) 3 | ALL_NB := $(patsubst %.py, %.ipynb, $(ALL_PY)) 4 | 5 | all: $(ALL_NB) 6 | 7 | %.ipynb: %.py 8 | @echo $< "=>" $@ 9 | docker run -i --entrypoint ./binder/render-nb-pipe.sh $(DKR) < $< > $@ 10 | 11 | debug: 12 | docker run --rm -ti --entrypoint /bin/bash -v $$(pwd):/home/jovyan/notebooks $(DKR) 13 | 14 | clean: 15 | @echo Removing Rendered Notebooks 16 | rm -f $(ALL_NB) 17 | 18 | .PHONY: debug all clean 19 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Note that notebooks/ folder is downloaded by conf.py from a gist for now 3 | 4 | Example Notebooks 5 | ################# 6 | 7 | |Binder| 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | notebooks/stac-load-e84-aws 13 | notebooks/stac-load-S2-ms 14 | notebooks/stac-load-S2-deafrica 15 | 16 | .. |Binder| image:: https://mybinder.org/badge_logo.svg 17 | :target: https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo 18 | :alt: Run Examples in Binder 19 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PY=3.10 2 | FROM mcr.microsoft.com/devcontainers/python:1-$PY 3 | 4 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/ 5 | 6 | RUN --mount=type=cache,target=/var/cache \ 7 | apt-get update \ 8 | && export DEBIAN_FRONTEND=noninteractive \ 9 | && apt-get -y install --no-install-recommends \ 10 | libgdal-dev \ 11 | gdal-bin \ 12 | netcdf-bin \ 13 | pandoc \ 14 | zip unzip \ 15 | tmux tig jq fzf silversearcher-ag \ 16 | && true 17 | -------------------------------------------------------------------------------- /odc/stac/bench/__init__.py: -------------------------------------------------------------------------------- 1 | """Benchmarking tools.""" 2 | 3 | from ._prepare import SAMPLE_SITES, dump_site 4 | from ._report import load_results 5 | from ._run import ( 6 | BenchLoadParams, 7 | BenchmarkContext, 8 | TimeSample, 9 | collect_context_info, 10 | load_from_json, 11 | run_bench, 12 | ) 13 | 14 | __all__ = ( 15 | "SAMPLE_SITES", 16 | "dump_site", 17 | "BenchLoadParams", 18 | "BenchmarkContext", 19 | "TimeSample", 20 | "collect_context_info", 21 | "load_from_json", 22 | "load_results", 23 | "run_bench", 24 | ) 25 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | odc-stac 2 | ======== 3 | 4 | .. include:: intro.rst 5 | 6 | .. toctree:: 7 | :caption: Introduction 8 | :hidden: 9 | :maxdepth: 2 10 | 11 | intro.rst 12 | 13 | .. toctree:: 14 | :caption: Developer Guide 15 | :hidden: 16 | :maxdepth: 2 17 | 18 | api.rst 19 | examples.rst 20 | benchmarking.rst 21 | 22 | .. toctree:: 23 | :caption: STAC 24 | :hidden: 25 | :maxdepth: 2 26 | 27 | stac-vs-odc.rst 28 | stac-best-practice.rst 29 | 30 | .. toctree:: 31 | :caption: Index 32 | :hidden: 33 | :maxdepth: 2 34 | 35 | tocs.rst 36 | -------------------------------------------------------------------------------- /binder/_home/.jupyter/lab/user-settings/@jupyterlab/shortcuts-extension/shortcuts.jupyterlab-settings: -------------------------------------------------------------------------------- 1 | { 2 | "shortcuts": [ 3 | { 4 | "command": "runmenu:run-all", 5 | "keys": [ 6 | "-", 7 | "-" 8 | ], 9 | "selector": "[data-jp-kernel-user]:focus" 10 | }, 11 | { 12 | "command": "kernelmenu:restart-and-clear", 13 | "keys": [ 14 | "0", 15 | "0" 16 | ], 17 | "selector": "[data-jp-kernel-user]:focus" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /scripts/notebook_hash.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os.path 3 | 4 | 5 | def compute(folder: str) -> str: 6 | hash = hashlib.sha256() 7 | paths = [ 8 | os.path.join(folder, file_name) 9 | for file_name in os.listdir(folder) 10 | if os.path.splitext(file_name)[1] == ".py" 11 | ] 12 | paths = sorted(paths, key=str.casefold) 13 | for path in paths: 14 | with open(path, "rb") as file: 15 | bytes = file.read() 16 | hash.update(bytes) 17 | return hash.hexdigest(), paths 18 | 19 | 20 | if __name__ == "__main__": 21 | folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "notebooks")) 22 | hsh, _ = compute(folder) 23 | print(hsh) 24 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | # -W -- treat warnings as errors 7 | SPHINXOPTS ?= -W 8 | SPHINXBUILD ?= sphinx-build 9 | SOURCEDIR = . 10 | BUILDDIR = _build 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | # Catch-all target: route all unknown targets to Sphinx using the new 19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 20 | %: Makefile 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | 23 | clean: 24 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 25 | rm -rf _generated _api 26 | -------------------------------------------------------------------------------- /binder/environment.yml: -------------------------------------------------------------------------------- 1 | name: odc-stac 2 | channels: 3 | - conda-forge 4 | 5 | dependencies: 6 | - python =3.10 7 | - pip 8 | 9 | # odc-stac dependencies 10 | - odc-geo >=0.3.2 11 | - rasterio >=1.3.9 12 | - pystac >=1.9.0 # more flexible handling of extension versions 13 | - toolz 14 | - xarray 15 | # for reading with rasterio from s3 16 | - boto3 17 | 18 | # planetary-computer lib for URL signing 19 | - planetary-computer 20 | - pystac-client 21 | 22 | # JupyterLab 23 | - jupytext 24 | - jupyter-server-proxy 25 | - ipykernel 26 | - matplotlib-base 27 | - ipympl 28 | - dask 29 | 30 | # Some Geo libs 31 | - geopandas 32 | - folium 33 | 34 | # conveniences 35 | - autopep8 36 | - black 37 | - isort 38 | - python-dotenv # for notebooks 39 | - jupyterlab_code_formatter 40 | 41 | - pip: 42 | # odc-stac local checkout 43 | - -e ../ 44 | -------------------------------------------------------------------------------- /.devcontainer/post-create.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | main_env="$HOME/envs/main" 4 | 5 | process_main_env() { 6 | declare -l -a opts 7 | local main_py="$main_env/bin/python" 8 | 9 | [ -d "$main_env" ] || { 10 | echo "Creating main virtual environment..." 11 | mkdir -p "$(dirname "$main_env")" 12 | uv venv "$main_env" 13 | } 14 | for req in .devcontainer/requirements*.txt; do 15 | opts+=(-r "$req") 16 | done 17 | source "$main_env"/bin/activate 18 | uv pip install "${opts[@]}" 19 | } 20 | 21 | process_apt() { 22 | local apt_file=".devcontainer/apt.txt" 23 | if [ -e ${apt_file} ]; then 24 | sudo apt-get -y update 25 | awk '{if ($0 ~ /^[[:space:]]*#/) next; sub(/#[^"]*$/, ""); print}' <"${apt_file}" | 26 | xargs sudo apt-get -y install 27 | fi 28 | } 29 | 30 | cd /workspace || exit 1 31 | process_main_env 32 | process_apt 33 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /tests/test-env-py310.yml: -------------------------------------------------------------------------------- 1 | # Conda environment for running tests in odc-stac 2 | # conda env create -f test-env-py310.yml 3 | # conda activate odc-stac-tests-py310 4 | 5 | name: odc-stac-tests-py310 6 | channels: 7 | - conda-forge 8 | - nodefaults 9 | 10 | dependencies: 11 | - python =3.10 12 | - pip 13 | 14 | # odc-stac dependencies 15 | - affine 16 | - jinja2 17 | - numpy 18 | - pandas 19 | - toolz 20 | - odc-geo >=0.4.7 21 | - odc-loader >=0.5.1 22 | - pystac >=1.12.1 23 | - dask 24 | - xarray 25 | - rasterio 26 | 27 | # For mypy 28 | - types-python-dateutil 29 | 30 | # For tests 31 | - pytest 32 | - pytest-httpserver 33 | - pytest-cov 34 | - pytest-timeout 35 | - pytest-vcr 36 | - mock 37 | - deepdiff 38 | - pystac-client >=0.2.0 39 | - geopandas 40 | - stackstac 41 | - zarr 42 | 43 | # for docs 44 | - sphinx 45 | - sphinx_rtd_theme 46 | - sphinx-autodoc-typehints 47 | - nbsphinx 48 | - ipywidgets 49 | - jupytext 50 | 51 | # dev 52 | - autoflake 53 | - black >=25.1.0 54 | - isort 55 | - mypy 56 | - pylint =3 57 | - pip: 58 | - -e ../ 59 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api-reference: 2 | 3 | API Reference 4 | ############# 5 | 6 | .. highlight:: python 7 | .. py:module:: odc.stac 8 | .. py:module:: odc.stac.bench 9 | 10 | 11 | odc.stac 12 | ******** 13 | 14 | .. currentmodule:: odc.stac 15 | .. autosummary:: 16 | :toctree: _api/ 17 | 18 | load 19 | configure_rio 20 | configure_s3_access 21 | parse_item 22 | parse_items 23 | extract_collection_metadata 24 | output_geobox 25 | 26 | odc.stac.ParsedItem 27 | ******************* 28 | 29 | .. currentmodule:: odc.stac 30 | .. autosummary:: 31 | :toctree: _api/ 32 | 33 | ParsedItem 34 | ParsedItem.assets 35 | ParsedItem.crs 36 | ParsedItem.geoboxes 37 | ParsedItem.image_geometry 38 | ParsedItem.resolve_bands 39 | ParsedItem.safe_geometry 40 | ParsedItem.solar_date_at 41 | ParsedItem.strip 42 | 43 | RasterBandMetadata 44 | RasterCollectionMetadata 45 | RasterLoadParams 46 | RasterSource 47 | 48 | odc.stac.bench 49 | ************** 50 | 51 | .. currentmodule:: odc.stac.bench 52 | .. autosummary:: 53 | :toctree: _api/ 54 | 55 | BenchmarkContext 56 | BenchLoadParams 57 | 58 | dump_site 59 | load_from_json 60 | run_bench 61 | load_results 62 | -------------------------------------------------------------------------------- /docs/_static/xr-fixes.css: -------------------------------------------------------------------------------- 1 | /* xarray widget tweaks 2 | 3 | Some styles from default theme interfere, so define more specific rules to 4 | override 5 | */ 6 | 7 | 8 | /* override theme default of 800px which is too narrow I feel*/ 9 | .wy-nav-content { 10 | max-width: 56em; 11 | } 12 | 13 | .rst-content ul.xr-var-list li>* { 14 | margin-top: 0px !important; 15 | margin-bottom: 0px !important; 16 | } 17 | 18 | .rst-content ul.xr-dim-list li { 19 | display: inline-block !important; 20 | padding: 0 !important; 21 | ; 22 | margin: 0 !important; 23 | } 24 | 25 | .rst-content dl.xr-attrs dt, 26 | .rst-content dl.xr-attrs dd { 27 | margin: 0px 0 !important; 28 | font-size: inherit !important; 29 | background: inherit !important; 30 | color: inherit !important; 31 | border-top: none !important; 32 | padding: 0px 10px 0px 0px !important; 33 | float: left !important; 34 | white-space: nowrap !important; 35 | overflow: hidden !important; 36 | text-overflow: ellipsis !important; 37 | } 38 | 39 | .rst-content dl.xr-attrs dt { 40 | font-weight: normal !important; 41 | grid-column: 1 !important; 42 | } 43 | 44 | .rst-content dl.xr-attrs dd { 45 | grid-column: 2 !important; 46 | } 47 | -------------------------------------------------------------------------------- /odc/stac/__init__.py: -------------------------------------------------------------------------------- 1 | """STAC Item -> ODC Dataset[eo3].""" 2 | 3 | from odc.loader import configure_rio, configure_s3_access 4 | from odc.loader.types import RasterBandMetadata, RasterLoadParams, RasterSource 5 | 6 | from ._mdtools import ( 7 | ConversionConfig, 8 | ParsedItem, 9 | extract_collection_metadata, 10 | output_geobox, 11 | parse_item, 12 | parse_items, 13 | ) 14 | from ._stac_load import load 15 | from .model import RasterCollectionMetadata 16 | 17 | stac_load = load 18 | 19 | 20 | __all__ = ( 21 | "ParsedItem", 22 | "RasterBandMetadata", 23 | "RasterCollectionMetadata", 24 | "RasterLoadParams", 25 | "RasterSource", 26 | "ConversionConfig", 27 | "load", 28 | "stac_load", 29 | "configure_rio", 30 | "configure_s3_access", 31 | "parse_item", 32 | "parse_items", 33 | "extract_collection_metadata", 34 | "output_geobox", 35 | ) 36 | 37 | 38 | def __dir__(): 39 | return [*__all__, "__version__"] 40 | 41 | 42 | def __getattr__(name): 43 | # pylint: disable=import-outside-toplevel 44 | if name == "__version__": 45 | from importlib.metadata import version 46 | 47 | return version(__name__) 48 | raise AttributeError(f"module {__name__} has no attribute {name}") 49 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Sample Notebooks 2 | 3 | 4 | ## Developer Notes 5 | 6 | Do not commit `*.ipynb` files here! We use `jupytext` for keeping notebooks in 7 | version control, specifically "py:percent" format. Install `jupytext` into your 8 | jupyterlab environment, then you should be able to "Open With->Notebook" on 9 | these `.py` files. 10 | 11 | To create a new one, start with a notebook file (`.ipynb`) then use "Pair 12 | Notebook with percent Script" command (type `Ctr-Shift-C` when editing notebook, 13 | then start typing "percent" to fuzzy find the command) 14 | 15 | 16 | ## Rendered Notebooks 17 | 18 | Notebooks are executed by github action and results are uploaded to: 19 | 20 | ``` 21 | s3://datacube-core-deployment/odc-stac/nb/odc-stac-notebooks-{nb_hash}.tar.gz 22 | https://packages.dea.ga.gov.au/odc-stac/nb/odc-stac-notebooks-{nb_hash}.tar.gz 23 | ``` 24 | 25 | Where `{nb_hash}` is a 16 character hash computed from the content of `notebooks/*.py` (see `scripts/notebook_hash.py`). 26 | 27 | By the time changes are merged into `develop` branch there should be 28 | pre-rendered notebook archive accessible without authentication via https. 29 | Building documentation on read the docs site will use that archive rather than 30 | attempting to run notebooks directly. 31 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ODC-STAC", 3 | "customizations": { 4 | "vscode": { 5 | "extensions": [ 6 | "ms-python.python", 7 | "ms-python.isort", 8 | "ms-python.pylint", 9 | "ms-python.debugpy", 10 | "ms-python.black-formatter", 11 | "ms-python.mypy-type-checker", 12 | "ms-python.vscode-pylance", 13 | "ms-azuretools.vscode-docker", 14 | "ms-toolsai.jupyter", 15 | "ms-toolsai.jupyterhub", 16 | "ms-toolsai.jupyter-renderers", 17 | "ms-vscode.makefile-tools", 18 | "github.vscode-github-actions", 19 | "foxundermoon.shell-format", 20 | "timonwong.shellcheck", 21 | "streetsidesoftware.code-spell-checker", 22 | "kahole.magit" 23 | ], 24 | "settings": { 25 | "python.pythonPath": "/home/vscode/envs/main/bin/python", 26 | "python.defaultInterpreterPath": "/home/vscode/envs/main/bin/python", 27 | "jupyter.notebookFileRoot": "/workspace/${fileDirname}" 28 | } 29 | } 30 | }, 31 | "features": {}, 32 | "workspaceFolder": "/workspace", 33 | "dockerComposeFile": [ 34 | "docker-compose.yml" 35 | ], 36 | "service": "main", 37 | "postCreateCommand": "bash .devcontainer/post-create.sh || true", 38 | "remoteUser": "vscode" 39 | } 40 | -------------------------------------------------------------------------------- /odc/stac/bench/_report.py: -------------------------------------------------------------------------------- 1 | """Helper methods for benchmark reporting.""" 2 | 3 | import glob 4 | import pickle 5 | from typing import Any, Dict, Iterable, Iterator, Union 6 | 7 | import pandas as pd 8 | 9 | # pylint: disable=unsupported-assignment-operation 10 | 11 | 12 | def load_results( 13 | sources: Union[str, Iterable[str]], 14 | ) -> pd.DataFrame: 15 | """ 16 | Load benchmark run results. 17 | 18 | :param sources: A glob pattern or a stream of pickle file paths 19 | :return: Pandas dataframe 20 | """ 21 | 22 | def _stream(paths: Iterable[str]) -> Iterator[Dict[str, Any]]: 23 | for idx, fname in enumerate(paths): 24 | with open(fname, "rb") as src: 25 | dd = pickle.load(src) 26 | ctx = dd["context"] 27 | samples = dd["samples"] 28 | rr = ctx.to_pandas_dict() 29 | 30 | for sample in samples: 31 | t0, t1, t2 = sample 32 | yield {"experiment": idx, **rr, "t0": t0, "t1": t1, "t2": t2} 33 | 34 | if isinstance(sources, str): 35 | # glob 36 | pkl_paths: Iterable[str] = sorted(glob.glob(sources)) 37 | else: 38 | pkl_paths = sources 39 | 40 | xx = pd.DataFrame(list(_stream(pkl_paths))) 41 | xx = xx.set_index("experiment") 42 | xx["submit"] = xx.t1 - xx.t0 43 | xx["elapsed"] = xx.t2 - xx.t0 44 | return xx 45 | -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: python 2 | 3 | Overview 4 | ######## 5 | 6 | Load STAC :py:class:`pystac.Item`\s into :py:class:`xarray.Dataset`. 7 | 8 | .. code-block:: python 9 | 10 | catalog = pystac_client.Client.open(...) 11 | query = catalog.search(...) 12 | xx = odc.stac.load( 13 | query.items(), 14 | bands=["red", "green", "blue"], 15 | resolution=100, 16 | ) 17 | xx.red.plot.imshow(col="time") 18 | 19 | 20 | See :py:func:`odc.stac.load`. 21 | 22 | 23 | Installation 24 | ############ 25 | 26 | Using pip 27 | ********* 28 | 29 | .. code-block:: bash 30 | 31 | pip install odc-stac 32 | 33 | Using Conda 34 | *********** 35 | 36 | .. code-block:: bash 37 | 38 | conda install -c conda-forge odc-stac 39 | 40 | 41 | From unreleased source 42 | ********************** 43 | 44 | Using latest unreleased code in ``conda`` is also possible. It's best to install 45 | dependencies using conda then install ``odc-stac`` with pip. Sample 46 | ``environment.yml`` is provided below. 47 | 48 | 49 | .. code-block:: yaml 50 | 51 | channels: 52 | - conda-forge 53 | dependencies: 54 | - odc-geo >=0.1.3 55 | - xarray >=0.20.1 56 | - numpy 57 | - dask 58 | - pandas 59 | - affine 60 | - rasterio 61 | - boto3 62 | - toolz 63 | - pystac 64 | - pystac-client 65 | - pip =20 66 | - pip: 67 | - git+https://github.com/opendatacube/odc-stac/ 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pickle 2 | /test_env 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | .mypy_cache/ 7 | dask-worker-space/ 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis 50 | .pytest_cache 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | .idea/ 65 | 66 | # iPython Notebook 67 | .ipynb_checkpoints 68 | 69 | # Mac OS X 70 | .DS_Store 71 | docs/html/ 72 | 73 | # Generated Documentation 74 | generate/ 75 | docs/notebooks/ 76 | 77 | #Local Visual Studio Code configurations 78 | .vscode/ 79 | 80 | # used to cache dev install cache 81 | .run/ 82 | 83 | # emacs virtual env link 84 | .venv 85 | 86 | /notebooks/*html 87 | /notebooks/*ipynb 88 | /notebooks/*tif 89 | /wheels/* 90 | _off_* 91 | tt.py 92 | .cursorignore 93 | -------------------------------------------------------------------------------- /tests/common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common test data and utilities for STAC tests. 3 | """ 4 | 5 | from pystac import Item 6 | 7 | from odc.stac._mdtools import RasterBandMetadata 8 | 9 | # fmt: off 10 | S2_ALL_BANDS = { 11 | "B01", "B02", "B03", "B04", "B05", "B06", 12 | "B07", "B08", "B09", "B11", "B12", "B8A", 13 | "AOT", "SCL", "WVP", "visual", 14 | } 15 | # fmt: on 16 | 17 | 18 | STAC_CFG = { 19 | "sentinel-2-l2a": { 20 | "assets": { 21 | "*": RasterBandMetadata("uint16", 0, "1"), 22 | "SCL": RasterBandMetadata("uint8", 0, "1"), 23 | "visual": {"data_type": "uint8", "nodata": 0, "unit": "1"}, 24 | }, 25 | "aliases": { 26 | # Work around duplicate rededge common_name 27 | # by defining custom unique aliases 28 | "rededge1": "B05", 29 | "rededge2": "B06", 30 | "rededge3": "B07", 31 | }, 32 | } 33 | } 34 | 35 | NO_WARN_CFG = {"*": {"warnings": "ignore"}} 36 | 37 | 38 | def mk_stac_item( 39 | _id, datetime="2012-12-12T00:00:00Z", geometry=None, stac_extensions=None, **props 40 | ): 41 | if stac_extensions is None: 42 | stac_extensions = [] 43 | 44 | return Item.from_dict( 45 | { 46 | "type": "Feature", 47 | "stac_version": "1.0.0", 48 | "id": str(_id), 49 | "properties": { 50 | "datetime": datetime, 51 | **props, 52 | }, 53 | "geometry": geometry, 54 | "links": [], 55 | "assets": {}, 56 | "stac_extensions": stac_extensions, 57 | } 58 | ) 59 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | # When a PR is updated, cancel the jobs from the previous version. Merges 7 | # do not define head_ref, so use run_id to never cancel those jobs. 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | publish-pypi: 14 | if: | 15 | github.repository == 'opendatacube/odc-stac' 16 | timeout-minutes: 15 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 21 | 22 | - name: Download wheels from artifacts 23 | uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 24 | with: 25 | name: python-wheels 26 | path: ./wheels/clean 27 | 28 | - name: Setup Python 29 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 30 | with: 31 | python-version: "3.10" 32 | 33 | - name: Install Twine 34 | run: | 35 | python -m pip install --upgrade pip 36 | python -m pip install --upgrade setuptools 37 | python -m pip install --upgrade \ 38 | toml \ 39 | wheel \ 40 | twine 41 | python -m pip freeze 42 | 43 | - name: Upload to PyPI 44 | env: 45 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 46 | TWINE_USERNAME: __token__ 47 | 48 | run: | 49 | ls wheels/clean/ 50 | twine upload --non-interactive --skip-existing wheels/clean/* 51 | -------------------------------------------------------------------------------- /docs/stac-best-practice.rst: -------------------------------------------------------------------------------- 1 | Best Practices 2 | ############## 3 | 4 | :mod:`odc.stac` can operate on STAC items with only minimal information present, 5 | however user experience is best when following information is included: 6 | ``data_type`` and ``nodata`` from `Raster Extension`_, ``proj:{shape,transform,epsg}`` 7 | from `Projection Extension`_. 8 | 9 | For a full list of understood extension elements see table below. 10 | 11 | .. list-table:: 12 | 13 | * - `Raster Extension`_ 14 | - 15 | * - ``data_type`` 16 | - used to determine output pixel type 17 | * - ``nodata`` 18 | - used when combining multiple items into one raster plane 19 | * - ``unit`` 20 | - passed on as an attribute 21 | (can be useful for further processing) 22 | * - *[planned]* ``scale``, ``offset`` 23 | - currently ignored, but will be supported in the future 24 | 25 | * - `Projection Extension`_ 26 | - 27 | * - ``proj:shape`` 28 | - contains image size per asset 29 | * - ``proj:transform`` 30 | - contains geo-registration per asset 31 | * - ``proj:epsg`` 32 | - contains native CRS 33 | * - ``proj:wkt2``, ``proj:projjson`` 34 | - can be used instead of ``proj:epsg`` for CRS without EPSG code 35 | * - `Electro Optical Extension`_ 36 | - 37 | * - ``eo:bands.common_name`` 38 | - used to assign an alias for a band 39 | (use ``red`` instead of ``B04``). 40 | 41 | 42 | Assumptions 43 | =========== 44 | 45 | Items from the same collection are assumed to have the same number and names of 46 | bands, and bands are assumed to use the same ``data_type`` across the 47 | collection. 48 | 49 | It is assumed that Assets within a single Item share common native projection. 50 | 51 | .. _`Raster Extension`: https://github.com/stac-extensions/raster 52 | .. _`Projection Extension`: https://github.com/stac-extensions/projection 53 | .. _`Electro Optical Extension`: https://github.com/stac-extensions/eo 54 | -------------------------------------------------------------------------------- /.github/workflows/build-binder.yml: -------------------------------------------------------------------------------- 1 | name: Build Binder Image 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - develop 8 | paths: 9 | - 'binder/**' 10 | - '.github/workflows/build-binder.yml' 11 | 12 | # When a PR is updated, cancel the jobs from the previous version. Merges 13 | # do not define head_ref, so use run_id to never cancel those jobs. 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | repo2docker: 20 | timeout-minutes: 30 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 25 | 26 | - name: Build Binder Image 27 | uses: jupyterhub/repo2docker-action@69702685940e406c5bc32bc26395bbacda7ec9d3 # 0.21 28 | id: dkr 29 | with: 30 | IMAGE_NAME: kirillodc/odc-stac-binder 31 | DOCKER_USERNAME: kirillodc 32 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} 33 | 34 | - name: Print Notice 35 | uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 36 | env: 37 | DKR: ${{ steps.dkr.outputs.IMAGE_SHA_NAME }} 38 | with: 39 | script: | 40 | const {DKR} = process.env 41 | core.notice(`Built Docker Image: '${DKR}'`) 42 | 43 | - name: Dump conda environment 44 | run: | 45 | echo "# ${DKR}" > /tmp/environment.yaml 46 | docker run --rm \ 47 | --entrypoint /srv/conda/envs/notebook/bin/mamba \ 48 | ${DKR} \ 49 | env export | tee --append /tmp/environment.yaml 50 | env: 51 | DKR: ${{ steps.dkr.outputs.IMAGE_SHA_NAME }} 52 | 53 | - name: Publish environment.yaml artifact 54 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 55 | with: 56 | name: environment 57 | path: /tmp/environment.yaml 58 | if-no-files-found: error 59 | -------------------------------------------------------------------------------- /tests/notebooks/bench-runner.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.13.0 10 | # kernelspec: 11 | # display_name: ODC 12 | # language: python 13 | # name: odc 14 | # --- 15 | 16 | # %% 17 | import json 18 | import time 19 | from timeit import default_timer as t_now 20 | 21 | import datacube 22 | import numpy as np 23 | import odc.stac 24 | import planetary_computer as pc 25 | import pystac.item 26 | import pystac_client 27 | import rioxarray 28 | import stackstac 29 | import xarray as xr 30 | from distributed import Client 31 | from distributed import wait as dask_wait 32 | from odc.stac.bench import ( 33 | BenchLoadParams, 34 | collect_context_info, 35 | load_from_json, 36 | run_bench, 37 | ) 38 | 39 | # generated by bench-prep-query notebook 40 | fname = [ 41 | "site1-20200606-tall-strip-africa.geojson", 42 | "site2-2020_jun_jul-35MNM.geojson", 43 | ][0] 44 | 45 | print(f"Load from file: {fname}") 46 | all_features = json.load(open(fname, "rt")) 47 | print(json.dumps(all_features.get("properties", {}), indent=2)) 48 | 49 | # %% 50 | params = BenchLoadParams( 51 | scenario=fname, 52 | method="stackstac", 53 | bands=("B02", "B03", "B04"), 54 | chunks=(2048, 2048), 55 | patch_url=pc.sign, 56 | extra={ 57 | "stackstac": {"dtype": "uint16", "fill_value": 0}, 58 | "odc-stac": {"groupby": "solar_day", "stac_cfg": {"*": {"warnings": "ignore"}}}, 59 | }, 60 | # resolution = (2**5)*10, 61 | # crs="epsg:3857", 62 | ) 63 | 64 | xx = load_from_json( 65 | all_features, 66 | params.with_method("stackstac"), 67 | ) 68 | 69 | yy = load_from_json( 70 | all_features, 71 | params.with_method("odc-stac"), 72 | ) 73 | 74 | assert xx.spec.transform == xx.geobox.transform 75 | assert xx.geobox == yy.geobox 76 | xx 77 | 78 | # %% 79 | client = Client("tcp://127.0.0.1:8786") 80 | if len(client.futures) > 0: 81 | print("Restarting Client") 82 | client.restart() 83 | display(client) 84 | 85 | # %% 86 | rr_odc, results_odc = run_bench(yy, client, ntimes=3) 87 | 88 | # %% 89 | rr_stc, results_stc = run_bench(xx, client, ntimes=3) 90 | -------------------------------------------------------------------------------- /binder/_home/.jupyter/lab/workspaces/demo-2a97.jupyterlab-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "layout-restorer:data": { 4 | "main": { 5 | "dock": { 6 | "type": "split-area", 7 | "orientation": "vertical", 8 | "sizes": [ 9 | 1.0 10 | ], 11 | "children": [ 12 | { 13 | "type": "tab-area", 14 | "currentIndex": 0, 15 | "widgets": [ 16 | "markdownviewer-widget:notebooks/Welcome.md", 17 | "notebook:notebooks/stac-load-e84-aws.ipynb", 18 | "notebook:notebooks/stac-load-S2-ms.ipynb" 19 | ] 20 | } 21 | ] 22 | }, 23 | "current": "markdownviewer-widget:notebooks/Welcome.md" 24 | }, 25 | "down": { 26 | "size": 0, 27 | "widgets": [] 28 | }, 29 | "left": { 30 | "collapsed": true, 31 | "widgets": [ 32 | "filebrowser", 33 | "running-sessions", 34 | "dask-dashboard-launcher", 35 | "git-sessions", 36 | "@jupyterlab/toc:plugin", 37 | "code-snippet-extension", 38 | "extensionmanager.main-view" 39 | ] 40 | }, 41 | "right": { 42 | "collapsed": true, 43 | "widgets": [ 44 | "jp-property-inspector", 45 | "debugger-sidebar" 46 | ] 47 | }, 48 | "relativeSizes": [ 49 | 0, 50 | 1, 51 | 0 52 | ] 53 | }, 54 | "file-browser-filebrowser:cwd": { 55 | "path": "notebooks" 56 | }, 57 | "markdownviewer-widget:notebooks/Welcome.md": { 58 | "data": { 59 | "path": "notebooks/Welcome.md", 60 | "factory": "Markdown Preview" 61 | } 62 | }, 63 | "notebook:notebooks/stac-load-e84-aws.ipynb": { 64 | "data": { 65 | "path": "notebooks/stac-load-e84-aws.ipynb", 66 | "factory": "Notebook" 67 | } 68 | }, 69 | "notebook:notebooks/stac-load-S2-ms.ipynb": { 70 | "data": { 71 | "path": "notebooks/stac-load-S2-ms.ipynb", 72 | "factory": "Notebook" 73 | } 74 | }, 75 | "dask-dashboard-launcher": { 76 | "url": "/user/__JUPYTERHUB_USER__/proxy/8787", 77 | "cluster": "" 78 | } 79 | }, 80 | "metadata": { 81 | "id": "demo" 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /tests/notebooks/bench-prep-query.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.13.0 10 | # kernelspec: 11 | # display_name: ODC 12 | # language: python 13 | # name: odc 14 | # --- 15 | 16 | # %% 17 | import json 18 | from timeit import default_timer as t_now 19 | 20 | import geopandas as gpd 21 | import numpy as np 22 | import odc.stac 23 | import planetary_computer as pc 24 | import pystac.item 25 | import pystac_client 26 | from dask.utils import format_bytes 27 | from distributed import Client 28 | from distributed import wait as dask_wait 29 | 30 | if "geom_query" in locals(): 31 | bbox = tuple(geom_query.boundingbox) 32 | 33 | mode = "site1-tall" 34 | if mode == "site1-tall": 35 | # mgrs_tiles = ["35MNM", "35LNL", "35LNK", "35LNJ", "35LNH", "35LNG", "35LNF", "35LNE", "35LND"] 36 | bbox = (27.345815, -14.98724, 27.565542, -7.710992) # Narrow/Tall epsg:32735 37 | file_id = "site1-20200606-tall-strip-africa" 38 | datetime = "2020-06-06" 39 | query = {} 40 | elif mode == "site2": 41 | bbox = None 42 | file_id = "site2-2020_jun_jul-35MNM" 43 | datetime = "2020-06/2020-07" 44 | query = { 45 | "s2:mgrs_tile": {"eq": "35MNM"}, 46 | "s2:nodata_pixel_percentage": {"lt": 10}, 47 | } 48 | 49 | 50 | cat = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 51 | search = cat.search( 52 | collections=["sentinel-2-l2a"], 53 | datetime=datetime, 54 | query=query, 55 | bbox=bbox, 56 | ) 57 | print("Query API end-point") 58 | all_features = search.item_collection_as_dict() 59 | 60 | all_features["properties"] = dict(url=search.url, query=search._parameters) 61 | all_features["properties"] 62 | 63 | # %% 64 | out_path = Path(f"{file_id}.geojson") 65 | if out_path.exists(): 66 | print(f"File exists, keeping previous version: {out_path}") 67 | else: 68 | print(f"Writing to: {out_path}") 69 | json.dump(all_features, open(out_path, "wt")) 70 | 71 | # %% 72 | all_items = [pystac.item.Item.from_dict(f) for f in all_features["features"]] 73 | 74 | # %% 75 | gdf = gpd.GeoDataFrame.from_features(all_features, "epsg:4326") 76 | display(set(gdf["s2:mgrs_tile"].values), set(gdf.platform), len(set(gdf.datetime))) 77 | 78 | _map = gdf.explore( 79 | "s2:mgrs_tile", 80 | categorical=True, 81 | tooltip=[ 82 | "s2:mgrs_tile", 83 | "datetime", 84 | "s2:nodata_pixel_percentage", 85 | "eo:cloud_cover", 86 | ], 87 | popup=True, 88 | style_kwds=dict(fillOpacity=0.0, width=2), 89 | name="STAC", 90 | ) 91 | display(_map) 92 | 93 | # %% 94 | display(gdf.head()) 95 | # gdf[gdf['s2:nodata_pixel_percentage']>10].explore() 96 | 97 | # %% 98 | -------------------------------------------------------------------------------- /docs/samples/save-cog-from-stac.py: -------------------------------------------------------------------------------- 1 | """ 2 | Save Landsat 8 pass to GeoTIFF (COG). 3 | 4 | This program captures one pass of Band 4 (NIR) of Lansat 8 to a single 5 | cloud optimized GeoTIFF image. Produced image is rotated to maximize 6 | proportion of valid pixels in the result. Data is saved in EPSG:3857 at 7 | native resolution (30m). Produced TIFF is about 4.7GiB. 8 | 9 | Data is sourced from Microsoft Planetary Computer: 10 | 11 | https://planetarycomputer.microsoft.com/ 12 | 13 | Python environment 14 | 15 | ```bash 16 | pip install odc-stac==0.3.0rc1 tqdm planetary_computer pystac-client 17 | ``` 18 | 19 | """ 20 | 21 | import planetary_computer 22 | import pystac_client 23 | from affine import Affine 24 | from dask.utils import format_bytes 25 | from odc.geo import geom 26 | from odc.geo.geobox import GeoBox 27 | from tqdm.auto import tqdm 28 | 29 | from odc.stac import configure_rio 30 | from odc.stac import load as stac_load 31 | 32 | res = 30 # resolution 33 | a = 12.7 # rotation in degrees 34 | band = "SR_B4" 35 | 36 | catalog = pystac_client.Client.open( 37 | "https://planetarycomputer.microsoft.com/api/stac/v1" 38 | ) 39 | 40 | items = catalog.search( 41 | collections=["landsat-8-c2-l2"], 42 | datetime="2021-07-01T08:00:00Z/2021-07-01T09:00:00Z", 43 | bbox=(-180, -50, 180, 50), 44 | ).item_collection() 45 | 46 | # Compute Polygon of the pass in EPSG:3857 47 | ls8_pass = geom.unary_union( 48 | geom.Geometry(item.geometry, "epsg:4326").to_crs("epsg:3857") for item in items 49 | ) 50 | assert ls8_pass is not None 51 | 52 | # Construct rotated GeoBox 53 | # rotate geometry 54 | # construct axis aligned geobox in rotated space 55 | # then rotate geobox the other way 56 | gbox = Affine.rotation(-a) * GeoBox.from_geopolygon( 57 | ls8_pass.transform(Affine.rotation(a)), 58 | resolution=res, 59 | ) 60 | 61 | # Assume COG datasource, disables looking for external files (it's slow in the cloud) 62 | configure_rio(cloud_defaults=True) 63 | 64 | print(f"Loading {band} => {gbox.shape.x:,d}x{gbox.shape.y:,d}") 65 | xx = stac_load( 66 | items, 67 | like=gbox, 68 | bands=[band], 69 | dtype="int16", 70 | nodata=0, 71 | groupby="solar_day", 72 | resampling="average", 73 | pool=4, # Use 4 cores for loading 74 | progress=tqdm, # 75 | patch_url=planetary_computer.sign, 76 | ) 77 | print("Load finished") 78 | 79 | ts = xx.time[0].dt.strftime("%Y%m%d").item() 80 | fname = f"{band}-{ts}-{res}m.tif" 81 | print( 82 | f"Will write image to: '{fname}' Raw Size is: {format_bytes(xx[band].data.size*xx[band].dtype.itemsize)}" 83 | ) 84 | 85 | xx[band].odc.write_cog( 86 | fname, 87 | overwrite=True, 88 | blocksize=2048, 89 | ovr_blocksize=1024, 90 | overview_resampling="average", 91 | intermediate_compression={"compress": "zstd", "zstd_level": 1}, 92 | use_windowed_writes=True, 93 | compress="zstd", 94 | zstd_level=6, 95 | BIGTIFF=True, 96 | SPARSE_OK=True, 97 | NUM_THREADS=4, 98 | ) 99 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "odc-stac" 3 | description = "Tooling for converting STAC metadata to ODC data model" 4 | version = "0.5.0" 5 | authors = [ 6 | {name = "Open Data Cube"} 7 | ] 8 | maintainers = [ 9 | {name = "Open Data Cube"} 10 | ] 11 | readme = "README.rst" 12 | license = {text = "Apache License 2.0"} 13 | requires-python = ">=3.10" 14 | classifiers = [ 15 | "Intended Audience :: Developers", 16 | "Operating System :: OS Independent", 17 | "Programming Language :: Python :: 3.10", 18 | "Programming Language :: Python :: 3.11", 19 | "Programming Language :: Python :: 3.12", 20 | "Programming Language :: Python :: 3.13", 21 | "Topic :: Software Development :: Libraries :: Python Modules", 22 | "Topic :: Scientific/Engineering :: GIS", 23 | "Typing :: Typed" 24 | ] 25 | dependencies = [ 26 | "affine", 27 | "odc-geo>=0.4.7", 28 | "odc-loader>=0.6.0", 29 | "rasterio>=1.0.0,!=1.3.0,!=1.3.1", 30 | "dask[array]", 31 | "numpy>=1.20.0", 32 | "pandas", 33 | "pystac>=1.0.0,<2", 34 | "toolz", 35 | "typing-extensions", 36 | "xarray>=0.19" 37 | ] 38 | 39 | [project.optional-dependencies] 40 | botocore = ["botocore"] 41 | docs = [ 42 | "sphinx", 43 | "sphinx_rtd_theme", 44 | "nbsphinx", 45 | "sphinx-autodoc-typehints", 46 | "matplotlib-inline", 47 | "pandas", 48 | "distributed", 49 | "ipython", 50 | "ipykernel" 51 | ] 52 | test = [ 53 | "pytest", 54 | "pytest-cov", 55 | "pytest-timeout", 56 | "pystac_client", 57 | "distributed", 58 | "geopandas" 59 | ] 60 | test-all = [ 61 | "pytest", 62 | "pytest-cov", 63 | "pytest-timeout", 64 | "pystac_client", 65 | "distributed", 66 | "geopandas", 67 | "stackstac" 68 | ] 69 | 70 | [project.urls] 71 | Documentation = "https://odc-stac.readthedocs.io/en/latest/" 72 | "Bug Reporting" = "https://github.com/opendatacube/odc-stac/issues" 73 | Homepage = "https://github.com/opendatacube/odc-stac/" 74 | 75 | [build-system] 76 | requires = ["flit_core >=3.2,<4"] 77 | build-backend = "flit_core.buildapi" 78 | 79 | [tool.flit.module] 80 | name = "odc.stac" 81 | 82 | [tool.mypy] 83 | python_version = "3.10" 84 | ignore_missing_imports = true 85 | allow_redefinition = true 86 | enable_error_code = ["explicit-override"] 87 | warn_redundant_casts = true 88 | warn_unused_ignores = true 89 | plugins = "numpy.typing.mypy_plugin" 90 | explicit_package_bases = true 91 | 92 | [tool.coverage.run] 93 | omit = [ 94 | "tests/*", 95 | "*/test_*" 96 | ] 97 | 98 | [tool.isort] 99 | profile = "black" 100 | 101 | [tool.pylint.messages_control] 102 | max-line-length = 120 103 | max-args = 15 104 | max-positional-arguments = 12 105 | disable = [ 106 | "missing-function-docstring", 107 | "invalid-name", 108 | "fixme", 109 | "wrong-import-order", 110 | "duplicate-code", 111 | "ungrouped-imports", 112 | ] 113 | -------------------------------------------------------------------------------- /odc/stac/bench/_prepare.py: -------------------------------------------------------------------------------- 1 | """Utilities for benchmarking.""" 2 | 3 | import json 4 | from pathlib import Path 5 | from typing import Any, Dict 6 | 7 | # pylint: disable=import-outside-toplevel 8 | 9 | SAMPLE_SITES = { 10 | "s2-ms-mosaic": { 11 | "file_id": "s2-ms-mosaic_2020-06-06--P1D", 12 | "api": "https://planetarycomputer.microsoft.com/api/stac/v1", 13 | "search": { 14 | "collections": ["sentinel-2-l2a"], 15 | "datetime": "2020-06-06", 16 | "bbox": [27.345815, -14.98724, 27.565542, -7.710992], 17 | "query": {}, 18 | }, 19 | }, 20 | "s2-ms-deep": { 21 | "file_id": "s2-ms-deep_2020-06--P2M_35MNM", 22 | "api": "https://planetarycomputer.microsoft.com/api/stac/v1", 23 | "search": { 24 | "collections": ["sentinel-2-l2a"], 25 | "datetime": "2020-06/2020-07", 26 | "bbox": None, 27 | "query": { 28 | "s2:mgrs_tile": {"eq": "35MNM"}, 29 | "s2:nodata_pixel_percentage": {"lt": 10}, 30 | }, 31 | }, 32 | }, 33 | } 34 | 35 | 36 | def dump_site(site: Dict[str, Any], overwrite: bool = False) -> Dict[str, Any]: 37 | """ 38 | Prepare input for benchmarking. 39 | 40 | Queries API end-point according to site configuration and dumps result into a geojson file. Site 41 | configuration must include ``file_id:str, api:str, search:Dict[str,Any]``. 42 | 43 | .. code-block:: json 44 | 45 | { 46 | "file_id": "ms-s2-long-mosaic_2020-06-06--P1D", 47 | "api": "https://planetarycomputer.microsoft.com/api/stac/v1", 48 | "search": { 49 | "collections": ["sentinel-2-l2a"], 50 | "datetime": "2020-06-06", 51 | "bbox": [ 27.345815, -14.98724, 27.565542, -7.710992], 52 | "query": {} 53 | } 54 | } 55 | 56 | :param site: Definition of the test query 57 | :param overwrite: overwrite existing file 58 | :return: Returns GeoJSON FeatureCollection with extra metadata about the query 59 | """ 60 | import pystac_client 61 | 62 | api = site["api"] 63 | search = site["search"] 64 | 65 | cat = pystac_client.Client.open(api) 66 | search = cat.search(**search) 67 | print(f"Query API end-point: {api}") 68 | all_features = search.item_collection_as_dict() 69 | all_features["properties"] = { 70 | "api": search.url, 71 | "search": search._parameters, # pylint: disable=protected-access 72 | } 73 | 74 | out_path = Path(f"{site['file_id']}.geojson") 75 | if out_path.exists(): 76 | if overwrite: 77 | print(f"Will overwrite: {out_path}") 78 | else: 79 | print(f"File exists, keeping previous version: {out_path}") 80 | return all_features 81 | 82 | print(f"Writing to: {out_path}") 83 | with open(out_path, "wt", encoding="utf8") as dst: 84 | json.dump(all_features, dst) 85 | 86 | return all_features 87 | -------------------------------------------------------------------------------- /tests/data/lidar_dem.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "stac_version": "1.0.0-beta.2", 4 | "id": "lidar_id", 5 | "properties": { 6 | "start_datetime": "2012-01-01T00:00:00Z", 7 | "end_datetime": "2012-01-01T00:00:00Z", 8 | "resolution": 1.0, 9 | "data_type": "float32", 10 | "derived_from": "Category 1 Lidar", 11 | "platform": "Aircraft", 12 | "interpolation_type": "TIN", 13 | "horizontal_datum": "GDA94", 14 | "vertical_datum": "AHD71 - using local Geoid model", 15 | "model_type": "DEM", 16 | "horizontal_accuracy": "+/-0.80 @95% Confidence Interval", 17 | "vertical_accuracy": "+/-0.30 @95% Confidence Interval", 18 | "sensor": "ALS50 (SN101)", 19 | "proj:epsg": 28355, 20 | "proj:shape": [ 21 | 2000, 22 | 2000 23 | ], 24 | "proj:transform": [ 25 | 1.0, 26 | 0.0, 27 | 766000.0, 28 | 0.0, 29 | -1.0, 30 | 6732000.0, 31 | 0.0, 32 | 0.0, 33 | 1.0 34 | ], 35 | "datetime": null 36 | }, 37 | "geometry": { 38 | "type": "Polygon", 39 | "coordinates": [ 40 | [ 41 | [ 42 | 149.74413486135487, 43 | -29.513331085946845 44 | ], 45 | [ 46 | 149.74462179934187, 47 | -29.531360829758146 48 | ], 49 | [ 50 | 149.76523815043552, 51 | -29.53093320024493 52 | ], 53 | [ 54 | 149.7647475661126, 55 | -29.512903768601603 56 | ], 57 | [ 58 | 149.74413486135487, 59 | -29.513331085946845 60 | ] 61 | ] 62 | ] 63 | }, 64 | "links": [ 65 | { 66 | "rel": "root", 67 | "href": "s3://example-bucket/catalog.json", 68 | "type": "application/json" 69 | }, 70 | { 71 | "rel": "collection", 72 | "href": "s3://example-bucket/lidar_collection/collection.json", 73 | "type": "application/json" 74 | }, 75 | { 76 | "rel": "parent", 77 | "href": "s3://example-bucket/lidar_collection/collection.json", 78 | "type": "application/json" 79 | }, 80 | { 81 | "rel": "self", 82 | "href": "s3://example-bucket/lidar_collection/lidar_id.json", 83 | "type": "application/json" 84 | } 85 | ], 86 | "assets": { 87 | "dem": { 88 | "href": "s3://example-bucket/lidar_id.tif", 89 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 90 | "title": "Cloud-Optimized Geotiff" 91 | } 92 | }, 93 | "bbox": [ 94 | 149.74413486135487, 95 | -29.531360829758146, 96 | 149.76523815043552, 97 | -29.512903768601603 98 | ], 99 | "stac_extensions": [ 100 | "projection" 101 | ], 102 | "collection": "lidar_collection" 103 | } 104 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | ## [v0.3.5] - 2023-01-18 7 | 8 | - Fix data loading with Dask for collections where items might have "missing" assets 9 | 10 | ## [v0.3.4] - 2022-12-08 11 | 12 | - Implement `fail_on_error=False` option for skipping over errors while loading data 13 | - Maintenance of github actions 14 | 15 | ## [v0.3.3] - 2022-10-20 16 | 17 | - Fixes to support `xarray >= 2022.10.0` 18 | 19 | ## [v0.3.2] - 2022-09-09 20 | 21 | - Multi band support when parsing STAC items 22 | - Remove ambiguous alias warnings and errors, instead pick "best" band for a 23 | given common name based on a simple heuristic (favour single band assets over 24 | multi-band, use alphabet order when band count is the same). 25 | - Accept `.` syntax for specifying bands 26 | - Support files with GCP-based geo-reference 27 | - Robust handling of transforms that "break" item geometry, better handle cases 28 | when item geometry doesn't project cleanly into the destination projection 29 | - Fix error in GDAL environment configuration for non-Dask case 30 | 31 | ## [v0.3.1] - 2022-06-28 32 | 33 | - Use asset key as a canonical name, fixes landsat collection parsing 34 | 35 | ## [v0.3.0] - 2022-06-06 36 | 37 | - No longer depend on `datacube` library 38 | - Significantly smaller set of compulsory dependencies, easier to install/deploy 39 | - Using `odc-geo` library instead of `datacube` for `GeoBox` and `Geometry` classes 40 | - Can load data into rotated pixel planes ([Example](https://github.com/opendatacube/odc-stac/wiki/Generating-Rotated-Images-to-Save-Space)) 41 | - Arbitrary grouping of STAC items into pixel planes with user supplied grouping methods or group by property name 42 | - Better handling of credentials and other GDAL state in distributed context 43 | - credentials and GDAL environment configuration were part of the global state previously, now global state is removed, so you can access collections with different permissions from the same Dask cluster (for example mixing public and private access). 44 | - Parallelized data loading even when not using Dask 45 | - Progress reporting for non-Dask load with `tqdm` 46 | 47 | ## [v0.2.4] - 2022-01-19 48 | 49 | ### Changed 50 | 51 | - Removed `odc.index.` module 52 | 53 | ## [v0.2.3] - 2022-01-05 54 | 55 | ### Added 56 | 57 | - This CHANGELOG 58 | - `requirements-dev.txt` 59 | - Documentation 60 | - Upload built conda environment as an artifact 61 | - Notebook rendering to Github actions, including hash-based artifact checks 62 | - Initial benchmarking tooling, still in progress 63 | 64 | ### Changed 65 | 66 | - Moved publishing steps into separate workflows 67 | - Deprecated imports from `odc.index.*` 68 | - Removed `.units` attribute from `.time` axis for better inter-op with `.to_zarr`, `.to_netcdf` 69 | 70 | ### Fixed 71 | 72 | - Handling of STAC Items with only partial `proj` data 73 | - Typos in documentation 74 | 75 | ## [v0.2.2] - 2021-10-25 76 | 77 | ### Added 78 | 79 | - Binder launcher to README 80 | - Another USGS STAC example for Landsat SR 81 | - Documentation 82 | 83 | ### Changed 84 | 85 | - Cleaned up test fixtures 86 | - Relaxed `is_raster_data` check 87 | - Force data band decision for explicitly configured bands 88 | - Moved constansts in to global scope 89 | 90 | ## [v0.2.1] - 2021-10-18 91 | 92 | Initial release as a standalone project. 93 | Previously, this project was part of https://github.com/opendatacube/odc-tools. 94 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | odc.stac 2 | ######## 3 | 4 | |Documentation Status| |Test Status| |Test Coverage| |Binder| |Discord| 5 | 6 | Load STAC items into ``xarray`` Datasets. Process locally or distribute data 7 | loading and computation with Dask_. 8 | 9 | Usage 10 | ##### 11 | 12 | 13 | odc.stac.load 14 | ~~~~~~~~~~~~~ 15 | 16 | .. code-block:: python 17 | 18 | catalog = pystac_client.Client.open(...) 19 | query = catalog.search(...) 20 | xx = odc.stac.load( 21 | query.items(), 22 | bands=["red", "green", "blue"], 23 | ) 24 | xx.red.plot.imshow(col="time") 25 | 26 | For more details see `Documentation`_ and `Sample Notebooks`_, or try it out on Binder_. 27 | 28 | 29 | Installation 30 | ############ 31 | 32 | Using pip 33 | ~~~~~~~~~ 34 | 35 | .. code-block:: bash 36 | 37 | pip install odc-stac 38 | 39 | To install with ``botocore`` support (for working with AWS): 40 | 41 | .. code-block:: bash 42 | 43 | pip install 'odc-stac[botocore]' 44 | 45 | 46 | Using Conda 47 | ~~~~~~~~~~~ 48 | 49 | This package is be available on ``conda-forge`` channel: 50 | 51 | .. code-block:: bash 52 | 53 | conda install -c conda-forge odc-stac 54 | 55 | 56 | From unreleased source 57 | ~~~~~~~~~~~~~~~~~~~~~~ 58 | 59 | To use development version of ``odc-stac`` install dependencies from ``conda``, then 60 | install ``odc-stac`` with ``pip``. 61 | 62 | Sample ``environment.yml`` is provided below. 63 | 64 | .. code-block:: yaml 65 | 66 | channels: 67 | - conda-forge 68 | dependencies: 69 | - odc-geo 70 | - xarray 71 | - numpy 72 | - dask 73 | - pandas 74 | - affine 75 | - rasterio 76 | - toolz 77 | - pystac 78 | - pystac-client 79 | - pip 80 | - pip: 81 | - "git+https://github.com/opendatacube/odc-stac/" 82 | 83 | Developing 84 | ########## 85 | 86 | To develop ``odc-stac`` locally it's best to use provided devcontainer_. 87 | 88 | .. code-block:: bash 89 | 90 | git clone https://github.com/opendatacube/odc-stac 91 | code odc-stac 92 | 93 | Once running inside the dev container, run tests with pytest_: 94 | 95 | .. code-block:: bash 96 | 97 | pytest 98 | 99 | Linting is provided by mypy_, pylint_, and black_: 100 | 101 | .. code-block:: bash 102 | 103 | black --check . 104 | pylint -v odc 105 | mypy odc 106 | 107 | 108 | .. |Documentation Status| image:: https://readthedocs.org/projects/odc-stac/badge/?version=latest 109 | :target: https://odc-stac.readthedocs.io/en/latest/?badge=latest 110 | :alt: Documentation Status 111 | 112 | .. |Test Status| image:: https://github.com/opendatacube/odc-stac/actions/workflows/main.yml/badge.svg 113 | :target: https://github.com/opendatacube/odc-stac/actions/workflows/main.yml 114 | :alt: Test Status 115 | 116 | .. |Test Coverage| image:: https://codecov.io/gh/opendatacube/odc-stac/branch/develop/graph/badge.svg?token=HQ8nTuZHH5 117 | :target: https://codecov.io/gh/opendatacube/odc-stac 118 | :alt: Test Coverage 119 | 120 | .. |Binder| image:: https://mybinder.org/badge_logo.svg 121 | :target: https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo 122 | :alt: Run Examples in Binder 123 | 124 | .. |Discord| image:: https://img.shields.io/discord/1212501566326571070?label=Discord&logo=discord&logoColor=white&color=7289DA 125 | :target: https://discord.gg/4hhBQVas5U 126 | :alt: Join Discord for support 127 | 128 | .. _Binder: https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo 129 | 130 | .. _pytest: https://docs.pytest.org 131 | 132 | .. _mypy: http://mypy-lang.org/ 133 | 134 | .. _pylint: https://pylint.org/ 135 | 136 | .. _black: https://github.com/psf/black 137 | 138 | .. _`Documentation`: https://odc-stac.readthedocs.io/ 139 | 140 | .. _`Sample Notebooks`: https://odc-stac.readthedocs.io/en/latest/examples.html 141 | 142 | .. _Dask: https://dask.org/ 143 | 144 | .. _devcontainer: https://code.visualstudio.com/docs/devcontainers/containers 145 | -------------------------------------------------------------------------------- /docs/stac-vs-odc.rst: -------------------------------------------------------------------------------- 1 | STAC vs Open Datacube 2 | ##################### 3 | 4 | The `Open Datacube`_ (ODC) project, on which this library is based, started before `STAC`_ 5 | spec existed. As a result ODC uses different terminology for otherwise very 6 | similar concepts. 7 | 8 | 9 | .. list-table:: 10 | :header-rows: 1 11 | 12 | * - STAC 13 | - ODC 14 | - Description 15 | * - :py:class:`~pystac.Collection` 16 | - :py:class:`~datacube.model.Product` 17 | - Collection of observations across space and time 18 | * - :py:class:`~pystac.Item` 19 | - :py:class:`~datacube.model.Dataset` 20 | - Single observation (specific time and place), multi-channel 21 | * - :py:class:`~pystac.Asset` 22 | - :py:class:`~datacube.model.Measurement` 23 | - Component of a single observation 24 | * - Band_ 25 | - :py:class:`~datacube.model.Measurement` 26 | - Pixel plane within a multi-plane asset 27 | * - `Common Name`_ 28 | - Alias 29 | - Refer to the same band by different names 30 | 31 | Similarly to STAC, ODC uses several levels of hierarchy to model metadata. At 32 | the highest level there is *Product* which is a collection of *Datasets*. Each 33 | *Dataset* contains a set of *Measurements* and related metadata. Finally 34 | *Measurement* describes a single plane of pixels captured at roughly the same 35 | time. Metadata includes location of the "file" and possibly location within a 36 | file. 37 | 38 | Multiple Bands per File 39 | ======================= 40 | 41 | Multiple bands in a single file are supported by both ODC and STAC, but 42 | representation differs. In STAC another level of hierarchy is added below an 43 | *Asset* via the `bands attribute of the EO extension`_. Resources pointed 44 | to by an *Asset* may contain more than one band of pixels, and an *Asset* 45 | contains descriptions of those bands. In ODC, *Asset* is not modelled 46 | explicitly, instead resource path and potential location within this resource 47 | are properties of a *Measurement* object. It is common in STAC to have one to 48 | one mapping between band and asset, and in that scenario ODC *Measurement* and 49 | STAC *Asset* can be seen as equivalent. 50 | 51 | Geo Referencing Metadata 52 | ======================== 53 | 54 | Precise geo referencing metadata is stored within a file pointed to by 55 | *Asset*/*Measurement*, but it can also be recorded within a STAC *Item*/ODC 56 | *Dataset* document. Having geo-referencing information at this level can enable 57 | more efficient data access by providing spatial information without needing to 58 | access the source (data file) itself. 59 | 60 | In STAC, the `Projection Extension`_ is used to bring this metadata from file to 61 | *Item* document. In STAC each band might have different projection, but in ODC 62 | projection is a *Dataset* level property and has to be shared across all 63 | *Measurements*. In ODC individual bands can be of different resolution and have 64 | different footprints (usually with a lot of overlap), but **must** be in the 65 | same projection. 66 | 67 | Consistency Assumptions 68 | ======================= 69 | 70 | In STAC, *Collection* is a very loose term, in theory it can point to very 71 | heterogeneous set of *Items*. In practice *Items* are typically very similar in 72 | structure, most contain the same set of *Assets* and bands. ODC is more strict 73 | in that regard. ODC *Product* contains expected set of *Measurements* per 74 | *Dataset* as well as some basic common metadata per *Measurement*, specifically 75 | pixel data type, which is assumed to stay the same across all *Datasets* for a 76 | given *Measurement*. 77 | 78 | STAC equivalent would be `Item Assets`_ extension with `Raster Extension`_ 79 | inside. It describes at the *Collection* level, expected structure of *Items* 80 | contained within. 81 | 82 | 83 | .. _`Open Datacube`: https://www.opendatacube.org/ 84 | .. _`STAC`: https://stacspec.org/ 85 | .. _`Projection Extension`: https://github.com/stac-extensions/projection 86 | .. _`Raster Extension`: https://github.com/stac-extensions/raster 87 | .. _`Item Assets`: https://github.com/stac-extensions/item-assets 88 | .. _Band: https://github.com/stac-extensions/eo#band-object 89 | .. _`Common Name`: https://github.com/stac-extensions/eo#common-band-names 90 | .. _`bands attribute of the EO extension`: https://github.com/stac-extensions/eo#band-object 91 | -------------------------------------------------------------------------------- /.github/workflows/render.yml: -------------------------------------------------------------------------------- 1 | name: Render Example Notebooks 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | force: 7 | description: 'Force re-rendering of notebooks' 8 | required: false 9 | default: 'false' 10 | type: boolean 11 | push: 12 | paths: 13 | - "notebooks/*py" 14 | - ".github/workflows/render.yml" 15 | 16 | # When a PR is updated, cancel the jobs from the previous version. Merges 17 | # do not define head_ref, so use run_id to never cancel those jobs. 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | build-binder-env: 24 | timeout-minutes: 15 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 29 | 30 | - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 31 | id: binder_cache 32 | with: 33 | path: /tmp/binder_env 34 | key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }} 35 | 36 | - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 37 | if: steps.binder_cache.outputs.cache-hit != 'true' 38 | with: 39 | channels: conda-forge 40 | channel-priority: true 41 | activate-environment: "" 42 | mamba-version: "*" 43 | use-mamba: true 44 | 45 | - name: Dump Conda Environment Info 46 | shell: bash -l {0} 47 | if: steps.binder_cache.outputs.cache-hit != 'true' 48 | run: | 49 | conda info 50 | conda list 51 | mamba --version 52 | conda config --show-sources 53 | conda config --show 54 | printenv | sort 55 | 56 | - name: Build Python Environment for Notebooks 57 | shell: bash -l {0} 58 | if: steps.binder_cache.outputs.cache-hit != 'true' 59 | run: | 60 | cd binder 61 | mamba env create -f environment.yml -p /tmp/binder_env 62 | 63 | - name: Check Python Env 64 | shell: bash -l {0} 65 | if: steps.binder_cache.outputs.cache-hit != 'true' 66 | run: | 67 | mamba env export -p /tmp/binder_env 68 | 69 | render: 70 | timeout-minutes: 15 71 | runs-on: ubuntu-latest 72 | 73 | needs: 74 | - build-binder-env 75 | 76 | steps: 77 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 78 | 79 | - name: Config 80 | id: cfg 81 | run: | 82 | find notebooks/ -maxdepth 1 -name '*.py' -type f | sort -f -d 83 | 84 | nb_dir="docs/notebooks" 85 | nb_hash=$(python scripts/notebook_hash.py) 86 | echo "Notebooks hash: ${nb_hash}" 87 | echo "nb-dir=${nb_dir}" >> $GITHUB_OUTPUT 88 | echo "nb-hash=${nb_hash}" >> $GITHUB_OUTPUT 89 | echo "nb-archive=odc-stac-notebooks-${nb_hash}.tar.gz" >> $GITHUB_OUTPUT 90 | 91 | - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 92 | id: nb_cache 93 | with: 94 | path: ${{ steps.cfg.outputs.nb-dir }} 95 | key: docs-notebooks-${{ hashFiles('notebooks/*.py') }} 96 | 97 | - name: Get Conda Environment from Cache 98 | if: steps.nb_cache.outputs.cache-hit != 'true' || github.event.inputs.force == 'true' 99 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 100 | id: conda_cache 101 | with: 102 | path: /tmp/binder_env 103 | key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }} 104 | 105 | - name: Update PATH 106 | if: steps.nb_cache.outputs.cache-hit != 'true' || github.event.inputs.force == 'true' 107 | shell: bash 108 | run: | 109 | echo "/tmp/binder_env/bin" >> $GITHUB_PATH 110 | 111 | - name: Run Notebooks 112 | if: steps.nb_cache.outputs.cache-hit != 'true' || github.event.inputs.force == 'true' 113 | run: | 114 | nb_dir="${{ steps.cfg.outputs.nb-dir }}" 115 | 116 | mkdir -p $nb_dir 117 | for src in $(find notebooks -type f -maxdepth 1 -name '*py'); do 118 | dst="${nb_dir}/$(basename ${src%%.py}.ipynb)" 119 | echo "$src -> $dst" 120 | ./binder/render-nb-pipe.sh <$src >$dst 121 | done 122 | ls -lh ${nb_dir}/ 123 | 124 | - name: Package Notebooks 125 | run: | 126 | nb_dir="${{ steps.cfg.outputs.nb-dir }}" 127 | nb_hash="${{ steps.cfg.outputs.nb-hash }}" 128 | nb_archive="${{ steps.cfg.outputs.nb-archive }}" 129 | echo "DIR: ${nb_dir}" 130 | echo "NB hash: $nb_hash" 131 | echo "Archive: $nb_archive" 132 | 133 | (cd $nb_dir && tar cvz .) > "${nb_archive}" 134 | ls -lh "${nb_archive}" 135 | tar tzf "${nb_archive}" 136 | 137 | - name: Upload results (artifact) 138 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 139 | with: 140 | name: rendered-notebooks 141 | path: docs/notebooks 142 | if-no-files-found: error 143 | -------------------------------------------------------------------------------- /notebooks/stac-load-S2-deafrica.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.13.6 10 | # kernelspec: 11 | # display_name: 'Python 3.8.12 64-bit (''stac'': conda)' 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% [markdown] 17 | # # Access Sentinel 2 Analysis Ready Data from Digital Earth Africa 18 | # 19 | # [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?labpath=notebooks%2Fstac-load-S2-deafrica.ipynb) 20 | # 21 | # https://explorer.digitalearth.africa/products/s2_l2a 22 | 23 | # %% [markdown] 24 | # ## Import Required Packages 25 | 26 | # %% 27 | from pystac_client import Client 28 | 29 | from odc.stac import configure_rio, stac_load 30 | 31 | # %% [markdown] 32 | # ## Set Collection Configuration 33 | # 34 | # The configuration dictionary is determined from the product's definition, available at https://explorer.digitalearth.africa/products/s2_l2a#definition-doc 35 | # 36 | # All assets except SCL have the same configuration. SCL uses `uint8` rather than `uint16`. 37 | # 38 | # In the configuration, we also supply the aliases for each band. This means we can load data by band name rather than band number. 39 | 40 | # %% 41 | config = { 42 | "s2_l2a": { 43 | "assets": { 44 | "*": { 45 | "data_type": "uint16", 46 | "nodata": 0, 47 | "unit": "1", 48 | }, 49 | "SCL": { 50 | "data_type": "uint8", 51 | "nodata": 0, 52 | "unit": "1", 53 | }, 54 | }, 55 | "aliases": { 56 | "costal_aerosol": "B01", 57 | "blue": "B02", 58 | "green": "B03", 59 | "red": "B04", 60 | "red_edge_1": "B05", 61 | "red_edge_2": "B06", 62 | "red_edge_3": "B07", 63 | "nir": "B08", 64 | "nir_narrow": "B08A", 65 | "water_vapour": "B09", 66 | "swir_1": "B11", 67 | "swir_2": "B12", 68 | "mask": "SCL", 69 | "aerosol_optical_thickness": "AOT", 70 | "scene_average_water_vapour": "WVP", 71 | }, 72 | } 73 | } 74 | 75 | 76 | # %% [markdown] 77 | # ## Set AWS Configuration 78 | # 79 | # Digital Earth Africa data is stored on S3 in Cape Town, Africa. To load the data, we must configure rasterio with the appropriate AWS S3 endpoint. This can be done with the `odc.stac.configure_rio` function. Documentation for this function is available at https://odc-stac.readthedocs.io/en/latest/_api/odc.stac.configure_rio.html#odc.stac.configure_rio. 80 | # 81 | # The configuration below must be used when loading any Digital Earth Africa data through the STAC API. 82 | 83 | # %% 84 | configure_rio( 85 | cloud_defaults=True, 86 | aws={"aws_unsigned": True}, 87 | AWS_S3_ENDPOINT="s3.af-south-1.amazonaws.com", 88 | ) 89 | 90 | 91 | # %% [markdown] 92 | # ## Connect to the Digital Earth Africa STAC Catalog 93 | 94 | # %% 95 | # Open the stac catalogue 96 | catalog = Client.open("https://explorer.digitalearth.africa/stac") 97 | 98 | 99 | # %% [markdown] 100 | # ## Find STAC Items to Load 101 | # 102 | # ### Define query parameters 103 | 104 | # %% 105 | # Set a bounding box 106 | # [xmin, ymin, xmax, ymax] in latitude and longitude 107 | bbox = [37.76, 12.49, 37.77, 12.50] 108 | 109 | # Set a start and end date 110 | start_date = "2020-09-01" 111 | end_date = "2020-12-01" 112 | 113 | # Set the STAC collections 114 | collections = ["s2_l2a"] 115 | 116 | 117 | # %% [markdown] 118 | # ### Construct query and get items from catalog 119 | 120 | # %% 121 | # Build a query with the set parameters 122 | query = catalog.search( 123 | bbox=bbox, collections=collections, datetime=f"{start_date}/{end_date}" 124 | ) 125 | 126 | # Search the STAC catalog for all items matching the query 127 | items = list(query.items()) 128 | print(f"Found: {len(items):d} datasets") 129 | 130 | # %% [markdown] 131 | # ## Load the Data 132 | # 133 | # In this step, we specify the desired coordinate system, resolution (here 20m), and bands to load. We also pass the bounding box to the `stac_load` function to only load the requested data. Since the band aliases are contained in the `config` dictionary, bands can be loaded using these aliaes (e.g. `"red"` instead of `"B04"` below). 134 | # 135 | # The data will be lazy-loaded with dask, meaning that is won't be loaded into memory until necessary, such as when it is displayed. 136 | 137 | # %% 138 | crs = "EPSG:6933" 139 | resolution = 20 140 | 141 | ds = stac_load( 142 | items, 143 | bands=("red", "green", "blue", "nir"), 144 | crs=crs, 145 | resolution=resolution, 146 | chunks={}, 147 | groupby="solar_day", 148 | stac_cfg=config, 149 | bbox=bbox, 150 | ) 151 | 152 | # View the Xarray Dataset 153 | ds 154 | 155 | 156 | # %% [markdown] 157 | # ### Compute a band index 158 | # 159 | # After loading the data, you can perform standard Xarray operations, such as calculating and plotting the normalised difference vegetation index (NDVI). The `.compute()` method triggers Dask to load the data into memory, so running this step may take a few minutes. 160 | 161 | # %% 162 | ds["NDVI"] = (ds.nir - ds.red) / (ds.nir + ds.red) 163 | 164 | 165 | ds.NDVI.compute().plot(col="time", col_wrap=6, vmin=0, vmax=1) 166 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test for SQS to DC tool 3 | """ 4 | 5 | import json 6 | from pathlib import Path 7 | 8 | import pystac 9 | import pystac.collection 10 | import pystac.item 11 | import pytest 12 | from odc.geo.data import country_geom 13 | 14 | TEST_DATA_FOLDER: Path = Path(__file__).parent.joinpath("data") 15 | PARTIAL_PROJ_STAC: str = "only_crs_proj.json" 16 | GA_LANDSAT_STAC: str = "ga_ls8c_ard_3-1-0_088080_2020-05-25_final.stac-item.json" 17 | SENTINEL_STAC_COLLECTION: str = "sentinel-2-l2a.collection.json" 18 | SENTINEL_STAC: str = "S2A_28QCH_20200714_0_L2A.json" 19 | SENTINEL_STAC_MS: str = "S2B_MSIL2A_20190629T212529_R043_T06VVN_20201006T080531.json" 20 | SENTINEL_STAC_MS_RASTER_EXT: str = ( 21 | "S2B_MSIL2A_20190629T212529_R043_T06VVN_20201006T080531_raster_ext.json" 22 | ) 23 | USGS_LANDSAT_STAC_v1b: str = "LC08_L2SR_081119_20200101_20200823_02_T2.json" 24 | USGS_LANDSAT_STAC_v1: str = "LC08_L2SP_028030_20200114_20200824_02_T1_SR.json" 25 | USGS_LANDSAT_STAC_v1_1_1: str = "LE07_L2SP_044033_20210329_20210424_02_T1_SR.json" 26 | LIDAR_STAC: str = "lidar_dem.json" 27 | BENCH_SITE1: str = "site1-20200606-tall-strip-africa.geojson" 28 | BENCH_SITE2: str = "site2-2020_jun_jul-35MNM.geojson" 29 | 30 | # pylint: disable=redefined-outer-name 31 | 32 | 33 | @pytest.fixture(scope="session") 34 | def test_data_dir(): 35 | return TEST_DATA_FOLDER 36 | 37 | 38 | @pytest.fixture 39 | def partial_proj_stac(): 40 | return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(PARTIAL_PROJ_STAC))) 41 | 42 | 43 | @pytest.fixture 44 | def no_bands_stac(partial_proj_stac): 45 | partial_proj_stac.assets.clear() 46 | return partial_proj_stac 47 | 48 | 49 | @pytest.fixture 50 | def usgs_landsat_stac_v1(): 51 | return pystac.item.Item.from_file( 52 | str(TEST_DATA_FOLDER.joinpath(USGS_LANDSAT_STAC_v1)) 53 | ) 54 | 55 | 56 | @pytest.fixture 57 | def usgs_landsat_stac_v1b(): 58 | return pystac.item.Item.from_file( 59 | str(TEST_DATA_FOLDER.joinpath(USGS_LANDSAT_STAC_v1b)) 60 | ) 61 | 62 | 63 | @pytest.fixture 64 | def usgs_landsat_stac_v1_1_1(): 65 | return pystac.item.Item.from_file( 66 | str(TEST_DATA_FOLDER.joinpath(USGS_LANDSAT_STAC_v1_1_1)) 67 | ) 68 | 69 | 70 | @pytest.fixture 71 | def ga_landsat_stac(): 72 | return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(GA_LANDSAT_STAC))) 73 | 74 | 75 | @pytest.fixture 76 | def lidar_stac(): 77 | return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(LIDAR_STAC))) 78 | 79 | 80 | @pytest.fixture 81 | def sentinel_stac(): 82 | return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC))) 83 | 84 | 85 | @pytest.fixture 86 | def sentinel_stac_ms_json(): 87 | with TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_MS).open("r", encoding="utf") as f: 88 | return json.load(f) 89 | 90 | 91 | @pytest.fixture 92 | def bench_site1(): 93 | with TEST_DATA_FOLDER.joinpath(BENCH_SITE1).open("r", encoding="utf") as f: 94 | return _strip_links(json.load(f)) 95 | 96 | 97 | @pytest.fixture 98 | def bench_site2(): 99 | with TEST_DATA_FOLDER.joinpath(BENCH_SITE2).open("r", encoding="utf") as f: 100 | return _strip_links(json.load(f)) 101 | 102 | 103 | @pytest.fixture 104 | def sentinel_stac_ms(): 105 | return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_MS))) 106 | 107 | 108 | @pytest.fixture 109 | def sentinel_stac_ms_no_ext(sentinel_stac_ms_json): 110 | metadata = dict(sentinel_stac_ms_json) 111 | metadata["stac_extensions"] = [] 112 | return pystac.item.Item.from_dict(metadata) 113 | 114 | 115 | @pytest.fixture 116 | def sentinel_stac_ms_with_raster_ext(): 117 | return pystac.item.Item.from_file( 118 | str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_MS_RASTER_EXT)) 119 | ) 120 | 121 | 122 | @pytest.fixture 123 | def sentinel_stac_collection(): 124 | return pystac.collection.Collection.from_file( 125 | str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_COLLECTION)) 126 | ) 127 | 128 | 129 | @pytest.fixture 130 | def relative_href_only(ga_landsat_stac: pystac.item.Item): 131 | item = pystac.Item.from_dict(ga_landsat_stac.to_dict()) 132 | item = item.make_asset_hrefs_relative() 133 | assert isinstance(item, pystac.Item) 134 | item.remove_links("self") 135 | return item 136 | 137 | 138 | @pytest.fixture 139 | def sample_geojson(): 140 | return { 141 | "type": "FeatureCollection", 142 | "features": [ 143 | { 144 | "type": "Feature", 145 | "properties": {"name": "Kangaroo Island"}, 146 | "geometry": { 147 | "type": "Polygon", 148 | "coordinates": [ 149 | [ 150 | [136.351318359375, -35.78217070326606], 151 | [136.7303466796875, -36.16448788632062], 152 | [137.5323486328125, -36.16005298551352], 153 | [137.8179931640625, -35.933540642493114], 154 | [138.0816650390625, -36.05798104702501], 155 | [138.2025146484375, -35.74205383068035], 156 | [137.5653076171875, -35.46066995149529], 157 | [136.351318359375, -35.78217070326606], 158 | ] 159 | ], 160 | }, 161 | } 162 | ], 163 | } 164 | 165 | 166 | def _strip_links(gjson): 167 | for item in gjson["features"]: 168 | item["links"] = [] 169 | return gjson 170 | 171 | 172 | @pytest.fixture() 173 | def gpd_iso3(): 174 | def _get(iso3: str, crs=None): 175 | return country_geom(iso3.upper(), crs=crs) 176 | 177 | yield _get 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | import logging as pylogging 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | from pathlib import Path 18 | 19 | import requests 20 | from sphinx.util import logging 21 | 22 | sys.path.insert(0, os.path.abspath("..")) 23 | from odc.stac import __version__ as _odc_stac_version 24 | from scripts import notebook_hash 25 | 26 | # isort: off 27 | # extra imports to check env 28 | import odc.stac.bench 29 | 30 | 31 | # Workaround for https://github.com/agronholm/sphinx-autodoc-typehints/issues/123 32 | # When this https://github.com/agronholm/sphinx-autodoc-typehints/pull/153 33 | # gets merged, we can remove this 34 | class FilterForIssue123(pylogging.Filter): 35 | def filter(self, record: pylogging.LogRecord) -> bool: 36 | # You probably should make this check more specific by checking 37 | # that dataclass name is in the message, so that you don't filter out 38 | # other meaningful warnings 39 | return not record.getMessage().startswith("Cannot treat a function") 40 | 41 | 42 | logging.getLogger("sphinx_autodoc_typehints").logger.addFilter(FilterForIssue123()) 43 | # End of a workaround 44 | 45 | 46 | def ensure_notebooks(dst_folder): 47 | """ 48 | Download pre-rendered notebooks from a tar archive 49 | """ 50 | dst_folder = Path(dst_folder) 51 | if dst_folder.exists(): 52 | print(f"Found pre-rendered notebooks in {dst_folder}") 53 | return True 54 | 55 | dst_folder.mkdir() 56 | nb_hash, nb_paths = notebook_hash.compute("../notebooks") 57 | nb_names = [p.rsplit("/", 1)[-1].rsplit(".", 1)[0] + ".ipynb" for p in nb_paths] 58 | 59 | for nb in nb_names: 60 | url = f"https://{nb_hash[:16]}--odc-stac-docs.netlify.app/notebooks/{nb}" 61 | print(f"{url} -> notebooks/{nb}") 62 | rr = requests.get(url, timeout=5) 63 | if not rr: 64 | return False 65 | with open(dst_folder / nb, "wt", encoding="utf") as dst: 66 | dst.write(rr.text) 67 | 68 | return True 69 | 70 | 71 | # working directory is docs/ 72 | # download pre-rendered notebooks unless folder is already populated 73 | if not ensure_notebooks("notebooks"): 74 | notebooks_directory = os.path.abspath("../notebooks") 75 | raise RuntimeException( 76 | "There is no cached version of these notebooks. " 77 | "Build the notebooks before building the documentation. " 78 | f"Notebooks are located in {notebooks_directory}." 79 | ) 80 | 81 | # -- Project information ----------------------------------------------------- 82 | 83 | project = "odc-stac" 84 | copyright = "2021, ODC" 85 | author = "ODC" 86 | 87 | version = ".".join(_odc_stac_version.split(".", 2)[:2]) 88 | # The full version, including alpha/beta/rc tags 89 | release = _odc_stac_version 90 | 91 | 92 | # -- General configuration --------------------------------------------------- 93 | 94 | # Add any Sphinx extension module names here, as strings. They can be 95 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 96 | # ones. 97 | extensions = [ 98 | "sphinx.ext.autodoc", 99 | "sphinx.ext.autosummary", 100 | "sphinx_autodoc_typehints", 101 | "sphinx.ext.viewcode", 102 | "sphinx.ext.intersphinx", 103 | "sphinx.ext.extlinks", 104 | "sphinx.ext.mathjax", 105 | "nbsphinx", 106 | ] 107 | 108 | # Add any paths that contain templates here, relative to this directory. 109 | templates_path = ["_templates"] 110 | 111 | # List of patterns, relative to source directory, that match files and 112 | # directories to ignore when looking for source files. 113 | # This pattern also affects html_static_path and html_extra_path. 114 | exclude_patterns = ["_build"] 115 | 116 | # If true, '()' will be appended to :func: etc. cross-reference text. 117 | add_function_parentheses = True 118 | 119 | # If true, sectionauthor and moduleauthor directives will be shown in the 120 | # output. They are ignored by default. 121 | # show_authors = False 122 | 123 | # The name of the Pygments (syntax highlighting) style to use. 124 | pygments_style = "friendly" 125 | 126 | autosummary_generate = True 127 | 128 | extlinks = { 129 | "issue": ("https://github.com/opendatacube/odc-stac/issues/%s", "issue %s"), 130 | "pull": ("https://github.com/opendatacube/odc-stac/pulls/%s", "PR %s"), 131 | } 132 | 133 | intersphinx_mapping = { 134 | "python": ("https://docs.python.org/3", None), 135 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), 136 | "numpy": ("https://docs.scipy.org/doc/numpy/", None), 137 | "xarray": ("https://xarray.pydata.org/en/stable/", None), 138 | "datacube": ("https://datacube-core.readthedocs.io/en/latest/", None), 139 | "odc-geo": ("https://odc-geo.readthedocs.io/en/latest/", None), 140 | "pystac": ("https://pystac.readthedocs.io/en/latest/", None), 141 | "rasterio": ("https://rasterio.readthedocs.io/en/latest/", None), 142 | } 143 | 144 | # -- Options for HTML output ------------------------------------------------- 145 | 146 | # The theme to use for HTML and HTML Help pages. See the documentation for 147 | # a list of builtin themes. 148 | # 149 | html_theme = "sphinx_rtd_theme" 150 | 151 | html_theme_options = { 152 | "collapse_navigation": False, 153 | "logo_only": True, 154 | } 155 | 156 | # html_logo = '_static/logo.svg' 157 | html_last_updated_fmt = "%b %d, %Y" 158 | html_show_sphinx = False 159 | 160 | 161 | # Add any paths that contain custom static files (such as style sheets) here, 162 | # relative to this directory. They are copied after the builtin static files, 163 | # so a file named "default.css" will overwrite the builtin "default.css". 164 | html_static_path = ["_static"] 165 | 166 | html_css_files = ["xr-fixes.css"] 167 | -------------------------------------------------------------------------------- /notebooks/stac-load-e84-aws.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.13.8 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% [markdown] 17 | # # Access Sentinel 2 Data from AWS 18 | # 19 | # [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?labpath=notebooks%2Fstac-load-e84-aws.ipynb) 20 | # 21 | # https://registry.opendata.aws/sentinel-2-l2a-cogs/ 22 | 23 | # %% 24 | import dask.distributed 25 | import folium 26 | import folium.plugins 27 | import geopandas as gpd 28 | import shapely.geometry 29 | from IPython.display import display 30 | from pystac_client import Client 31 | 32 | from odc.stac import configure_rio, stac_load 33 | 34 | 35 | def convert_bounds(bbox, invert_y=False): 36 | """ 37 | Helper method for changing bounding box representation to leaflet notation 38 | 39 | ``(lon1, lat1, lon2, lat2) -> ((lat1, lon1), (lat2, lon2))`` 40 | """ 41 | x1, y1, x2, y2 = bbox 42 | if invert_y: 43 | y1, y2 = y2, y1 44 | return ((y1, x1), (y2, x2)) 45 | 46 | 47 | # %% [markdown] 48 | # ## Start Dask Client 49 | # 50 | # This step is optional, but it does improve load speed significantly. You 51 | # don't have to use Dask, as you can load data directly into memory of the 52 | # notebook. 53 | 54 | # %% 55 | client = dask.distributed.Client() 56 | configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) 57 | display(client) 58 | 59 | # %% [markdown] 60 | # ## Find STAC Items to Load 61 | 62 | # %% 63 | km2deg = 1.0 / 111 64 | x, y = (113.887, -25.843) # Center point of a query 65 | r = 100 * km2deg 66 | bbox = (x - r, y - r, x + r, y + r) 67 | 68 | catalog = Client.open("https://earth-search.aws.element84.com/v1/") 69 | 70 | query = catalog.search( 71 | collections=["sentinel-2-l2a"], datetime="2021-09-16", limit=100, bbox=bbox 72 | ) 73 | 74 | items = list(query.items()) 75 | print(f"Found: {len(items):d} datasets") 76 | 77 | # Convert STAC items into a GeoJSON FeatureCollection 78 | stac_json = query.item_collection_as_dict() 79 | 80 | # %% [markdown] 81 | # ## Review Query Result 82 | # 83 | # We'll use GeoPandas DataFrame object to make plotting easier. 84 | 85 | # %% 86 | gdf = gpd.GeoDataFrame.from_features(stac_json, "epsg:4326") 87 | 88 | # Compute granule id from components 89 | gdf["granule"] = ( 90 | gdf["mgrs:utm_zone"].apply(lambda x: f"{x:02d}") 91 | + gdf["mgrs:latitude_band"] 92 | + gdf["mgrs:grid_square"] 93 | ) 94 | 95 | fig = gdf.plot( 96 | "granule", 97 | edgecolor="black", 98 | categorical=True, 99 | aspect="equal", 100 | alpha=0.5, 101 | figsize=(6, 12), 102 | legend=True, 103 | legend_kwds={"loc": "upper left", "frameon": False, "ncol": 1}, 104 | ) 105 | _ = fig.set_title("STAC Query Results") 106 | 107 | # %% [markdown] 108 | # ## Plot STAC Items on a Map 109 | 110 | # %% 111 | # https://github.com/python-visualization/folium/issues/1501 112 | from branca.element import Figure 113 | 114 | fig = Figure(width="400px", height="500px") 115 | map1 = folium.Map() 116 | fig.add_child(map1) 117 | 118 | folium.GeoJson( 119 | shapely.geometry.box(*bbox), 120 | style_function=lambda x: dict(fill=False, weight=1, opacity=0.7, color="olive"), 121 | name="Query", 122 | ).add_to(map1) 123 | 124 | gdf.explore( 125 | "granule", 126 | categorical=True, 127 | tooltip=[ 128 | "granule", 129 | "datetime", 130 | "eo:cloud_cover", 131 | ], 132 | popup=True, 133 | style_kwds=dict(fillOpacity=0.1, width=2), 134 | name="STAC", 135 | m=map1, 136 | ) 137 | 138 | map1.fit_bounds(bounds=convert_bounds(gdf.unary_union.bounds)) 139 | display(fig) 140 | 141 | # %% [markdown] 142 | # ## Construct Dask Dataset 143 | # 144 | # Note that even though there are 9 STAC Items on input, there is only one 145 | # timeslice on output. This is because of `groupby="solar_day"`. With that 146 | # setting `stac_load` will place all items that occured on the same day (as 147 | # adjusted for the timezone) into one image plane. 148 | 149 | # %% 150 | # Since we will plot it on a map we need to use `EPSG:3857` projection 151 | crs = "epsg:3857" 152 | zoom = 2**5 # overview level 5 153 | 154 | xx = stac_load( 155 | items, 156 | bands=("red", "green", "blue"), 157 | crs=crs, 158 | resolution=10 * zoom, 159 | chunks={}, # <-- use Dask 160 | groupby="solar_day", 161 | ) 162 | display(xx) 163 | 164 | # %% [markdown] 165 | # Note that data is not loaded yet. But we can review memory requirement. We can also check data footprint. 166 | 167 | # %% 168 | xx.odc.geobox 169 | 170 | # %% [markdown] 171 | # ## Load data into local memory 172 | 173 | # %% 174 | # %%time 175 | xx = xx.compute() 176 | 177 | # %% 178 | _ = ( 179 | xx.isel(time=0) 180 | .to_array("band") 181 | .plot.imshow( 182 | col="band", 183 | size=4, 184 | vmin=0, 185 | vmax=4000, 186 | ) 187 | ) 188 | 189 | # %% [markdown] 190 | # ## Load with bounding box 191 | # 192 | # As you can see `stac_load` returned all the data covered by STAC items 193 | # returned from the query. This happens by default as `stac_load` has no way of 194 | # knowing what your query was. But it is possible to control what region is 195 | # loaded. There are several mechanisms available, but probably simplest one is 196 | # to use `bbox=` parameter (compatible with `stac_client`). 197 | # 198 | # Let's load a small region at native resolution to demonstrate. 199 | 200 | # %% 201 | r = 6.5 * km2deg 202 | small_bbox = (x - r, y - r, x + r, y + r) 203 | 204 | yy = stac_load( 205 | items, 206 | bands=("red", "green", "blue"), 207 | crs=crs, 208 | resolution=10, 209 | chunks={}, # <-- use Dask 210 | groupby="solar_day", 211 | bbox=small_bbox, 212 | ) 213 | display(yy.odc.geobox) 214 | 215 | # %% 216 | yy = yy.compute() 217 | 218 | # %% 219 | _ = ( 220 | yy.isel(time=0) 221 | .to_array("band") 222 | .plot.imshow( 223 | col="band", 224 | size=4, 225 | vmin=0, 226 | vmax=4000, 227 | ) 228 | ) 229 | 230 | # %% [markdown] 231 | # -------------------------------------------------------------- 232 | -------------------------------------------------------------------------------- /notebooks/stac-load-S2-ms.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.13.8 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% [markdown] 17 | # # Access Sentinel 2 Data on Planetary Computer 18 | # 19 | # [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?labpath=notebooks%2Fstac-load-S2-ms.ipynb) 20 | 21 | # %% [markdown] 22 | # ## Setup Instructions 23 | # 24 | # This notebook is meant to run on Planetary Computer lab hub. 25 | 26 | # %% 27 | import dask.distributed 28 | import dask.utils 29 | import numpy as np 30 | import planetary_computer as pc 31 | import xarray as xr 32 | from IPython.display import display 33 | from pystac_client import Client 34 | 35 | from odc.stac import configure_rio, stac_load 36 | 37 | # %% [markdown] 38 | # ## Start Dask Client 39 | # 40 | # This step is optional, but it does improve load speed significantly. You 41 | # don't have to use Dask, as you can load data directly into memory of the 42 | # notebook. 43 | 44 | # %% 45 | client = dask.distributed.Client() 46 | configure_rio(cloud_defaults=True, client=client) 47 | display(client) 48 | 49 | # %% [markdown] 50 | # ## Query STAC API 51 | # 52 | # Here we are looking for datasets in `sentinel-2-l2a` collection from June 53 | # 2019 over MGRS tile `06VVN`. 54 | 55 | # %% 56 | catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 57 | 58 | query = catalog.search( 59 | collections=["sentinel-2-l2a"], 60 | datetime="2019-06", 61 | query={"s2:mgrs_tile": dict(eq="06VVN")}, 62 | ) 63 | 64 | items = list(query.items()) 65 | print(f"Found: {len(items):d} datasets") 66 | 67 | # %% [markdown] 68 | # ## Lazy load all the bands 69 | # 70 | # We won't use all the bands but it doesn't matter as bands that we won't use 71 | # won't be loaded. We are "loading" data with Dask, which means that at this 72 | # point no reads will be happening just yet. 73 | # 74 | # We have to supply `dtype=` and `nodata=` because items in this collection are missing [raster extension](https://github.com/stac-extensions/raster) metadata. 75 | 76 | # %% 77 | resolution = 10 78 | SHRINK = 4 79 | if client.cluster.workers[0].memory_manager.memory_limit < dask.utils.parse_bytes("4G"): 80 | SHRINK = 8 # running on Binder with 2Gb RAM 81 | 82 | if SHRINK > 1: 83 | resolution = resolution * SHRINK 84 | 85 | xx = stac_load( 86 | items, 87 | chunks={"x": 2048, "y": 2048}, 88 | patch_url=pc.sign, 89 | resolution=resolution, 90 | # force dtype and nodata 91 | dtype="uint16", 92 | nodata=0, 93 | ) 94 | 95 | print(f"Bands: {','.join(list(xx.data_vars))}") 96 | display(xx) 97 | 98 | # %% [markdown] 99 | # By default `stac_load` will return all the data bands using canonical asset 100 | # names. But we can also request a subset of bands, by supplying `bands=` parameter. 101 | # When going this route you can also use "common name" to refer to a band. 102 | # 103 | # In this case we request `red,green,blue,nir` bands which are common names for 104 | # bands `B04,B03,B02,B08` and `SCL` band which is a canonical name. 105 | 106 | # %% 107 | xx = stac_load( 108 | items, 109 | bands=["red", "green", "blue", "nir", "SCL"], 110 | resolution=resolution, 111 | chunks={"x": 2048, "y": 2048}, 112 | patch_url=pc.sign, 113 | # force dtype and nodata 114 | dtype="uint16", 115 | nodata=0, 116 | ) 117 | 118 | print(f"Bands: {','.join(list(xx.data_vars))}") 119 | display(xx) 120 | 121 | 122 | # %% [markdown] 123 | # ## Do some math with bands 124 | 125 | 126 | # %% 127 | def to_float(xx): 128 | _xx = xx.astype("float32") 129 | nodata = _xx.attrs.pop("nodata", None) 130 | if nodata is None: 131 | return _xx 132 | return _xx.where(xx != nodata) 133 | 134 | 135 | def colorize(xx, colormap): 136 | return xr.DataArray(colormap[xx.data], coords=xx.coords, dims=(*xx.dims, "band")) 137 | 138 | 139 | # %% 140 | # like .astype(float32) but taking care of nodata->NaN mapping 141 | nir = to_float(xx.nir) 142 | red = to_float(xx.red) 143 | ndvi = (nir - red) / ( 144 | nir + red 145 | ) # < This is still a lazy Dask computation (no data loaded yet) 146 | 147 | # Get the 5-th time slice `load->compute->plot` 148 | _ = ndvi.isel(time=4).compute().plot.imshow(size=7, aspect=1.2, interpolation="bicubic") 149 | 150 | # %% [markdown] 151 | # For sample purposes work with first 6 observations only 152 | 153 | # %% 154 | xx = xx.isel(time=np.s_[:6]) 155 | 156 | # %% 157 | # fmt: off 158 | scl_colormap = np.array( 159 | [ 160 | [255, 0, 255, 255], # 0 - NODATA 161 | [255, 0, 4, 255], # 1 - Saturated or Defective 162 | [0 , 0, 0, 255], # 2 - Dark Areas 163 | [97 , 97, 97, 255], # 3 - Cloud Shadow 164 | [3 , 139, 80, 255], # 4 - Vegetation 165 | [192, 132, 12, 255], # 5 - Bare Ground 166 | [21 , 103, 141, 255], # 6 - Water 167 | [117, 0, 27, 255], # 7 - Unclassified 168 | [208, 208, 208, 255], # 8 - Cloud 169 | [244, 244, 244, 255], # 9 - Definitely Cloud 170 | [195, 231, 240, 255], # 10 - Thin Cloud 171 | [222, 157, 204, 255], # 11 - Snow or Ice 172 | ], 173 | dtype="uint8", 174 | ) 175 | # fmt: on 176 | 177 | # Load SCL band, then convert to RGB using color scheme above 178 | scl_rgba = colorize(xx.SCL.compute(), scl_colormap) 179 | 180 | # Check we still have geo-registration 181 | scl_rgba.odc.geobox 182 | 183 | # %% 184 | _ = scl_rgba.plot.imshow(col="time", col_wrap=3, size=3, interpolation="antialiased") 185 | 186 | # %% [markdown] 187 | # Let's save image dated 2019-06-04 to a cloud optimized geotiff file. 188 | 189 | # %% 190 | to_save = scl_rgba.isel(time=3) 191 | fname = f"SCL-{to_save.time.dt.strftime('%Y%m%d').item()}.tif" 192 | print(f"Saving to: '{fname}'") 193 | 194 | # %% 195 | scl_rgba.isel(time=3).odc.write_cog( 196 | fname, 197 | overwrite=True, 198 | compress="webp", 199 | webp_quality=90, 200 | ) 201 | 202 | # %% [markdown] 203 | # Check the file with `rio info`. 204 | 205 | # %% 206 | # !ls -lh {fname} 207 | # !rio info {fname} | jq . 208 | 209 | # %% [markdown] 210 | # -------------------------------- 211 | -------------------------------------------------------------------------------- /tests/data/only_crs_proj.json: -------------------------------------------------------------------------------- 1 | { 2 | "stac_version": "1.0.0", 3 | "type": "Feature", 4 | "id": "3af1acae-0255-4762-b2b2-f26034cf3ce8", 5 | "properties": { 6 | "title": "LS_FC_PC_3577_-14_-26_20190101", 7 | "platform": "landsat-5,landsat-7,landsat-8", 8 | "instruments": [ 9 | "tm,etm+,oli" 10 | ], 11 | "created": "2020-03-10T05:35:24.063151Z", 12 | "proj:epsg": 4326, 13 | "datetime": "2019-01-01T00:00:00Z", 14 | "cubedash:region_code": "-14_-26" 15 | }, 16 | "geometry": { 17 | "type": "Polygon", 18 | "coordinates": [ 19 | [ 20 | [ 21 | 118.22240593999766, 22 | -22.699975944231888 23 | ], 24 | [ 25 | 118.24427397418242, 26 | -22.515645780963876 27 | ], 28 | [ 29 | 119.22003537123638, 30 | -22.60842512232027 31 | ], 32 | [ 33 | 119.12126684085257, 34 | -23.49827058643962 35 | ], 36 | [ 37 | 118.13807838381156, 38 | -23.404891555616906 39 | ], 40 | [ 41 | 118.22240593999766, 42 | -22.699975944231888 43 | ] 44 | ] 45 | ] 46 | }, 47 | "links": [ 48 | { 49 | "rel": "self", 50 | "href": "https://explorer-aws.dea.ga.gov.au/stac/collections/fc_percentile_albers_annual/items/3af1acae-0255-4762-b2b2-f26034cf3ce8", 51 | "type": "application/json" 52 | }, 53 | { 54 | "rel": "odc_yaml", 55 | "href": "https://explorer-aws.dea.ga.gov.au/dataset/3af1acae-0255-4762-b2b2-f26034cf3ce8.odc-metadata.yaml", 56 | "type": "text/yaml", 57 | "title": "ODC Dataset YAML" 58 | }, 59 | { 60 | "rel": "collection", 61 | "href": "https://explorer-aws.dea.ga.gov.au/stac/collections/fc_percentile_albers_annual" 62 | }, 63 | { 64 | "rel": "product_overview", 65 | "href": "https://explorer-aws.dea.ga.gov.au/product/fc_percentile_albers_annual", 66 | "type": "text/html", 67 | "title": "ODC Product Overview" 68 | }, 69 | { 70 | "rel": "alternative", 71 | "href": "https://explorer-aws.dea.ga.gov.au/dataset/3af1acae-0255-4762-b2b2-f26034cf3ce8", 72 | "type": "text/html", 73 | "title": "ODC Dataset Overview" 74 | }, 75 | { 76 | "rel": "root", 77 | "href": "https://explorer-aws.dea.ga.gov.au/stac" 78 | } 79 | ], 80 | "assets": { 81 | "BS_PC_10": { 82 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_BS_PC_10.tif", 83 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 84 | "title": "BS_PC_10", 85 | "eo:bands": [ 86 | { 87 | "name": "BS_PC_10" 88 | } 89 | ], 90 | "roles": [ 91 | "data" 92 | ] 93 | }, 94 | "BS_PC_50": { 95 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_BS_PC_50.tif", 96 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 97 | "title": "BS_PC_50", 98 | "eo:bands": [ 99 | { 100 | "name": "BS_PC_50" 101 | } 102 | ], 103 | "roles": [ 104 | "data" 105 | ] 106 | }, 107 | "BS_PC_90": { 108 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_BS_PC_90.tif", 109 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 110 | "title": "BS_PC_90", 111 | "eo:bands": [ 112 | { 113 | "name": "BS_PC_90" 114 | } 115 | ], 116 | "roles": [ 117 | "data" 118 | ] 119 | }, 120 | "PV_PC_10": { 121 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_PV_PC_10.tif", 122 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 123 | "title": "PV_PC_10", 124 | "eo:bands": [ 125 | { 126 | "name": "PV_PC_10" 127 | } 128 | ], 129 | "roles": [ 130 | "data" 131 | ] 132 | }, 133 | "PV_PC_50": { 134 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_PV_PC_50.tif", 135 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 136 | "title": "PV_PC_50", 137 | "eo:bands": [ 138 | { 139 | "name": "PV_PC_50" 140 | } 141 | ], 142 | "roles": [ 143 | "data" 144 | ] 145 | }, 146 | "PV_PC_90": { 147 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_PV_PC_90.tif", 148 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 149 | "title": "PV_PC_90", 150 | "eo:bands": [ 151 | { 152 | "name": "PV_PC_90" 153 | } 154 | ], 155 | "roles": [ 156 | "data" 157 | ] 158 | }, 159 | "NPV_PC_10": { 160 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_NPV_PC_10.tif", 161 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 162 | "title": "NPV_PC_10", 163 | "eo:bands": [ 164 | { 165 | "name": "NPV_PC_10" 166 | } 167 | ], 168 | "roles": [ 169 | "data" 170 | ] 171 | }, 172 | "NPV_PC_50": { 173 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_NPV_PC_50.tif", 174 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 175 | "title": "NPV_PC_50", 176 | "eo:bands": [ 177 | { 178 | "name": "NPV_PC_50" 179 | } 180 | ], 181 | "roles": [ 182 | "data" 183 | ] 184 | }, 185 | "NPV_PC_90": { 186 | "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_NPV_PC_90.tif", 187 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 188 | "title": "NPV_PC_90", 189 | "eo:bands": [ 190 | { 191 | "name": "NPV_PC_90" 192 | } 193 | ], 194 | "roles": [ 195 | "data" 196 | ] 197 | } 198 | }, 199 | "bbox": [ 200 | 118.13807838381156, 201 | -23.49827058643962, 202 | 119.22003537123638, 203 | -22.515645780963876 204 | ], 205 | "stac_extensions": [ 206 | "https://stac-extensions.github.io/eo/v1.0.0/schema.json", 207 | "https://stac-extensions.github.io/projection/v1.0.0/schema.json" 208 | ], 209 | "collection": "fc_percentile_albers_annual" 210 | } 211 | -------------------------------------------------------------------------------- /odc/stac/testing/stac.py: -------------------------------------------------------------------------------- 1 | """ 2 | Making STAC items for testing. 3 | """ 4 | 5 | from datetime import datetime, timezone 6 | from typing import Any, Generator 7 | 8 | import pystac.asset 9 | import pystac.item 10 | import xarray as xr 11 | from odc.geo.geobox import GeoBox 12 | from odc.loader.types import ( 13 | AuxBandMetadata, 14 | AuxDataSource, 15 | RasterBandMetadata, 16 | RasterGroupMetadata, 17 | RasterSource, 18 | norm_key, 19 | ) 20 | from pystac.extensions.projection import ProjectionExtension 21 | from pystac.extensions.raster import RasterBand, RasterExtension 22 | from toolz import dicttoolz 23 | 24 | from .._mdtools import _group_geoboxes 25 | from ..model import ParsedItem, PropertyLoadRequest, RasterCollectionMetadata 26 | 27 | # pylint: disable=redefined-builtin,too-many-arguments 28 | 29 | STAC_DATE_FMT = "%Y-%m-%dT%H:%M:%S.%fZ" 30 | STAC_DATE_FMT_SHORT = "%Y-%m-%dT%H:%M:%SZ" 31 | 32 | 33 | def _norm_dates(*args): 34 | valid = [a for a in args if a is not None] 35 | valid = [ 36 | datetime.fromisoformat(dt).replace(tzinfo=timezone.utc) 37 | for dt in xr.DataArray(list(valid)) 38 | .astype("datetime64[ns]") 39 | .dt.strftime("%Y-%m-%dT%H:%M:%S.%f") 40 | .values 41 | ] 42 | valid = iter(valid) 43 | return [next(valid) if a else None for a in args] 44 | 45 | 46 | def b_( 47 | name, 48 | geobox=None, 49 | dtype="int16", 50 | nodata=None, 51 | unit="1", 52 | dims=(), 53 | uri=None, 54 | bidx=1, 55 | prefix="http://example.com/items/", 56 | ): 57 | band_key = norm_key(name) 58 | name, _ = band_key 59 | if uri is None: 60 | uri = f"{prefix}{name}.tif" 61 | meta = RasterBandMetadata(dtype, nodata, unit, dims=dims) 62 | return (band_key, RasterSource(uri, bidx, geobox=geobox, meta=meta)) 63 | 64 | 65 | def mk_parsed_item( 66 | bands, 67 | datetime=None, 68 | *, 69 | start_datetime=None, 70 | end_datetime=None, 71 | id="some-item", 72 | collection="some-collection", 73 | href=None, 74 | geometry=None, 75 | props: dict[str, Any] | None = None, 76 | ) -> ParsedItem: 77 | """ 78 | Construct parsed stac item for testing. 79 | """ 80 | # pylint: disable=redefined-outer-name, too-many-locals 81 | if isinstance(bands, (list, tuple)): 82 | bands = {norm_key(k): v for k, v in bands} 83 | 84 | gboxes = dicttoolz.valmap(lambda b: b.geobox, bands) 85 | gboxes = dicttoolz.valfilter(lambda x: x is not None, gboxes) 86 | gboxes = dicttoolz.keymap(lambda bk: bk[0], gboxes) 87 | 88 | if len(gboxes) == 0: 89 | band2grid = {b: "default" for b, _ in bands} 90 | geobox = None 91 | else: 92 | grids, band2grid = _group_geoboxes(gboxes) 93 | geobox = grids["default"] 94 | 95 | if geometry is None and geobox is not None: 96 | geometry = geobox.geographic_extent 97 | 98 | aliases = {} 99 | if props is None: 100 | props = {} 101 | 102 | # Handle auxiliary bands from props 103 | prop_user_input = [v[1] if isinstance(v, tuple) else k for k, v in props.items()] 104 | prop_requests = PropertyLoadRequest.from_user_input(prop_user_input) 105 | for idx, prop_req in enumerate(prop_requests): 106 | bk = ("_stac_metadata", idx + 1) 107 | # Look up actual value from props dict using prop_req.key 108 | actual_value = props[prop_req.key] 109 | if isinstance(actual_value, tuple): 110 | actual_value, _ = actual_value 111 | 112 | aux_meta = AuxBandMetadata( 113 | prop_req.dtype, 114 | nodata=prop_req.nodata, 115 | units=prop_req.units, 116 | driver_data=prop_req, 117 | ) 118 | aux_source = AuxDataSource( 119 | uri=f"virtual://{bk[0]}/{bk[1]}", 120 | subdataset=None, 121 | meta=aux_meta, 122 | driver_data=actual_value, 123 | ) 124 | bands[bk] = aux_source 125 | aliases[prop_req.output_name] = [bk] 126 | 127 | collection = RasterCollectionMetadata( 128 | collection, 129 | RasterGroupMetadata( 130 | dicttoolz.valmap(lambda b: b.meta, bands), 131 | aliases=aliases, 132 | ), 133 | has_proj=(geobox is not None), 134 | band2grid=band2grid, 135 | ) 136 | datetime, start_datetime, end_datetime = _norm_dates( 137 | datetime, start_datetime, end_datetime 138 | ) 139 | 140 | return ParsedItem( 141 | id, 142 | collection, 143 | bands, 144 | geometry=geometry, 145 | datetime=datetime, 146 | datetime_range=(start_datetime, end_datetime), 147 | href=href, 148 | ) 149 | 150 | 151 | def _add_proj(gbox: GeoBox, xx) -> None: 152 | proj = ProjectionExtension.ext(xx, add_if_missing=True) 153 | proj.shape = list(gbox.shape.yx) 154 | proj.transform = gbox.transform[:6] 155 | crs = gbox.crs 156 | if crs is not None: 157 | epsg = crs.epsg 158 | if epsg is not None: 159 | proj.epsg = epsg 160 | else: 161 | proj.wkt2 = crs.wkt 162 | 163 | 164 | def _extract_props(item: ParsedItem) -> Generator[tuple[str, Any], None, None]: 165 | for k in item.bands: 166 | if k[0] != "_stac_metadata": 167 | continue 168 | b = item[k] 169 | if b.meta is None or b.meta.driver_data is None: 170 | continue 171 | yield b.meta.driver_data.key, b.driver_data 172 | 173 | 174 | def to_stac_item(item: ParsedItem) -> pystac.item.Item: 175 | gg = item.geometry 176 | 177 | props = {} 178 | for n, dt in zip(["start_datetime", "end_datetime"], item.datetime_range): 179 | if dt is not None: 180 | props[n] = dt.strftime(STAC_DATE_FMT) 181 | 182 | props.update(_extract_props(item)) 183 | 184 | xx = pystac.item.Item( 185 | item.id, 186 | geometry=gg.json if gg is not None else None, 187 | bbox=list(gg.boundingbox.bbox) if gg is not None else None, 188 | datetime=item.datetime, 189 | properties=props, 190 | collection=item.collection.name, 191 | ) 192 | 193 | gboxes = item.geoboxes() 194 | if len(gboxes) > 0: 195 | gbox = gboxes[0] 196 | 197 | ProjectionExtension.add_to(xx) 198 | _add_proj(gbox, xx) 199 | 200 | def _to_raster_band(src: RasterSource) -> RasterBand: 201 | meta = src.meta 202 | assert meta is not None 203 | return RasterBand.create( 204 | data_type=meta.data_type, # type: ignore[arg-type] 205 | nodata=meta.nodata, 206 | unit=meta.units, 207 | ) 208 | 209 | for asset_name, bands in item.assets().items(): 210 | RasterExtension.add_to(xx) 211 | b = bands[0] # all bands should share same uri 212 | xx.add_asset( 213 | asset_name, 214 | pystac.asset.Asset(b.uri, media_type="image/tiff", roles=["data"]), 215 | ) 216 | RasterExtension.ext(xx.assets[asset_name]).apply( 217 | list(map(_to_raster_band, bands)) 218 | ) 219 | 220 | for asset_name, asset in xx.assets.items(): 221 | bb = item.bands[(asset_name, 1)] 222 | assert isinstance(bb, RasterSource) 223 | if bb.geobox is not None: 224 | assert isinstance(bb.geobox, GeoBox) 225 | _add_proj(bb.geobox, asset) 226 | 227 | if item.href is not None: 228 | xx.set_self_href(item.href) 229 | 230 | return xx 231 | -------------------------------------------------------------------------------- /odc/stac/bench/_cli.py: -------------------------------------------------------------------------------- 1 | """CLI app for benchmarking.""" 2 | 3 | import json 4 | from datetime import datetime 5 | from time import sleep 6 | from typing import Any, Dict, Optional 7 | 8 | import click 9 | import distributed 10 | import rasterio.enums 11 | 12 | from odc.stac.bench import ( 13 | SAMPLE_SITES, 14 | BenchLoadParams, 15 | dump_site, 16 | load_from_json, 17 | load_results, 18 | run_bench, 19 | ) 20 | 21 | # pylint: disable=too-many-arguments,too-many-locals 22 | 23 | RIO_RESAMPLING_NAMES = [it.name for it in rasterio.enums.Resampling] 24 | 25 | 26 | @click.group("odc-stac-bench") 27 | def main() -> None: 28 | """Benchmarking tool for odc.stac.""" 29 | 30 | 31 | @main.command("prepare") 32 | @click.option("--sample-site", type=str, help="Use one of sample sites") 33 | @click.option( 34 | "--list-sample-sites", 35 | is_flag=True, 36 | default=False, 37 | help="Print available sample sites", 38 | ) 39 | @click.option( 40 | "--from-file", 41 | help="From json config file", 42 | type=click.Path(exists=True, dir_okay=False, readable=True), 43 | ) 44 | @click.option("--overwrite", is_flag=True, help="Overwite output file") 45 | def prepare(sample_site: str, list_sample_sites: bool, from_file, overwrite): 46 | """Prepare benchmarking dataset.""" 47 | if list_sample_sites: 48 | click.echo("Sample sites:") 49 | for site_name in SAMPLE_SITES: 50 | click.echo(f" {site_name}") 51 | return 52 | 53 | site: Optional[Dict[str, Any]] = None 54 | if sample_site is not None: 55 | site = SAMPLE_SITES.get(sample_site, None) 56 | if site is None: 57 | raise click.ClickException(f"No such site: {sample_site}") 58 | print("Site config:") 59 | print("------------------------------------------") 60 | print(json.dumps(site, indent=2)) 61 | print("------------------------------------------") 62 | elif from_file is not None: 63 | with open(from_file, "rt", encoding="utf8") as src: 64 | site = json.load(src) 65 | 66 | if site is None: 67 | raise click.ClickException("Have to supply one of --sample-site or --from-file") 68 | dump_site(site, overwrite=overwrite) 69 | 70 | 71 | @main.command("dask") 72 | @click.option( 73 | "--n-workers", type=int, default=1, help="Number of workers to launch (1)" 74 | ) 75 | @click.option( 76 | "--threads-per-worker", type=int, help="Number of threads per worker (all cpus)" 77 | ) 78 | @click.option("--memory-limit", type=str, help="Configure worker memory limit") 79 | def _dask(n_workers, threads_per_worker, memory_limit) -> None: 80 | """Launch local Dask Cluster.""" 81 | client = distributed.Client( 82 | n_workers=n_workers, 83 | threads_per_worker=threads_per_worker, 84 | memory_limit=memory_limit, 85 | ) 86 | info = client.scheduler_info() 87 | print(f"Launched Dask Cluster: {info['address']}") 88 | print(f" --scheduler='{info['address']}'") 89 | while True: 90 | try: 91 | sleep(1) 92 | except KeyboardInterrupt: 93 | print("Terminating") 94 | client.shutdown() 95 | return 96 | 97 | 98 | @main.command("run") 99 | @click.option( 100 | "--config", 101 | "-c", 102 | type=click.Path(exists=True, dir_okay=False, readable=True), 103 | required=False, 104 | help="Experiment configuration in json format", 105 | ) 106 | @click.option( 107 | "--ntimes", "-n", type=int, default=1, help="Configure number of times to run" 108 | ) 109 | @click.option( 110 | "--method", 111 | help="Data loading method", 112 | type=click.Choice(["odc-stac", "stackstac"]), 113 | ) 114 | @click.option("--bands", type=str, help="Comma separated list of bands") 115 | @click.option("--chunks", type=int, help="Chunk size Y,X order", nargs=2) 116 | @click.option("--resolution", type=float, help="Set output resolution") 117 | @click.option("--crs", type=str, help="Set CRS") 118 | @click.option( 119 | "--resampling", 120 | help="Resampling method when changing resolution/projection", 121 | type=click.Choice(RIO_RESAMPLING_NAMES), 122 | ) 123 | @click.option("--show-config", is_flag=True, help="Show configuration only, don't run") 124 | @click.option( 125 | "--scheduler", default="tcp://localhost:8786", help="Dask server to connect to" 126 | ) 127 | @click.argument("site", type=click.Path(exists=True, dir_okay=False, readable=True)) 128 | def run( 129 | site: str, 130 | config, 131 | method: str, 132 | ntimes: int, 133 | bands: str, 134 | chunks, 135 | resolution: float, 136 | crs: str, 137 | resampling: str, 138 | show_config: bool, 139 | scheduler, 140 | ) -> None: 141 | """ 142 | Run data load benchmark using Dask. 143 | 144 | SITE is a GeoJSON file produced by `prepare` step. 145 | """ 146 | cfg: Optional[BenchLoadParams] = None 147 | if config is not None: 148 | with open(config, "rt", encoding="utf8") as src: 149 | cfg = BenchLoadParams.from_json(src.read()) 150 | else: 151 | cfg = BenchLoadParams( 152 | method="odc-stac", 153 | chunks=(2048, 2048), 154 | extra={ 155 | "stackstac": {"dtype": "uint16", "fill_value": 0}, 156 | "odc-stac": { 157 | "groupby": "solar_day", 158 | "stac_cfg": {"*": {"warnings": "ignore"}}, 159 | }, 160 | }, 161 | ) 162 | 163 | if chunks: 164 | cfg.chunks = chunks 165 | if method is not None: 166 | cfg.method = method 167 | if bands is not None: 168 | cfg.bands = tuple(bands.split(",")) 169 | if resolution is not None: 170 | cfg.resolution = resolution 171 | if crs is not None: 172 | cfg.crs = crs 173 | if resampling is not None: 174 | cfg.resampling = resampling 175 | if not cfg.scenario: 176 | cfg.scenario = site.rsplit(".", 1)[0] 177 | 178 | with open(site, "rt", encoding="utf8") as src: 179 | site_geojson = json.load(src) 180 | 181 | print(f"Loaded: {len(site_geojson['features'])} STAC items from '{site}'") 182 | 183 | print("Will use following load configuration") 184 | print("-" * 60) 185 | print(cfg.to_json(indent=2)) 186 | print("-" * 60) 187 | 188 | if show_config: 189 | return 190 | 191 | print(f"Connecting to Dask Scheduler: {scheduler}") 192 | client = distributed.Client(scheduler) 193 | 194 | print("Constructing Dask graph") 195 | xx = load_from_json(site_geojson, cfg) 196 | print(f"Starting benchmark run ({ntimes} runs)") 197 | print("=" * 60) 198 | 199 | ts = datetime.now().strftime("%Y%m%dT%H%M%S.%f") 200 | results_file = f"{cfg.scenario}_{ts}.pkl" 201 | print(f"Will write results to: {results_file}") 202 | _ = run_bench(xx, client, ntimes=ntimes, results_file=results_file) 203 | print("=" * 60) 204 | print("Finished") 205 | 206 | 207 | @main.command("report") 208 | @click.option( 209 | "--matching", type=str, help="Supply glob pattern instead of individual .pkl files" 210 | ) 211 | @click.option( 212 | "--output", 213 | type=str, 214 | help="File to write CSV data, if not supplied will write to stdout", 215 | ) 216 | @click.argument( 217 | "pkls", type=click.Path(exists=True, dir_okay=False, readable=True), nargs=-1 218 | ) 219 | def report(matching: str, output: str, pkls) -> None: 220 | """ 221 | Collate results of multiple benchmark experiments. 222 | 223 | Read pickle files produced by the `run` command and assemble 224 | them into one CSV file. 225 | """ 226 | if matching is not None: 227 | data_raw = load_results(matching) 228 | else: 229 | data_raw = load_results(pkls) 230 | 231 | if output is None: 232 | print(data_raw.to_csv()) 233 | else: 234 | data_raw.to_csv(output) 235 | -------------------------------------------------------------------------------- /tests/test_load.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-module-docstring,redefined-builtin 2 | from unittest.mock import MagicMock 3 | 4 | import pystac 5 | import pystac.item 6 | import pytest 7 | import shapely.geometry 8 | from dask.utils import ndeepmap 9 | from odc.geo.geobox import GeoBox 10 | from odc.geo.xr import ODCExtension 11 | from odc.loader import resolve_load_cfg 12 | from odc.loader.types import RasterBandMetadata 13 | 14 | from odc.stac import RasterLoadParams 15 | from odc.stac import load as stac_load 16 | from odc.stac._stac_load import _group_items 17 | from odc.stac.testing.stac import b_, mk_parsed_item, to_stac_item 18 | 19 | 20 | def test_stac_load_smoketest( 21 | sentinel_stac_ms_with_raster_ext: pystac.item.Item, 22 | ) -> None: 23 | item = sentinel_stac_ms_with_raster_ext.clone() 24 | 25 | params = {"crs": "EPSG:3857", "resolution": 100, "align": 0, "chunks": {}} 26 | xx = stac_load([item], "B02", **params) # type: ignore 27 | 28 | assert isinstance(xx.B02.odc, ODCExtension) 29 | assert xx.B02.shape[0] == 1 30 | assert xx.B02.odc.geobox is not None 31 | assert xx.B02.odc.geobox.crs == "EPSG:3857" 32 | assert xx.time.dtype == "datetime64[ns]" 33 | 34 | # Test dc.load name for bands, and alias support 35 | xx = stac_load([item], measurements=["red", "green"], **params) # type: ignore 36 | 37 | assert "red" in xx.data_vars 38 | assert "green" in xx.data_vars 39 | assert xx.red.shape == xx.green.shape 40 | 41 | # Test dc.load name for bands, and alias support 42 | patch_url = MagicMock(return_value="https://example.com/f.tif") 43 | xx = stac_load( 44 | [item], 45 | measurements=["red", "green"], 46 | patch_url=patch_url, 47 | **params, # type: ignore 48 | ) 49 | assert isinstance(xx.odc, ODCExtension) 50 | 51 | # expect patch_url to be called 2 times, 1 for red and 1 for green band 52 | assert patch_url.call_count == 2 53 | 54 | patch_url = MagicMock(return_value="https://example.com/f.tif") 55 | zz = stac_load( 56 | [item], 57 | patch_url=patch_url, 58 | stac_cfg={"*": {"warnings": "ignore"}}, 59 | **params, # type: ignore 60 | ) 61 | assert patch_url.call_count == len(zz.data_vars) 62 | 63 | yy = stac_load( 64 | [item], ["nir"], like=xx, chunks={}, stac_cfg={"*": {"warnings": "ignore"}} 65 | ) 66 | assert yy.odc.geobox == xx.odc.geobox 67 | assert isinstance(xx.odc.geobox, GeoBox) 68 | 69 | yy = stac_load( 70 | [item], 71 | ["nir"], 72 | geobox=xx.odc.geobox, 73 | chunks={}, 74 | stac_cfg={"*": {"warnings": "ignore"}}, 75 | ) 76 | assert yy.odc.geobox == xx.odc.geobox 77 | assert yy.odc.geobox == yy.nir.odc.geobox 78 | 79 | # Check automatic CRS/resolution 80 | yy = stac_load( 81 | [item], 82 | ["nir", "coastal"], 83 | chunks={}, 84 | stac_cfg={"*": {"warnings": "ignore"}}, 85 | ) 86 | assert yy.odc.geobox.crs == "EPSG:32606" 87 | assert yy.odc.geobox.resolution.yx == (-10, 10) 88 | 89 | # test bbox overlaping with lon/lat 90 | with pytest.raises(ValueError): 91 | stac_load([item], ["nir"], bbox=(0, 0, 1, 1), lon=(0, 1), lat=(0, 1), chunks={}) 92 | 93 | # test bbox overlaping with x/y 94 | with pytest.raises(ValueError): 95 | stac_load( 96 | [item], 97 | ["nir"], 98 | bbox=(0, 0, 1, 1), 99 | x=(0, 1000), 100 | y=(0, 1000), 101 | chunks={}, 102 | ) 103 | 104 | bbox = (0, 0, 1, 1) 105 | x1, y1, x2, y2 = bbox 106 | 107 | assert ( 108 | stac_load( 109 | [item], 110 | ["nir"], 111 | crs="epsg:3857", 112 | resolution=10, 113 | chunks={}, 114 | lon=(x1, x2), 115 | lat=(y1, y2), 116 | ).nir.odc.geobox 117 | == stac_load( 118 | [item], 119 | ["nir"], 120 | crs="epsg:3857", 121 | resolution=10, 122 | chunks={}, 123 | bbox=bbox, 124 | ).nir.odc.geobox 125 | ) 126 | 127 | geopolygon = shapely.geometry.box(*bbox) 128 | assert ( 129 | stac_load( 130 | [item], 131 | ["nir"], 132 | crs="epsg:3857", 133 | resolution=10, 134 | chunks={}, 135 | lon=(x1, x2), 136 | lat=(y1, y2), 137 | ).nir.odc.geobox 138 | == stac_load( 139 | [item], 140 | ["nir"], 141 | crs="epsg:3857", 142 | resolution=10, 143 | chunks={}, 144 | geopolygon=geopolygon, 145 | ).nir.odc.geobox 146 | ) 147 | 148 | 149 | def test_group_items() -> None: 150 | def _mk(id: str, lon: float, datetime: str): 151 | gbox = GeoBox.from_bbox((lon - 0.1, 0, lon + 0.1, 1), shape=(100, 100)) 152 | return mk_parsed_item([b_("b1", gbox)], datetime=datetime, id=id) 153 | 154 | # check no-op case first 155 | assert _group_items([], [], "time") == [] 156 | assert _group_items([], [], "id") == [] 157 | assert _group_items([], [], "solar_day") == [] 158 | 159 | aa = _mk("a", 15 * 10, "2020-01-02T23:59Z") 160 | b1 = _mk("b1", 15 * 10 + 1, "2020-01-03T00:01Z") 161 | b2 = _mk("b2", 15 * 10 + 2, "2020-01-03T00:01Z") 162 | cc = _mk("c", 0, "2020-01-02T23:59Z") 163 | 164 | def _t(items, groupby, expect, lon=None, preserve_original_order=False) -> None: 165 | stac_items = [to_stac_item(item) for item in items] 166 | rr = ndeepmap( 167 | 2, 168 | lambda idx: items[idx], 169 | _group_items( 170 | stac_items, 171 | items, 172 | groupby, 173 | lon=lon, 174 | preserve_original_order=preserve_original_order, 175 | ), 176 | ) 177 | _expect = ndeepmap(2, lambda item: item.id, expect) 178 | _got = ndeepmap(2, lambda item: item.id, rr) 179 | 180 | assert _expect == _got 181 | 182 | # same order as input 183 | _t([aa, b1, b2], "id", [[aa], [b1], [b2]]) 184 | _t([aa, b2, b1], "id", [[aa], [b2], [b1]]) 185 | _t([b1, aa, b2], "id", [[b1], [aa], [b2]]) 186 | _t([cc, aa, b1, b2], "id", [[cc], [aa], [b1], [b2]]) 187 | 188 | _t([aa, b1, b2], "time", [[aa], [b1, b2]]) 189 | _t([b1, aa, b2], "time", [[aa], [b1, b2]]) 190 | 191 | # order within group is preserved 192 | _t([b2, aa, b1], "time", [[aa], [b2, b1]], preserve_original_order=True) 193 | _t([aa, cc, b1, b2], "time", [[aa, cc], [b1, b2]], preserve_original_order=True) 194 | 195 | _t([aa, b1, b2], "solar_day", [[aa, b1, b2]]) 196 | _t([b1, aa, b2], "solar_day", [[aa, b1, b2]]) 197 | _t([b2, aa, b1], "solar_day", [[aa, b1, b2]]) 198 | _t([aa, b1, b2, cc], "solar_day", [[cc], [aa, b1, b2]]) 199 | 200 | _t([aa, b1, b2, cc], "solar_day", [[aa, cc, b1, b2]], lon=150 + 1) 201 | 202 | # property based 203 | _t([aa, b1], "proj:epsg", [[aa, b1]]) 204 | _t([b1, aa], "proj:epsg", [[aa, b1]]) 205 | _t([aa, b1], "proj:transform", [[aa], [b1]]) 206 | 207 | # custom callback 208 | _t( 209 | [aa, b1, b2, cc], 210 | lambda item, parsed, idx: idx % 2, 211 | [[aa, b2], [b1, cc]], 212 | preserve_original_order=True, 213 | ) 214 | 215 | 216 | def test_resolve_load_cfg() -> None: 217 | rlp = RasterLoadParams 218 | assert resolve_load_cfg({}) == {} 219 | 220 | item = mk_parsed_item( 221 | [ 222 | b_("a", dtype="int8", nodata=-1), 223 | b_("b", dtype="float64", dims=("y", "x", "b")), 224 | ] 225 | ) 226 | 227 | assert set(item.collection) == set([("a", 1), ("b", 1)]) 228 | assert item.collection["a"].data_type == "int8" 229 | assert item.collection["b"].data_type == "float64" 230 | 231 | _bands = {n: b for (n, _), b in item.collection.bands.items()} 232 | assert isinstance(_bands["a"], RasterBandMetadata) 233 | assert isinstance(_bands["b"], RasterBandMetadata) 234 | 235 | cfg = resolve_load_cfg(_bands, resampling="average") 236 | assert cfg["a"] == rlp("int8", -1, resampling="average", meta=_bands["a"]) 237 | assert cfg["b"] == rlp( 238 | "float64", None, resampling="average", dims=("y", "x", "b"), meta=_bands["b"] 239 | ) 240 | 241 | cfg = resolve_load_cfg( 242 | _bands, 243 | resampling={"*": "mode", "b": "sum"}, 244 | nodata=-999, 245 | dtype="int64", 246 | ) 247 | assert cfg["a"] == rlp("int64", -999, resampling="mode", meta=_bands["a"]) 248 | assert cfg["b"] == rlp( 249 | "int64", -999, resampling="sum", dims=("y", "x", "b"), meta=_bands["b"] 250 | ) 251 | 252 | cfg = resolve_load_cfg( 253 | _bands, 254 | dtype={"a": "float32"}, 255 | ) 256 | assert cfg["a"] == rlp("float32", -1, meta=_bands["a"]) 257 | assert cfg["b"] == rlp("float64", None, dims=_bands["b"].dims, meta=_bands["b"]) 258 | -------------------------------------------------------------------------------- /docs/benchmarking.rst: -------------------------------------------------------------------------------- 1 | Benchmarking Utilities 2 | ###################### 3 | 4 | Module :py:mod:`odc.stac.bench` provides utilities for benchmarking data loading. It is both a 5 | library that can be used directly from a notebook and a command line application. 6 | 7 | .. code-block:: none 8 | 9 | Usage: python -m odc.stac.bench [OPTIONS] COMMAND [ARGS]... 10 | 11 | Benchmarking tool for odc.stac. 12 | 13 | Options: 14 | --help Show this message and exit. 15 | 16 | Commands: 17 | dask Launch local Dask Cluster. 18 | prepare Prepare benchmarking dataset. 19 | report Collate results of multiple benchmark experiments. 20 | run Run data load benchmark using Dask. 21 | 22 | 23 | Define Test Site 24 | ================ 25 | 26 | To start you need to define a test site, or use one of the pre-configured examples. Site 27 | configuration is a json file that describes STAC API query and some other metadata. Below is a 28 | definition of the ``s2-ms-mosaic`` sample site. 29 | 30 | .. code-block:: json 31 | 32 | { 33 | "file_id": "s2-ms-mosaic_2020-06-06--P1D", 34 | "api": "https://planetarycomputer.microsoft.com/api/stac/v1", 35 | "search": { 36 | "collections": ["sentinel-2-l2a"], 37 | "datetime": "2020-06-06", 38 | "bbox": [ 27.345815, -14.98724, 27.565542, -7.710992] 39 | } 40 | } 41 | 42 | This would query Planetary Computer STAC API endpoint for Sentinel 2 collection and store results to 43 | a geojson file ``{file_id}.geojson``. Try it now: 44 | 45 | .. code-block:: bash 46 | 47 | python -m odc.stac.bench prepare --sample-site s2-ms-mosaic 48 | 49 | Command above will write a GeoJSON file to your current directory. We will use this file to run 50 | benchmarks later on. 51 | 52 | 53 | Prepare Load Configuration 54 | ========================== 55 | 56 | Let's create base data loading configuration file suitable for running benchmarks with the site 57 | configuration produced previously. Save example below as ``cfg.json``. 58 | 59 | .. code-block:: json 60 | 61 | { 62 | "method": "odc-stac", 63 | "bands": ["B02", "B03", "B04"], 64 | "patch_url": "planetary_computer.sas.sign", 65 | "extra": { 66 | "stackstac": { 67 | "dtype": "uint16", 68 | "fill_value": 0 69 | }, 70 | "odc-stac": { 71 | "groupby": "solar_day", 72 | "stac_cfg": {"*": {"warnings": "ignore"}} 73 | } 74 | } 75 | } 76 | 77 | Making your own is simple: 78 | 79 | 1. Create :py:class:`~odc.stac.bench.BenchLoadParams` object 80 | 2. Modify configuration options to match your needs 81 | 3. Dump it to JSON 82 | 83 | .. code-block:: python3 84 | 85 | from odc.stac.bench import BenchLoadParams 86 | 87 | params = BenchLoadParams() 88 | params.scenario = "web-zoom-8" 89 | params.bands = ["red", "green", "blue"] 90 | params.crs = "EPSG:3857" 91 | params.resolution = 610 92 | params.chunks = (512, 512) 93 | params.resampling = "bilinear" 94 | 95 | print(params.to_json()) 96 | 97 | 98 | Start Dask Cluster 99 | ================== 100 | 101 | Before we can run the benchmark we need to have an active Dask cluster. You can connect to a remote 102 | cluster or run a local one. A convenience local Dask cluster launcher is provided. In a separate 103 | shell run this command: 104 | 105 | .. code-block:: none 106 | 107 | > python -m odc.stac.bench dask --memory-limit=8GiB 108 | 109 | GDAL_DISABLE_READDIR_ON_OPEN = EMPTY_DIR 110 | GDAL_HTTP_MAX_RETRY = 10 111 | GDAL_HTTP_RETRY_DELAY = 0.5 112 | GDAL_DATA = /srv/conda/envs/notebook/share/gdal 113 | Launched Dask Cluster: tcp://127.0.0.1:43677 114 | --scheduler='tcp://127.0.0.1:43677' 115 | 116 | This will start a local Dask cluster, configure GDAL on Dask workers and print out the address of 117 | the Dask scheduler. Leave this running and take a note of the ``--scheduler=...`` option that was 118 | printed out, we will use it the next step. 119 | 120 | 121 | Run Benchmark 122 | ============= 123 | 124 | We are now ready to run some benchmarking with the ``run`` command documented below: 125 | 126 | .. code-block:: none 127 | 128 | Usage: python -m odc.stac.bench run [OPTIONS] SITE 129 | 130 | Run data load benchmark using Dask. 131 | 132 | SITE is a GeoJSON file produced by `prepare` step. 133 | 134 | Options: 135 | -c, --config FILE Experiment configuration in json format 136 | -n, --ntimes INTEGER Configure number of times to run 137 | --method [odc-stac|stackstac] Data loading method 138 | --bands TEXT Comma separated list of bands 139 | --chunks INTEGER... Chunk size Y,X order 140 | --resolution FLOAT Set output resolution 141 | --crs TEXT Set CRS 142 | --resampling [nearest|bilinear|cubic|cubic_spline|lanczos|average|mode|gauss|max|min|med|q1|q3|sum|rms] 143 | Resampling method when changing 144 | resolution/projection 145 | --show-config Show configuration only, don't run 146 | --scheduler TEXT Dask server to connect to 147 | --help Show this message and exit. 148 | 149 | 150 | First let's check configuration, note we will run with the reduced resolution for quicker turn 151 | around (``--resolution=80`` option). Command line arguments take precedence over configuration 152 | parameters supplied in the json file. 153 | 154 | .. code-block:: bash 155 | 156 | python -m odc.stac.bench run \ 157 | s2-ms-mosaic_2020-06-06--P1D.geojson \ 158 | --config cfg.json \ 159 | --resolution=80 \ 160 | --show-config 161 | 162 | If the above went well we can start the benchmark, remove ``--show-config`` option, and add 163 | ``--scheduler=`` option that was printed when we started Dask cluster. Let's also configure number 164 | of benchmarking passes to run with ``-n 10`` option. 165 | 166 | .. code-block:: bash 167 | 168 | python -m odc.stac.bench run \ 169 | s2-ms-mosaic_2020-06-06--P1D.geojson \ 170 | --config cfg.json \ 171 | --resolution=80 \ 172 | -n 10 \ 173 | --scheduler='tcp://127.0.0.1:43677' 174 | 175 | 176 | .. note:: 177 | 178 | Don't forget to edit ``--scheduler=``, part of the above command. 179 | 180 | This will first print out configuration that will be used, 181 | 182 | .. code-block:: none 183 | 184 | Loaded: 9 STAC items from 's2-ms-mosaic_2020-06-06--P1D.geojson' 185 | Will use following load configuration 186 | ------------------------------------------------------------ 187 | { /** NOTE: this section was edited for brevity **/ 188 | "scenario": "s2-ms-mosaic_2020-06-06--P1D", 189 | "method": "odc-stac", 190 | "chunks": [ 2048, 2048 ], 191 | "bands": [ "B02", "B03", "B04" ], 192 | "resolution": 80.0, 193 | "crs": null, 194 | "resampling": null, 195 | "patch_url": "planetary_computer.sas.sign", 196 | "extra": { 197 | "stackstac": { "dtype": "uint16", "fill_value": 0 }, 198 | "odc-stac": { "groupby": "solar_day", "stac_cfg": {"*": {"warnings": "ignore" }}} 199 | } 200 | } 201 | ------------------------------------------------------------ 202 | 203 | 204 | followed by information about data being loaded and some stats about the Dask cluster on which the 205 | benchmark will run: 206 | 207 | .. code-block:: none 208 | 209 | Connecting to Dask Scheduler: tcp://127.0.0.1:43677 210 | Constructing Dask graph 211 | Starting benchmark run (10 runs) 212 | ============================================================ 213 | Will write results to: s2-ms-mosaic_2020-06-06--P1D_20220104T080235.133458.pkl 214 | method : odc-stac 215 | Scenario : s2-ms-mosaic_2020-06-06--P1D 216 | T.slice : 2020-06-06 217 | Data : 1.3.11373.1374.uint16, 89.42 MiB 218 | Chunks : 1.1.2048.1374 (T.B.Y.X) 219 | GEO : epsg:32735 220 | | 80, 0, 499920| 221 | | 0,-80, 9200080| 222 | Cluster : 1 workers, 4 threads, 8.00 GiB 223 | ------------------------------------------------------------ 224 | 225 | As benchmark runs are completed brief summaries are printed: 226 | 227 | .. code-block:: none 228 | 229 | T.Elapsed : 2.845 seconds 230 | T.Submit : 0.228 seconds 231 | Throughput : 16.480 Mpx/second (overall) 232 | | 4.120 Mpx/second (per thread) 233 | ------------------------------------------------------------ 234 | T.Elapsed : 2.448 seconds 235 | T.Submit : 0.015 seconds 236 | Throughput : 19.152 Mpx/second (overall) 237 | | 4.788 Mpx/second (per thread) 238 | ... continues 239 | 240 | You can terminate early without losing data with ``Ctrl-C``. Benchmark results are saved after each 241 | benchmark pass (overwriting previous save-point) in case there is a crash or some other fatal 242 | error. 243 | 244 | 245 | Review Results 246 | ============== 247 | 248 | To convert benchmark results stored in ``.pkl`` file(s) to CSV use the following: 249 | 250 | .. code-block:: bash 251 | 252 | python -m odc.stac.bench report *.pkl --output results.csv 253 | 254 | The idea is to run benchmarks with different load configurations, different chunk sizes for example, 255 | or comparing relative costs of resampling modes, then combine those into one data table. 256 | -------------------------------------------------------------------------------- /tests/test_bench.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wrong-import-order,wrong-import-position, 2 | # pylint: disable=redefined-outer-name,missing-function-docstring,missing-module-docstring 3 | import pytest 4 | 5 | distributed = pytest.importorskip("distributed") 6 | 7 | from unittest.mock import MagicMock 8 | 9 | import xarray 10 | import numpy as np 11 | from distributed import Client 12 | from odc.geo.xr import ODCExtension 13 | 14 | from odc.stac.bench import ( 15 | BenchLoadParams, 16 | collect_context_info, 17 | load_from_json, 18 | run_bench, 19 | ) 20 | 21 | CFG = { 22 | "*": { 23 | "warnings": "ignore", 24 | # for every asset in every product default to uint16 with nodata=0 25 | "assets": {"*": {"data_type": "uint16", "nodata": 0}}, 26 | } 27 | } 28 | 29 | 30 | @pytest.fixture 31 | def fake_dask_client(monkeypatch): 32 | cc = MagicMock() 33 | cc.scheduler_info.return_value = { 34 | "type": "Scheduler", 35 | "id": "Scheduler-80d943db-16f6-4476-a51a-64d57a287e9b", 36 | "address": "inproc://10.10.10.10/1281505/1", 37 | "services": {"dashboard": 8787}, 38 | "started": 1638320006.6135786, 39 | "workers": { 40 | "inproc://10.10.10.10/1281505/4": { 41 | "type": "Worker", 42 | "id": 0, 43 | "host": "10.1.1.140", 44 | "resources": {}, 45 | "local_directory": "/tmp/dask-worker-space/worker-uhq1b9bh", 46 | "name": 0, 47 | "nthreads": 2, 48 | "memory_limit": 524288000, 49 | "last_seen": 1638320007.2504623, 50 | "services": {"dashboard": 38439}, 51 | "metrics": { 52 | "executing": 0, 53 | "in_memory": 0, 54 | "ready": 0, 55 | "in_flight": 0, 56 | "bandwidth": {"total": 100000000, "workers": {}, "types": {}}, 57 | "spilled_nbytes": 0, 58 | "cpu": 0.0, 59 | "memory": 145129472, 60 | "time": 1638320007.2390554, 61 | "read_bytes": 0.0, 62 | "write_bytes": 0.0, 63 | "read_bytes_disk": 0.0, 64 | "write_bytes_disk": 0.0, 65 | "num_fds": 82, 66 | }, 67 | "nanny": None, 68 | } 69 | }, 70 | } 71 | cc.cancel.return_value = None 72 | cc.restart.return_value = cc 73 | cc.persist = lambda x: x 74 | cc.compute = lambda x: x 75 | 76 | monkeypatch.setattr(distributed, "wait", MagicMock()) 77 | yield cc 78 | 79 | 80 | @pytest.fixture(scope="module") 81 | def dask_client(): 82 | client = Client( 83 | n_workers=1, 84 | threads_per_worker=2, 85 | memory_limit="500MiB", 86 | local_directory="/tmp/", 87 | memory_target_fraction=False, 88 | memory_spill_fraction=False, 89 | memory_pause_fraction=False, 90 | dashboard_address=None, 91 | processes=False, 92 | ) 93 | yield client 94 | client.shutdown() 95 | del client 96 | 97 | 98 | @pytest.mark.skipif( 99 | not pytest.importorskip("stackstac"), reason="stackstac not installed" 100 | ) 101 | def test_load_from_json_stackstac(fake_dask_client, bench_site1, bench_site2) -> None: 102 | dask_client = fake_dask_client 103 | params = BenchLoadParams( 104 | scenario="test1", 105 | method="stackstac", 106 | bands=("B04", "B02", "B03"), 107 | chunks=(2048, 2048), 108 | resampling="nearest", 109 | extra={ 110 | "odc-stac": {"groupby": "solar_day", "stac_cfg": CFG}, 111 | "stackstac": { 112 | "dtype": "uint16", 113 | "fill_value": np.uint16(0), 114 | "rescale": False, 115 | }, 116 | }, 117 | ) 118 | xx = load_from_json(bench_site1, params) 119 | assert "band" in xx.dims 120 | assert xx.shape == (1, 3, 90978, 10980) 121 | assert xx.dtype == "uint16" 122 | assert xx.spec.epsg == 32735 123 | 124 | yy = load_from_json( 125 | bench_site1, params.with_method("odc-stac"), geobox=xx.odc.geobox 126 | ) 127 | 128 | rrx = collect_context_info(dask_client, xx) 129 | rry = collect_context_info(dask_client, yy) 130 | assert rrx.shape == rry.shape 131 | assert rrx == rry 132 | 133 | xx = load_from_json(bench_site2, params) 134 | assert "band" in xx.dims 135 | assert xx.dtype == "uint16" 136 | assert xx.spec.epsg == 32735 137 | 138 | params.crs = "epsg:32736" 139 | xx = load_from_json(bench_site2, params) 140 | assert "band" in xx.dims 141 | assert xx.dtype == "uint16" 142 | assert xx.spec.epsg == 32736 143 | 144 | with pytest.raises(ValueError): 145 | load_from_json(bench_site1, params.with_method("wroNg")) 146 | 147 | 148 | def test_bench_context(fake_dask_client, bench_site1, bench_site2) -> None: 149 | params = BenchLoadParams( 150 | scenario="test1", 151 | method="odc-stac", 152 | bands=("red", "green", "blue"), 153 | chunks=(2048, 2048), 154 | extra={"odc-stac": {"groupby": "solar_day", "stac_cfg": CFG}}, 155 | ) 156 | xx = load_from_json(bench_site1, params) 157 | nt, ny, nx = xx.red.shape 158 | nb = len(xx.data_vars) 159 | 160 | # Check normal case Dataset, with time coords 161 | rr = collect_context_info( 162 | fake_dask_client, xx, method=params.method, scenario="site1" 163 | ) 164 | assert isinstance(xx.odc, ODCExtension) 165 | assert rr.shape == (nt, nb, ny, nx) 166 | assert rr.chunks == (1, 1, 2048, 2048) 167 | assert rr.crs == f"epsg:{xx.odc.geobox.crs.epsg}" 168 | assert rr.crs == xx.odc.geobox.crs 169 | assert rr.nthreads == 2 170 | assert rr.total_ram == 500 * (1 << 20) 171 | 172 | header_txt = rr.render_txt() 173 | assert "T.slice : 2020-06-06" in header_txt 174 | assert f"Data : 1.3.{ny}.{nx}.uint16, 5.58 GiB" in header_txt 175 | 176 | run_txt = rr.render_timing_info((0, 0.1, 30)) 177 | assert isinstance(run_txt, str) 178 | 179 | pd_dict = rr.to_pandas_dict() 180 | assert pd_dict["resolution"] == rr.resolution 181 | assert pd_dict["data"] == f"1.3.{ny}.{nx}.uint16" 182 | assert pd_dict["chunks_x"] == 2048 183 | assert pd_dict["chunks_y"] == 2048 184 | 185 | # Check DataArray case 186 | rr = collect_context_info( 187 | fake_dask_client, xx.red, method="odc-stac", scenario="site1" 188 | ) 189 | assert rr.shape == (nt, 1, ny, nx) 190 | assert rr.crs == xx.odc.geobox.crs 191 | 192 | # Check Dataset with 0 dimension time axis and extras field 193 | rr = collect_context_info( 194 | fake_dask_client, 195 | xx.isel(time=0), 196 | method=params.method, 197 | scenario=params.scenario, 198 | extras={"custom": 2}, 199 | ) 200 | assert rr.extras == {"custom": 2} 201 | assert rr.shape == (1, nb, ny, nx) 202 | 203 | header_txt = rr.render_txt() 204 | assert "GEO : epsg:32735" in header_txt 205 | assert "T.slice : 2020-06-06" in header_txt 206 | 207 | # Check no time info at all 208 | rr = collect_context_info( 209 | fake_dask_client, 210 | xx.isel(time=0, drop=True), 211 | method=params.method, 212 | scenario=params.scenario, 213 | ) 214 | assert rr.shape == (nt, nb, ny, nx) 215 | assert rr.dtype == xx.red.dtype 216 | assert rr.temporal_id == "-" 217 | 218 | # Check wrong type 219 | with pytest.raises(ValueError): 220 | collect_context_info(fake_dask_client, "wrong input type") # type: ignore 221 | 222 | # Check multi-time axis 223 | xx = load_from_json(bench_site2, params) 224 | nt, ny, nx = xx.red.shape 225 | nb = len(xx.data_vars) 226 | 227 | assert nt > 1 228 | 229 | rr = collect_context_info( 230 | fake_dask_client, 231 | xx, 232 | method=params.method, 233 | scenario=params.scenario, 234 | ) 235 | assert rr.shape == (nt, nb, ny, nx) 236 | assert rr.temporal_id == "2020-06-01__2020-07-31" 237 | 238 | # Check missing GEO info 239 | no_geo = _strip_geo(xx.red) 240 | assert no_geo.odc.geobox is None or no_geo.odc.geobox.crs is None 241 | with pytest.raises(ValueError): 242 | # no geobox 243 | collect_context_info(fake_dask_client, no_geo) 244 | 245 | 246 | def _strip_geo(xx: xarray.DataArray) -> xarray.DataArray: 247 | no_geo = xx.drop_vars("spatial_ref") 248 | no_geo.attrs.pop("crs", None) 249 | no_geo.attrs.pop("grid_mapping", None) 250 | no_geo.encoding.pop("grid_mapping", None) 251 | no_geo.x.attrs.pop("crs", None) 252 | no_geo.y.attrs.pop("crs", None) 253 | # get rid of cached geobox 254 | no_geo = xarray.DataArray( 255 | no_geo.data, 256 | coords=no_geo.coords, 257 | dims=no_geo.dims, 258 | attrs=no_geo.attrs, 259 | ) 260 | assert no_geo.odc.geobox is None or no_geo.odc.geobox.crs is None 261 | return no_geo 262 | 263 | 264 | def test_run_bench(fake_dask_client, bench_site1, capsys) -> None: 265 | dask_client = fake_dask_client 266 | params = BenchLoadParams( 267 | scenario="test1", 268 | method="odc-stac", 269 | bands=("red", "green", "blue"), 270 | chunks=(2048, 2048), 271 | extra={"odc-stac": {"groupby": "solar_day", "stac_cfg": CFG}}, 272 | ) 273 | xx = load_from_json(bench_site1, params) 274 | 275 | rr, timing = run_bench(xx, dask_client, 10) 276 | 277 | assert rr.scenario == params.scenario 278 | assert rr.method == params.method 279 | assert len(timing) == 10 280 | _io = capsys.readouterr() 281 | assert len(_io.out) > 0 282 | 283 | 284 | def test_bench_params_json() -> None: 285 | params = BenchLoadParams( 286 | scenario="test1", 287 | method="odc-stac", 288 | bands=("red", "green", "blue"), 289 | chunks=(100, 200), 290 | extra={"odc-stac": {"groupby": "solar_day", "stac_cfg": CFG}}, 291 | ) 292 | 293 | assert params == BenchLoadParams.from_json(params.to_json()) 294 | assert params.to_json() == BenchLoadParams.from_json(params.to_json()).to_json() 295 | 296 | # function should round-trip too 297 | params.patch_url = load_from_json 298 | assert params == BenchLoadParams.from_json(params.to_json()) 299 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=redefined-outer-name,missing-module-docstring,missing-function-docstring 2 | # pylint: disable=import-outside-toplevel 3 | import datetime as dt 4 | 5 | import pytest 6 | from dask.base import tokenize 7 | from odc.geo.geobox import GeoBox 8 | from odc.loader.types import ( 9 | AuxDataSource, 10 | BandKey, 11 | RasterBandMetadata, 12 | RasterGroupMetadata, 13 | RasterLoadParams, 14 | RasterSource, 15 | norm_key, 16 | ) 17 | 18 | from odc.stac import ParsedItem, RasterCollectionMetadata 19 | from odc.stac.model import PropertyLoadRequest 20 | from odc.stac.testing.stac import b_, mk_parsed_item 21 | 22 | 23 | def test_band_load_info() -> None: 24 | meta = RasterBandMetadata(data_type="uint16", nodata=13) 25 | band = RasterSource("https://example.com/some.tif", meta=meta) 26 | assert RasterLoadParams.same_as(meta).dtype == "uint16" 27 | assert RasterLoadParams.same_as(band).fill_value == 13 28 | 29 | band = RasterSource("file:///") 30 | assert RasterLoadParams.same_as(band).dtype == "float32" 31 | assert RasterLoadParams().dtype is None 32 | assert RasterLoadParams().nearest is True 33 | assert RasterLoadParams(resampling="average").nearest is False 34 | 35 | 36 | @pytest.mark.parametrize("lon", [0, -179, 179, 10, 23.4]) 37 | def test_mid_longitude(lon: float) -> None: 38 | gbox = GeoBox.from_bbox((lon - 0.1, 0, lon + 0.1, 1), shape=(100, 100)) 39 | xx = mk_parsed_item([b_("b1", gbox)]) 40 | assert xx.geometry is not None 41 | assert xx.geometry.crs == "epsg:4326" 42 | assert xx.mid_longitude == pytest.approx(lon) 43 | 44 | assert mk_parsed_item([]).mid_longitude is None 45 | 46 | 47 | def test_solar_day() -> None: 48 | def _mk(lon: float, datetime): 49 | gbox = GeoBox.from_bbox((lon - 0.1, 0, lon + 0.1, 1), shape=(100, 100)) 50 | return mk_parsed_item([b_("b1", gbox)], datetime=datetime) 51 | 52 | for lon in [0, 1, 2, 3, 14, -1, -14, -3]: 53 | xx = _mk(lon, "2020-01-02T12:13:14Z") 54 | assert xx.mid_longitude == pytest.approx(lon) 55 | assert xx.nominal_datetime == xx.solar_date 56 | 57 | xx = _mk(15.1, "2020-01-02T12:13:14Z") 58 | assert xx.nominal_datetime != xx.solar_date 59 | assert xx.nominal_datetime + dt.timedelta(seconds=3600) == xx.solar_date 60 | assert xx.nominal_datetime + dt.timedelta(seconds=3600) == xx.solar_date_at(20) 61 | 62 | xx = _mk(-15.1, "2020-01-02T12:13:14Z") 63 | assert xx.nominal_datetime != xx.solar_date 64 | assert xx.nominal_datetime - dt.timedelta(seconds=3600) == xx.solar_date 65 | assert xx.nominal_datetime - dt.timedelta(seconds=3600) == xx.solar_date_at(-20) 66 | 67 | xx = mk_parsed_item([b_("b1")], datetime="2000-01-02") 68 | assert xx.geometry is None 69 | assert xx.nominal_datetime == xx.solar_date 70 | 71 | xx = _mk(10, None) 72 | with pytest.raises(ValueError): 73 | _ = xx.solar_date 74 | 75 | 76 | @pytest.fixture() 77 | def collection_ab() -> RasterCollectionMetadata: 78 | return RasterCollectionMetadata( 79 | "ab", 80 | RasterGroupMetadata( 81 | { 82 | ("a", 1): RasterBandMetadata("uint8"), 83 | ("b", 1): RasterBandMetadata("uint16"), 84 | }, 85 | {"A": [("a", 1)], "AA": [("a", 1)], "B": [("b", 1)]}, 86 | ), 87 | has_proj=True, 88 | band2grid={}, 89 | ) 90 | 91 | 92 | @pytest.fixture() 93 | def parsed_item_ab(collection_ab: RasterCollectionMetadata) -> ParsedItem: 94 | def _src(k: BandKey) -> RasterSource | AuxDataSource: 95 | meta = collection_ab[k] 96 | if isinstance(meta, RasterBandMetadata): 97 | return RasterSource(f"file:///{k[0]}-{k[1]}.tif", meta=meta) 98 | return AuxDataSource(f"file:///{k[0]}-{k[1]}.aux", meta=meta) 99 | 100 | return ParsedItem( 101 | "item-ab", 102 | collection_ab, 103 | {k: _src(k) for k in collection_ab}, 104 | ) 105 | 106 | 107 | def test_collection(collection_ab: RasterCollectionMetadata) -> None: 108 | xx = collection_ab 109 | 110 | assert xx.canonical_name("b") == "b" 111 | assert xx.canonical_name("B") == "b" 112 | assert xx.canonical_name("AA") == "a" 113 | assert xx.canonical_name("a") == "a" 114 | 115 | assert xx.band_key("B") == ("b", 1) 116 | assert xx.band_key("AA") == ("a", 1) 117 | assert xx["AA"].data_type == "uint8" 118 | assert xx["b"].data_type == "uint16" 119 | assert "b" in xx 120 | assert "b.1" in xx 121 | assert ("b", 1) in xx 122 | assert {} not in xx 123 | assert ("some-random", 1) not in xx 124 | assert "no-such-band" not in xx 125 | 126 | assert xx.resolve_bands("AA")["AA"] == xx["a"] 127 | assert list(xx.resolve_bands(["a", "B"])) == ["a", "B"] 128 | assert xx.resolve_bands(["a", "B"])["B"] is xx["b"] 129 | assert xx.resolve_bands(["a", "B"])["a"] is xx["a"] 130 | assert set(xx) == set([("a", 1), ("b", 1)]) 131 | assert len(xx) == 2 132 | 133 | for k in "a AA A b B".split(" "): 134 | assert xx.band_key(k) in xx.bands 135 | assert xx.canonical_name(k) in ["a", "b"] 136 | assert k in xx 137 | assert isinstance(xx[k], RasterBandMetadata) 138 | assert xx[k] is xx[xx.band_key(k)] 139 | 140 | with pytest.raises(ValueError): 141 | _ = xx.resolve_bands(["xxxxxxxx", "a"]) 142 | 143 | with pytest.raises(KeyError): 144 | _ = xx["no-such-band"] 145 | 146 | 147 | def test_collection_allbands() -> None: 148 | xx = mk_parsed_item([b_("a.1"), b_("a.2"), b_("a.3")]) 149 | md = xx.collection 150 | assert md.all_bands == ["a.1", "a.2", "a.3"] 151 | 152 | md.aliases["AA"] = [("a", 2)] 153 | md.aliases["AAA"] = [("a", 3)] 154 | assert md["AA"] == md["a.2"] 155 | assert md["AAA"] == md["a.3"] 156 | 157 | # expect aliases to be used for all_band when multi-band 158 | # assets have unique aliases 159 | assert md.all_bands == ["a.1", "AA", "AAA"] 160 | assert md.canonical_name("a.2") == "AA" 161 | assert md.canonical_name("AA") == "AA" 162 | assert md.canonical_name("a.3") == "AAA" 163 | assert md.canonical_name("AAA") == "AAA" 164 | 165 | 166 | def test_parsed_item(parsed_item_ab: ParsedItem) -> None: 167 | xx = parsed_item_ab 168 | assert xx["AA"] is not None 169 | assert xx["b"] is not None 170 | assert xx["AA"].meta is not None 171 | assert xx["AA"].meta.data_type == "uint8" 172 | assert xx["b"].meta is not None 173 | assert xx["b"].meta.data_type == "uint16" 174 | 175 | assert xx.resolve_bands("AA")["AA"] == xx["a"] 176 | assert list(xx.resolve_bands(["a", "B"])) == ["a", "B"] 177 | assert xx.resolve_bands(["a", "B"])["B"] is xx["b"] 178 | assert xx.resolve_bands(["a", "B"])["a"] is xx["a"] 179 | assert set(xx) == set([("a", 1), ("b", 1)]) 180 | assert len(xx) == 2 181 | assert len(set([xx, xx, xx])) == 1 182 | assert ("a", 1) in xx 183 | assert ("a", 2) not in xx 184 | assert ("a", 2, 3) not in xx 185 | 186 | for k in "a AA A b B".split(" "): 187 | assert k in xx 188 | assert [k] not in xx 189 | assert f"___{k}___" not in xx 190 | assert isinstance(xx[k], RasterSource) 191 | assert xx[k] is xx.resolve_bands(k)[k] 192 | 193 | assert isinstance(xx["b"], RasterSource) 194 | assert isinstance(xx["b"].strip(), RasterSource) 195 | assert xx["b"].strip().geobox is None 196 | assert xx["b"].strip().meta is xx["b"].meta 197 | assert xx["b"].strip().uri == xx["b"].uri 198 | assert xx["b"].strip().band == xx["b"].band 199 | assert xx["b"].strip().subdataset == xx["b"].subdataset 200 | assert xx["b"].strip().driver_data == xx["b"].driver_data 201 | 202 | xx_strip = xx.strip() 203 | assert isinstance(xx_strip["b"], RasterSource) 204 | assert xx_strip["b"].geobox is None 205 | assert xx_strip["b"].meta is xx["b"].meta 206 | assert xx_strip["b"].uri == xx["b"].uri 207 | assert xx_strip["b"].band == xx["b"].band 208 | assert xx_strip["b"].subdataset == xx["b"].subdataset 209 | assert xx_strip["b"].driver_data == xx["b"].driver_data 210 | 211 | 212 | def test_tokenize(parsed_item_ab: ParsedItem) -> None: 213 | assert tokenize(parsed_item_ab.collection) == tokenize(parsed_item_ab.collection) 214 | assert tokenize(parsed_item_ab) == tokenize(parsed_item_ab) 215 | assert tokenize(parsed_item_ab["a"]) == tokenize(parsed_item_ab["a"]) 216 | assert tokenize(parsed_item_ab["a"].meta) == tokenize(parsed_item_ab["a"].meta) 217 | 218 | assert tokenize(RasterLoadParams()) == tokenize(RasterLoadParams()) 219 | assert tokenize(RasterLoadParams("uint8")) == tokenize(RasterLoadParams("uint8")) 220 | assert tokenize(RasterLoadParams("uint8")) != tokenize(RasterLoadParams("uint32")) 221 | 222 | 223 | @pytest.mark.parametrize( 224 | "name, expected", 225 | [ 226 | ("a", ("a", 1)), 227 | ("a.1", ("a", 1)), 228 | ("a.2", ("a", 2)), 229 | (("b", 1), ("b", 1)), 230 | ("foo.tiff", ("foo.tiff", 1)), 231 | ], 232 | ) 233 | def test_normkey(name, expected) -> None: 234 | assert norm_key(name) == expected 235 | 236 | 237 | def test_version() -> None: 238 | from odc.stac import __version__ # pylint: disable=no-name-in-module 239 | 240 | assert __version__ is not None 241 | assert len(__version__.split(".")) == 3 242 | 243 | 244 | def test_property_load_request_basic() -> None: 245 | """Test basic PropertyLoadRequest functionality.""" 246 | # Test with just key 247 | req = PropertyLoadRequest(key="eo:cloud_cover") 248 | assert req.key == "eo:cloud_cover" 249 | assert req.dtype == "float32" # default 250 | assert req.name is None # default 251 | assert req.nodata is None 252 | assert req.units == "1" 253 | assert req.output_name == "eo_cloud_cover" 254 | 255 | # Test with all fields 256 | req = PropertyLoadRequest( 257 | key="eo:cloud_cover", dtype="int16", name="cloud_cover", nodata=-999 258 | ) 259 | assert req.key == "eo:cloud_cover" 260 | assert req.dtype == "int16" 261 | assert req.name == "cloud_cover" 262 | assert req.nodata == -999 263 | assert req.units == "1" 264 | assert req.output_name == "cloud_cover" 265 | 266 | 267 | def test_property_load_request_from_user_input() -> None: 268 | """Test from_user_input method with various inputs.""" 269 | # Test with string inputs 270 | requests = PropertyLoadRequest.from_user_input(["eo:cloud_cover", "eo:platform"]) 271 | assert len(requests) == 2 272 | assert requests[0].key == "eo:cloud_cover" 273 | assert requests[1].key == "eo:platform" 274 | assert all(req.dtype == "float32" for req in requests) 275 | assert all(req.name is None for req in requests) 276 | 277 | # Test with dict inputs 278 | requests = PropertyLoadRequest.from_user_input( 279 | [ 280 | {"key": "eo:cloud_cover", "dtype": "int16", "name": "cloud_cover"}, 281 | {"key": "eo:platform", "name": "satellite"}, 282 | ] 283 | ) 284 | assert len(requests) == 2 285 | assert requests[0].key == "eo:cloud_cover" 286 | assert requests[0].dtype == "int16" 287 | assert requests[0].name == "cloud_cover" 288 | assert requests[1].key == "eo:platform" 289 | assert requests[1].dtype == "float32" # default 290 | assert requests[1].name == "satellite" 291 | 292 | # Test with mixed inputs 293 | requests = PropertyLoadRequest.from_user_input( 294 | ["eo:cloud_cover", {"key": "eo:platform", "name": "satellite"}] 295 | ) 296 | assert len(requests) == 2 297 | assert requests[0].key == "eo:cloud_cover" 298 | assert requests[0].dtype == "float32" 299 | assert requests[0].name is None 300 | assert requests[1].key == "eo:platform" 301 | assert requests[1].name == "satellite" 302 | 303 | 304 | def test_property_load_request_errors() -> None: 305 | """Test error cases for PropertyLoadRequest.""" 306 | # Test missing key in dict 307 | with pytest.raises(ValueError, match="Dictionary input must contain 'key' field"): 308 | PropertyLoadRequest.from_user_input([{"dtype": "int16"}]) 309 | 310 | # Test invalid input type 311 | with pytest.raises(ValueError, match="Input must be string or dict"): 312 | PropertyLoadRequest.from_user_input([123]) # type: ignore 313 | 314 | # Test empty sequence 315 | requests = PropertyLoadRequest.from_user_input([]) 316 | assert len(requests) == 0 317 | -------------------------------------------------------------------------------- /tests/data/S2A_28QCH_20200714_0_L2A.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "stac_version": "1.0.0-beta.1", 4 | "stac_extensions": [ 5 | "https://stac-extensions.github.io/eo/v1.0.0/schema.json", 6 | "https://stac-extensions.github.io/view/v1.0.0/schema.json", 7 | "https://stac-extensions.github.io/projection/v1.0.0/schema.json" 8 | ], 9 | "id": "S2A_28QCH_20200714_0_L2A", 10 | "bbox": [ 11 | -16.92047119589637, 12 | 20.052963012507906, 13 | -16.72595055093299, 14 | 20.791551352272126 15 | ], 16 | "geometry": { 17 | "type": "Polygon", 18 | "coordinates": [ 19 | [ 20 | [ 21 | -16.911257292502636, 22 | 20.052963012507906 23 | ], 24 | [ 25 | -16.92047119589637, 26 | 20.78948450776572 27 | ], 28 | [ 29 | -16.72595055093299, 30 | 20.791551352272126 31 | ], 32 | [ 33 | -16.911257292502636, 34 | 20.052963012507906 35 | ] 36 | ] 37 | ] 38 | }, 39 | "properties": { 40 | "datetime": "2020-07-14T11:55:52Z", 41 | "platform": "sentinel-2a", 42 | "constellation": "sentinel-2", 43 | "instruments": [ 44 | "MSI" 45 | ], 46 | "gsd": 10, 47 | "data_coverage": 6.85, 48 | "view:off_nadir": 0, 49 | "eo:cloud_cover": 2.36, 50 | "proj:epsg": 32628, 51 | "sentinel:latitude_band": "Q", 52 | "sentinel:grid_square": "CH", 53 | "sentinel:sequence": "0", 54 | "sentinel:product_id": "S2A_MSIL2A_20200714T115221_N0214_R123_T28QCH_20200714T191310" 55 | }, 56 | "collection": "sentinel-s2-l2a-cogs", 57 | "assets": { 58 | "thumbnail": { 59 | "title": "Thumbnail", 60 | "type": "image/png", 61 | "href": "https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/28/Q/CH/2020/7/14/0/preview.jpg" 62 | }, 63 | "overview": { 64 | "title": "True color image", 65 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 66 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/L2A_PVI.tif", 67 | "proj:shape": [ 68 | 343, 69 | 343 70 | ], 71 | "proj:transform": [ 72 | 320.0, 73 | 0.0, 74 | 300000.0, 75 | 0.0, 76 | -320.0, 77 | 2300040.0, 78 | 0.0, 79 | 0.0, 80 | 1.0 81 | ] 82 | }, 83 | "info": { 84 | "title": "Original JSON metadata", 85 | "type": "application/json", 86 | "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/28/Q/CH/2020/7/14/0/tileInfo.json" 87 | }, 88 | "metadata": { 89 | "title": "Original XML metadata", 90 | "type": "application/xml", 91 | "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/28/Q/CH/2020/7/14/0/metadata.xml" 92 | }, 93 | "visual": { 94 | "title": "True color image", 95 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 96 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/TCI.tif", 97 | "proj:shape": [ 98 | 10980, 99 | 10980 100 | ], 101 | "proj:transform": [ 102 | 10.0, 103 | 0.0, 104 | 300000.0, 105 | 0.0, 106 | -10.0, 107 | 2300040.0, 108 | 0.0, 109 | 0.0, 110 | 1.0 111 | ] 112 | }, 113 | "B01": { 114 | "title": "Band 1 (coastal)", 115 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 116 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B01.tif", 117 | "proj:shape": [ 118 | 1830, 119 | 1830 120 | ], 121 | "proj:transform": [ 122 | 60.0, 123 | 0.0, 124 | 300000.0, 125 | 0.0, 126 | -60.0, 127 | 2300040.0, 128 | 0.0, 129 | 0.0, 130 | 1.0 131 | ] 132 | }, 133 | "B02": { 134 | "title": "Band 2 (blue)", 135 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 136 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B02.tif", 137 | "proj:shape": [ 138 | 10980, 139 | 10980 140 | ], 141 | "proj:transform": [ 142 | 10.0, 143 | 0.0, 144 | 300000.0, 145 | 0.0, 146 | -10.0, 147 | 2300040.0, 148 | 0.0, 149 | 0.0, 150 | 1.0 151 | ] 152 | }, 153 | "B03": { 154 | "title": "Band 3 (green)", 155 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 156 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B03.tif", 157 | "proj:shape": [ 158 | 10980, 159 | 10980 160 | ], 161 | "proj:transform": [ 162 | 10.0, 163 | 0.0, 164 | 300000.0, 165 | 0.0, 166 | -10.0, 167 | 2300040.0, 168 | 0.0, 169 | 0.0, 170 | 1.0 171 | ] 172 | }, 173 | "B04": { 174 | "title": "Band 4 (red)", 175 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 176 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B04.tif", 177 | "proj:shape": [ 178 | 10980, 179 | 10980 180 | ], 181 | "proj:transform": [ 182 | 10.0, 183 | 0.0, 184 | 300000.0, 185 | 0.0, 186 | -10.0, 187 | 2300040.0, 188 | 0.0, 189 | 0.0, 190 | 1.0 191 | ] 192 | }, 193 | "B05": { 194 | "title": "Band 5", 195 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 196 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B05.tif", 197 | "proj:shape": [ 198 | 5490, 199 | 5490 200 | ], 201 | "proj:transform": [ 202 | 20.0, 203 | 0.0, 204 | 300000.0, 205 | 0.0, 206 | -20.0, 207 | 2300040.0, 208 | 0.0, 209 | 0.0, 210 | 1.0 211 | ] 212 | }, 213 | "B06": { 214 | "title": "Band 6", 215 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 216 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B06.tif", 217 | "proj:shape": [ 218 | 5490, 219 | 5490 220 | ], 221 | "proj:transform": [ 222 | 20.0, 223 | 0.0, 224 | 300000.0, 225 | 0.0, 226 | -20.0, 227 | 2300040.0, 228 | 0.0, 229 | 0.0, 230 | 1.0 231 | ] 232 | }, 233 | "B07": { 234 | "title": "Band 7", 235 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 236 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B07.tif", 237 | "proj:shape": [ 238 | 5490, 239 | 5490 240 | ], 241 | "proj:transform": [ 242 | 20.0, 243 | 0.0, 244 | 300000.0, 245 | 0.0, 246 | -20.0, 247 | 2300040.0, 248 | 0.0, 249 | 0.0, 250 | 1.0 251 | ] 252 | }, 253 | "B08": { 254 | "title": "Band 8 (nir)", 255 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 256 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B08.tif", 257 | "proj:shape": [ 258 | 10980, 259 | 10980 260 | ], 261 | "proj:transform": [ 262 | 10.0, 263 | 0.0, 264 | 300000.0, 265 | 0.0, 266 | -10.0, 267 | 2300040.0, 268 | 0.0, 269 | 0.0, 270 | 1.0 271 | ] 272 | }, 273 | "B8A": { 274 | "title": "Band 8A", 275 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 276 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B8A.tif", 277 | "proj:shape": [ 278 | 5490, 279 | 5490 280 | ], 281 | "proj:transform": [ 282 | 20.0, 283 | 0.0, 284 | 300000.0, 285 | 0.0, 286 | -20.0, 287 | 2300040.0, 288 | 0.0, 289 | 0.0, 290 | 1.0 291 | ] 292 | }, 293 | "B09": { 294 | "title": "Band 9", 295 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 296 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B09.tif", 297 | "proj:shape": [ 298 | 1830, 299 | 1830 300 | ], 301 | "proj:transform": [ 302 | 60.0, 303 | 0.0, 304 | 300000.0, 305 | 0.0, 306 | -60.0, 307 | 2300040.0, 308 | 0.0, 309 | 0.0, 310 | 1.0 311 | ] 312 | }, 313 | "B11": { 314 | "title": "Band 11 (swir16)", 315 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 316 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B11.tif", 317 | "proj:shape": [ 318 | 5490, 319 | 5490 320 | ], 321 | "proj:transform": [ 322 | 20.0, 323 | 0.0, 324 | 300000.0, 325 | 0.0, 326 | -20.0, 327 | 2300040.0, 328 | 0.0, 329 | 0.0, 330 | 1.0 331 | ] 332 | }, 333 | "B12": { 334 | "title": "Band 12 (swir22)", 335 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 336 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B12.tif", 337 | "proj:shape": [ 338 | 5490, 339 | 5490 340 | ], 341 | "proj:transform": [ 342 | 20.0, 343 | 0.0, 344 | 300000.0, 345 | 0.0, 346 | -20.0, 347 | 2300040.0, 348 | 0.0, 349 | 0.0, 350 | 1.0 351 | ] 352 | }, 353 | "AOT": { 354 | "title": "Aerosol Optical Thickness (AOT)", 355 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 356 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/AOT.tif", 357 | "proj:shape": [ 358 | 1830, 359 | 1830 360 | ], 361 | "proj:transform": [ 362 | 60.0, 363 | 0.0, 364 | 300000.0, 365 | 0.0, 366 | -60.0, 367 | 2300040.0, 368 | 0.0, 369 | 0.0, 370 | 1.0 371 | ] 372 | }, 373 | "WVP": { 374 | "title": "Water Vapour (WVP)", 375 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 376 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/WVP.tif", 377 | "proj:shape": [ 378 | 10980, 379 | 10980 380 | ], 381 | "proj:transform": [ 382 | 10.0, 383 | 0.0, 384 | 300000.0, 385 | 0.0, 386 | -10.0, 387 | 2300040.0, 388 | 0.0, 389 | 0.0, 390 | 1.0 391 | ] 392 | }, 393 | "SCL": { 394 | "title": "Scene Classification Map (SCL)", 395 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 396 | "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/SCL.tif", 397 | "proj:shape": [ 398 | 5490, 399 | 5490 400 | ], 401 | "proj:transform": [ 402 | 20.0, 403 | 0.0, 404 | 300000.0, 405 | 0.0, 406 | -20.0, 407 | 2300040.0, 408 | 0.0, 409 | 0.0, 410 | 1.0 411 | ] 412 | } 413 | }, 414 | "links": [ 415 | { 416 | "rel": "self", 417 | "href": "s3://sentinel-cogs/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/S2A_28QCH_20200714_0_L2A.json", 418 | "type": "application/json" 419 | }, 420 | { 421 | "title": "Source STAC Item", 422 | "rel": "derived_from", 423 | "href": "/tmp/tmpj8uymxue/s3:/cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/2020/S2A_28QCH_20200714_0_L2A/S2A_28QCH_20200714_0_L2A.json", 424 | "type": "application/json" 425 | } 426 | ] 427 | } 428 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Run Code Checks 2 | 3 | on: 4 | pull_request: 5 | push: 6 | 7 | # When a PR is updated, cancel the jobs from the previous version. Merges 8 | # do not define head_ref, so use run_id to never cancel those jobs. 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build-wheels: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 20 | 21 | - name: Setup Python 22 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 23 | with: 24 | python-version: "3.10" 25 | 26 | - name: Install Dependencies 27 | run: | 28 | python -m pip install --upgrade flit twine 29 | python -m pip freeze 30 | 31 | - name: Build Clean Packages 32 | run: | 33 | mkdir -p ./wheels/clean 34 | flit build --format wheel --format sdist 35 | mv dist/* ./wheels/clean/ 36 | find ./wheels/clean -type f 37 | 38 | - name: Check Packaging 39 | run: | 40 | find ./wheels/clean -type f | xargs twine check 41 | 42 | - name: Upload wheels as artifacts 43 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 44 | with: 45 | name: python-wheels 46 | path: ./wheels/clean/ 47 | retention-days: 1 48 | 49 | build-test-env-base: 50 | timeout-minutes: 15 51 | runs-on: ubuntu-latest 52 | 53 | steps: 54 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 55 | 56 | - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 57 | id: conda_cache 58 | with: 59 | path: /tmp/test_env 60 | key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }} 61 | 62 | - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 63 | if: steps.conda_cache.outputs.cache-hit != 'true' 64 | with: 65 | miniforge-variant: Miniforge3 66 | miniforge-version: latest 67 | use-mamba: true 68 | 69 | - name: Dump Conda Environment Info 70 | shell: bash -l {0} 71 | if: steps.conda_cache.outputs.cache-hit != 'true' 72 | run: | 73 | conda info 74 | conda list 75 | conda config --show-sources 76 | conda config --show 77 | printenv | sort 78 | 79 | - name: Build Python Environment for Testing 80 | shell: bash -l {0} 81 | if: steps.conda_cache.outputs.cache-hit != 'true' 82 | run: | 83 | mamba env create -f tests/test-env-py310.yml -p /tmp/test_env 84 | 85 | - name: Check Python Env 86 | shell: bash -l {0} 87 | if: steps.conda_cache.outputs.cache-hit != 'true' 88 | run: | 89 | mamba env export -p /tmp/test_env 90 | 91 | build-binder-env: 92 | timeout-minutes: 15 93 | runs-on: ubuntu-latest 94 | 95 | steps: 96 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 97 | 98 | - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 99 | id: binder_cache 100 | with: 101 | path: /tmp/binder_env 102 | key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }} 103 | 104 | - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 105 | if: steps.binder_cache.outputs.cache-hit != 'true' 106 | with: 107 | miniforge-variant: Miniforge3 108 | miniforge-version: latest 109 | use-mamba: true 110 | 111 | - name: Dump Conda Environment Info 112 | shell: bash -l {0} 113 | if: steps.binder_cache.outputs.cache-hit != 'true' 114 | run: | 115 | conda info 116 | conda list 117 | conda config --show-sources 118 | conda config --show 119 | printenv | sort 120 | 121 | - name: Build Python Environment for Notebooks 122 | shell: bash -l {0} 123 | if: steps.binder_cache.outputs.cache-hit != 'true' 124 | run: | 125 | cd binder 126 | mamba env create -f environment.yml -p /tmp/binder_env 127 | 128 | - name: Check Python Env 129 | shell: bash -l {0} 130 | if: steps.binder_cache.outputs.cache-hit != 'true' 131 | run: | 132 | mamba env export -p /tmp/binder_env 133 | 134 | run-black-check: 135 | timeout-minutes: 15 136 | runs-on: ubuntu-latest 137 | needs: 138 | - build-test-env-base 139 | 140 | steps: 141 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 142 | - name: Get Conda Environment from Cache 143 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 144 | id: conda_cache 145 | with: 146 | path: /tmp/test_env 147 | key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }} 148 | 149 | - name: Update PATH 150 | shell: bash 151 | run: | 152 | echo "/tmp/test_env/bin" >> $GITHUB_PATH 153 | 154 | - name: Check formatting (black) 155 | shell: bash 156 | run: | 157 | black --version 158 | black --check --diff . 159 | 160 | run-pylint: 161 | timeout-minutes: 15 162 | runs-on: ubuntu-latest 163 | needs: 164 | - build-test-env-base 165 | 166 | steps: 167 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 168 | - name: Get Conda Environment from Cache 169 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 170 | id: conda_cache 171 | with: 172 | path: /tmp/test_env 173 | key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }} 174 | 175 | - name: Update PATH 176 | shell: bash 177 | run: | 178 | echo "/tmp/test_env/bin" >> $GITHUB_PATH 179 | 180 | - name: Install in Edit mode 181 | shell: bash 182 | run: | 183 | pip install -e . --no-deps 184 | 185 | - name: Check with pylint 186 | shell: bash 187 | run: | 188 | echo "Running pylint" 189 | pylint --version 190 | echo "-------------------------------------" 191 | pylint -v odc 192 | 193 | run-mypy: 194 | timeout-minutes: 15 195 | runs-on: ubuntu-latest 196 | needs: 197 | - build-test-env-base 198 | 199 | steps: 200 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 201 | - name: Get Conda Environment from Cache 202 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 203 | id: conda_cache 204 | with: 205 | path: /tmp/test_env 206 | key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }} 207 | 208 | - name: Update PATH 209 | shell: bash 210 | run: | 211 | echo "/tmp/test_env/bin" >> $GITHUB_PATH 212 | 213 | - name: Check with mypy 214 | shell: bash 215 | run: | 216 | python -m mypy --namespace-packages --explicit-package-bases odc 217 | 218 | 219 | test-with-botocore-and-coverage: 220 | timeout-minutes: 15 221 | runs-on: ubuntu-latest 222 | permissions: 223 | id-token: write 224 | needs: 225 | - build-test-env-base 226 | - run-black-check 227 | 228 | steps: 229 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 230 | 231 | - name: Get Conda Environment from Cache 232 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 233 | id: conda_cache 234 | with: 235 | path: /tmp/test_env 236 | 237 | key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }} 238 | 239 | - name: Update PATH 240 | shell: bash 241 | run: | 242 | echo "/tmp/test_env/bin" >> $GITHUB_PATH 243 | 244 | - name: Install in Edit mode 245 | shell: bash 246 | run: | 247 | pip install -e '.[botocore]' --no-deps 248 | 249 | - name: Run Tests 250 | shell: bash 251 | run: | 252 | echo "Running Tests" 253 | pytest --cov=. \ 254 | --cov-report=html \ 255 | --cov-report=xml:coverage.xml \ 256 | --timeout=30 \ 257 | tests odc 258 | 259 | env: 260 | AWS_DEFAULT_REGION: us-west-2 261 | DASK_TEMPORARY_DIRECTORY: /tmp/dask 262 | 263 | - name: Upload Coverage 264 | if: | 265 | github.repository == 'opendatacube/odc-stac' 266 | 267 | uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3 268 | with: 269 | fail_ci_if_error: false 270 | verbose: false 271 | use_oidc: true 272 | 273 | test-wheels: 274 | timeout-minutes: 15 275 | runs-on: ubuntu-latest 276 | 277 | needs: 278 | - build-test-env-base 279 | - run-black-check 280 | - build-wheels 281 | 282 | steps: 283 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 284 | 285 | - name: Download wheels from artifacts 286 | uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 287 | with: 288 | name: python-wheels 289 | path: ./wheels/clean 290 | 291 | - name: Get Conda Environment from Cache 292 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 293 | id: conda_cache 294 | with: 295 | path: /tmp/test_env 296 | key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }} 297 | 298 | - name: Update PATH 299 | shell: bash 300 | run: | 301 | echo "/tmp/test_env/bin" >> $GITHUB_PATH 302 | 303 | - name: Install wheels for testing 304 | shell: bash 305 | run: | 306 | which python 307 | 308 | ls -lh wheels/clean 309 | python -m pip install --no-deps wheels/clean/*whl 310 | python -m pip check || true 311 | 312 | - name: Run Tests 313 | shell: bash 314 | run: | 315 | echo "Running Tests" 316 | pytest --timeout=30 tests odc 317 | 318 | env: 319 | AWS_DEFAULT_REGION: us-west-2 320 | DASK_TEMPORARY_DIRECTORY: /tmp/dask 321 | 322 | build-notebooks: 323 | timeout-minutes: 15 324 | runs-on: ubuntu-latest 325 | 326 | needs: 327 | - build-binder-env 328 | 329 | steps: 330 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 331 | 332 | - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 333 | id: nb_cache 334 | with: 335 | path: docs/notebooks 336 | key: docs-notebooks-${{ hashFiles('notebooks/*.py') }} 337 | 338 | - name: Get Conda Environment from Cache 339 | if: steps.nb_cache.outputs.cache-hit != 'true' 340 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 341 | id: conda_cache 342 | with: 343 | path: /tmp/binder_env 344 | key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }} 345 | 346 | - name: Update PATH 347 | if: steps.nb_cache.outputs.cache-hit != 'true' 348 | shell: bash 349 | run: | 350 | echo "/tmp/binder_env/bin" >> $GITHUB_PATH 351 | 352 | - name: Run Notebooks 353 | if: steps.nb_cache.outputs.cache-hit != 'true' 354 | run: | 355 | nb_dir=docs/notebooks 356 | mkdir -p $nb_dir 357 | for src in $(find notebooks -type f -maxdepth 1 -name '*py'); do 358 | dst="$nb_dir/$(basename ${src%%.py}.ipynb)" 359 | echo "$src -> $dst" 360 | ./binder/render-nb-pipe.sh <$src >$dst 361 | done 362 | ls -lh $nb_dir 363 | 364 | check-docs: 365 | timeout-minutes: 15 366 | runs-on: ubuntu-latest 367 | 368 | needs: 369 | - build-test-env-base 370 | - run-black-check 371 | - build-notebooks 372 | 373 | steps: 374 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 375 | 376 | - name: Get Rendered Notebooks 377 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 378 | id: nb_cache 379 | with: 380 | path: docs/notebooks 381 | key: docs-notebooks-${{ hashFiles('notebooks/*.py') }} 382 | 383 | - name: Get Conda Environment from Cache 384 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 385 | id: conda_cache 386 | with: 387 | path: /tmp/test_env 388 | key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }} 389 | 390 | - name: Update PATH 391 | shell: bash 392 | run: | 393 | echo "/tmp/test_env/bin" >> $GITHUB_PATH 394 | 395 | - name: Config 396 | id: cfg 397 | run: | 398 | find notebooks/ -maxdepth 1 -name '*.py' -type f | sort -f -d 399 | 400 | nb_dir="docs/notebooks" 401 | nb_hash=$(python scripts/notebook_hash.py) 402 | echo "Notebooks hash: ${nb_hash}" 403 | echo "nb-hash=${nb_hash}" >> $GITHUB_OUTPUT 404 | echo "nb-hash-short=${nb_hash:0:16}" >> $GITHUB_OUTPUT 405 | 406 | - name: Install in Edit mode 407 | shell: bash 408 | run: | 409 | pip install -e . --no-deps 410 | 411 | - name: Build STAC docs 412 | shell: bash 413 | run: | 414 | make -C docs html 415 | 416 | - name: Deploy to Netlify 417 | id: netlify 418 | if: github.event_name == 'pull_request' 419 | uses: nwtgck/actions-netlify@4cbaf4c08f1a7bfa537d6113472ef4424e4eb654 # v3.0.0 420 | with: 421 | production-branch: "develop" 422 | publish-dir: "docs/_build/html" 423 | deploy-message: "Deploy from GitHub Actions" 424 | github-token: ${{ secrets.GITHUB_TOKEN }} 425 | enable-pull-request-comment: true 426 | enable-commit-comment: false 427 | alias: ${{ steps.cfg.outputs.nb-hash-short }} 428 | 429 | env: 430 | NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} 431 | NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} 432 | 433 | - name: Print Notice 434 | uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 435 | if: github.event_name == 'pull_request' 436 | env: 437 | NETLIFY_URL: ${{ steps.netlify.outputs.deploy-url }} 438 | with: 439 | script: | 440 | const {NETLIFY_URL} = process.env 441 | core.notice(`Published to: ${NETLIFY_URL}`) 442 | -------------------------------------------------------------------------------- /tests/data/LC08_L2SR_081119_20200101_20200823_02_T2.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "Feature", 3 | "stac_version": "1.0.0-beta.2", 4 | "id": "LC08_L2SR_081119_20200101_20200823_02_T2", 5 | "properties": { 6 | "collection": "landsat-c2l2-sr", 7 | "eo:gsd": 30, 8 | "eo:platform": "LANDSAT_8", 9 | "datetime": "2020-01-01T00:00:00Z", 10 | "eo:cloud_cover": 0, 11 | "eo:sun_azimuth": 93.54107327, 12 | "eo:sun_elevation": 22.72192082, 13 | "landsat:cloud_cover_land": 0, 14 | "landsat:wrs_type": "2", 15 | "landsat:wrs_path": "81", 16 | "landsat:wrs_row": "119", 17 | "landsat:scene_id": "LC80811192020001LGN00", 18 | "landsat:collection_category": "T2", 19 | "landsat:collection_number": "02", 20 | "eo:bands": [ 21 | { 22 | "name": "SR_B1", 23 | "common_name": "coastal", 24 | "gsd": 30, 25 | "center_wavelength": 0.44 26 | }, 27 | { 28 | "name": "SR_B2", 29 | "common_name": "blue", 30 | "gsd": 30, 31 | "center_wavelength": 0.48 32 | }, 33 | { 34 | "name": "SR_B3", 35 | "common_name": "green", 36 | "gsd": 30, 37 | "center_wavelength": 0.56 38 | }, 39 | { 40 | "name": "SR_B4", 41 | "common_name": "red", 42 | "gsd": 30, 43 | "center_wavelength": 0.65 44 | }, 45 | { 46 | "name": "SR_B5", 47 | "common_name": "nir08", 48 | "gsd": 30, 49 | "center_wavelength": 0.86 50 | }, 51 | { 52 | "name": "SR_B6", 53 | "common_name": "swir16", 54 | "gsd": 30, 55 | "center_wavelength": 1.6 56 | }, 57 | { 58 | "name": "SR_B7", 59 | "common_name": "swir22", 60 | "gsd": 30, 61 | "center_wavelength": 2.2 62 | } 63 | ], 64 | "constellation": "Landsat", 65 | "instruments": [ 66 | "oli", 67 | "tirs" 68 | ], 69 | "view:off_nadir": 0, 70 | "proj:epsg": 3031 71 | }, 72 | "geometry": { 73 | "type": "Polygon", 74 | "coordinates": [ 75 | [ 76 | [ 77 | 106.0673404715087, 78 | -79.61169259634802 79 | ], 80 | [ 81 | 111.50547149149213, 82 | -81.06844416149866 83 | ], 84 | [ 85 | 101.33143658242044, 86 | -81.93331557352852 87 | ], 88 | [ 89 | 97.0135423519546, 90 | -80.34938321953908 91 | ], 92 | [ 93 | 106.0673404715087, 94 | -79.61169259634802 95 | ] 96 | ] 97 | ] 98 | }, 99 | "links": [ 100 | { 101 | "rel": "self", 102 | "href": "./LC08_L2SR_081119_20200101_20200823_02_T2.json", 103 | "type": "application/json" 104 | } 105 | ], 106 | "assets": { 107 | "thumbnail": { 108 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_thumb_small.jpeg", 109 | "type": "image/jpeg", 110 | "title": "Thumbnail image" 111 | }, 112 | "reduced_resolution_browse": { 113 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_thumb_large.jpeg", 114 | "type": "image/jpeg", 115 | "title": "Reduced resolution browse image" 116 | }, 117 | "index": { 118 | "href": "https://landsatlook.usgs.gov/stac-browser/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2", 119 | "type": "text/html", 120 | "title": "HTML index page" 121 | }, 122 | "SR_B1.TIF": { 123 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B1.TIF", 124 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 125 | "title": "Coastal/Aerosol Band (B1)", 126 | "description": "Collection 2 Level-2 Coastal/Aerosol Band (B1) Surface Reflectance", 127 | "eo:bands": [ 128 | 0 129 | ], 130 | "proj:transform": [ 131 | 1164.15, 132 | 0.0, 133 | 857085.0, 134 | 0.0, 135 | -1150.6499999999999, 136 | -127485.0, 137 | 0.0, 138 | 0.0, 139 | 1.0 140 | ], 141 | "proj:shape": [ 142 | 200, 143 | 200 144 | ] 145 | }, 146 | "SR_B2.TIF": { 147 | "href": "tests/data-files/landsat/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B2_small.TIF", 148 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 149 | "title": "Blue Band (B2)", 150 | "description": "Collection 2 Level-2 Blue Band (B2) Surface Reflectance", 151 | "eo:bands": [ 152 | 1 153 | ], 154 | "proj:transform": [ 155 | 1164.15, 156 | 0.0, 157 | 857085.0, 158 | 0.0, 159 | -1150.6499999999999, 160 | -127485.0, 161 | 0.0, 162 | 0.0, 163 | 1.0 164 | ], 165 | "proj:shape": [ 166 | 200, 167 | 200 168 | ] 169 | }, 170 | "SR_B3.TIF": { 171 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B3.TIF", 172 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 173 | "title": "Green Band (B3)", 174 | "description": "Collection 2 Level-2 Green Band (B3) Surface Reflectance", 175 | "eo:bands": [ 176 | 2 177 | ], 178 | "proj:transform": [ 179 | 1164.15, 180 | 0.0, 181 | 857085.0, 182 | 0.0, 183 | -1150.6499999999999, 184 | -127485.0, 185 | 0.0, 186 | 0.0, 187 | 1.0 188 | ], 189 | "proj:shape": [ 190 | 200, 191 | 200 192 | ] 193 | }, 194 | "SR_B4.TIF": { 195 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B4.TIF", 196 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 197 | "title": "Red Band (B4)", 198 | "description": "Collection 2 Level-2 Red Band (B4) Surface Reflectance", 199 | "eo:bands": [ 200 | 3 201 | ], 202 | "proj:transform": [ 203 | 1164.15, 204 | 0.0, 205 | 857085.0, 206 | 0.0, 207 | -1150.6499999999999, 208 | -127485.0, 209 | 0.0, 210 | 0.0, 211 | 1.0 212 | ], 213 | "proj:shape": [ 214 | 200, 215 | 200 216 | ] 217 | }, 218 | "SR_B5.TIF": { 219 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B5.TIF", 220 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 221 | "title": "Near Infrared Band 0.8 (B5)", 222 | "description": "Collection 2 Level-2 Near Infrared Band 0.8 (B5) Surface Reflectance", 223 | "eo:bands": [ 224 | 4 225 | ], 226 | "proj:transform": [ 227 | 1164.15, 228 | 0.0, 229 | 857085.0, 230 | 0.0, 231 | -1150.6499999999999, 232 | -127485.0, 233 | 0.0, 234 | 0.0, 235 | 1.0 236 | ], 237 | "proj:shape": [ 238 | 200, 239 | 200 240 | ] 241 | }, 242 | "SR_B6.TIF": { 243 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B6.TIF", 244 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 245 | "title": "Short-wave Infrared Band 1.6 (B6)", 246 | "description": "Collection 2 Level-2 Short-wave Infrared Band 1.6 (B6) Surface Reflectance", 247 | "eo:bands": [ 248 | 5 249 | ], 250 | "proj:transform": [ 251 | 1164.15, 252 | 0.0, 253 | 857085.0, 254 | 0.0, 255 | -1150.6499999999999, 256 | -127485.0, 257 | 0.0, 258 | 0.0, 259 | 1.0 260 | ], 261 | "proj:shape": [ 262 | 200, 263 | 200 264 | ] 265 | }, 266 | "SR_B7.TIF": { 267 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B7.TIF", 268 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 269 | "title": "Short-wave Infrared Band 2.2 (B7)", 270 | "description": "Collection 2 Level-2 Short-wave Infrared Band 2.2 (B7) Surface Reflectance", 271 | "eo:bands": [ 272 | 6 273 | ], 274 | "proj:transform": [ 275 | 1164.15, 276 | 0.0, 277 | 857085.0, 278 | 0.0, 279 | -1150.6499999999999, 280 | -127485.0, 281 | 0.0, 282 | 0.0, 283 | 1.0 284 | ], 285 | "proj:shape": [ 286 | 200, 287 | 200 288 | ] 289 | }, 290 | "SR_QA_AEROSOL.TIF": { 291 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_QA_AEROSOL.TIF", 292 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 293 | "title": "Aerosol Quality Analysis Band", 294 | "description": "Collection 2 Level-2 Aerosol Quality Analysis Band (ANG) Surface Reflectance", 295 | "proj:transform": [ 296 | 1164.15, 297 | 0.0, 298 | 857085.0, 299 | 0.0, 300 | -1150.6499999999999, 301 | -127485.0, 302 | 0.0, 303 | 0.0, 304 | 1.0 305 | ], 306 | "proj:shape": [ 307 | 200, 308 | 200 309 | ] 310 | }, 311 | "ANG.txt": { 312 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_ANG.txt", 313 | "type": "text/plain", 314 | "title": "Angle Coefficients File", 315 | "description": "Collection 2 Level-2 Angle Coefficients File (ANG) Surface Reflectance" 316 | }, 317 | "MTL.txt": { 318 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_MTL.txt", 319 | "type": "text/plain", 320 | "title": "Product Metadata File", 321 | "description": "Collection 2 Level-2 Product Metadata File (MTL) Surface Reflectance" 322 | }, 323 | "MTL.xml": { 324 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_MTL.xml", 325 | "type": "application/xml", 326 | "title": "Product Metadata File (xml)", 327 | "description": "Collection 2 Level-1 Product Metadata File (xml) Surface Reflectance" 328 | }, 329 | "MTL.json": { 330 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_MTL.json", 331 | "type": "application/json", 332 | "title": "Product Metadata File (json)", 333 | "description": "Collection 2 Level-2 Product Metadata File (json) Surface Reflectance" 334 | }, 335 | "QA_PIXEL.TIF": { 336 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_QA_PIXEL.TIF", 337 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 338 | "title": "Pixel Quality Assessment Band", 339 | "description": "Collection 2 Level-2 Pixel Quality Assessment Band Surface Reflectance", 340 | "proj:transform": [ 341 | 1164.15, 342 | 0.0, 343 | 857085.0, 344 | 0.0, 345 | -1150.6499999999999, 346 | -127485.0, 347 | 0.0, 348 | 0.0, 349 | 1.0 350 | ], 351 | "proj:shape": [ 352 | 200, 353 | 200 354 | ] 355 | }, 356 | "QA_RADSAT.TIF": { 357 | "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_QA_RADSAT.TIF", 358 | "type": "image/tiff; application=geotiff; profile=cloud-optimized", 359 | "title": "Radiometric Saturation Quality Assessment Band", 360 | "description": "Collection 2 Level-2 Radiometric Saturation Quality Assessment Band Surface Reflectance", 361 | "proj:transform": [ 362 | 1164.15, 363 | 0.0, 364 | 857085.0, 365 | 0.0, 366 | -1150.6499999999999, 367 | -127485.0, 368 | 0.0, 369 | 0.0, 370 | 1.0 371 | ], 372 | "proj:shape": [ 373 | 200, 374 | 200 375 | ] 376 | } 377 | }, 378 | "bbox": [ 379 | 97.0135423519546, 380 | -81.93331557352852, 381 | 111.50547149149213, 382 | -79.61169259634802 383 | ], 384 | "stac_extensions": [ 385 | "eo", 386 | "https://landsat.usgs.gov/stac/landsat-extension/schema.json", 387 | "view", 388 | "projection" 389 | ], 390 | "collection": "landsat-c2l2-sr", 391 | "description": "Landsat Collection 2 Level-2 Surface Reflectance Product" 392 | } 393 | --------------------------------------------------------------------------------