├── odc
    └── stac
    │   ├── py.typed
    │   ├── testing
    │       ├── __init__.py
    │       └── stac.py
    │   ├── bench
    │       ├── __main__.py
    │       ├── __init__.py
    │       ├── _report.py
    │       ├── _prepare.py
    │       └── _cli.py
    │   └── __init__.py
├── tests
    ├── __init__.py
    ├── data
    │   ├── test-product-eo.yml
    │   ├── test-product-eo3.yml
    │   ├── lidar_dem.json
    │   ├── only_crs_proj.json
    │   ├── S2A_28QCH_20200714_0_L2A.json
    │   └── LC08_L2SR_081119_20200101_20200823_02_T2.json
    ├── test-env-py310.yml
    ├── common
    │   └── __init__.py
    ├── notebooks
    │   ├── bench-runner.py
    │   └── bench-prep-query.py
    ├── conftest.py
    ├── test_load.py
    ├── test_bench.py
    └── test_model.py
├── docs
    ├── _static
    │   ├── .gitkeep
    │   └── xr-fixes.css
    ├── rtd-requirements.txt
    ├── .gitignore
    ├── tocs.rst
    ├── examples.rst
    ├── index.rst
    ├── Makefile
    ├── make.bat
    ├── api.rst
    ├── intro.rst
    ├── stac-best-practice.rst
    ├── samples
    │   └── save-cog-from-stac.py
    ├── stac-vs-odc.rst
    ├── conf.py
    └── benchmarking.rst
├── notebooks
    ├── .gitignore
    ├── Welcome.md
    ├── render-nb.sh
    ├── render-html.sh
    ├── build.sh
    ├── Makefile
    ├── README.md
    ├── stac-load-S2-deafrica.py
    ├── stac-load-e84-aws.py
    └── stac-load-S2-ms.py
├── .devcontainer
    ├── requirements.txt
    ├── requirements-dev.txt
    ├── docker-compose.yml
    ├── Dockerfile
    ├── post-create.sh
    └── devcontainer.json
├── binder
    ├── apt.txt
    ├── README.md
    ├── postBuild
    ├── _home
    │   ├── .config
    │   │   └── dask
    │   │   │   ├── dask.yaml
    │   │   │   └── labextension.yaml
    │   └── .jupyter
    │   │   └── lab
    │   │       ├── user-settings
    │   │           └── @jupyterlab
    │   │           │   └── shortcuts-extension
    │   │           │       └── shortcuts.jupyterlab-settings
    │   │       └── workspaces
    │   │           └── demo-2a97.jupyterlab-workspace
    ├── render-nb-pipe.sh
    ├── start
    └── environment.yml
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .github
    ├── codecov.yml
    └── workflows
    │   ├── publish-pypi.yml
    │   ├── build-binder.yml
    │   ├── render.yml
    │   └── main.yml
├── scripts
    └── notebook_hash.py
├── .gitignore
├── pyproject.toml
├── CHANGELOG.md
├── README.rst
└── LICENSE


/odc/stac/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/_static/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/rtd-requirements.txt:
--------------------------------------------------------------------------------
1 | .[docs]
2 | 


--------------------------------------------------------------------------------
/notebooks/.gitignore:
--------------------------------------------------------------------------------
1 | *.ipynb
2 | dbg/*
3 | bk/*
4 | 


--------------------------------------------------------------------------------
/.devcontainer/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .[test-all,docs]
2 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build/*
2 | _generated/*
3 | _api/*
4 | notebooks
5 | 


--------------------------------------------------------------------------------
/odc/stac/testing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Utilities used in tests.
3 | """
4 | 


--------------------------------------------------------------------------------
/binder/apt.txt:
--------------------------------------------------------------------------------
1 | fish
2 | git
3 | tig
4 | htop
5 | jq
6 | silversearcher-ag
7 | graphviz
8 | 


--------------------------------------------------------------------------------
/odc/stac/bench/__main__.py:
--------------------------------------------------------------------------------
1 | """Run main."""
2 | 
3 | from ._cli import main
4 | 
5 | main()
6 | 


--------------------------------------------------------------------------------
/docs/tocs.rst:
--------------------------------------------------------------------------------
1 | Indices and tables
2 | ==================
3 | 
4 | * :ref:`genindex`
5 | * :ref:`modindex`
6 | * :ref:`search`
7 | 


--------------------------------------------------------------------------------
/binder/README.md:
--------------------------------------------------------------------------------
1 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo)
2 | 
3 | Configuration for mybinder launcher.
4 | 


--------------------------------------------------------------------------------
/.devcontainer/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | pylint >=3
 2 | black[jupyter]
 3 | isort
 4 | pycodestyle
 5 | pylint
 6 | docutils
 7 | autopep8
 8 | autoflake
 9 | 
10 | mypy
11 | types-shapely
12 | types-cachetools
13 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 |     - repo: https://github.com/pre-commit/pre-commit-hooks
3 |       rev: v6.0.0
4 |       hooks:
5 |         - id: end-of-file-fixer
6 |         - id: check-added-large-files
7 |         - id: check-merge-conflict
8 | 


--------------------------------------------------------------------------------
/tests/data/test-product-eo.yml:
--------------------------------------------------------------------------------
 1 | name: test_product_eo
 2 | metadata_type: eo
 3 | metadata:
 4 |   product:
 5 |     name: test_product_eo
 6 | measurements:
 7 |   - name: band
 8 |     dtype: "float32"
 9 |     nodata: .nan
10 |     units: "1"
11 | 


--------------------------------------------------------------------------------
/tests/data/test-product-eo3.yml:
--------------------------------------------------------------------------------
 1 | name: test_product_eo3
 2 | metadata_type: eo3
 3 | metadata:
 4 |   product:
 5 |     name: test_product_eo3
 6 | measurements:
 7 |   - name: band
 8 |     dtype: int16
 9 |     nodata: -999
10 |     units: "1"
11 | 


--------------------------------------------------------------------------------
/binder/postBuild:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # This runs as a last step of Docker build
4 | echo "+++++++++++++++++++++++++++++++++"
5 | echo "+ post build started            +"
6 | echo "+++++++++++++++++++++++++++++++++"
7 | 
8 | (cd binder/_home && tar c .) | tar x
9 | 


--------------------------------------------------------------------------------
/notebooks/Welcome.md:
--------------------------------------------------------------------------------
1 | # Sample Notebooks
2 | 
3 | - Access Sentinel 2 Data on Planetary Computer, [open](stac-load-S2-ms.ipynb)
4 |   - Works on binder too, but might need to decrease resolution to fit in to 2Gb of RAM
5 | - Access Sentinel 2 Data from AWS, [open](stac-load-e84-aws.ipynb)
6 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.10"
 7 | 
 8 | python:
 9 |   install:
10 |     - requirements: docs/rtd-requirements.txt
11 | 
12 | sphinx:
13 |   builder: html
14 |   configuration: docs/conf.py
15 |   fail_on_warning: true
16 | 


--------------------------------------------------------------------------------
/binder/_home/.config/dask/dask.yaml:
--------------------------------------------------------------------------------
 1 | temporary-directory: /tmp
 2 | 
 3 | distributed:
 4 |    dashboard:
 5 |       link: "/user/__JUPYTERHUB_USER__/proxy/{port}/status"
 6 |    worker:
 7 |       memory:
 8 |         target: 0.95
 9 |         spill: 0.99
10 |         pause: 0.99
11 |         terminate: 0.99
12 | 


--------------------------------------------------------------------------------
/binder/_home/.config/dask/labextension.yaml:
--------------------------------------------------------------------------------
 1 | labextension:
 2 |   factory:
 3 |     module: 'dask.distributed'
 4 |     class: 'LocalCluster'
 5 |     args: []
 6 |     kwargs: {}
 7 | 
 8 |   default:
 9 |     workers: null
10 |     adapt:
11 |       null
12 |       # minimum: 0
13 |       # maximum: 10
14 |   initial: []
15 | 


--------------------------------------------------------------------------------
/.devcontainer/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   main:
 3 |     build:
 4 |       context: .
 5 |       dockerfile: Dockerfile
 6 |     volumes:
 7 |       - ..:/workspace:cached
 8 |       - home_vscode:/home/vscode/
 9 |     command: sleep infinity
10 | 
11 |     environment:
12 |       - PYTHONUNBUFFERED=1
13 | volumes:
14 |   home_vscode:
15 | 


--------------------------------------------------------------------------------
/.github/codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   require_ci_to_pass: yes
 3 | 
 4 | coverage:
 5 |   precision: 2
 6 |   round: down
 7 |   range: "60...100"
 8 | 
 9 |   status:
10 |     project:
11 |       default: # This can be anything, but it needs to exist as the name
12 |         # basic settings
13 |         target: 60%
14 |         threshold: 20%
15 | 


--------------------------------------------------------------------------------
/notebooks/render-nb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | src=$1
 4 | dst=${2:-${src%%.py}.ipynb}
 5 | 
 6 | echo "$src -> $dst"
 7 | 
 8 | jupytext $src --to ipynb -o - \
 9 |   | jupyter nbconvert \
10 |       --stdin \
11 |       --to notebook \
12 |       --stdout \
13 |       --ExecutePreprocessor.store_widget_state=True \
14 |       --execute > "${dst}"
15 | 


--------------------------------------------------------------------------------
/notebooks/render-html.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | src=$1
 4 | dst=${2:-${src%%.py}.html}
 5 | 
 6 | echo "$src -> $dst"
 7 | 
 8 | jupytext $src --set-kernel "python3" --to ipynb -o - \
 9 |   | jupyter nbconvert \
10 |       --stdin \
11 |       --to html \
12 |       --stdout \
13 |       --ExecutePreprocessor.store_widget_state=True \
14 |       --execute > "${dst}"
15 | 


--------------------------------------------------------------------------------
/binder/render-nb-pipe.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Expects percent script notebook on stdin
 4 | # produces rendered ipynb notebook on stdout
 5 | 
 6 | jupytext --from 'py:percent' --to ipynb -o - \
 7 |   | jupyter nbconvert \
 8 |       --stdin \
 9 |       --to notebook \
10 |       --stdout \
11 |       --ExecutePreprocessor.store_widget_state=True \
12 |       --execute
13 | 


--------------------------------------------------------------------------------
/binder/start:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This runs when binder is launched
 4 | 
 5 | date > .startup.log
 6 | jupytext -k python3 $HOME/notebooks/*py
 7 | jupytext -s $HOME/notebooks/*{py,md}
 8 | 
 9 | sed -i -e "s|__JUPYTERHUB_USER__|${JUPYTERHUB_USER}|g" $HOME/.config/dask/dask.yaml
10 | sed -i -e "s|__JUPYTERHUB_USER__|${JUPYTERHUB_USER}|g" $HOME/.jupyter/lab/workspaces/demo-*
11 | 
12 | exec "$@"
13 | 


--------------------------------------------------------------------------------
/notebooks/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -e
 4 | 
 5 | indir="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
 6 | outdir="$(dirname $indir)/docs/notebooks"
 7 | 
 8 | mkdir -p $outdir
 9 | 
10 | 
11 | for infile in $(find $indir -type f -maxdepth 1 -name '*py'); do
12 |     outfile="${outdir}/$(basename ${infile%%.py}.ipynb)"
13 |     $indir/render-nb.sh $infile $outfile
14 | done
15 | 


--------------------------------------------------------------------------------
/notebooks/Makefile:
--------------------------------------------------------------------------------
 1 | DKR ?= kirillodc/odc-stac-binder:latest
 2 | ALL_PY := $(wildcard *.py)
 3 | ALL_NB := $(patsubst %.py, %.ipynb, $(ALL_PY))
 4 | 
 5 | all: $(ALL_NB)
 6 | 
 7 | %.ipynb: %.py
 8 | 	@echo $< "=>" $@
 9 | 	docker run -i --entrypoint ./binder/render-nb-pipe.sh $(DKR) < $< > $@
10 | 
11 | debug:
12 | 	docker run --rm -ti --entrypoint /bin/bash -v $$(pwd):/home/jovyan/notebooks $(DKR)
13 | 
14 | clean:
15 | 	@echo Removing Rendered Notebooks
16 | 	rm -f $(ALL_NB)
17 | 
18 | .PHONY: debug all clean
19 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | ..
 2 |   Note that notebooks/ folder is downloaded by conf.py from a gist for now
 3 | 
 4 | Example Notebooks
 5 | #################
 6 | 
 7 | |Binder|
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    notebooks/stac-load-e84-aws
13 |    notebooks/stac-load-S2-ms
14 |    notebooks/stac-load-S2-deafrica
15 | 
16 | .. |Binder| image:: https://mybinder.org/badge_logo.svg
17 |    :target: https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo
18 |    :alt: Run Examples in Binder
19 | 


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PY=3.10
 2 | FROM mcr.microsoft.com/devcontainers/python:1-$PY
 3 | 
 4 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
 5 | 
 6 | RUN --mount=type=cache,target=/var/cache \
 7 |     apt-get update \
 8 |     && export DEBIAN_FRONTEND=noninteractive \
 9 |     && apt-get -y install --no-install-recommends \
10 |         libgdal-dev \
11 |         gdal-bin \
12 |         netcdf-bin \
13 |         pandoc \
14 |         zip unzip \
15 |         tmux tig jq fzf silversearcher-ag \
16 |     && true 
17 | 


--------------------------------------------------------------------------------
/odc/stac/bench/__init__.py:
--------------------------------------------------------------------------------
 1 | """Benchmarking tools."""
 2 | 
 3 | from ._prepare import SAMPLE_SITES, dump_site
 4 | from ._report import load_results
 5 | from ._run import (
 6 |     BenchLoadParams,
 7 |     BenchmarkContext,
 8 |     TimeSample,
 9 |     collect_context_info,
10 |     load_from_json,
11 |     run_bench,
12 | )
13 | 
14 | __all__ = (
15 |     "SAMPLE_SITES",
16 |     "dump_site",
17 |     "BenchLoadParams",
18 |     "BenchmarkContext",
19 |     "TimeSample",
20 |     "collect_context_info",
21 |     "load_from_json",
22 |     "load_results",
23 |     "run_bench",
24 | )
25 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | odc-stac
 2 | ========
 3 | 
 4 | .. include:: intro.rst
 5 | 
 6 | .. toctree::
 7 |    :caption: Introduction
 8 |    :hidden:
 9 |    :maxdepth: 2
10 | 
11 |    intro.rst
12 | 
13 | .. toctree::
14 |    :caption: Developer Guide
15 |    :hidden:
16 |    :maxdepth: 2
17 | 
18 |    api.rst
19 |    examples.rst
20 |    benchmarking.rst
21 | 
22 | .. toctree::
23 |    :caption: STAC
24 |    :hidden:
25 |    :maxdepth: 2
26 | 
27 |    stac-vs-odc.rst
28 |    stac-best-practice.rst
29 | 
30 | .. toctree::
31 |    :caption: Index
32 |    :hidden:
33 |    :maxdepth: 2
34 | 
35 |    tocs.rst
36 | 


--------------------------------------------------------------------------------
/binder/_home/.jupyter/lab/user-settings/@jupyterlab/shortcuts-extension/shortcuts.jupyterlab-settings:
--------------------------------------------------------------------------------
 1 | {
 2 |         "shortcuts": [
 3 |         {
 4 |             "command": "runmenu:run-all",
 5 |             "keys": [
 6 |                 "-",
 7 |                 "-"
 8 |             ],
 9 |             "selector": "[data-jp-kernel-user]:focus"
10 |         },
11 |         {
12 |             "command": "kernelmenu:restart-and-clear",
13 |             "keys": [
14 |                 "0",
15 |                 "0"
16 |             ],
17 |             "selector": "[data-jp-kernel-user]:focus"
18 |         }
19 |     ]
20 | }
21 | 


--------------------------------------------------------------------------------
/scripts/notebook_hash.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os.path
 3 | 
 4 | 
 5 | def compute(folder: str) -> str:
 6 |     hash = hashlib.sha256()
 7 |     paths = [
 8 |         os.path.join(folder, file_name)
 9 |         for file_name in os.listdir(folder)
10 |         if os.path.splitext(file_name)[1] == ".py"
11 |     ]
12 |     paths = sorted(paths, key=str.casefold)
13 |     for path in paths:
14 |         with open(path, "rb") as file:
15 |             bytes = file.read()
16 |             hash.update(bytes)
17 |     return hash.hexdigest(), paths
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "notebooks"))
22 |     hsh, _ = compute(folder)
23 |     print(hsh)
24 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | #   -W -- treat warnings as errors
 7 | SPHINXOPTS    ?= -W
 8 | SPHINXBUILD   ?= sphinx-build
 9 | SOURCEDIR     = .
10 | BUILDDIR      = _build
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | .PHONY: help Makefile
17 | 
18 | # Catch-all target: route all unknown targets to Sphinx using the new
19 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
20 | %: Makefile
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 
23 | clean:
24 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
25 | 	rm -rf _generated _api
26 | 


--------------------------------------------------------------------------------
/binder/environment.yml:
--------------------------------------------------------------------------------
 1 | name: odc-stac
 2 | channels:
 3 |   - conda-forge
 4 | 
 5 | dependencies:
 6 |   - python =3.10
 7 |   - pip
 8 | 
 9 |   # odc-stac dependencies
10 |   - odc-geo >=0.3.2
11 |   - rasterio >=1.3.9
12 |   - pystac >=1.9.0 # more flexible handling of extension versions
13 |   - toolz
14 |   - xarray
15 |   # for reading with rasterio from s3
16 |   - boto3
17 | 
18 |   # planetary-computer lib for URL signing
19 |   - planetary-computer
20 |   - pystac-client
21 | 
22 |   # JupyterLab
23 |   - jupytext
24 |   - jupyter-server-proxy
25 |   - ipykernel
26 |   - matplotlib-base
27 |   - ipympl
28 |   - dask
29 | 
30 |   # Some Geo libs
31 |   - geopandas
32 |   - folium
33 | 
34 |   # conveniences
35 |   - autopep8
36 |   - black
37 |   - isort
38 |   - python-dotenv # for notebooks
39 |   - jupyterlab_code_formatter
40 | 
41 |   - pip:
42 |       # odc-stac local checkout
43 |       - -e ../
44 | 


--------------------------------------------------------------------------------
/.devcontainer/post-create.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | main_env="$HOME/envs/main"
 4 | 
 5 | process_main_env() {
 6 |     declare -l -a opts
 7 |     local main_py="$main_env/bin/python"
 8 | 
 9 |     [ -d "$main_env" ] || {
10 |         echo "Creating main virtual environment..."
11 |         mkdir -p "$(dirname "$main_env")"
12 |         uv venv "$main_env"
13 |     }
14 |     for req in .devcontainer/requirements*.txt; do
15 |         opts+=(-r "$req")
16 |     done
17 |     source "$main_env"/bin/activate
18 |     uv pip install "${opts[@]}"
19 | }
20 | 
21 | process_apt() {
22 |     local apt_file=".devcontainer/apt.txt"
23 |     if [ -e ${apt_file} ]; then
24 |         sudo apt-get -y update
25 |         awk '{if ($0 ~ /^[[:space:]]*#/) next; sub(/#[^"]*$/, ""); print}' <"${apt_file}" |
26 |             xargs sudo apt-get -y install
27 |     fi
28 | }
29 | 
30 | cd /workspace || exit 1
31 | process_main_env
32 | process_apt
33 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/tests/test-env-py310.yml:
--------------------------------------------------------------------------------
 1 | # Conda environment for running tests in odc-stac
 2 | #   conda env create -f test-env-py310.yml
 3 | #   conda activate odc-stac-tests-py310
 4 | 
 5 | name: odc-stac-tests-py310
 6 | channels:
 7 |   - conda-forge
 8 |   - nodefaults
 9 | 
10 | dependencies:
11 |   - python =3.10
12 |   - pip
13 | 
14 |   # odc-stac dependencies
15 |   - affine
16 |   - jinja2
17 |   - numpy
18 |   - pandas
19 |   - toolz
20 |   - odc-geo >=0.4.7
21 |   - odc-loader >=0.5.1
22 |   - pystac >=1.12.1
23 |   - dask
24 |   - xarray
25 |   - rasterio
26 | 
27 |   # For mypy
28 |   - types-python-dateutil
29 | 
30 |   # For tests
31 |   - pytest
32 |   - pytest-httpserver
33 |   - pytest-cov
34 |   - pytest-timeout
35 |   - pytest-vcr
36 |   - mock
37 |   - deepdiff
38 |   - pystac-client >=0.2.0
39 |   - geopandas
40 |   - stackstac
41 |   - zarr
42 | 
43 |   # for docs
44 |   - sphinx
45 |   - sphinx_rtd_theme
46 |   - sphinx-autodoc-typehints
47 |   - nbsphinx
48 |   - ipywidgets
49 |   - jupytext
50 | 
51 |   # dev
52 |   - autoflake
53 |   - black >=25.1.0
54 |   - isort
55 |   - mypy
56 |   - pylint =3
57 |   - pip:
58 |     - -e ../
59 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api-reference:
 2 | 
 3 | API Reference
 4 | #############
 5 | 
 6 | .. highlight:: python
 7 | .. py:module:: odc.stac
 8 | .. py:module:: odc.stac.bench
 9 | 
10 | 
11 | odc.stac
12 | ********
13 | 
14 | .. currentmodule:: odc.stac
15 | .. autosummary::
16 |    :toctree: _api/
17 | 
18 |    load
19 |    configure_rio
20 |    configure_s3_access
21 |    parse_item
22 |    parse_items
23 |    extract_collection_metadata
24 |    output_geobox
25 | 
26 | odc.stac.ParsedItem
27 | *******************
28 | 
29 | .. currentmodule:: odc.stac
30 | .. autosummary::
31 |    :toctree: _api/
32 | 
33 |    ParsedItem
34 |    ParsedItem.assets
35 |    ParsedItem.crs
36 |    ParsedItem.geoboxes
37 |    ParsedItem.image_geometry
38 |    ParsedItem.resolve_bands
39 |    ParsedItem.safe_geometry
40 |    ParsedItem.solar_date_at
41 |    ParsedItem.strip
42 | 
43 |    RasterBandMetadata
44 |    RasterCollectionMetadata
45 |    RasterLoadParams
46 |    RasterSource
47 | 
48 | odc.stac.bench
49 | **************
50 | 
51 | .. currentmodule:: odc.stac.bench
52 | .. autosummary::
53 |    :toctree: _api/
54 | 
55 |    BenchmarkContext
56 |    BenchLoadParams
57 | 
58 |    dump_site
59 |    load_from_json
60 |    run_bench
61 |    load_results
62 | 


--------------------------------------------------------------------------------
/docs/_static/xr-fixes.css:
--------------------------------------------------------------------------------
 1 | /* xarray widget tweaks
 2 | 
 3 |    Some styles from default theme interfere, so define more specific rules to
 4 |    override
 5 | */
 6 | 
 7 | 
 8 | /* override theme default of 800px which is too narrow I feel*/
 9 | .wy-nav-content {
10 |     max-width: 56em;
11 | }
12 | 
13 | .rst-content ul.xr-var-list li>* {
14 |     margin-top: 0px !important;
15 |     margin-bottom: 0px !important;
16 | }
17 | 
18 | .rst-content ul.xr-dim-list li {
19 |     display: inline-block !important;
20 |     padding: 0 !important;
21 |     ;
22 |     margin: 0 !important;
23 | }
24 | 
25 | .rst-content dl.xr-attrs dt,
26 | .rst-content dl.xr-attrs dd {
27 |     margin: 0px 0 !important;
28 |     font-size: inherit !important;
29 |     background: inherit !important;
30 |     color: inherit !important;
31 |     border-top: none !important;
32 |     padding: 0px 10px 0px 0px !important;
33 |     float: left !important;
34 |     white-space: nowrap !important;
35 |     overflow: hidden !important;
36 |     text-overflow: ellipsis !important;
37 | }
38 | 
39 | .rst-content dl.xr-attrs dt {
40 |     font-weight: normal !important;
41 |     grid-column: 1 !important;
42 | }
43 | 
44 | .rst-content dl.xr-attrs dd {
45 |     grid-column: 2 !important;
46 | }
47 | 


--------------------------------------------------------------------------------
/odc/stac/__init__.py:
--------------------------------------------------------------------------------
 1 | """STAC Item -> ODC Dataset[eo3]."""
 2 | 
 3 | from odc.loader import configure_rio, configure_s3_access
 4 | from odc.loader.types import RasterBandMetadata, RasterLoadParams, RasterSource
 5 | 
 6 | from ._mdtools import (
 7 |     ConversionConfig,
 8 |     ParsedItem,
 9 |     extract_collection_metadata,
10 |     output_geobox,
11 |     parse_item,
12 |     parse_items,
13 | )
14 | from ._stac_load import load
15 | from .model import RasterCollectionMetadata
16 | 
17 | stac_load = load
18 | 
19 | 
20 | __all__ = (
21 |     "ParsedItem",
22 |     "RasterBandMetadata",
23 |     "RasterCollectionMetadata",
24 |     "RasterLoadParams",
25 |     "RasterSource",
26 |     "ConversionConfig",
27 |     "load",
28 |     "stac_load",
29 |     "configure_rio",
30 |     "configure_s3_access",
31 |     "parse_item",
32 |     "parse_items",
33 |     "extract_collection_metadata",
34 |     "output_geobox",
35 | )
36 | 
37 | 
38 | def __dir__():
39 |     return [*__all__, "__version__"]
40 | 
41 | 
42 | def __getattr__(name):
43 |     # pylint: disable=import-outside-toplevel
44 |     if name == "__version__":
45 |         from importlib.metadata import version
46 | 
47 |         return version(__name__)
48 |     raise AttributeError(f"module {__name__} has no attribute {name}")
49 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
 1 | # Sample Notebooks
 2 | 
 3 | 
 4 | ## Developer Notes
 5 | 
 6 | Do not commit `*.ipynb` files here! We use `jupytext` for keeping notebooks in
 7 | version control, specifically "py:percent" format. Install `jupytext` into your
 8 | jupyterlab environment, then you should be able to "Open With->Notebook" on
 9 | these `.py` files.
10 | 
11 | To create a new one, start with a notebook file (`.ipynb`) then use "Pair
12 | Notebook with percent Script" command (type `Ctr-Shift-C` when editing notebook,
13 | then start typing "percent" to fuzzy find the command)
14 | 
15 | 
16 | ## Rendered Notebooks
17 | 
18 | Notebooks are executed by github action and results are uploaded to:
19 | 
20 | ```
21 | s3://datacube-core-deployment/odc-stac/nb/odc-stac-notebooks-{nb_hash}.tar.gz
22 | https://packages.dea.ga.gov.au/odc-stac/nb/odc-stac-notebooks-{nb_hash}.tar.gz
23 | ```
24 | 
25 | Where `{nb_hash}` is a 16 character hash computed from the content of `notebooks/*.py` (see `scripts/notebook_hash.py`).
26 | 
27 | By the time changes are merged into `develop` branch there should be
28 | pre-rendered notebook archive accessible without authentication via https.
29 | Building documentation on read the docs site will use that archive rather than
30 | attempting to run notebooks directly.
31 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "ODC-STAC",
 3 |   "customizations": {
 4 |     "vscode": {
 5 |       "extensions": [
 6 |         "ms-python.python",
 7 |         "ms-python.isort",
 8 |         "ms-python.pylint",
 9 |         "ms-python.debugpy",
10 |         "ms-python.black-formatter",  
11 |         "ms-python.mypy-type-checker",
12 |         "ms-python.vscode-pylance",
13 |         "ms-azuretools.vscode-docker",
14 |         "ms-toolsai.jupyter",
15 |         "ms-toolsai.jupyterhub",
16 |         "ms-toolsai.jupyter-renderers",
17 |         "ms-vscode.makefile-tools",
18 |         "github.vscode-github-actions",
19 |         "foxundermoon.shell-format",
20 |         "timonwong.shellcheck",
21 |         "streetsidesoftware.code-spell-checker",
22 |         "kahole.magit"
23 |       ],
24 |       "settings": {
25 |         "python.pythonPath": "/home/vscode/envs/main/bin/python",
26 |         "python.defaultInterpreterPath": "/home/vscode/envs/main/bin/python",
27 |         "jupyter.notebookFileRoot": "/workspace/${fileDirname}"
28 |       }
29 |     }
30 |   },
31 |   "features": {},
32 |   "workspaceFolder": "/workspace",
33 |   "dockerComposeFile": [
34 |     "docker-compose.yml"
35 |   ],
36 |   "service": "main",
37 |   "postCreateCommand": "bash .devcontainer/post-create.sh || true",
38 |   "remoteUser": "vscode"
39 | }
40 | 


--------------------------------------------------------------------------------
/odc/stac/bench/_report.py:
--------------------------------------------------------------------------------
 1 | """Helper methods for benchmark reporting."""
 2 | 
 3 | import glob
 4 | import pickle
 5 | from typing import Any, Dict, Iterable, Iterator, Union
 6 | 
 7 | import pandas as pd
 8 | 
 9 | # pylint: disable=unsupported-assignment-operation
10 | 
11 | 
12 | def load_results(
13 |     sources: Union[str, Iterable[str]],
14 | ) -> pd.DataFrame:
15 |     """
16 |     Load benchmark run results.
17 | 
18 |     :param sources: A glob pattern or a stream of pickle file paths
19 |     :return: Pandas dataframe
20 |     """
21 | 
22 |     def _stream(paths: Iterable[str]) -> Iterator[Dict[str, Any]]:
23 |         for idx, fname in enumerate(paths):
24 |             with open(fname, "rb") as src:
25 |                 dd = pickle.load(src)
26 |             ctx = dd["context"]
27 |             samples = dd["samples"]
28 |             rr = ctx.to_pandas_dict()
29 | 
30 |             for sample in samples:
31 |                 t0, t1, t2 = sample
32 |                 yield {"experiment": idx, **rr, "t0": t0, "t1": t1, "t2": t2}
33 | 
34 |     if isinstance(sources, str):
35 |         # glob
36 |         pkl_paths: Iterable[str] = sorted(glob.glob(sources))
37 |     else:
38 |         pkl_paths = sources
39 | 
40 |     xx = pd.DataFrame(list(_stream(pkl_paths)))
41 |     xx = xx.set_index("experiment")
42 |     xx["submit"] = xx.t1 - xx.t0
43 |     xx["elapsed"] = xx.t2 - xx.t0
44 |     return xx
45 | 


--------------------------------------------------------------------------------
/docs/intro.rst:
--------------------------------------------------------------------------------
 1 | .. highlight:: python
 2 | 
 3 | Overview
 4 | ########
 5 | 
 6 | Load STAC :py:class:`pystac.Item`\s into :py:class:`xarray.Dataset`.
 7 | 
 8 | .. code-block:: python
 9 | 
10 |    catalog = pystac_client.Client.open(...)
11 |    query = catalog.search(...)
12 |    xx = odc.stac.load(
13 |        query.items(),
14 |        bands=["red", "green", "blue"],
15 |        resolution=100,
16 |    )
17 |    xx.red.plot.imshow(col="time")
18 | 
19 | 
20 | See :py:func:`odc.stac.load`.
21 | 
22 | 
23 | Installation
24 | ############
25 | 
26 | Using pip
27 | *********
28 | 
29 | .. code-block:: bash
30 | 
31 |    pip install odc-stac
32 | 
33 | Using Conda
34 | ***********
35 | 
36 | .. code-block:: bash
37 | 
38 |    conda install -c conda-forge odc-stac
39 | 
40 | 
41 | From unreleased source
42 | **********************
43 | 
44 | Using latest unreleased code in ``conda`` is also possible. It's best to install
45 | dependencies using conda then install ``odc-stac`` with pip. Sample
46 | ``environment.yml`` is provided below.
47 | 
48 | 
49 | .. code-block:: yaml
50 | 
51 |    channels:
52 |      - conda-forge
53 |    dependencies:
54 |      - odc-geo >=0.1.3
55 |      - xarray >=0.20.1
56 |      - numpy
57 |      - dask
58 |      - pandas
59 |      - affine
60 |      - rasterio
61 |      - boto3
62 |      - toolz
63 |      - pystac
64 |      - pystac-client
65 |      - pip =20
66 |      - pip:
67 |        - git+https://github.com/opendatacube/odc-stac/
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pickle
 2 | /test_env
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | .mypy_cache/
 7 | dask-worker-space/
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis
50 | .pytest_cache
51 | 
52 | # Translations
53 | *.mo
54 | *.pot
55 | 
56 | # Django stuff:
57 | *.log
58 | 
59 | # Sphinx documentation
60 | docs/_build/
61 | 
62 | # PyBuilder
63 | target/
64 | .idea/
65 | 
66 | # iPython Notebook
67 | .ipynb_checkpoints
68 | 
69 | # Mac OS X
70 | .DS_Store
71 | docs/html/
72 | 
73 | # Generated Documentation
74 | generate/
75 | docs/notebooks/
76 | 
77 | #Local Visual Studio Code configurations
78 | .vscode/
79 | 
80 | # used to cache dev install cache
81 | .run/
82 | 
83 | # emacs virtual env link
84 | .venv
85 | 
86 | /notebooks/*html
87 | /notebooks/*ipynb
88 | /notebooks/*tif
89 | /wheels/*
90 | _off_*
91 | tt.py
92 | .cursorignore
93 | 


--------------------------------------------------------------------------------
/tests/common/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Common test data and utilities for STAC tests.
 3 | """
 4 | 
 5 | from pystac import Item
 6 | 
 7 | from odc.stac._mdtools import RasterBandMetadata
 8 | 
 9 | # fmt: off
10 | S2_ALL_BANDS = {
11 |     "B01", "B02", "B03", "B04", "B05", "B06",
12 |     "B07", "B08", "B09", "B11", "B12", "B8A",
13 |     "AOT", "SCL", "WVP", "visual",
14 | }
15 | # fmt: on
16 | 
17 | 
18 | STAC_CFG = {
19 |     "sentinel-2-l2a": {
20 |         "assets": {
21 |             "*": RasterBandMetadata("uint16", 0, "1"),
22 |             "SCL": RasterBandMetadata("uint8", 0, "1"),
23 |             "visual": {"data_type": "uint8", "nodata": 0, "unit": "1"},
24 |         },
25 |         "aliases": {
26 |             # Work around duplicate rededge common_name
27 |             # by defining custom unique aliases
28 |             "rededge1": "B05",
29 |             "rededge2": "B06",
30 |             "rededge3": "B07",
31 |         },
32 |     }
33 | }
34 | 
35 | NO_WARN_CFG = {"*": {"warnings": "ignore"}}
36 | 
37 | 
38 | def mk_stac_item(
39 |     _id, datetime="2012-12-12T00:00:00Z", geometry=None, stac_extensions=None, **props
40 | ):
41 |     if stac_extensions is None:
42 |         stac_extensions = []
43 | 
44 |     return Item.from_dict(
45 |         {
46 |             "type": "Feature",
47 |             "stac_version": "1.0.0",
48 |             "id": str(_id),
49 |             "properties": {
50 |                 "datetime": datetime,
51 |                 **props,
52 |             },
53 |             "geometry": geometry,
54 |             "links": [],
55 |             "assets": {},
56 |             "stac_extensions": stac_extensions,
57 |         }
58 |     )
59 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | # When a PR is updated, cancel the jobs from the previous version. Merges
 7 | # do not define head_ref, so use run_id to never cancel those jobs.
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 |   cancel-in-progress: true
11 | 
12 | jobs:
13 |   publish-pypi:
14 |     if: |
15 |       github.repository == 'opendatacube/odc-stac'
16 |     timeout-minutes: 15
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
21 |       
22 |       - name: Download wheels from artifacts
23 |         uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
24 |         with:
25 |           name: python-wheels
26 |           path: ./wheels/clean
27 | 
28 |       - name: Setup Python
29 |         uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
30 |         with:
31 |           python-version: "3.10" 
32 | 
33 |       - name: Install Twine
34 |         run: |
35 |           python -m pip install --upgrade pip
36 |           python -m pip install --upgrade setuptools
37 |           python -m pip install --upgrade \
38 |            toml \
39 |            wheel \
40 |            twine
41 |           python -m pip freeze
42 | 
43 |       - name: Upload to PyPI
44 |         env:
45 |           TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
46 |           TWINE_USERNAME: __token__
47 | 
48 |         run: |
49 |           ls wheels/clean/
50 |           twine upload --non-interactive --skip-existing wheels/clean/*
51 | 


--------------------------------------------------------------------------------
/docs/stac-best-practice.rst:
--------------------------------------------------------------------------------
 1 | Best Practices
 2 | ##############
 3 | 
 4 | :mod:`odc.stac` can operate on STAC items with only minimal information present,
 5 | however user experience is best when following information is included:
 6 | ``data_type`` and ``nodata`` from `Raster Extension`_, ``proj:{shape,transform,epsg}``
 7 | from `Projection Extension`_.
 8 | 
 9 | For a full list of understood extension elements see table below.
10 | 
11 | .. list-table::
12 | 
13 |    * - `Raster Extension`_
14 |      -
15 |    * - ``data_type``
16 |      - used to determine output pixel type
17 |    * - ``nodata``
18 |      - used when combining multiple items into one raster plane
19 |    * - ``unit``
20 |      - passed on as an attribute
21 |        (can be useful for further processing)
22 |    * - *[planned]* ``scale``, ``offset``
23 |      - currently ignored, but will be supported in the future
24 | 
25 |    * - `Projection Extension`_
26 |      -
27 |    * - ``proj:shape``
28 |      - contains image size per asset
29 |    * - ``proj:transform``
30 |      - contains geo-registration per asset
31 |    * - ``proj:epsg``
32 |      - contains native CRS
33 |    * - ``proj:wkt2``, ``proj:projjson``
34 |      - can be used instead of ``proj:epsg`` for CRS without EPSG code
35 |    * - `Electro Optical Extension`_
36 |      -
37 |    * - ``eo:bands.common_name``
38 |      - used to assign an alias for a band
39 |        (use ``red`` instead of ``B04``).
40 | 
41 | 
42 | Assumptions
43 | ===========
44 | 
45 | Items from the same collection are assumed to have the same number and names of
46 | bands, and bands are assumed to use the same ``data_type`` across the
47 | collection.
48 | 
49 | It is assumed that Assets within a single Item share common native projection.
50 | 
51 | .. _`Raster Extension`: https://github.com/stac-extensions/raster
52 | .. _`Projection Extension`: https://github.com/stac-extensions/projection
53 | .. _`Electro Optical Extension`: https://github.com/stac-extensions/eo
54 | 


--------------------------------------------------------------------------------
/.github/workflows/build-binder.yml:
--------------------------------------------------------------------------------
 1 | name: Build Binder Image
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - develop
 8 |     paths:
 9 |       - 'binder/**'
10 |       - '.github/workflows/build-binder.yml'
11 | 
12 | # When a PR is updated, cancel the jobs from the previous version. Merges
13 | # do not define head_ref, so use run_id to never cancel those jobs.
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   repo2docker:
20 |     timeout-minutes: 30
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
25 | 
26 |     - name: Build Binder Image
27 |       uses: jupyterhub/repo2docker-action@69702685940e406c5bc32bc26395bbacda7ec9d3 # 0.21
28 |       id: dkr
29 |       with:
30 |         IMAGE_NAME: kirillodc/odc-stac-binder
31 |         DOCKER_USERNAME: kirillodc
32 |         DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
33 | 
34 |     - name: Print Notice
35 |       uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
36 |       env:
37 |         DKR: ${{ steps.dkr.outputs.IMAGE_SHA_NAME }}
38 |       with:
39 |         script: |
40 |           const {DKR} = process.env
41 |           core.notice(`Built Docker Image: '${DKR}'`)
42 | 
43 |     - name: Dump conda environment
44 |       run: |
45 |         echo "# ${DKR}" > /tmp/environment.yaml
46 |         docker run --rm \
47 |           --entrypoint /srv/conda/envs/notebook/bin/mamba \
48 |           ${DKR} \
49 |           env export | tee --append /tmp/environment.yaml
50 |       env:
51 |           DKR: ${{ steps.dkr.outputs.IMAGE_SHA_NAME }}
52 | 
53 |     - name: Publish environment.yaml artifact
54 |       uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
55 |       with:
56 |         name: environment
57 |         path: /tmp/environment.yaml
58 |         if-no-files-found: error
59 | 


--------------------------------------------------------------------------------
/tests/notebooks/bench-runner.py:
--------------------------------------------------------------------------------
 1 | # ---
 2 | # jupyter:
 3 | #   jupytext:
 4 | #     formats: ipynb,py:percent
 5 | #     text_representation:
 6 | #       extension: .py
 7 | #       format_name: percent
 8 | #       format_version: '1.3'
 9 | #       jupytext_version: 1.13.0
10 | #   kernelspec:
11 | #     display_name: ODC
12 | #     language: python
13 | #     name: odc
14 | # ---
15 | 
16 | # %%
17 | import json
18 | import time
19 | from timeit import default_timer as t_now
20 | 
21 | import datacube
22 | import numpy as np
23 | import odc.stac
24 | import planetary_computer as pc
25 | import pystac.item
26 | import pystac_client
27 | import rioxarray
28 | import stackstac
29 | import xarray as xr
30 | from distributed import Client
31 | from distributed import wait as dask_wait
32 | from odc.stac.bench import (
33 |     BenchLoadParams,
34 |     collect_context_info,
35 |     load_from_json,
36 |     run_bench,
37 | )
38 | 
39 | # generated by bench-prep-query notebook
40 | fname = [
41 |     "site1-20200606-tall-strip-africa.geojson",
42 |     "site2-2020_jun_jul-35MNM.geojson",
43 | ][0]
44 | 
45 | print(f"Load from file: {fname}")
46 | all_features = json.load(open(fname, "rt"))
47 | print(json.dumps(all_features.get("properties", {}), indent=2))
48 | 
49 | # %%
50 | params = BenchLoadParams(
51 |     scenario=fname,
52 |     method="stackstac",
53 |     bands=("B02", "B03", "B04"),
54 |     chunks=(2048, 2048),
55 |     patch_url=pc.sign,
56 |     extra={
57 |         "stackstac": {"dtype": "uint16", "fill_value": 0},
58 |         "odc-stac": {"groupby": "solar_day", "stac_cfg": {"*": {"warnings": "ignore"}}},
59 |     },
60 |     # resolution = (2**5)*10,
61 |     # crs="epsg:3857",
62 | )
63 | 
64 | xx = load_from_json(
65 |     all_features,
66 |     params.with_method("stackstac"),
67 | )
68 | 
69 | yy = load_from_json(
70 |     all_features,
71 |     params.with_method("odc-stac"),
72 | )
73 | 
74 | assert xx.spec.transform == xx.geobox.transform
75 | assert xx.geobox == yy.geobox
76 | xx
77 | 
78 | # %%
79 | client = Client("tcp://127.0.0.1:8786")
80 | if len(client.futures) > 0:
81 |     print("Restarting Client")
82 |     client.restart()
83 | display(client)
84 | 
85 | # %%
86 | rr_odc, results_odc = run_bench(yy, client, ntimes=3)
87 | 
88 | # %%
89 | rr_stc, results_stc = run_bench(xx, client, ntimes=3)
90 | 


--------------------------------------------------------------------------------
/binder/_home/.jupyter/lab/workspaces/demo-2a97.jupyterlab-workspace:
--------------------------------------------------------------------------------
 1 | {
 2 |   "data": {
 3 |     "layout-restorer:data": {
 4 |       "main": {
 5 |         "dock": {
 6 |           "type": "split-area",
 7 |           "orientation": "vertical",
 8 |           "sizes": [
 9 |             1.0
10 |           ],
11 |           "children": [
12 |             {
13 |               "type": "tab-area",
14 |               "currentIndex": 0,
15 |               "widgets": [
16 |                 "markdownviewer-widget:notebooks/Welcome.md",
17 |                 "notebook:notebooks/stac-load-e84-aws.ipynb",
18 |                 "notebook:notebooks/stac-load-S2-ms.ipynb"
19 |               ]
20 |             }
21 |           ]
22 |         },
23 |         "current": "markdownviewer-widget:notebooks/Welcome.md"
24 |       },
25 |       "down": {
26 |         "size": 0,
27 |         "widgets": []
28 |       },
29 |       "left": {
30 |         "collapsed": true,
31 |         "widgets": [
32 |           "filebrowser",
33 |           "running-sessions",
34 |           "dask-dashboard-launcher",
35 |           "git-sessions",
36 |           "@jupyterlab/toc:plugin",
37 |           "code-snippet-extension",
38 |           "extensionmanager.main-view"
39 |         ]
40 |       },
41 |       "right": {
42 |         "collapsed": true,
43 |         "widgets": [
44 |           "jp-property-inspector",
45 |           "debugger-sidebar"
46 |         ]
47 |       },
48 |       "relativeSizes": [
49 |         0,
50 |         1,
51 |         0
52 |       ]
53 |     },
54 |     "file-browser-filebrowser:cwd": {
55 |       "path": "notebooks"
56 |     },
57 |     "markdownviewer-widget:notebooks/Welcome.md": {
58 |       "data": {
59 |         "path": "notebooks/Welcome.md",
60 |         "factory": "Markdown Preview"
61 |       }
62 |     },
63 |     "notebook:notebooks/stac-load-e84-aws.ipynb": {
64 |       "data": {
65 |         "path": "notebooks/stac-load-e84-aws.ipynb",
66 |         "factory": "Notebook"
67 |       }
68 |     },
69 |     "notebook:notebooks/stac-load-S2-ms.ipynb": {
70 |       "data": {
71 |         "path": "notebooks/stac-load-S2-ms.ipynb",
72 |         "factory": "Notebook"
73 |       }
74 |     },
75 |     "dask-dashboard-launcher": {
76 |        "url": "/user/__JUPYTERHUB_USER__/proxy/8787",
77 |        "cluster": ""
78 |     }
79 |   },
80 |   "metadata": {
81 |     "id": "demo"
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/tests/notebooks/bench-prep-query.py:
--------------------------------------------------------------------------------
 1 | # ---
 2 | # jupyter:
 3 | #   jupytext:
 4 | #     formats: ipynb,py:percent
 5 | #     text_representation:
 6 | #       extension: .py
 7 | #       format_name: percent
 8 | #       format_version: '1.3'
 9 | #       jupytext_version: 1.13.0
10 | #   kernelspec:
11 | #     display_name: ODC
12 | #     language: python
13 | #     name: odc
14 | # ---
15 | 
16 | # %%
17 | import json
18 | from timeit import default_timer as t_now
19 | 
20 | import geopandas as gpd
21 | import numpy as np
22 | import odc.stac
23 | import planetary_computer as pc
24 | import pystac.item
25 | import pystac_client
26 | from dask.utils import format_bytes
27 | from distributed import Client
28 | from distributed import wait as dask_wait
29 | 
30 | if "geom_query" in locals():
31 |     bbox = tuple(geom_query.boundingbox)
32 | 
33 | mode = "site1-tall"
34 | if mode == "site1-tall":
35 |     # mgrs_tiles = ["35MNM", "35LNL", "35LNK", "35LNJ", "35LNH", "35LNG", "35LNF", "35LNE", "35LND"]
36 |     bbox = (27.345815, -14.98724, 27.565542, -7.710992)  # Narrow/Tall epsg:32735
37 |     file_id = "site1-20200606-tall-strip-africa"
38 |     datetime = "2020-06-06"
39 |     query = {}
40 | elif mode == "site2":
41 |     bbox = None
42 |     file_id = "site2-2020_jun_jul-35MNM"
43 |     datetime = "2020-06/2020-07"
44 |     query = {
45 |         "s2:mgrs_tile": {"eq": "35MNM"},
46 |         "s2:nodata_pixel_percentage": {"lt": 10},
47 |     }
48 | 
49 | 
50 | cat = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
51 | search = cat.search(
52 |     collections=["sentinel-2-l2a"],
53 |     datetime=datetime,
54 |     query=query,
55 |     bbox=bbox,
56 | )
57 | print("Query API end-point")
58 | all_features = search.item_collection_as_dict()
59 | 
60 | all_features["properties"] = dict(url=search.url, query=search._parameters)
61 | all_features["properties"]
62 | 
63 | # %%
64 | out_path = Path(f"{file_id}.geojson")
65 | if out_path.exists():
66 |     print(f"File exists, keeping previous version: {out_path}")
67 | else:
68 |     print(f"Writing to: {out_path}")
69 |     json.dump(all_features, open(out_path, "wt"))
70 | 
71 | # %%
72 | all_items = [pystac.item.Item.from_dict(f) for f in all_features["features"]]
73 | 
74 | # %%
75 | gdf = gpd.GeoDataFrame.from_features(all_features, "epsg:4326")
76 | display(set(gdf["s2:mgrs_tile"].values), set(gdf.platform), len(set(gdf.datetime)))
77 | 
78 | _map = gdf.explore(
79 |     "s2:mgrs_tile",
80 |     categorical=True,
81 |     tooltip=[
82 |         "s2:mgrs_tile",
83 |         "datetime",
84 |         "s2:nodata_pixel_percentage",
85 |         "eo:cloud_cover",
86 |     ],
87 |     popup=True,
88 |     style_kwds=dict(fillOpacity=0.0, width=2),
89 |     name="STAC",
90 | )
91 | display(_map)
92 | 
93 | # %%
94 | display(gdf.head())
95 | # gdf[gdf['s2:nodata_pixel_percentage']>10].explore()
96 | 
97 | # %%
98 | 


--------------------------------------------------------------------------------
/docs/samples/save-cog-from-stac.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Save Landsat 8 pass to GeoTIFF (COG).
 3 | 
 4 | This program captures one pass of Band 4 (NIR) of Lansat 8 to a single
 5 | cloud optimized GeoTIFF image. Produced image is rotated to maximize
 6 | proportion of valid pixels in the result. Data is saved in EPSG:3857 at
 7 | native resolution (30m). Produced TIFF is about 4.7GiB.
 8 | 
 9 | Data is sourced from Microsoft Planetary Computer:
10 | 
11 | https://planetarycomputer.microsoft.com/
12 | 
13 | Python environment
14 | 
15 | ```bash
16 | pip install odc-stac==0.3.0rc1 tqdm planetary_computer pystac-client
17 | ```
18 | 
19 | """
20 | 
21 | import planetary_computer
22 | import pystac_client
23 | from affine import Affine
24 | from dask.utils import format_bytes
25 | from odc.geo import geom
26 | from odc.geo.geobox import GeoBox
27 | from tqdm.auto import tqdm
28 | 
29 | from odc.stac import configure_rio
30 | from odc.stac import load as stac_load
31 | 
32 | res = 30  # resolution
33 | a = 12.7  # rotation in degrees
34 | band = "SR_B4"
35 | 
36 | catalog = pystac_client.Client.open(
37 |     "https://planetarycomputer.microsoft.com/api/stac/v1"
38 | )
39 | 
40 | items = catalog.search(
41 |     collections=["landsat-8-c2-l2"],
42 |     datetime="2021-07-01T08:00:00Z/2021-07-01T09:00:00Z",
43 |     bbox=(-180, -50, 180, 50),
44 | ).item_collection()
45 | 
46 | # Compute Polygon of the pass in EPSG:3857
47 | ls8_pass = geom.unary_union(
48 |     geom.Geometry(item.geometry, "epsg:4326").to_crs("epsg:3857") for item in items
49 | )
50 | assert ls8_pass is not None
51 | 
52 | # Construct rotated GeoBox
53 | #  rotate geometry
54 | #  construct axis aligned geobox in rotated space
55 | #  then rotate geobox the other way
56 | gbox = Affine.rotation(-a) * GeoBox.from_geopolygon(
57 |     ls8_pass.transform(Affine.rotation(a)),
58 |     resolution=res,
59 | )
60 | 
61 | # Assume COG datasource, disables looking for external files (it's slow in the cloud)
62 | configure_rio(cloud_defaults=True)
63 | 
64 | print(f"Loading {band} => {gbox.shape.x:,d}x{gbox.shape.y:,d}")
65 | xx = stac_load(
66 |     items,
67 |     like=gbox,
68 |     bands=[band],
69 |     dtype="int16",
70 |     nodata=0,
71 |     groupby="solar_day",
72 |     resampling="average",
73 |     pool=4,  # Use 4 cores for loading
74 |     progress=tqdm,  #
75 |     patch_url=planetary_computer.sign,
76 | )
77 | print("Load finished")
78 | 
79 | ts = xx.time[0].dt.strftime("%Y%m%d").item()
80 | fname = f"{band}-{ts}-{res}m.tif"
81 | print(
82 |     f"Will write image to: '{fname}' Raw Size is: {format_bytes(xx[band].data.size*xx[band].dtype.itemsize)}"
83 | )
84 | 
85 | xx[band].odc.write_cog(
86 |     fname,
87 |     overwrite=True,
88 |     blocksize=2048,
89 |     ovr_blocksize=1024,
90 |     overview_resampling="average",
91 |     intermediate_compression={"compress": "zstd", "zstd_level": 1},
92 |     use_windowed_writes=True,
93 |     compress="zstd",
94 |     zstd_level=6,
95 |     BIGTIFF=True,
96 |     SPARSE_OK=True,
97 |     NUM_THREADS=4,
98 | )
99 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "odc-stac"
  3 | description = "Tooling for converting STAC metadata to ODC data model"
  4 | version = "0.5.0"
  5 | authors = [
  6 |     {name = "Open Data Cube"}
  7 | ]
  8 | maintainers = [
  9 |     {name = "Open Data Cube"}
 10 | ]
 11 | readme = "README.rst"
 12 | license = {text = "Apache License 2.0"}
 13 | requires-python = ">=3.10"
 14 | classifiers = [
 15 |     "Intended Audience :: Developers",
 16 |     "Operating System :: OS Independent",
 17 |     "Programming Language :: Python :: 3.10",
 18 |     "Programming Language :: Python :: 3.11",
 19 |     "Programming Language :: Python :: 3.12",
 20 |     "Programming Language :: Python :: 3.13",
 21 |     "Topic :: Software Development :: Libraries :: Python Modules",
 22 |     "Topic :: Scientific/Engineering :: GIS",
 23 |     "Typing :: Typed"
 24 | ]
 25 | dependencies = [
 26 |     "affine",
 27 |     "odc-geo>=0.4.7",
 28 |     "odc-loader>=0.6.0",
 29 |     "rasterio>=1.0.0,!=1.3.0,!=1.3.1",
 30 |     "dask[array]",
 31 |     "numpy>=1.20.0",
 32 |     "pandas",
 33 |     "pystac>=1.0.0,<2",
 34 |     "toolz",
 35 |     "typing-extensions",
 36 |     "xarray>=0.19"
 37 | ]
 38 | 
 39 | [project.optional-dependencies]
 40 | botocore = ["botocore"]
 41 | docs = [
 42 |     "sphinx",
 43 |     "sphinx_rtd_theme",
 44 |     "nbsphinx",
 45 |     "sphinx-autodoc-typehints",
 46 |     "matplotlib-inline",
 47 |     "pandas",
 48 |     "distributed",
 49 |     "ipython",
 50 |     "ipykernel"
 51 | ]
 52 | test = [
 53 |     "pytest",
 54 |     "pytest-cov",
 55 |     "pytest-timeout",
 56 |     "pystac_client",
 57 |     "distributed",
 58 |     "geopandas"
 59 | ]
 60 | test-all = [
 61 |     "pytest",
 62 |     "pytest-cov",
 63 |     "pytest-timeout",
 64 |     "pystac_client",
 65 |     "distributed",
 66 |     "geopandas",
 67 |     "stackstac"
 68 | ]
 69 | 
 70 | [project.urls]
 71 | Documentation = "https://odc-stac.readthedocs.io/en/latest/"
 72 | "Bug Reporting" = "https://github.com/opendatacube/odc-stac/issues"
 73 | Homepage = "https://github.com/opendatacube/odc-stac/"
 74 | 
 75 | [build-system]
 76 | requires = ["flit_core >=3.2,<4"]
 77 | build-backend = "flit_core.buildapi"
 78 | 
 79 | [tool.flit.module]
 80 | name = "odc.stac"
 81 | 
 82 | [tool.mypy]
 83 | python_version = "3.10"
 84 | ignore_missing_imports = true
 85 | allow_redefinition = true
 86 | enable_error_code = ["explicit-override"]
 87 | warn_redundant_casts = true
 88 | warn_unused_ignores = true
 89 | plugins = "numpy.typing.mypy_plugin"
 90 | explicit_package_bases = true
 91 | 
 92 | [tool.coverage.run]
 93 | omit = [
 94 |   "tests/*",
 95 |   "*/test_*"
 96 | ]
 97 | 
 98 | [tool.isort]
 99 | profile = "black"
100 | 
101 | [tool.pylint.messages_control]
102 | max-line-length = 120
103 | max-args = 15
104 | max-positional-arguments = 12
105 | disable = [
106 |   "missing-function-docstring",
107 |   "invalid-name",
108 |   "fixme",
109 |   "wrong-import-order",
110 |   "duplicate-code",
111 |   "ungrouped-imports",
112 | ]
113 | 


--------------------------------------------------------------------------------
/odc/stac/bench/_prepare.py:
--------------------------------------------------------------------------------
 1 | """Utilities for benchmarking."""
 2 | 
 3 | import json
 4 | from pathlib import Path
 5 | from typing import Any, Dict
 6 | 
 7 | # pylint: disable=import-outside-toplevel
 8 | 
 9 | SAMPLE_SITES = {
10 |     "s2-ms-mosaic": {
11 |         "file_id": "s2-ms-mosaic_2020-06-06--P1D",
12 |         "api": "https://planetarycomputer.microsoft.com/api/stac/v1",
13 |         "search": {
14 |             "collections": ["sentinel-2-l2a"],
15 |             "datetime": "2020-06-06",
16 |             "bbox": [27.345815, -14.98724, 27.565542, -7.710992],
17 |             "query": {},
18 |         },
19 |     },
20 |     "s2-ms-deep": {
21 |         "file_id": "s2-ms-deep_2020-06--P2M_35MNM",
22 |         "api": "https://planetarycomputer.microsoft.com/api/stac/v1",
23 |         "search": {
24 |             "collections": ["sentinel-2-l2a"],
25 |             "datetime": "2020-06/2020-07",
26 |             "bbox": None,
27 |             "query": {
28 |                 "s2:mgrs_tile": {"eq": "35MNM"},
29 |                 "s2:nodata_pixel_percentage": {"lt": 10},
30 |             },
31 |         },
32 |     },
33 | }
34 | 
35 | 
36 | def dump_site(site: Dict[str, Any], overwrite: bool = False) -> Dict[str, Any]:
37 |     """
38 |     Prepare input for benchmarking.
39 | 
40 |     Queries API end-point according to site configuration and dumps result into a geojson file. Site
41 |     configuration must include ``file_id:str, api:str, search:Dict[str,Any]``.
42 | 
43 |     .. code-block:: json
44 | 
45 |        {
46 |          "file_id": "ms-s2-long-mosaic_2020-06-06--P1D",
47 |          "api": "https://planetarycomputer.microsoft.com/api/stac/v1",
48 |          "search": {
49 |            "collections": ["sentinel-2-l2a"],
50 |            "datetime": "2020-06-06",
51 |            "bbox": [ 27.345815, -14.98724, 27.565542, -7.710992],
52 |            "query": {}
53 |          }
54 |        }
55 | 
56 |     :param site: Definition of the test query
57 |     :param overwrite: overwrite existing file
58 |     :return: Returns GeoJSON FeatureCollection with extra metadata about the query
59 |     """
60 |     import pystac_client
61 | 
62 |     api = site["api"]
63 |     search = site["search"]
64 | 
65 |     cat = pystac_client.Client.open(api)
66 |     search = cat.search(**search)
67 |     print(f"Query API end-point: {api}")
68 |     all_features = search.item_collection_as_dict()
69 |     all_features["properties"] = {
70 |         "api": search.url,
71 |         "search": search._parameters,  # pylint: disable=protected-access
72 |     }
73 | 
74 |     out_path = Path(f"{site['file_id']}.geojson")
75 |     if out_path.exists():
76 |         if overwrite:
77 |             print(f"Will overwrite: {out_path}")
78 |         else:
79 |             print(f"File exists, keeping previous version: {out_path}")
80 |             return all_features
81 | 
82 |     print(f"Writing to: {out_path}")
83 |     with open(out_path, "wt", encoding="utf8") as dst:
84 |         json.dump(all_features, dst)
85 | 
86 |     return all_features
87 | 


--------------------------------------------------------------------------------
/tests/data/lidar_dem.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "type": "Feature",
  3 |     "stac_version": "1.0.0-beta.2",
  4 |     "id": "lidar_id",
  5 |     "properties": {
  6 |         "start_datetime": "2012-01-01T00:00:00Z",
  7 |         "end_datetime": "2012-01-01T00:00:00Z",
  8 |         "resolution": 1.0,
  9 |         "data_type": "float32",
 10 |         "derived_from": "Category 1 Lidar",
 11 |         "platform": "Aircraft",
 12 |         "interpolation_type": "TIN",
 13 |         "horizontal_datum": "GDA94",
 14 |         "vertical_datum": "AHD71 - using local Geoid model",
 15 |         "model_type": "DEM",
 16 |         "horizontal_accuracy": "+/-0.80 @95% Confidence Interval",
 17 |         "vertical_accuracy": "+/-0.30 @95% Confidence Interval",
 18 |         "sensor": "ALS50 (SN101)",
 19 |         "proj:epsg": 28355,
 20 |         "proj:shape": [
 21 |             2000,
 22 |             2000
 23 |         ],
 24 |         "proj:transform": [
 25 |             1.0,
 26 |             0.0,
 27 |             766000.0,
 28 |             0.0,
 29 |             -1.0,
 30 |             6732000.0,
 31 |             0.0,
 32 |             0.0,
 33 |             1.0
 34 |         ],
 35 |         "datetime": null
 36 |     },
 37 |     "geometry": {
 38 |         "type": "Polygon",
 39 |         "coordinates": [
 40 |             [
 41 |                 [
 42 |                     149.74413486135487,
 43 |                     -29.513331085946845
 44 |                 ],
 45 |                 [
 46 |                     149.74462179934187,
 47 |                     -29.531360829758146
 48 |                 ],
 49 |                 [
 50 |                     149.76523815043552,
 51 |                     -29.53093320024493
 52 |                 ],
 53 |                 [
 54 |                     149.7647475661126,
 55 |                     -29.512903768601603
 56 |                 ],
 57 |                 [
 58 |                     149.74413486135487,
 59 |                     -29.513331085946845
 60 |                 ]
 61 |             ]
 62 |         ]
 63 |     },
 64 |     "links": [
 65 |         {
 66 |             "rel": "root",
 67 |             "href": "s3://example-bucket/catalog.json",
 68 |             "type": "application/json"
 69 |         },
 70 |         {
 71 |             "rel": "collection",
 72 |             "href": "s3://example-bucket/lidar_collection/collection.json",
 73 |             "type": "application/json"
 74 |         },
 75 |         {
 76 |             "rel": "parent",
 77 |             "href": "s3://example-bucket/lidar_collection/collection.json",
 78 |             "type": "application/json"
 79 |         },
 80 |         {
 81 |             "rel": "self",
 82 |             "href": "s3://example-bucket/lidar_collection/lidar_id.json",
 83 |             "type": "application/json"
 84 |         }
 85 |     ],
 86 |     "assets": {
 87 |         "dem": {
 88 |             "href": "s3://example-bucket/lidar_id.tif",
 89 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
 90 |             "title": "Cloud-Optimized Geotiff"
 91 |         }
 92 |     },
 93 |     "bbox": [
 94 |         149.74413486135487,
 95 |         -29.531360829758146,
 96 |         149.76523815043552,
 97 |         -29.512903768601603
 98 |     ],
 99 |     "stac_extensions": [
100 |         "projection"
101 |     ],
102 |     "collection": "lidar_collection"
103 | }
104 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 6 | ## [v0.3.5] - 2023-01-18
 7 | 
 8 | - Fix data loading with Dask for collections where items might have "missing" assets
 9 | 
10 | ## [v0.3.4] - 2022-12-08
11 | 
12 | - Implement `fail_on_error=False` option for skipping over errors while loading data
13 | - Maintenance of github actions
14 | 
15 | ## [v0.3.3] - 2022-10-20
16 | 
17 | - Fixes to support `xarray >= 2022.10.0`
18 | 
19 | ## [v0.3.2] - 2022-09-09
20 | 
21 | - Multi band support when parsing STAC items
22 | - Remove ambiguous alias warnings and errors, instead pick "best" band for a
23 |   given common name based on a simple heuristic (favour single band assets over
24 |   multi-band, use alphabet order when band count is the same).
25 | - Accept `<asset name>.<band index: 1..>` syntax for specifying bands
26 | - Support files with GCP-based geo-reference
27 | - Robust handling of transforms that "break" item geometry, better handle cases
28 |   when item geometry doesn't project cleanly into the destination projection
29 | - Fix error in GDAL environment configuration for non-Dask case 
30 | 
31 | ## [v0.3.1] - 2022-06-28
32 | 
33 | - Use asset key as a canonical name, fixes landsat collection parsing
34 | 
35 | ## [v0.3.0] - 2022-06-06
36 | 
37 | - No longer depend on `datacube` library
38 |   - Significantly smaller set of compulsory dependencies, easier to install/deploy
39 | - Using `odc-geo` library instead of `datacube` for `GeoBox` and `Geometry` classes
40 | - Can load data into rotated pixel planes ([Example](https://github.com/opendatacube/odc-stac/wiki/Generating-Rotated-Images-to-Save-Space))
41 | - Arbitrary grouping of STAC items into pixel planes with user supplied grouping methods or group by property name
42 | - Better handling of credentials and other GDAL state in distributed context
43 |   - credentials and GDAL environment configuration were part of the global state previously, now global state is removed, so you can access collections with different permissions from the same Dask cluster (for example mixing public and private access).
44 | - Parallelized data loading even when not using Dask
45 | - Progress reporting for non-Dask load with `tqdm`
46 | 
47 | ## [v0.2.4] - 2022-01-19
48 | 
49 | ### Changed
50 | 
51 | - Removed `odc.index.` module
52 | 
53 | ## [v0.2.3] - 2022-01-05
54 | 
55 | ### Added
56 | 
57 | - This CHANGELOG
58 | - `requirements-dev.txt`
59 | - Documentation
60 | - Upload built conda environment as an artifact
61 | - Notebook rendering to Github actions, including hash-based artifact checks
62 | - Initial benchmarking tooling, still in progress
63 | 
64 | ### Changed
65 | 
66 | - Moved publishing steps into separate workflows
67 | - Deprecated imports from `odc.index.*`
68 | - Removed `.units` attribute from `.time` axis for better inter-op with `.to_zarr`, `.to_netcdf`
69 | 
70 | ### Fixed
71 | 
72 | - Handling of STAC Items with only partial `proj` data
73 | - Typos in documentation
74 | 
75 | ## [v0.2.2] - 2021-10-25
76 | 
77 | ### Added
78 | 
79 | - Binder launcher to README
80 | - Another USGS STAC example for Landsat SR
81 | - Documentation
82 | 
83 | ### Changed
84 | 
85 | - Cleaned up test fixtures
86 | - Relaxed `is_raster_data` check
87 | - Force data band decision for explicitly configured bands
88 | - Moved constansts in to global scope
89 | 
90 | ## [v0.2.1] - 2021-10-18
91 | 
92 | Initial release as a standalone project.
93 | Previously, this project was part of https://github.com/opendatacube/odc-tools.
94 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | odc.stac
  2 | ########
  3 | 
  4 | |Documentation Status| |Test Status| |Test Coverage| |Binder| |Discord|
  5 | 
  6 | Load STAC items into ``xarray`` Datasets. Process locally or distribute data
  7 | loading and computation with Dask_.
  8 | 
  9 | Usage
 10 | #####
 11 | 
 12 | 
 13 | odc.stac.load
 14 | ~~~~~~~~~~~~~
 15 | 
 16 | .. code-block:: python
 17 | 
 18 |    catalog = pystac_client.Client.open(...)
 19 |    query = catalog.search(...)
 20 |    xx = odc.stac.load(
 21 |        query.items(),
 22 |        bands=["red", "green", "blue"],
 23 |    )
 24 |    xx.red.plot.imshow(col="time")
 25 | 
 26 | For more details see `Documentation`_ and `Sample Notebooks`_, or try it out on Binder_.
 27 | 
 28 | 
 29 | Installation
 30 | ############
 31 | 
 32 | Using pip
 33 | ~~~~~~~~~
 34 | 
 35 | .. code-block:: bash
 36 | 
 37 |    pip install odc-stac
 38 | 
 39 | To install with ``botocore`` support (for working with AWS):
 40 | 
 41 | .. code-block:: bash
 42 | 
 43 |    pip install 'odc-stac[botocore]'
 44 | 
 45 | 
 46 | Using Conda
 47 | ~~~~~~~~~~~
 48 | 
 49 | This package is be available on ``conda-forge`` channel:
 50 | 
 51 | .. code-block:: bash
 52 | 
 53 |    conda install -c conda-forge odc-stac
 54 | 
 55 | 
 56 | From unreleased source
 57 | ~~~~~~~~~~~~~~~~~~~~~~
 58 | 
 59 | To use development version of ``odc-stac`` install dependencies from ``conda``, then
 60 | install ``odc-stac`` with ``pip``.
 61 | 
 62 | Sample ``environment.yml`` is provided below.
 63 | 
 64 | .. code-block:: yaml
 65 | 
 66 |    channels:
 67 |      - conda-forge
 68 |    dependencies:
 69 |      - odc-geo
 70 |      - xarray
 71 |      - numpy
 72 |      - dask
 73 |      - pandas
 74 |      - affine
 75 |      - rasterio
 76 |      - toolz
 77 |      - pystac
 78 |      - pystac-client
 79 |      - pip
 80 |      - pip:
 81 |        - "git+https://github.com/opendatacube/odc-stac/"
 82 | 
 83 | Developing
 84 | ##########
 85 | 
 86 | To develop ``odc-stac`` locally it's best to use provided devcontainer_.
 87 | 
 88 | .. code-block:: bash
 89 | 
 90 |    git clone https://github.com/opendatacube/odc-stac
 91 |    code odc-stac
 92 | 
 93 | Once running inside the dev container, run tests with pytest_:
 94 | 
 95 | .. code-block:: bash
 96 | 
 97 |    pytest
 98 | 
 99 | Linting is provided by mypy_, pylint_, and black_:
100 | 
101 | .. code-block:: bash
102 | 
103 |    black --check .
104 |    pylint -v odc
105 |    mypy odc
106 | 
107 | 
108 | .. |Documentation Status| image:: https://readthedocs.org/projects/odc-stac/badge/?version=latest
109 |    :target: https://odc-stac.readthedocs.io/en/latest/?badge=latest
110 |    :alt: Documentation Status
111 | 
112 | .. |Test Status| image:: https://github.com/opendatacube/odc-stac/actions/workflows/main.yml/badge.svg
113 |    :target: https://github.com/opendatacube/odc-stac/actions/workflows/main.yml
114 |    :alt: Test Status
115 | 
116 | .. |Test Coverage| image:: https://codecov.io/gh/opendatacube/odc-stac/branch/develop/graph/badge.svg?token=HQ8nTuZHH5
117 |    :target: https://codecov.io/gh/opendatacube/odc-stac
118 |    :alt: Test Coverage
119 | 
120 | .. |Binder| image:: https://mybinder.org/badge_logo.svg
121 |    :target: https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo
122 |    :alt: Run Examples in Binder
123 | 
124 | .. |Discord| image:: https://img.shields.io/discord/1212501566326571070?label=Discord&logo=discord&logoColor=white&color=7289DA
125 |    :target: https://discord.gg/4hhBQVas5U
126 |    :alt: Join Discord for support
127 | 
128 | .. _Binder: https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?urlpath=lab/workspaces/demo
129 | 
130 | .. _pytest: https://docs.pytest.org
131 | 
132 | .. _mypy: http://mypy-lang.org/
133 | 
134 | .. _pylint: https://pylint.org/
135 | 
136 | .. _black: https://github.com/psf/black
137 | 
138 | .. _`Documentation`: https://odc-stac.readthedocs.io/
139 | 
140 | .. _`Sample Notebooks`: https://odc-stac.readthedocs.io/en/latest/examples.html
141 | 
142 | .. _Dask: https://dask.org/
143 | 
144 | .. _devcontainer: https://code.visualstudio.com/docs/devcontainers/containers
145 | 


--------------------------------------------------------------------------------
/docs/stac-vs-odc.rst:
--------------------------------------------------------------------------------
 1 | STAC vs Open Datacube
 2 | #####################
 3 | 
 4 | The `Open Datacube`_ (ODC) project, on which this library is based, started before `STAC`_
 5 | spec existed. As a result ODC uses different terminology for otherwise very
 6 | similar concepts.
 7 | 
 8 | 
 9 | .. list-table::
10 |    :header-rows: 1
11 | 
12 |    * - STAC
13 |      - ODC
14 |      - Description
15 |    * - :py:class:`~pystac.Collection`
16 |      - :py:class:`~datacube.model.Product`
17 |      - Collection of observations across space and time
18 |    * - :py:class:`~pystac.Item`
19 |      - :py:class:`~datacube.model.Dataset`
20 |      - Single observation (specific time and place), multi-channel
21 |    * - :py:class:`~pystac.Asset`
22 |      - :py:class:`~datacube.model.Measurement`
23 |      - Component of a single observation
24 |    * - Band_
25 |      - :py:class:`~datacube.model.Measurement`
26 |      - Pixel plane within a multi-plane asset
27 |    * - `Common Name`_
28 |      - Alias
29 |      - Refer to the same band by different names
30 | 
31 | Similarly to STAC, ODC uses several levels of hierarchy to model metadata. At
32 | the highest level there is *Product* which is a collection of *Datasets*. Each
33 | *Dataset* contains a set of *Measurements* and related metadata. Finally
34 | *Measurement* describes a single plane of pixels captured at roughly the same
35 | time. Metadata includes location of the "file" and possibly location within a
36 | file.
37 | 
38 | Multiple Bands per File
39 | =======================
40 | 
41 | Multiple bands in a single file are supported by both ODC and STAC, but
42 | representation differs. In STAC another level of hierarchy is added below an
43 | *Asset* via the `bands attribute of the EO extension`_. Resources pointed
44 | to by an *Asset* may contain more than one band of pixels, and an *Asset*
45 | contains descriptions of those bands. In ODC, *Asset* is not modelled
46 | explicitly, instead resource path and potential location within this resource
47 | are properties of a *Measurement* object. It is common in STAC to have one to
48 | one mapping between band and asset, and in that scenario ODC *Measurement* and
49 | STAC *Asset* can be seen as equivalent.
50 | 
51 | Geo Referencing Metadata
52 | ========================
53 | 
54 | Precise geo referencing metadata is stored within a file pointed to by
55 | *Asset*/*Measurement*, but it can also be recorded within a STAC *Item*/ODC
56 | *Dataset* document. Having geo-referencing information at this level can enable
57 | more efficient data access by providing spatial information without needing to
58 | access the source (data file) itself.
59 | 
60 | In STAC, the `Projection Extension`_ is used to bring this metadata from file to
61 | *Item* document. In STAC each band might have different projection, but in ODC
62 | projection is a *Dataset* level property and has to be shared across all
63 | *Measurements*. In ODC individual bands can be of different resolution and have
64 | different footprints (usually with a lot of overlap), but **must** be in the
65 | same projection.
66 | 
67 | Consistency Assumptions
68 | =======================
69 | 
70 | In STAC, *Collection* is a very loose term, in theory it can point to very
71 | heterogeneous set of *Items*. In practice *Items* are typically very similar in
72 | structure, most contain the same set of *Assets* and bands. ODC is more strict
73 | in that regard. ODC *Product* contains expected set of *Measurements* per
74 | *Dataset* as well as some basic common metadata per *Measurement*, specifically
75 | pixel data type, which is assumed to stay the same across all *Datasets* for a
76 | given *Measurement*.
77 | 
78 | STAC equivalent would be `Item Assets`_ extension with `Raster Extension`_
79 | inside. It describes at the *Collection* level, expected structure of *Items*
80 | contained within.
81 | 
82 | 
83 | .. _`Open Datacube`: https://www.opendatacube.org/
84 | .. _`STAC`: https://stacspec.org/
85 | .. _`Projection Extension`: https://github.com/stac-extensions/projection
86 | .. _`Raster Extension`: https://github.com/stac-extensions/raster
87 | .. _`Item Assets`: https://github.com/stac-extensions/item-assets
88 | .. _Band: https://github.com/stac-extensions/eo#band-object
89 | .. _`Common Name`: https://github.com/stac-extensions/eo#common-band-names
90 | .. _`bands attribute of the EO extension`: https://github.com/stac-extensions/eo#band-object
91 | 


--------------------------------------------------------------------------------
/.github/workflows/render.yml:
--------------------------------------------------------------------------------
  1 | name: Render Example Notebooks
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       force:
  7 |         description: 'Force re-rendering of notebooks'
  8 |         required: false
  9 |         default: 'false'
 10 |         type: boolean
 11 |   push:
 12 |     paths:
 13 |       - "notebooks/*py"
 14 |       - ".github/workflows/render.yml"
 15 | 
 16 | # When a PR is updated, cancel the jobs from the previous version. Merges
 17 | # do not define head_ref, so use run_id to never cancel those jobs.
 18 | concurrency:
 19 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 20 |   cancel-in-progress: true
 21 | 
 22 | jobs:
 23 |   build-binder-env:
 24 |     timeout-minutes: 15
 25 |     runs-on: ubuntu-latest
 26 | 
 27 |     steps:
 28 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 29 | 
 30 |       - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
 31 |         id: binder_cache
 32 |         with:
 33 |           path: /tmp/binder_env
 34 |           key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }}
 35 | 
 36 |       - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
 37 |         if: steps.binder_cache.outputs.cache-hit != 'true'
 38 |         with:
 39 |           channels: conda-forge
 40 |           channel-priority: true
 41 |           activate-environment: ""
 42 |           mamba-version: "*"
 43 |           use-mamba: true
 44 | 
 45 |       - name: Dump Conda Environment Info
 46 |         shell: bash -l {0}
 47 |         if: steps.binder_cache.outputs.cache-hit != 'true'
 48 |         run: |
 49 |           conda info
 50 |           conda list
 51 |           mamba --version
 52 |           conda config --show-sources
 53 |           conda config --show
 54 |           printenv | sort
 55 | 
 56 |       - name: Build Python Environment for Notebooks
 57 |         shell: bash -l {0}
 58 |         if: steps.binder_cache.outputs.cache-hit != 'true'
 59 |         run: |
 60 |           cd binder
 61 |           mamba env create -f environment.yml -p /tmp/binder_env
 62 | 
 63 |       - name: Check Python Env
 64 |         shell: bash -l {0}
 65 |         if: steps.binder_cache.outputs.cache-hit != 'true'
 66 |         run: |
 67 |           mamba env export -p /tmp/binder_env
 68 | 
 69 |   render:
 70 |     timeout-minutes: 15
 71 |     runs-on: ubuntu-latest
 72 | 
 73 |     needs:
 74 |       - build-binder-env
 75 | 
 76 |     steps:
 77 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 78 | 
 79 |       - name: Config
 80 |         id: cfg
 81 |         run: |
 82 |           find notebooks/ -maxdepth 1 -name '*.py' -type f | sort -f -d
 83 | 
 84 |           nb_dir="docs/notebooks"
 85 |           nb_hash=$(python scripts/notebook_hash.py)
 86 |           echo "Notebooks hash: ${nb_hash}"
 87 |           echo "nb-dir=${nb_dir}" >> $GITHUB_OUTPUT
 88 |           echo "nb-hash=${nb_hash}" >> $GITHUB_OUTPUT
 89 |           echo "nb-archive=odc-stac-notebooks-${nb_hash}.tar.gz" >> $GITHUB_OUTPUT
 90 | 
 91 |       - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
 92 |         id: nb_cache
 93 |         with:
 94 |           path: ${{ steps.cfg.outputs.nb-dir }}
 95 |           key: docs-notebooks-${{ hashFiles('notebooks/*.py') }}
 96 | 
 97 |       - name: Get Conda Environment from Cache
 98 |         if: steps.nb_cache.outputs.cache-hit != 'true' || github.event.inputs.force == 'true'
 99 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
100 |         id: conda_cache
101 |         with:
102 |           path: /tmp/binder_env
103 |           key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }}
104 | 
105 |       - name: Update PATH
106 |         if: steps.nb_cache.outputs.cache-hit != 'true' || github.event.inputs.force == 'true'
107 |         shell: bash
108 |         run: |
109 |           echo "/tmp/binder_env/bin" >> $GITHUB_PATH
110 | 
111 |       - name: Run Notebooks
112 |         if: steps.nb_cache.outputs.cache-hit != 'true' || github.event.inputs.force == 'true'
113 |         run: |
114 |           nb_dir="${{ steps.cfg.outputs.nb-dir }}"
115 | 
116 |           mkdir -p $nb_dir
117 |           for src in $(find notebooks -type f -maxdepth 1 -name '*py'); do
118 |              dst="${nb_dir}/$(basename ${src%%.py}.ipynb)"
119 |              echo "$src -> $dst"
120 |              ./binder/render-nb-pipe.sh <$src >$dst
121 |           done
122 |           ls -lh ${nb_dir}/
123 | 
124 |       - name: Package Notebooks
125 |         run: |
126 |           nb_dir="${{ steps.cfg.outputs.nb-dir }}"
127 |           nb_hash="${{ steps.cfg.outputs.nb-hash }}"
128 |           nb_archive="${{ steps.cfg.outputs.nb-archive }}"
129 |           echo "DIR: ${nb_dir}"
130 |           echo "NB hash: $nb_hash"
131 |           echo "Archive: $nb_archive"
132 | 
133 |           (cd $nb_dir && tar cvz .) > "${nb_archive}"
134 |           ls -lh "${nb_archive}"
135 |           tar tzf "${nb_archive}"
136 | 
137 |       - name: Upload results (artifact)
138 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
139 |         with:
140 |           name: rendered-notebooks
141 |           path: docs/notebooks
142 |           if-no-files-found: error
143 | 


--------------------------------------------------------------------------------
/notebooks/stac-load-S2-deafrica.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     formats: ipynb,py:percent
  5 | #     text_representation:
  6 | #       extension: .py
  7 | #       format_name: percent
  8 | #       format_version: '1.3'
  9 | #       jupytext_version: 1.13.6
 10 | #   kernelspec:
 11 | #     display_name: 'Python 3.8.12 64-bit (''stac'': conda)'
 12 | #     language: python
 13 | #     name: python3
 14 | # ---
 15 | 
 16 | # %% [markdown]
 17 | # # Access Sentinel 2 Analysis Ready Data from Digital Earth Africa
 18 | #
 19 | # [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?labpath=notebooks%2Fstac-load-S2-deafrica.ipynb)
 20 | #
 21 | # https://explorer.digitalearth.africa/products/s2_l2a
 22 | 
 23 | # %% [markdown]
 24 | # ## Import Required Packages
 25 | 
 26 | # %%
 27 | from pystac_client import Client
 28 | 
 29 | from odc.stac import configure_rio, stac_load
 30 | 
 31 | # %% [markdown]
 32 | # ## Set Collection Configuration
 33 | #
 34 | # The configuration dictionary is determined from the product's definition, available at https://explorer.digitalearth.africa/products/s2_l2a#definition-doc
 35 | #
 36 | # All assets except SCL have the same configuration. SCL uses `uint8` rather than `uint16`.
 37 | #
 38 | # In the configuration, we also supply the aliases for each band. This means we can load data by band name rather than band number.
 39 | 
 40 | # %%
 41 | config = {
 42 |     "s2_l2a": {
 43 |         "assets": {
 44 |             "*": {
 45 |                 "data_type": "uint16",
 46 |                 "nodata": 0,
 47 |                 "unit": "1",
 48 |             },
 49 |             "SCL": {
 50 |                 "data_type": "uint8",
 51 |                 "nodata": 0,
 52 |                 "unit": "1",
 53 |             },
 54 |         },
 55 |         "aliases": {
 56 |             "costal_aerosol": "B01",
 57 |             "blue": "B02",
 58 |             "green": "B03",
 59 |             "red": "B04",
 60 |             "red_edge_1": "B05",
 61 |             "red_edge_2": "B06",
 62 |             "red_edge_3": "B07",
 63 |             "nir": "B08",
 64 |             "nir_narrow": "B08A",
 65 |             "water_vapour": "B09",
 66 |             "swir_1": "B11",
 67 |             "swir_2": "B12",
 68 |             "mask": "SCL",
 69 |             "aerosol_optical_thickness": "AOT",
 70 |             "scene_average_water_vapour": "WVP",
 71 |         },
 72 |     }
 73 | }
 74 | 
 75 | 
 76 | # %% [markdown]
 77 | # ## Set AWS Configuration
 78 | #
 79 | # Digital Earth Africa data is stored on S3 in Cape Town, Africa. To load the data, we must configure rasterio with the appropriate AWS S3 endpoint. This can be done with the `odc.stac.configure_rio` function. Documentation for this function is available at https://odc-stac.readthedocs.io/en/latest/_api/odc.stac.configure_rio.html#odc.stac.configure_rio.
 80 | #
 81 | # The configuration below must be used when loading any Digital Earth Africa data through the STAC API.
 82 | 
 83 | # %%
 84 | configure_rio(
 85 |     cloud_defaults=True,
 86 |     aws={"aws_unsigned": True},
 87 |     AWS_S3_ENDPOINT="s3.af-south-1.amazonaws.com",
 88 | )
 89 | 
 90 | 
 91 | # %% [markdown]
 92 | # ## Connect to the Digital Earth Africa STAC Catalog
 93 | 
 94 | # %%
 95 | # Open the stac catalogue
 96 | catalog = Client.open("https://explorer.digitalearth.africa/stac")
 97 | 
 98 | 
 99 | # %% [markdown]
100 | # ## Find STAC Items to Load
101 | #
102 | # ### Define query parameters
103 | 
104 | # %%
105 | # Set a bounding box
106 | # [xmin, ymin, xmax, ymax] in latitude and longitude
107 | bbox = [37.76, 12.49, 37.77, 12.50]
108 | 
109 | # Set a start and end date
110 | start_date = "2020-09-01"
111 | end_date = "2020-12-01"
112 | 
113 | # Set the STAC collections
114 | collections = ["s2_l2a"]
115 | 
116 | 
117 | # %% [markdown]
118 | # ### Construct query and get items from catalog
119 | 
120 | # %%
121 | # Build a query with the set parameters
122 | query = catalog.search(
123 |     bbox=bbox, collections=collections, datetime=f"{start_date}/{end_date}"
124 | )
125 | 
126 | # Search the STAC catalog for all items matching the query
127 | items = list(query.items())
128 | print(f"Found: {len(items):d} datasets")
129 | 
130 | # %% [markdown]
131 | # ## Load the Data
132 | #
133 | # In this step, we specify the desired coordinate system, resolution (here 20m), and bands to load. We also pass the bounding box to the `stac_load` function to only load the requested data. Since the band aliases are contained in the `config` dictionary, bands can be loaded using these aliaes (e.g. `"red"` instead of `"B04"` below).
134 | #
135 | # The data will be lazy-loaded with dask, meaning that is won't be loaded into memory until necessary, such as when it is displayed.
136 | 
137 | # %%
138 | crs = "EPSG:6933"
139 | resolution = 20
140 | 
141 | ds = stac_load(
142 |     items,
143 |     bands=("red", "green", "blue", "nir"),
144 |     crs=crs,
145 |     resolution=resolution,
146 |     chunks={},
147 |     groupby="solar_day",
148 |     stac_cfg=config,
149 |     bbox=bbox,
150 | )
151 | 
152 | # View the Xarray Dataset
153 | ds
154 | 
155 | 
156 | # %% [markdown]
157 | # ### Compute a band index
158 | #
159 | # After loading the data, you can perform standard Xarray operations, such as calculating and plotting the normalised difference vegetation index (NDVI). The `.compute()` method triggers Dask to load the data into memory, so running this step may take a few minutes.
160 | 
161 | # %%
162 | ds["NDVI"] = (ds.nir - ds.red) / (ds.nir + ds.red)
163 | 
164 | 
165 | ds.NDVI.compute().plot(col="time", col_wrap=6, vmin=0, vmax=1)
166 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test for SQS to DC tool
  3 | """
  4 | 
  5 | import json
  6 | from pathlib import Path
  7 | 
  8 | import pystac
  9 | import pystac.collection
 10 | import pystac.item
 11 | import pytest
 12 | from odc.geo.data import country_geom
 13 | 
 14 | TEST_DATA_FOLDER: Path = Path(__file__).parent.joinpath("data")
 15 | PARTIAL_PROJ_STAC: str = "only_crs_proj.json"
 16 | GA_LANDSAT_STAC: str = "ga_ls8c_ard_3-1-0_088080_2020-05-25_final.stac-item.json"
 17 | SENTINEL_STAC_COLLECTION: str = "sentinel-2-l2a.collection.json"
 18 | SENTINEL_STAC: str = "S2A_28QCH_20200714_0_L2A.json"
 19 | SENTINEL_STAC_MS: str = "S2B_MSIL2A_20190629T212529_R043_T06VVN_20201006T080531.json"
 20 | SENTINEL_STAC_MS_RASTER_EXT: str = (
 21 |     "S2B_MSIL2A_20190629T212529_R043_T06VVN_20201006T080531_raster_ext.json"
 22 | )
 23 | USGS_LANDSAT_STAC_v1b: str = "LC08_L2SR_081119_20200101_20200823_02_T2.json"
 24 | USGS_LANDSAT_STAC_v1: str = "LC08_L2SP_028030_20200114_20200824_02_T1_SR.json"
 25 | USGS_LANDSAT_STAC_v1_1_1: str = "LE07_L2SP_044033_20210329_20210424_02_T1_SR.json"
 26 | LIDAR_STAC: str = "lidar_dem.json"
 27 | BENCH_SITE1: str = "site1-20200606-tall-strip-africa.geojson"
 28 | BENCH_SITE2: str = "site2-2020_jun_jul-35MNM.geojson"
 29 | 
 30 | # pylint: disable=redefined-outer-name
 31 | 
 32 | 
 33 | @pytest.fixture(scope="session")
 34 | def test_data_dir():
 35 |     return TEST_DATA_FOLDER
 36 | 
 37 | 
 38 | @pytest.fixture
 39 | def partial_proj_stac():
 40 |     return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(PARTIAL_PROJ_STAC)))
 41 | 
 42 | 
 43 | @pytest.fixture
 44 | def no_bands_stac(partial_proj_stac):
 45 |     partial_proj_stac.assets.clear()
 46 |     return partial_proj_stac
 47 | 
 48 | 
 49 | @pytest.fixture
 50 | def usgs_landsat_stac_v1():
 51 |     return pystac.item.Item.from_file(
 52 |         str(TEST_DATA_FOLDER.joinpath(USGS_LANDSAT_STAC_v1))
 53 |     )
 54 | 
 55 | 
 56 | @pytest.fixture
 57 | def usgs_landsat_stac_v1b():
 58 |     return pystac.item.Item.from_file(
 59 |         str(TEST_DATA_FOLDER.joinpath(USGS_LANDSAT_STAC_v1b))
 60 |     )
 61 | 
 62 | 
 63 | @pytest.fixture
 64 | def usgs_landsat_stac_v1_1_1():
 65 |     return pystac.item.Item.from_file(
 66 |         str(TEST_DATA_FOLDER.joinpath(USGS_LANDSAT_STAC_v1_1_1))
 67 |     )
 68 | 
 69 | 
 70 | @pytest.fixture
 71 | def ga_landsat_stac():
 72 |     return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(GA_LANDSAT_STAC)))
 73 | 
 74 | 
 75 | @pytest.fixture
 76 | def lidar_stac():
 77 |     return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(LIDAR_STAC)))
 78 | 
 79 | 
 80 | @pytest.fixture
 81 | def sentinel_stac():
 82 |     return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC)))
 83 | 
 84 | 
 85 | @pytest.fixture
 86 | def sentinel_stac_ms_json():
 87 |     with TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_MS).open("r", encoding="utf") as f:
 88 |         return json.load(f)
 89 | 
 90 | 
 91 | @pytest.fixture
 92 | def bench_site1():
 93 |     with TEST_DATA_FOLDER.joinpath(BENCH_SITE1).open("r", encoding="utf") as f:
 94 |         return _strip_links(json.load(f))
 95 | 
 96 | 
 97 | @pytest.fixture
 98 | def bench_site2():
 99 |     with TEST_DATA_FOLDER.joinpath(BENCH_SITE2).open("r", encoding="utf") as f:
100 |         return _strip_links(json.load(f))
101 | 
102 | 
103 | @pytest.fixture
104 | def sentinel_stac_ms():
105 |     return pystac.item.Item.from_file(str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_MS)))
106 | 
107 | 
108 | @pytest.fixture
109 | def sentinel_stac_ms_no_ext(sentinel_stac_ms_json):
110 |     metadata = dict(sentinel_stac_ms_json)
111 |     metadata["stac_extensions"] = []
112 |     return pystac.item.Item.from_dict(metadata)
113 | 
114 | 
115 | @pytest.fixture
116 | def sentinel_stac_ms_with_raster_ext():
117 |     return pystac.item.Item.from_file(
118 |         str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_MS_RASTER_EXT))
119 |     )
120 | 
121 | 
122 | @pytest.fixture
123 | def sentinel_stac_collection():
124 |     return pystac.collection.Collection.from_file(
125 |         str(TEST_DATA_FOLDER.joinpath(SENTINEL_STAC_COLLECTION))
126 |     )
127 | 
128 | 
129 | @pytest.fixture
130 | def relative_href_only(ga_landsat_stac: pystac.item.Item):
131 |     item = pystac.Item.from_dict(ga_landsat_stac.to_dict())
132 |     item = item.make_asset_hrefs_relative()
133 |     assert isinstance(item, pystac.Item)
134 |     item.remove_links("self")
135 |     return item
136 | 
137 | 
138 | @pytest.fixture
139 | def sample_geojson():
140 |     return {
141 |         "type": "FeatureCollection",
142 |         "features": [
143 |             {
144 |                 "type": "Feature",
145 |                 "properties": {"name": "Kangaroo Island"},
146 |                 "geometry": {
147 |                     "type": "Polygon",
148 |                     "coordinates": [
149 |                         [
150 |                             [136.351318359375, -35.78217070326606],
151 |                             [136.7303466796875, -36.16448788632062],
152 |                             [137.5323486328125, -36.16005298551352],
153 |                             [137.8179931640625, -35.933540642493114],
154 |                             [138.0816650390625, -36.05798104702501],
155 |                             [138.2025146484375, -35.74205383068035],
156 |                             [137.5653076171875, -35.46066995149529],
157 |                             [136.351318359375, -35.78217070326606],
158 |                         ]
159 |                     ],
160 |                 },
161 |             }
162 |         ],
163 |     }
164 | 
165 | 
166 | def _strip_links(gjson):
167 |     for item in gjson["features"]:
168 |         item["links"] = []
169 |     return gjson
170 | 
171 | 
172 | @pytest.fixture()
173 | def gpd_iso3():
174 |     def _get(iso3: str, crs=None):
175 |         return country_geom(iso3.upper(), crs=crs)
176 | 
177 |     yield _get
178 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | import logging as pylogging
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | from pathlib import Path
 18 | 
 19 | import requests
 20 | from sphinx.util import logging
 21 | 
 22 | sys.path.insert(0, os.path.abspath(".."))
 23 | from odc.stac import __version__ as _odc_stac_version
 24 | from scripts import notebook_hash
 25 | 
 26 | # isort: off
 27 | # extra imports to check env
 28 | import odc.stac.bench
 29 | 
 30 | 
 31 | # Workaround for https://github.com/agronholm/sphinx-autodoc-typehints/issues/123
 32 | # When this https://github.com/agronholm/sphinx-autodoc-typehints/pull/153
 33 | # gets merged, we can remove this
 34 | class FilterForIssue123(pylogging.Filter):
 35 |     def filter(self, record: pylogging.LogRecord) -> bool:
 36 |         # You probably should make this check more specific by checking
 37 |         # that dataclass name is in the message, so that you don't filter out
 38 |         # other meaningful warnings
 39 |         return not record.getMessage().startswith("Cannot treat a function")
 40 | 
 41 | 
 42 | logging.getLogger("sphinx_autodoc_typehints").logger.addFilter(FilterForIssue123())
 43 | # End of a workaround
 44 | 
 45 | 
 46 | def ensure_notebooks(dst_folder):
 47 |     """
 48 |     Download pre-rendered notebooks from a tar archive
 49 |     """
 50 |     dst_folder = Path(dst_folder)
 51 |     if dst_folder.exists():
 52 |         print(f"Found pre-rendered notebooks in {dst_folder}")
 53 |         return True
 54 | 
 55 |     dst_folder.mkdir()
 56 |     nb_hash, nb_paths = notebook_hash.compute("../notebooks")
 57 |     nb_names = [p.rsplit("/", 1)[-1].rsplit(".", 1)[0] + ".ipynb" for p in nb_paths]
 58 | 
 59 |     for nb in nb_names:
 60 |         url = f"https://{nb_hash[:16]}--odc-stac-docs.netlify.app/notebooks/{nb}"
 61 |         print(f"{url} -> notebooks/{nb}")
 62 |         rr = requests.get(url, timeout=5)
 63 |         if not rr:
 64 |             return False
 65 |         with open(dst_folder / nb, "wt", encoding="utf") as dst:
 66 |             dst.write(rr.text)
 67 | 
 68 |     return True
 69 | 
 70 | 
 71 | # working directory is docs/
 72 | # download pre-rendered notebooks unless folder is already populated
 73 | if not ensure_notebooks("notebooks"):
 74 |     notebooks_directory = os.path.abspath("../notebooks")
 75 |     raise RuntimeException(
 76 |         "There is no cached version of these notebooks. "
 77 |         "Build the notebooks before building the documentation. "
 78 |         f"Notebooks are located in {notebooks_directory}."
 79 |     )
 80 | 
 81 | # -- Project information -----------------------------------------------------
 82 | 
 83 | project = "odc-stac"
 84 | copyright = "2021, ODC"
 85 | author = "ODC"
 86 | 
 87 | version = ".".join(_odc_stac_version.split(".", 2)[:2])
 88 | # The full version, including alpha/beta/rc tags
 89 | release = _odc_stac_version
 90 | 
 91 | 
 92 | # -- General configuration ---------------------------------------------------
 93 | 
 94 | # Add any Sphinx extension module names here, as strings. They can be
 95 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 96 | # ones.
 97 | extensions = [
 98 |     "sphinx.ext.autodoc",
 99 |     "sphinx.ext.autosummary",
100 |     "sphinx_autodoc_typehints",
101 |     "sphinx.ext.viewcode",
102 |     "sphinx.ext.intersphinx",
103 |     "sphinx.ext.extlinks",
104 |     "sphinx.ext.mathjax",
105 |     "nbsphinx",
106 | ]
107 | 
108 | # Add any paths that contain templates here, relative to this directory.
109 | templates_path = ["_templates"]
110 | 
111 | # List of patterns, relative to source directory, that match files and
112 | # directories to ignore when looking for source files.
113 | # This pattern also affects html_static_path and html_extra_path.
114 | exclude_patterns = ["_build"]
115 | 
116 | # If true, '()' will be appended to :func: etc. cross-reference text.
117 | add_function_parentheses = True
118 | 
119 | # If true, sectionauthor and moduleauthor directives will be shown in the
120 | # output. They are ignored by default.
121 | # show_authors = False
122 | 
123 | # The name of the Pygments (syntax highlighting) style to use.
124 | pygments_style = "friendly"
125 | 
126 | autosummary_generate = True
127 | 
128 | extlinks = {
129 |     "issue": ("https://github.com/opendatacube/odc-stac/issues/%s", "issue %s"),
130 |     "pull": ("https://github.com/opendatacube/odc-stac/pulls/%s", "PR %s"),
131 | }
132 | 
133 | intersphinx_mapping = {
134 |     "python": ("https://docs.python.org/3", None),
135 |     "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
136 |     "numpy": ("https://docs.scipy.org/doc/numpy/", None),
137 |     "xarray": ("https://xarray.pydata.org/en/stable/", None),
138 |     "datacube": ("https://datacube-core.readthedocs.io/en/latest/", None),
139 |     "odc-geo": ("https://odc-geo.readthedocs.io/en/latest/", None),
140 |     "pystac": ("https://pystac.readthedocs.io/en/latest/", None),
141 |     "rasterio": ("https://rasterio.readthedocs.io/en/latest/", None),
142 | }
143 | 
144 | # -- Options for HTML output -------------------------------------------------
145 | 
146 | # The theme to use for HTML and HTML Help pages.  See the documentation for
147 | # a list of builtin themes.
148 | #
149 | html_theme = "sphinx_rtd_theme"
150 | 
151 | html_theme_options = {
152 |     "collapse_navigation": False,
153 |     "logo_only": True,
154 | }
155 | 
156 | # html_logo = '_static/logo.svg'
157 | html_last_updated_fmt = "%b %d, %Y"
158 | html_show_sphinx = False
159 | 
160 | 
161 | # Add any paths that contain custom static files (such as style sheets) here,
162 | # relative to this directory. They are copied after the builtin static files,
163 | # so a file named "default.css" will overwrite the builtin "default.css".
164 | html_static_path = ["_static"]
165 | 
166 | html_css_files = ["xr-fixes.css"]
167 | 


--------------------------------------------------------------------------------
/notebooks/stac-load-e84-aws.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     formats: ipynb,py:percent
  5 | #     text_representation:
  6 | #       extension: .py
  7 | #       format_name: percent
  8 | #       format_version: '1.3'
  9 | #       jupytext_version: 1.13.8
 10 | #   kernelspec:
 11 | #     display_name: Python 3 (ipykernel)
 12 | #     language: python
 13 | #     name: python3
 14 | # ---
 15 | 
 16 | # %% [markdown]
 17 | # # Access Sentinel 2 Data from AWS
 18 | #
 19 | # [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?labpath=notebooks%2Fstac-load-e84-aws.ipynb)
 20 | #
 21 | # https://registry.opendata.aws/sentinel-2-l2a-cogs/
 22 | 
 23 | # %%
 24 | import dask.distributed
 25 | import folium
 26 | import folium.plugins
 27 | import geopandas as gpd
 28 | import shapely.geometry
 29 | from IPython.display import display
 30 | from pystac_client import Client
 31 | 
 32 | from odc.stac import configure_rio, stac_load
 33 | 
 34 | 
 35 | def convert_bounds(bbox, invert_y=False):
 36 |     """
 37 |     Helper method for changing bounding box representation to leaflet notation
 38 | 
 39 |     ``(lon1, lat1, lon2, lat2) -> ((lat1, lon1), (lat2, lon2))``
 40 |     """
 41 |     x1, y1, x2, y2 = bbox
 42 |     if invert_y:
 43 |         y1, y2 = y2, y1
 44 |     return ((y1, x1), (y2, x2))
 45 | 
 46 | 
 47 | # %% [markdown]
 48 | # ## Start Dask Client
 49 | #
 50 | # This step is optional, but it does improve load speed significantly. You
 51 | # don't have to use Dask, as you can load data directly into memory of the
 52 | # notebook.
 53 | 
 54 | # %%
 55 | client = dask.distributed.Client()
 56 | configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client)
 57 | display(client)
 58 | 
 59 | # %% [markdown]
 60 | # ## Find STAC Items to Load
 61 | 
 62 | # %%
 63 | km2deg = 1.0 / 111
 64 | x, y = (113.887, -25.843)  # Center point of a query
 65 | r = 100 * km2deg
 66 | bbox = (x - r, y - r, x + r, y + r)
 67 | 
 68 | catalog = Client.open("https://earth-search.aws.element84.com/v1/")
 69 | 
 70 | query = catalog.search(
 71 |     collections=["sentinel-2-l2a"], datetime="2021-09-16", limit=100, bbox=bbox
 72 | )
 73 | 
 74 | items = list(query.items())
 75 | print(f"Found: {len(items):d} datasets")
 76 | 
 77 | # Convert STAC items into a GeoJSON FeatureCollection
 78 | stac_json = query.item_collection_as_dict()
 79 | 
 80 | # %% [markdown]
 81 | # ## Review Query Result
 82 | #
 83 | # We'll use GeoPandas DataFrame object to make plotting easier.
 84 | 
 85 | # %%
 86 | gdf = gpd.GeoDataFrame.from_features(stac_json, "epsg:4326")
 87 | 
 88 | # Compute granule id from components
 89 | gdf["granule"] = (
 90 |     gdf["mgrs:utm_zone"].apply(lambda x: f"{x:02d}")
 91 |     + gdf["mgrs:latitude_band"]
 92 |     + gdf["mgrs:grid_square"]
 93 | )
 94 | 
 95 | fig = gdf.plot(
 96 |     "granule",
 97 |     edgecolor="black",
 98 |     categorical=True,
 99 |     aspect="equal",
100 |     alpha=0.5,
101 |     figsize=(6, 12),
102 |     legend=True,
103 |     legend_kwds={"loc": "upper left", "frameon": False, "ncol": 1},
104 | )
105 | _ = fig.set_title("STAC Query Results")
106 | 
107 | # %% [markdown]
108 | # ## Plot STAC Items on a Map
109 | 
110 | # %%
111 | # https://github.com/python-visualization/folium/issues/1501
112 | from branca.element import Figure
113 | 
114 | fig = Figure(width="400px", height="500px")
115 | map1 = folium.Map()
116 | fig.add_child(map1)
117 | 
118 | folium.GeoJson(
119 |     shapely.geometry.box(*bbox),
120 |     style_function=lambda x: dict(fill=False, weight=1, opacity=0.7, color="olive"),
121 |     name="Query",
122 | ).add_to(map1)
123 | 
124 | gdf.explore(
125 |     "granule",
126 |     categorical=True,
127 |     tooltip=[
128 |         "granule",
129 |         "datetime",
130 |         "eo:cloud_cover",
131 |     ],
132 |     popup=True,
133 |     style_kwds=dict(fillOpacity=0.1, width=2),
134 |     name="STAC",
135 |     m=map1,
136 | )
137 | 
138 | map1.fit_bounds(bounds=convert_bounds(gdf.unary_union.bounds))
139 | display(fig)
140 | 
141 | # %% [markdown]
142 | # ## Construct Dask Dataset
143 | #
144 | # Note that even though there are 9 STAC Items on input, there is only one
145 | # timeslice on output. This is because of `groupby="solar_day"`. With that
146 | # setting `stac_load` will place all items that occured on the same day (as
147 | # adjusted for the timezone) into one image plane.
148 | 
149 | # %%
150 | # Since we will plot it on a map we need to use `EPSG:3857` projection
151 | crs = "epsg:3857"
152 | zoom = 2**5  # overview level 5
153 | 
154 | xx = stac_load(
155 |     items,
156 |     bands=("red", "green", "blue"),
157 |     crs=crs,
158 |     resolution=10 * zoom,
159 |     chunks={},  # <-- use Dask
160 |     groupby="solar_day",
161 | )
162 | display(xx)
163 | 
164 | # %% [markdown]
165 | # Note that data is not loaded yet. But we can review memory requirement. We can also check data footprint.
166 | 
167 | # %%
168 | xx.odc.geobox
169 | 
170 | # %% [markdown]
171 | # ## Load data into local memory
172 | 
173 | # %%
174 | # %%time
175 | xx = xx.compute()
176 | 
177 | # %%
178 | _ = (
179 |     xx.isel(time=0)
180 |     .to_array("band")
181 |     .plot.imshow(
182 |         col="band",
183 |         size=4,
184 |         vmin=0,
185 |         vmax=4000,
186 |     )
187 | )
188 | 
189 | # %% [markdown]
190 | # ## Load with bounding box
191 | #
192 | # As you can see `stac_load` returned all the data covered by STAC items
193 | # returned from the query. This happens by default as `stac_load` has no way of
194 | # knowing what your query was. But it is possible to control what region is
195 | # loaded. There are several mechanisms available, but probably simplest one is
196 | # to use `bbox=` parameter (compatible with `stac_client`).
197 | #
198 | # Let's load a small region at native resolution to demonstrate.
199 | 
200 | # %%
201 | r = 6.5 * km2deg
202 | small_bbox = (x - r, y - r, x + r, y + r)
203 | 
204 | yy = stac_load(
205 |     items,
206 |     bands=("red", "green", "blue"),
207 |     crs=crs,
208 |     resolution=10,
209 |     chunks={},  # <-- use Dask
210 |     groupby="solar_day",
211 |     bbox=small_bbox,
212 | )
213 | display(yy.odc.geobox)
214 | 
215 | # %%
216 | yy = yy.compute()
217 | 
218 | # %%
219 | _ = (
220 |     yy.isel(time=0)
221 |     .to_array("band")
222 |     .plot.imshow(
223 |         col="band",
224 |         size=4,
225 |         vmin=0,
226 |         vmax=4000,
227 |     )
228 | )
229 | 
230 | # %% [markdown]
231 | # --------------------------------------------------------------
232 | 


--------------------------------------------------------------------------------
/notebooks/stac-load-S2-ms.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     formats: ipynb,py:percent
  5 | #     text_representation:
  6 | #       extension: .py
  7 | #       format_name: percent
  8 | #       format_version: '1.3'
  9 | #       jupytext_version: 1.13.8
 10 | #   kernelspec:
 11 | #     display_name: Python 3 (ipykernel)
 12 | #     language: python
 13 | #     name: python3
 14 | # ---
 15 | 
 16 | # %% [markdown]
 17 | # # Access Sentinel 2 Data on Planetary Computer
 18 | #
 19 | # [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/opendatacube/odc-stac/develop?labpath=notebooks%2Fstac-load-S2-ms.ipynb)
 20 | 
 21 | # %% [markdown]
 22 | # ## Setup Instructions
 23 | #
 24 | # This notebook is meant to run on Planetary Computer lab hub.
 25 | 
 26 | # %%
 27 | import dask.distributed
 28 | import dask.utils
 29 | import numpy as np
 30 | import planetary_computer as pc
 31 | import xarray as xr
 32 | from IPython.display import display
 33 | from pystac_client import Client
 34 | 
 35 | from odc.stac import configure_rio, stac_load
 36 | 
 37 | # %% [markdown]
 38 | # ## Start Dask Client
 39 | #
 40 | # This step is optional, but it does improve load speed significantly. You
 41 | # don't have to use Dask, as you can load data directly into memory of the
 42 | # notebook.
 43 | 
 44 | # %%
 45 | client = dask.distributed.Client()
 46 | configure_rio(cloud_defaults=True, client=client)
 47 | display(client)
 48 | 
 49 | # %% [markdown]
 50 | # ## Query STAC API
 51 | #
 52 | # Here we are looking for datasets in `sentinel-2-l2a` collection from June
 53 | # 2019 over MGRS tile `06VVN`.
 54 | 
 55 | # %%
 56 | catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
 57 | 
 58 | query = catalog.search(
 59 |     collections=["sentinel-2-l2a"],
 60 |     datetime="2019-06",
 61 |     query={"s2:mgrs_tile": dict(eq="06VVN")},
 62 | )
 63 | 
 64 | items = list(query.items())
 65 | print(f"Found: {len(items):d} datasets")
 66 | 
 67 | # %% [markdown]
 68 | # ## Lazy load all the bands
 69 | #
 70 | # We won't use all the bands but it doesn't matter as bands that we won't use
 71 | # won't be loaded. We are "loading" data with Dask, which means that at this
 72 | # point no reads will be happening just yet.
 73 | #
 74 | # We have to supply `dtype=` and `nodata=` because items in this collection are missing [raster extension](https://github.com/stac-extensions/raster) metadata.
 75 | 
 76 | # %%
 77 | resolution = 10
 78 | SHRINK = 4
 79 | if client.cluster.workers[0].memory_manager.memory_limit < dask.utils.parse_bytes("4G"):
 80 |     SHRINK = 8  # running on Binder with 2Gb RAM
 81 | 
 82 | if SHRINK > 1:
 83 |     resolution = resolution * SHRINK
 84 | 
 85 | xx = stac_load(
 86 |     items,
 87 |     chunks={"x": 2048, "y": 2048},
 88 |     patch_url=pc.sign,
 89 |     resolution=resolution,
 90 |     # force dtype and nodata
 91 |     dtype="uint16",
 92 |     nodata=0,
 93 | )
 94 | 
 95 | print(f"Bands: {','.join(list(xx.data_vars))}")
 96 | display(xx)
 97 | 
 98 | # %% [markdown]
 99 | # By default `stac_load` will return all the data bands using canonical asset
100 | # names. But we can also request a subset of bands, by supplying `bands=` parameter.
101 | # When going this route you can also use "common name" to refer to a band.
102 | #
103 | # In this case we request `red,green,blue,nir` bands which are common names for
104 | # bands `B04,B03,B02,B08` and `SCL` band which is a canonical name.
105 | 
106 | # %%
107 | xx = stac_load(
108 |     items,
109 |     bands=["red", "green", "blue", "nir", "SCL"],
110 |     resolution=resolution,
111 |     chunks={"x": 2048, "y": 2048},
112 |     patch_url=pc.sign,
113 |     # force dtype and nodata
114 |     dtype="uint16",
115 |     nodata=0,
116 | )
117 | 
118 | print(f"Bands: {','.join(list(xx.data_vars))}")
119 | display(xx)
120 | 
121 | 
122 | # %% [markdown]
123 | # ## Do some math with bands
124 | 
125 | 
126 | # %%
127 | def to_float(xx):
128 |     _xx = xx.astype("float32")
129 |     nodata = _xx.attrs.pop("nodata", None)
130 |     if nodata is None:
131 |         return _xx
132 |     return _xx.where(xx != nodata)
133 | 
134 | 
135 | def colorize(xx, colormap):
136 |     return xr.DataArray(colormap[xx.data], coords=xx.coords, dims=(*xx.dims, "band"))
137 | 
138 | 
139 | # %%
140 | # like .astype(float32) but taking care of nodata->NaN mapping
141 | nir = to_float(xx.nir)
142 | red = to_float(xx.red)
143 | ndvi = (nir - red) / (
144 |     nir + red
145 | )  # < This is still a lazy Dask computation (no data loaded yet)
146 | 
147 | # Get the 5-th time slice `load->compute->plot`
148 | _ = ndvi.isel(time=4).compute().plot.imshow(size=7, aspect=1.2, interpolation="bicubic")
149 | 
150 | # %% [markdown]
151 | # For sample purposes work with first 6 observations only
152 | 
153 | # %%
154 | xx = xx.isel(time=np.s_[:6])
155 | 
156 | # %%
157 | # fmt: off
158 | scl_colormap = np.array(
159 |     [
160 |         [255,   0, 255, 255],  # 0  - NODATA
161 |         [255,   0,   4, 255],  # 1  - Saturated or Defective
162 |         [0  ,   0,   0, 255],  # 2  - Dark Areas
163 |         [97 ,  97,  97, 255],  # 3  - Cloud Shadow
164 |         [3  , 139,  80, 255],  # 4  - Vegetation
165 |         [192, 132,  12, 255],  # 5  - Bare Ground
166 |         [21 , 103, 141, 255],  # 6  - Water
167 |         [117,   0,  27, 255],  # 7  - Unclassified
168 |         [208, 208, 208, 255],  # 8  - Cloud
169 |         [244, 244, 244, 255],  # 9  - Definitely Cloud
170 |         [195, 231, 240, 255],  # 10 - Thin Cloud
171 |         [222, 157, 204, 255],  # 11 - Snow or Ice
172 |     ],
173 |     dtype="uint8",
174 | )
175 | # fmt: on
176 | 
177 | # Load SCL band, then convert to RGB using color scheme above
178 | scl_rgba = colorize(xx.SCL.compute(), scl_colormap)
179 | 
180 | # Check we still have geo-registration
181 | scl_rgba.odc.geobox
182 | 
183 | # %%
184 | _ = scl_rgba.plot.imshow(col="time", col_wrap=3, size=3, interpolation="antialiased")
185 | 
186 | # %% [markdown]
187 | # Let's save image dated 2019-06-04 to a cloud optimized geotiff file.
188 | 
189 | # %%
190 | to_save = scl_rgba.isel(time=3)
191 | fname = f"SCL-{to_save.time.dt.strftime('%Y%m%d').item()}.tif"
192 | print(f"Saving to: '{fname}'")
193 | 
194 | # %%
195 | scl_rgba.isel(time=3).odc.write_cog(
196 |     fname,
197 |     overwrite=True,
198 |     compress="webp",
199 |     webp_quality=90,
200 | )
201 | 
202 | # %% [markdown]
203 | # Check the file with `rio info`.
204 | 
205 | # %%
206 | # !ls -lh {fname}
207 | # !rio info {fname} | jq .
208 | 
209 | # %% [markdown]
210 | # --------------------------------
211 | 


--------------------------------------------------------------------------------
/tests/data/only_crs_proj.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "stac_version": "1.0.0",
  3 |   "type": "Feature",
  4 |   "id": "3af1acae-0255-4762-b2b2-f26034cf3ce8",
  5 |   "properties": {
  6 |     "title": "LS_FC_PC_3577_-14_-26_20190101",
  7 |     "platform": "landsat-5,landsat-7,landsat-8",
  8 |     "instruments": [
  9 |       "tm,etm+,oli"
 10 |     ],
 11 |     "created": "2020-03-10T05:35:24.063151Z",
 12 |     "proj:epsg": 4326,
 13 |     "datetime": "2019-01-01T00:00:00Z",
 14 |     "cubedash:region_code": "-14_-26"
 15 |   },
 16 |   "geometry": {
 17 |     "type": "Polygon",
 18 |     "coordinates": [
 19 |       [
 20 |         [
 21 |           118.22240593999766,
 22 |           -22.699975944231888
 23 |         ],
 24 |         [
 25 |           118.24427397418242,
 26 |           -22.515645780963876
 27 |         ],
 28 |         [
 29 |           119.22003537123638,
 30 |           -22.60842512232027
 31 |         ],
 32 |         [
 33 |           119.12126684085257,
 34 |           -23.49827058643962
 35 |         ],
 36 |         [
 37 |           118.13807838381156,
 38 |           -23.404891555616906
 39 |         ],
 40 |         [
 41 |           118.22240593999766,
 42 |           -22.699975944231888
 43 |         ]
 44 |       ]
 45 |     ]
 46 |   },
 47 |   "links": [
 48 |     {
 49 |       "rel": "self",
 50 |       "href": "https://explorer-aws.dea.ga.gov.au/stac/collections/fc_percentile_albers_annual/items/3af1acae-0255-4762-b2b2-f26034cf3ce8",
 51 |       "type": "application/json"
 52 |     },
 53 |     {
 54 |       "rel": "odc_yaml",
 55 |       "href": "https://explorer-aws.dea.ga.gov.au/dataset/3af1acae-0255-4762-b2b2-f26034cf3ce8.odc-metadata.yaml",
 56 |       "type": "text/yaml",
 57 |       "title": "ODC Dataset YAML"
 58 |     },
 59 |     {
 60 |       "rel": "collection",
 61 |       "href": "https://explorer-aws.dea.ga.gov.au/stac/collections/fc_percentile_albers_annual"
 62 |     },
 63 |     {
 64 |       "rel": "product_overview",
 65 |       "href": "https://explorer-aws.dea.ga.gov.au/product/fc_percentile_albers_annual",
 66 |       "type": "text/html",
 67 |       "title": "ODC Product Overview"
 68 |     },
 69 |     {
 70 |       "rel": "alternative",
 71 |       "href": "https://explorer-aws.dea.ga.gov.au/dataset/3af1acae-0255-4762-b2b2-f26034cf3ce8",
 72 |       "type": "text/html",
 73 |       "title": "ODC Dataset Overview"
 74 |     },
 75 |     {
 76 |       "rel": "root",
 77 |       "href": "https://explorer-aws.dea.ga.gov.au/stac"
 78 |     }
 79 |   ],
 80 |   "assets": {
 81 |     "BS_PC_10": {
 82 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_BS_PC_10.tif",
 83 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
 84 |       "title": "BS_PC_10",
 85 |       "eo:bands": [
 86 |         {
 87 |           "name": "BS_PC_10"
 88 |         }
 89 |       ],
 90 |       "roles": [
 91 |         "data"
 92 |       ]
 93 |     },
 94 |     "BS_PC_50": {
 95 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_BS_PC_50.tif",
 96 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
 97 |       "title": "BS_PC_50",
 98 |       "eo:bands": [
 99 |         {
100 |           "name": "BS_PC_50"
101 |         }
102 |       ],
103 |       "roles": [
104 |         "data"
105 |       ]
106 |     },
107 |     "BS_PC_90": {
108 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_BS_PC_90.tif",
109 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
110 |       "title": "BS_PC_90",
111 |       "eo:bands": [
112 |         {
113 |           "name": "BS_PC_90"
114 |         }
115 |       ],
116 |       "roles": [
117 |         "data"
118 |       ]
119 |     },
120 |     "PV_PC_10": {
121 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_PV_PC_10.tif",
122 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
123 |       "title": "PV_PC_10",
124 |       "eo:bands": [
125 |         {
126 |           "name": "PV_PC_10"
127 |         }
128 |       ],
129 |       "roles": [
130 |         "data"
131 |       ]
132 |     },
133 |     "PV_PC_50": {
134 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_PV_PC_50.tif",
135 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
136 |       "title": "PV_PC_50",
137 |       "eo:bands": [
138 |         {
139 |           "name": "PV_PC_50"
140 |         }
141 |       ],
142 |       "roles": [
143 |         "data"
144 |       ]
145 |     },
146 |     "PV_PC_90": {
147 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_PV_PC_90.tif",
148 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
149 |       "title": "PV_PC_90",
150 |       "eo:bands": [
151 |         {
152 |           "name": "PV_PC_90"
153 |         }
154 |       ],
155 |       "roles": [
156 |         "data"
157 |       ]
158 |     },
159 |     "NPV_PC_10": {
160 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_NPV_PC_10.tif",
161 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
162 |       "title": "NPV_PC_10",
163 |       "eo:bands": [
164 |         {
165 |           "name": "NPV_PC_10"
166 |         }
167 |       ],
168 |       "roles": [
169 |         "data"
170 |       ]
171 |     },
172 |     "NPV_PC_50": {
173 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_NPV_PC_50.tif",
174 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
175 |       "title": "NPV_PC_50",
176 |       "eo:bands": [
177 |         {
178 |           "name": "NPV_PC_50"
179 |         }
180 |       ],
181 |       "roles": [
182 |         "data"
183 |       ]
184 |     },
185 |     "NPV_PC_90": {
186 |       "href": "s3://dea-public-data/fractional-cover/fc-percentile/annual/v2.2.0/combined/x_-14/y_-26/2019/LS_FC_PC_3577_-14_-26_20190101_NPV_PC_90.tif",
187 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
188 |       "title": "NPV_PC_90",
189 |       "eo:bands": [
190 |         {
191 |           "name": "NPV_PC_90"
192 |         }
193 |       ],
194 |       "roles": [
195 |         "data"
196 |       ]
197 |     }
198 |   },
199 |   "bbox": [
200 |     118.13807838381156,
201 |     -23.49827058643962,
202 |     119.22003537123638,
203 |     -22.515645780963876
204 |   ],
205 |   "stac_extensions": [
206 |     "https://stac-extensions.github.io/eo/v1.0.0/schema.json",
207 |     "https://stac-extensions.github.io/projection/v1.0.0/schema.json"
208 |   ],
209 |   "collection": "fc_percentile_albers_annual"
210 | }
211 | 


--------------------------------------------------------------------------------
/odc/stac/testing/stac.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Making STAC items for testing.
  3 | """
  4 | 
  5 | from datetime import datetime, timezone
  6 | from typing import Any, Generator
  7 | 
  8 | import pystac.asset
  9 | import pystac.item
 10 | import xarray as xr
 11 | from odc.geo.geobox import GeoBox
 12 | from odc.loader.types import (
 13 |     AuxBandMetadata,
 14 |     AuxDataSource,
 15 |     RasterBandMetadata,
 16 |     RasterGroupMetadata,
 17 |     RasterSource,
 18 |     norm_key,
 19 | )
 20 | from pystac.extensions.projection import ProjectionExtension
 21 | from pystac.extensions.raster import RasterBand, RasterExtension
 22 | from toolz import dicttoolz
 23 | 
 24 | from .._mdtools import _group_geoboxes
 25 | from ..model import ParsedItem, PropertyLoadRequest, RasterCollectionMetadata
 26 | 
 27 | # pylint: disable=redefined-builtin,too-many-arguments
 28 | 
 29 | STAC_DATE_FMT = "%Y-%m-%dT%H:%M:%S.%fZ"
 30 | STAC_DATE_FMT_SHORT = "%Y-%m-%dT%H:%M:%SZ"
 31 | 
 32 | 
 33 | def _norm_dates(*args):
 34 |     valid = [a for a in args if a is not None]
 35 |     valid = [
 36 |         datetime.fromisoformat(dt).replace(tzinfo=timezone.utc)
 37 |         for dt in xr.DataArray(list(valid))
 38 |         .astype("datetime64[ns]")
 39 |         .dt.strftime("%Y-%m-%dT%H:%M:%S.%f")
 40 |         .values
 41 |     ]
 42 |     valid = iter(valid)
 43 |     return [next(valid) if a else None for a in args]
 44 | 
 45 | 
 46 | def b_(
 47 |     name,
 48 |     geobox=None,
 49 |     dtype="int16",
 50 |     nodata=None,
 51 |     unit="1",
 52 |     dims=(),
 53 |     uri=None,
 54 |     bidx=1,
 55 |     prefix="http://example.com/items/",
 56 | ):
 57 |     band_key = norm_key(name)
 58 |     name, _ = band_key
 59 |     if uri is None:
 60 |         uri = f"{prefix}{name}.tif"
 61 |     meta = RasterBandMetadata(dtype, nodata, unit, dims=dims)
 62 |     return (band_key, RasterSource(uri, bidx, geobox=geobox, meta=meta))
 63 | 
 64 | 
 65 | def mk_parsed_item(
 66 |     bands,
 67 |     datetime=None,
 68 |     *,
 69 |     start_datetime=None,
 70 |     end_datetime=None,
 71 |     id="some-item",
 72 |     collection="some-collection",
 73 |     href=None,
 74 |     geometry=None,
 75 |     props: dict[str, Any] | None = None,
 76 | ) -> ParsedItem:
 77 |     """
 78 |     Construct parsed stac item for testing.
 79 |     """
 80 |     # pylint: disable=redefined-outer-name, too-many-locals
 81 |     if isinstance(bands, (list, tuple)):
 82 |         bands = {norm_key(k): v for k, v in bands}
 83 | 
 84 |     gboxes = dicttoolz.valmap(lambda b: b.geobox, bands)
 85 |     gboxes = dicttoolz.valfilter(lambda x: x is not None, gboxes)
 86 |     gboxes = dicttoolz.keymap(lambda bk: bk[0], gboxes)
 87 | 
 88 |     if len(gboxes) == 0:
 89 |         band2grid = {b: "default" for b, _ in bands}
 90 |         geobox = None
 91 |     else:
 92 |         grids, band2grid = _group_geoboxes(gboxes)
 93 |         geobox = grids["default"]
 94 | 
 95 |     if geometry is None and geobox is not None:
 96 |         geometry = geobox.geographic_extent
 97 | 
 98 |     aliases = {}
 99 |     if props is None:
100 |         props = {}
101 | 
102 |     # Handle auxiliary bands from props
103 |     prop_user_input = [v[1] if isinstance(v, tuple) else k for k, v in props.items()]
104 |     prop_requests = PropertyLoadRequest.from_user_input(prop_user_input)
105 |     for idx, prop_req in enumerate(prop_requests):
106 |         bk = ("_stac_metadata", idx + 1)
107 |         # Look up actual value from props dict using prop_req.key
108 |         actual_value = props[prop_req.key]
109 |         if isinstance(actual_value, tuple):
110 |             actual_value, _ = actual_value
111 | 
112 |         aux_meta = AuxBandMetadata(
113 |             prop_req.dtype,
114 |             nodata=prop_req.nodata,
115 |             units=prop_req.units,
116 |             driver_data=prop_req,
117 |         )
118 |         aux_source = AuxDataSource(
119 |             uri=f"virtual://{bk[0]}/{bk[1]}",
120 |             subdataset=None,
121 |             meta=aux_meta,
122 |             driver_data=actual_value,
123 |         )
124 |         bands[bk] = aux_source
125 |         aliases[prop_req.output_name] = [bk]
126 | 
127 |     collection = RasterCollectionMetadata(
128 |         collection,
129 |         RasterGroupMetadata(
130 |             dicttoolz.valmap(lambda b: b.meta, bands),
131 |             aliases=aliases,
132 |         ),
133 |         has_proj=(geobox is not None),
134 |         band2grid=band2grid,
135 |     )
136 |     datetime, start_datetime, end_datetime = _norm_dates(
137 |         datetime, start_datetime, end_datetime
138 |     )
139 | 
140 |     return ParsedItem(
141 |         id,
142 |         collection,
143 |         bands,
144 |         geometry=geometry,
145 |         datetime=datetime,
146 |         datetime_range=(start_datetime, end_datetime),
147 |         href=href,
148 |     )
149 | 
150 | 
151 | def _add_proj(gbox: GeoBox, xx) -> None:
152 |     proj = ProjectionExtension.ext(xx, add_if_missing=True)
153 |     proj.shape = list(gbox.shape.yx)
154 |     proj.transform = gbox.transform[:6]
155 |     crs = gbox.crs
156 |     if crs is not None:
157 |         epsg = crs.epsg
158 |         if epsg is not None:
159 |             proj.epsg = epsg
160 |         else:
161 |             proj.wkt2 = crs.wkt
162 | 
163 | 
164 | def _extract_props(item: ParsedItem) -> Generator[tuple[str, Any], None, None]:
165 |     for k in item.bands:
166 |         if k[0] != "_stac_metadata":
167 |             continue
168 |         b = item[k]
169 |         if b.meta is None or b.meta.driver_data is None:
170 |             continue
171 |         yield b.meta.driver_data.key, b.driver_data
172 | 
173 | 
174 | def to_stac_item(item: ParsedItem) -> pystac.item.Item:
175 |     gg = item.geometry
176 | 
177 |     props = {}
178 |     for n, dt in zip(["start_datetime", "end_datetime"], item.datetime_range):
179 |         if dt is not None:
180 |             props[n] = dt.strftime(STAC_DATE_FMT)
181 | 
182 |     props.update(_extract_props(item))
183 | 
184 |     xx = pystac.item.Item(
185 |         item.id,
186 |         geometry=gg.json if gg is not None else None,
187 |         bbox=list(gg.boundingbox.bbox) if gg is not None else None,
188 |         datetime=item.datetime,
189 |         properties=props,
190 |         collection=item.collection.name,
191 |     )
192 | 
193 |     gboxes = item.geoboxes()
194 |     if len(gboxes) > 0:
195 |         gbox = gboxes[0]
196 | 
197 |         ProjectionExtension.add_to(xx)
198 |         _add_proj(gbox, xx)
199 | 
200 |     def _to_raster_band(src: RasterSource) -> RasterBand:
201 |         meta = src.meta
202 |         assert meta is not None
203 |         return RasterBand.create(
204 |             data_type=meta.data_type,  # type: ignore[arg-type]
205 |             nodata=meta.nodata,
206 |             unit=meta.units,
207 |         )
208 | 
209 |     for asset_name, bands in item.assets().items():
210 |         RasterExtension.add_to(xx)
211 |         b = bands[0]  # all bands should share same uri
212 |         xx.add_asset(
213 |             asset_name,
214 |             pystac.asset.Asset(b.uri, media_type="image/tiff", roles=["data"]),
215 |         )
216 |         RasterExtension.ext(xx.assets[asset_name]).apply(
217 |             list(map(_to_raster_band, bands))
218 |         )
219 | 
220 |     for asset_name, asset in xx.assets.items():
221 |         bb = item.bands[(asset_name, 1)]
222 |         assert isinstance(bb, RasterSource)
223 |         if bb.geobox is not None:
224 |             assert isinstance(bb.geobox, GeoBox)
225 |             _add_proj(bb.geobox, asset)
226 | 
227 |     if item.href is not None:
228 |         xx.set_self_href(item.href)
229 | 
230 |     return xx
231 | 


--------------------------------------------------------------------------------
/odc/stac/bench/_cli.py:
--------------------------------------------------------------------------------
  1 | """CLI app for benchmarking."""
  2 | 
  3 | import json
  4 | from datetime import datetime
  5 | from time import sleep
  6 | from typing import Any, Dict, Optional
  7 | 
  8 | import click
  9 | import distributed
 10 | import rasterio.enums
 11 | 
 12 | from odc.stac.bench import (
 13 |     SAMPLE_SITES,
 14 |     BenchLoadParams,
 15 |     dump_site,
 16 |     load_from_json,
 17 |     load_results,
 18 |     run_bench,
 19 | )
 20 | 
 21 | # pylint: disable=too-many-arguments,too-many-locals
 22 | 
 23 | RIO_RESAMPLING_NAMES = [it.name for it in rasterio.enums.Resampling]
 24 | 
 25 | 
 26 | @click.group("odc-stac-bench")
 27 | def main() -> None:
 28 |     """Benchmarking tool for odc.stac."""
 29 | 
 30 | 
 31 | @main.command("prepare")
 32 | @click.option("--sample-site", type=str, help="Use one of sample sites")
 33 | @click.option(
 34 |     "--list-sample-sites",
 35 |     is_flag=True,
 36 |     default=False,
 37 |     help="Print available sample sites",
 38 | )
 39 | @click.option(
 40 |     "--from-file",
 41 |     help="From json config file",
 42 |     type=click.Path(exists=True, dir_okay=False, readable=True),
 43 | )
 44 | @click.option("--overwrite", is_flag=True, help="Overwite output file")
 45 | def prepare(sample_site: str, list_sample_sites: bool, from_file, overwrite):
 46 |     """Prepare benchmarking dataset."""
 47 |     if list_sample_sites:
 48 |         click.echo("Sample sites:")
 49 |         for site_name in SAMPLE_SITES:
 50 |             click.echo(f"   {site_name}")
 51 |         return
 52 | 
 53 |     site: Optional[Dict[str, Any]] = None
 54 |     if sample_site is not None:
 55 |         site = SAMPLE_SITES.get(sample_site, None)
 56 |         if site is None:
 57 |             raise click.ClickException(f"No such site: {sample_site}")
 58 |         print("Site config:")
 59 |         print("------------------------------------------")
 60 |         print(json.dumps(site, indent=2))
 61 |         print("------------------------------------------")
 62 |     elif from_file is not None:
 63 |         with open(from_file, "rt", encoding="utf8") as src:
 64 |             site = json.load(src)
 65 | 
 66 |     if site is None:
 67 |         raise click.ClickException("Have to supply one of --sample-site or --from-file")
 68 |     dump_site(site, overwrite=overwrite)
 69 | 
 70 | 
 71 | @main.command("dask")
 72 | @click.option(
 73 |     "--n-workers", type=int, default=1, help="Number of workers to launch (1)"
 74 | )
 75 | @click.option(
 76 |     "--threads-per-worker", type=int, help="Number of threads per worker (all cpus)"
 77 | )
 78 | @click.option("--memory-limit", type=str, help="Configure worker memory limit")
 79 | def _dask(n_workers, threads_per_worker, memory_limit) -> None:
 80 |     """Launch local Dask Cluster."""
 81 |     client = distributed.Client(
 82 |         n_workers=n_workers,
 83 |         threads_per_worker=threads_per_worker,
 84 |         memory_limit=memory_limit,
 85 |     )
 86 |     info = client.scheduler_info()
 87 |     print(f"Launched Dask Cluster: {info['address']}")
 88 |     print(f"   --scheduler='{info['address']}'")
 89 |     while True:
 90 |         try:
 91 |             sleep(1)
 92 |         except KeyboardInterrupt:
 93 |             print("Terminating")
 94 |             client.shutdown()
 95 |             return
 96 | 
 97 | 
 98 | @main.command("run")
 99 | @click.option(
100 |     "--config",
101 |     "-c",
102 |     type=click.Path(exists=True, dir_okay=False, readable=True),
103 |     required=False,
104 |     help="Experiment configuration in json format",
105 | )
106 | @click.option(
107 |     "--ntimes", "-n", type=int, default=1, help="Configure number of times to run"
108 | )
109 | @click.option(
110 |     "--method",
111 |     help="Data loading method",
112 |     type=click.Choice(["odc-stac", "stackstac"]),
113 | )
114 | @click.option("--bands", type=str, help="Comma separated list of bands")
115 | @click.option("--chunks", type=int, help="Chunk size Y,X order", nargs=2)
116 | @click.option("--resolution", type=float, help="Set output resolution")
117 | @click.option("--crs", type=str, help="Set CRS")
118 | @click.option(
119 |     "--resampling",
120 |     help="Resampling method when changing resolution/projection",
121 |     type=click.Choice(RIO_RESAMPLING_NAMES),
122 | )
123 | @click.option("--show-config", is_flag=True, help="Show configuration only, don't run")
124 | @click.option(
125 |     "--scheduler", default="tcp://localhost:8786", help="Dask server to connect to"
126 | )
127 | @click.argument("site", type=click.Path(exists=True, dir_okay=False, readable=True))
128 | def run(
129 |     site: str,
130 |     config,
131 |     method: str,
132 |     ntimes: int,
133 |     bands: str,
134 |     chunks,
135 |     resolution: float,
136 |     crs: str,
137 |     resampling: str,
138 |     show_config: bool,
139 |     scheduler,
140 | ) -> None:
141 |     """
142 |     Run data load benchmark using Dask.
143 | 
144 |     SITE is a GeoJSON file produced by `prepare` step.
145 |     """
146 |     cfg: Optional[BenchLoadParams] = None
147 |     if config is not None:
148 |         with open(config, "rt", encoding="utf8") as src:
149 |             cfg = BenchLoadParams.from_json(src.read())
150 |     else:
151 |         cfg = BenchLoadParams(
152 |             method="odc-stac",
153 |             chunks=(2048, 2048),
154 |             extra={
155 |                 "stackstac": {"dtype": "uint16", "fill_value": 0},
156 |                 "odc-stac": {
157 |                     "groupby": "solar_day",
158 |                     "stac_cfg": {"*": {"warnings": "ignore"}},
159 |                 },
160 |             },
161 |         )
162 | 
163 |     if chunks:
164 |         cfg.chunks = chunks
165 |     if method is not None:
166 |         cfg.method = method
167 |     if bands is not None:
168 |         cfg.bands = tuple(bands.split(","))
169 |     if resolution is not None:
170 |         cfg.resolution = resolution
171 |     if crs is not None:
172 |         cfg.crs = crs
173 |     if resampling is not None:
174 |         cfg.resampling = resampling
175 |     if not cfg.scenario:
176 |         cfg.scenario = site.rsplit(".", 1)[0]
177 | 
178 |     with open(site, "rt", encoding="utf8") as src:
179 |         site_geojson = json.load(src)
180 | 
181 |     print(f"Loaded: {len(site_geojson['features'])} STAC items from '{site}'")
182 | 
183 |     print("Will use following load configuration")
184 |     print("-" * 60)
185 |     print(cfg.to_json(indent=2))
186 |     print("-" * 60)
187 | 
188 |     if show_config:
189 |         return
190 | 
191 |     print(f"Connecting to Dask Scheduler: {scheduler}")
192 |     client = distributed.Client(scheduler)
193 | 
194 |     print("Constructing Dask graph")
195 |     xx = load_from_json(site_geojson, cfg)
196 |     print(f"Starting benchmark run ({ntimes} runs)")
197 |     print("=" * 60)
198 | 
199 |     ts = datetime.now().strftime("%Y%m%dT%H%M%S.%f")
200 |     results_file = f"{cfg.scenario}_{ts}.pkl"
201 |     print(f"Will write results to: {results_file}")
202 |     _ = run_bench(xx, client, ntimes=ntimes, results_file=results_file)
203 |     print("=" * 60)
204 |     print("Finished")
205 | 
206 | 
207 | @main.command("report")
208 | @click.option(
209 |     "--matching", type=str, help="Supply glob pattern instead of individual .pkl files"
210 | )
211 | @click.option(
212 |     "--output",
213 |     type=str,
214 |     help="File to write CSV data, if not supplied will write to stdout",
215 | )
216 | @click.argument(
217 |     "pkls", type=click.Path(exists=True, dir_okay=False, readable=True), nargs=-1
218 | )
219 | def report(matching: str, output: str, pkls) -> None:
220 |     """
221 |     Collate results of multiple benchmark experiments.
222 | 
223 |     Read pickle files produced by the `run` command and assemble
224 |     them into one CSV file.
225 |     """
226 |     if matching is not None:
227 |         data_raw = load_results(matching)
228 |     else:
229 |         data_raw = load_results(pkls)
230 | 
231 |     if output is None:
232 |         print(data_raw.to_csv())
233 |     else:
234 |         data_raw.to_csv(output)
235 | 


--------------------------------------------------------------------------------
/tests/test_load.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=missing-module-docstring,redefined-builtin
  2 | from unittest.mock import MagicMock
  3 | 
  4 | import pystac
  5 | import pystac.item
  6 | import pytest
  7 | import shapely.geometry
  8 | from dask.utils import ndeepmap
  9 | from odc.geo.geobox import GeoBox
 10 | from odc.geo.xr import ODCExtension
 11 | from odc.loader import resolve_load_cfg
 12 | from odc.loader.types import RasterBandMetadata
 13 | 
 14 | from odc.stac import RasterLoadParams
 15 | from odc.stac import load as stac_load
 16 | from odc.stac._stac_load import _group_items
 17 | from odc.stac.testing.stac import b_, mk_parsed_item, to_stac_item
 18 | 
 19 | 
 20 | def test_stac_load_smoketest(
 21 |     sentinel_stac_ms_with_raster_ext: pystac.item.Item,
 22 | ) -> None:
 23 |     item = sentinel_stac_ms_with_raster_ext.clone()
 24 | 
 25 |     params = {"crs": "EPSG:3857", "resolution": 100, "align": 0, "chunks": {}}
 26 |     xx = stac_load([item], "B02", **params)  # type: ignore
 27 | 
 28 |     assert isinstance(xx.B02.odc, ODCExtension)
 29 |     assert xx.B02.shape[0] == 1
 30 |     assert xx.B02.odc.geobox is not None
 31 |     assert xx.B02.odc.geobox.crs == "EPSG:3857"
 32 |     assert xx.time.dtype == "datetime64[ns]"
 33 | 
 34 |     # Test dc.load name for bands, and alias support
 35 |     xx = stac_load([item], measurements=["red", "green"], **params)  # type: ignore
 36 | 
 37 |     assert "red" in xx.data_vars
 38 |     assert "green" in xx.data_vars
 39 |     assert xx.red.shape == xx.green.shape
 40 | 
 41 |     # Test dc.load name for bands, and alias support
 42 |     patch_url = MagicMock(return_value="https://example.com/f.tif")
 43 |     xx = stac_load(
 44 |         [item],
 45 |         measurements=["red", "green"],
 46 |         patch_url=patch_url,
 47 |         **params,  # type: ignore
 48 |     )
 49 |     assert isinstance(xx.odc, ODCExtension)
 50 | 
 51 |     # expect patch_url to be called 2 times, 1 for red and 1 for green band
 52 |     assert patch_url.call_count == 2
 53 | 
 54 |     patch_url = MagicMock(return_value="https://example.com/f.tif")
 55 |     zz = stac_load(
 56 |         [item],
 57 |         patch_url=patch_url,
 58 |         stac_cfg={"*": {"warnings": "ignore"}},
 59 |         **params,  # type: ignore
 60 |     )
 61 |     assert patch_url.call_count == len(zz.data_vars)
 62 | 
 63 |     yy = stac_load(
 64 |         [item], ["nir"], like=xx, chunks={}, stac_cfg={"*": {"warnings": "ignore"}}
 65 |     )
 66 |     assert yy.odc.geobox == xx.odc.geobox
 67 |     assert isinstance(xx.odc.geobox, GeoBox)
 68 | 
 69 |     yy = stac_load(
 70 |         [item],
 71 |         ["nir"],
 72 |         geobox=xx.odc.geobox,
 73 |         chunks={},
 74 |         stac_cfg={"*": {"warnings": "ignore"}},
 75 |     )
 76 |     assert yy.odc.geobox == xx.odc.geobox
 77 |     assert yy.odc.geobox == yy.nir.odc.geobox
 78 | 
 79 |     # Check automatic CRS/resolution
 80 |     yy = stac_load(
 81 |         [item],
 82 |         ["nir", "coastal"],
 83 |         chunks={},
 84 |         stac_cfg={"*": {"warnings": "ignore"}},
 85 |     )
 86 |     assert yy.odc.geobox.crs == "EPSG:32606"
 87 |     assert yy.odc.geobox.resolution.yx == (-10, 10)
 88 | 
 89 |     # test bbox overlaping with lon/lat
 90 |     with pytest.raises(ValueError):
 91 |         stac_load([item], ["nir"], bbox=(0, 0, 1, 1), lon=(0, 1), lat=(0, 1), chunks={})
 92 | 
 93 |     # test bbox overlaping with x/y
 94 |     with pytest.raises(ValueError):
 95 |         stac_load(
 96 |             [item],
 97 |             ["nir"],
 98 |             bbox=(0, 0, 1, 1),
 99 |             x=(0, 1000),
100 |             y=(0, 1000),
101 |             chunks={},
102 |         )
103 | 
104 |     bbox = (0, 0, 1, 1)
105 |     x1, y1, x2, y2 = bbox
106 | 
107 |     assert (
108 |         stac_load(
109 |             [item],
110 |             ["nir"],
111 |             crs="epsg:3857",
112 |             resolution=10,
113 |             chunks={},
114 |             lon=(x1, x2),
115 |             lat=(y1, y2),
116 |         ).nir.odc.geobox
117 |         == stac_load(
118 |             [item],
119 |             ["nir"],
120 |             crs="epsg:3857",
121 |             resolution=10,
122 |             chunks={},
123 |             bbox=bbox,
124 |         ).nir.odc.geobox
125 |     )
126 | 
127 |     geopolygon = shapely.geometry.box(*bbox)
128 |     assert (
129 |         stac_load(
130 |             [item],
131 |             ["nir"],
132 |             crs="epsg:3857",
133 |             resolution=10,
134 |             chunks={},
135 |             lon=(x1, x2),
136 |             lat=(y1, y2),
137 |         ).nir.odc.geobox
138 |         == stac_load(
139 |             [item],
140 |             ["nir"],
141 |             crs="epsg:3857",
142 |             resolution=10,
143 |             chunks={},
144 |             geopolygon=geopolygon,
145 |         ).nir.odc.geobox
146 |     )
147 | 
148 | 
149 | def test_group_items() -> None:
150 |     def _mk(id: str, lon: float, datetime: str):
151 |         gbox = GeoBox.from_bbox((lon - 0.1, 0, lon + 0.1, 1), shape=(100, 100))
152 |         return mk_parsed_item([b_("b1", gbox)], datetime=datetime, id=id)
153 | 
154 |     # check no-op case first
155 |     assert _group_items([], [], "time") == []
156 |     assert _group_items([], [], "id") == []
157 |     assert _group_items([], [], "solar_day") == []
158 | 
159 |     aa = _mk("a", 15 * 10, "2020-01-02T23:59Z")
160 |     b1 = _mk("b1", 15 * 10 + 1, "2020-01-03T00:01Z")
161 |     b2 = _mk("b2", 15 * 10 + 2, "2020-01-03T00:01Z")
162 |     cc = _mk("c", 0, "2020-01-02T23:59Z")
163 | 
164 |     def _t(items, groupby, expect, lon=None, preserve_original_order=False) -> None:
165 |         stac_items = [to_stac_item(item) for item in items]
166 |         rr = ndeepmap(
167 |             2,
168 |             lambda idx: items[idx],
169 |             _group_items(
170 |                 stac_items,
171 |                 items,
172 |                 groupby,
173 |                 lon=lon,
174 |                 preserve_original_order=preserve_original_order,
175 |             ),
176 |         )
177 |         _expect = ndeepmap(2, lambda item: item.id, expect)
178 |         _got = ndeepmap(2, lambda item: item.id, rr)
179 | 
180 |         assert _expect == _got
181 | 
182 |     # same order as input
183 |     _t([aa, b1, b2], "id", [[aa], [b1], [b2]])
184 |     _t([aa, b2, b1], "id", [[aa], [b2], [b1]])
185 |     _t([b1, aa, b2], "id", [[b1], [aa], [b2]])
186 |     _t([cc, aa, b1, b2], "id", [[cc], [aa], [b1], [b2]])
187 | 
188 |     _t([aa, b1, b2], "time", [[aa], [b1, b2]])
189 |     _t([b1, aa, b2], "time", [[aa], [b1, b2]])
190 | 
191 |     # order within group is preserved
192 |     _t([b2, aa, b1], "time", [[aa], [b2, b1]], preserve_original_order=True)
193 |     _t([aa, cc, b1, b2], "time", [[aa, cc], [b1, b2]], preserve_original_order=True)
194 | 
195 |     _t([aa, b1, b2], "solar_day", [[aa, b1, b2]])
196 |     _t([b1, aa, b2], "solar_day", [[aa, b1, b2]])
197 |     _t([b2, aa, b1], "solar_day", [[aa, b1, b2]])
198 |     _t([aa, b1, b2, cc], "solar_day", [[cc], [aa, b1, b2]])
199 | 
200 |     _t([aa, b1, b2, cc], "solar_day", [[aa, cc, b1, b2]], lon=150 + 1)
201 | 
202 |     # property based
203 |     _t([aa, b1], "proj:epsg", [[aa, b1]])
204 |     _t([b1, aa], "proj:epsg", [[aa, b1]])
205 |     _t([aa, b1], "proj:transform", [[aa], [b1]])
206 | 
207 |     # custom callback
208 |     _t(
209 |         [aa, b1, b2, cc],
210 |         lambda item, parsed, idx: idx % 2,
211 |         [[aa, b2], [b1, cc]],
212 |         preserve_original_order=True,
213 |     )
214 | 
215 | 
216 | def test_resolve_load_cfg() -> None:
217 |     rlp = RasterLoadParams
218 |     assert resolve_load_cfg({}) == {}
219 | 
220 |     item = mk_parsed_item(
221 |         [
222 |             b_("a", dtype="int8", nodata=-1),
223 |             b_("b", dtype="float64", dims=("y", "x", "b")),
224 |         ]
225 |     )
226 | 
227 |     assert set(item.collection) == set([("a", 1), ("b", 1)])
228 |     assert item.collection["a"].data_type == "int8"
229 |     assert item.collection["b"].data_type == "float64"
230 | 
231 |     _bands = {n: b for (n, _), b in item.collection.bands.items()}
232 |     assert isinstance(_bands["a"], RasterBandMetadata)
233 |     assert isinstance(_bands["b"], RasterBandMetadata)
234 | 
235 |     cfg = resolve_load_cfg(_bands, resampling="average")
236 |     assert cfg["a"] == rlp("int8", -1, resampling="average", meta=_bands["a"])
237 |     assert cfg["b"] == rlp(
238 |         "float64", None, resampling="average", dims=("y", "x", "b"), meta=_bands["b"]
239 |     )
240 | 
241 |     cfg = resolve_load_cfg(
242 |         _bands,
243 |         resampling={"*": "mode", "b": "sum"},
244 |         nodata=-999,
245 |         dtype="int64",
246 |     )
247 |     assert cfg["a"] == rlp("int64", -999, resampling="mode", meta=_bands["a"])
248 |     assert cfg["b"] == rlp(
249 |         "int64", -999, resampling="sum", dims=("y", "x", "b"), meta=_bands["b"]
250 |     )
251 | 
252 |     cfg = resolve_load_cfg(
253 |         _bands,
254 |         dtype={"a": "float32"},
255 |     )
256 |     assert cfg["a"] == rlp("float32", -1, meta=_bands["a"])
257 |     assert cfg["b"] == rlp("float64", None, dims=_bands["b"].dims, meta=_bands["b"])
258 | 


--------------------------------------------------------------------------------
/docs/benchmarking.rst:
--------------------------------------------------------------------------------
  1 | Benchmarking Utilities
  2 | ######################
  3 | 
  4 | Module :py:mod:`odc.stac.bench` provides utilities for benchmarking data loading. It is both a
  5 | library that can be used directly from a notebook and a command line application.
  6 | 
  7 | .. code-block:: none 
  8 | 
  9 |    Usage: python -m odc.stac.bench [OPTIONS] COMMAND [ARGS]...
 10 |    
 11 |      Benchmarking tool for odc.stac.
 12 |    
 13 |    Options:
 14 |      --help  Show this message and exit.
 15 |    
 16 |    Commands:
 17 |      dask     Launch local Dask Cluster.
 18 |      prepare  Prepare benchmarking dataset.
 19 |      report   Collate results of multiple benchmark experiments.
 20 |      run      Run data load benchmark using Dask.
 21 | 
 22 | 
 23 | Define Test Site
 24 | ================
 25 | 
 26 | To start you need to define a test site, or use one of the pre-configured examples. Site
 27 | configuration is a json file that describes STAC API query and some other metadata. Below is a
 28 | definition of the ``s2-ms-mosaic`` sample site.
 29 | 
 30 | .. code-block:: json
 31 | 
 32 |    {
 33 |      "file_id": "s2-ms-mosaic_2020-06-06--P1D",
 34 |      "api": "https://planetarycomputer.microsoft.com/api/stac/v1",
 35 |      "search": {
 36 |        "collections": ["sentinel-2-l2a"],
 37 |        "datetime": "2020-06-06",
 38 |        "bbox": [ 27.345815, -14.98724, 27.565542, -7.710992]
 39 |      }
 40 |    }
 41 | 
 42 | This would query Planetary Computer STAC API endpoint for Sentinel 2 collection and store results to
 43 | a geojson file ``{file_id}.geojson``. Try it now:
 44 | 
 45 | .. code-block:: bash
 46 | 
 47 |     python -m odc.stac.bench prepare --sample-site s2-ms-mosaic
 48 | 
 49 | Command above will write a GeoJSON file to your current directory. We will use this file to run
 50 | benchmarks later on.
 51 | 
 52 | 
 53 | Prepare Load Configuration
 54 | ==========================
 55 | 
 56 | Let's create base data loading configuration file suitable for running benchmarks with the site
 57 | configuration produced previously. Save example below as ``cfg.json``.
 58 | 
 59 | .. code-block:: json
 60 | 
 61 |    {
 62 |      "method": "odc-stac",
 63 |      "bands": ["B02", "B03", "B04"],
 64 |      "patch_url": "planetary_computer.sas.sign",
 65 |      "extra": {
 66 |        "stackstac": {
 67 |          "dtype": "uint16",
 68 |          "fill_value": 0
 69 |        },
 70 |        "odc-stac": {
 71 |          "groupby": "solar_day",
 72 |          "stac_cfg": {"*": {"warnings": "ignore"}}
 73 |        }
 74 |      }
 75 |    }
 76 | 
 77 | Making your own is simple:
 78 | 
 79 | 1. Create :py:class:`~odc.stac.bench.BenchLoadParams` object
 80 | 2. Modify configuration options to match your needs
 81 | 3. Dump it to JSON
 82 | 
 83 | .. code-block:: python3
 84 | 
 85 |    from odc.stac.bench import BenchLoadParams
 86 |    
 87 |    params = BenchLoadParams()
 88 |    params.scenario = "web-zoom-8"
 89 |    params.bands = ["red", "green", "blue"]
 90 |    params.crs = "EPSG:3857"
 91 |    params.resolution = 610
 92 |    params.chunks = (512, 512)
 93 |    params.resampling = "bilinear"
 94 |    
 95 |    print(params.to_json())
 96 | 
 97 | 
 98 | Start Dask Cluster
 99 | ==================
100 | 
101 | Before we can run the benchmark we need to have an active Dask cluster. You can connect to a remote
102 | cluster or run a local one. A convenience local Dask cluster launcher is provided. In a separate
103 | shell run this command:
104 | 
105 | .. code-block:: none
106 | 
107 |     > python -m odc.stac.bench dask --memory-limit=8GiB
108 | 
109 |     GDAL_DISABLE_READDIR_ON_OPEN = EMPTY_DIR
110 |     GDAL_HTTP_MAX_RETRY          = 10
111 |     GDAL_HTTP_RETRY_DELAY        = 0.5
112 |     GDAL_DATA                    = /srv/conda/envs/notebook/share/gdal
113 |     Launched Dask Cluster: tcp://127.0.0.1:43677
114 |        --scheduler='tcp://127.0.0.1:43677'
115 | 
116 | This will start a local Dask cluster, configure GDAL on Dask workers and print out the address of
117 | the Dask scheduler. Leave this running and take a note of the ``--scheduler=...`` option that was
118 | printed out, we will use it the next step.
119 | 
120 | 
121 | Run Benchmark
122 | =============
123 | 
124 | We are now ready to run some benchmarking with the ``run`` command documented below:
125 | 
126 | .. code-block:: none
127 | 
128 |    Usage: python -m odc.stac.bench run [OPTIONS] SITE
129 | 
130 |      Run data load benchmark using Dask.
131 | 
132 |      SITE is a GeoJSON file produced by `prepare` step.
133 | 
134 |    Options:
135 |      -c, --config FILE               Experiment configuration in json format
136 |      -n, --ntimes INTEGER            Configure number of times to run
137 |      --method [odc-stac|stackstac]   Data loading method
138 |      --bands TEXT                    Comma separated list of bands
139 |      --chunks INTEGER...             Chunk size Y,X order
140 |      --resolution FLOAT              Set output resolution
141 |      --crs TEXT                      Set CRS
142 |      --resampling [nearest|bilinear|cubic|cubic_spline|lanczos|average|mode|gauss|max|min|med|q1|q3|sum|rms]
143 |                                      Resampling method when changing
144 |                                      resolution/projection
145 |      --show-config                   Show configuration only, don't run
146 |      --scheduler TEXT                Dask server to connect to
147 |      --help                          Show this message and exit.
148 | 
149 | 
150 | First let's check configuration, note we will run with the reduced resolution for quicker turn
151 | around (``--resolution=80`` option). Command line arguments take precedence over configuration
152 | parameters supplied in the json file.
153 | 
154 | .. code-block:: bash
155 | 
156 |     python -m odc.stac.bench run \
157 |       s2-ms-mosaic_2020-06-06--P1D.geojson \
158 |       --config cfg.json \
159 |       --resolution=80 \
160 |       --show-config
161 | 
162 | If the above went well we can start the benchmark, remove ``--show-config`` option, and add
163 | ``--scheduler=`` option that was printed when we started Dask cluster. Let's also configure number
164 | of benchmarking passes to run with ``-n 10`` option.
165 | 
166 | .. code-block:: bash
167 | 
168 |     python -m odc.stac.bench run \
169 |       s2-ms-mosaic_2020-06-06--P1D.geojson \
170 |       --config cfg.json \
171 |       --resolution=80 \
172 |       -n 10 \
173 |       --scheduler='tcp://127.0.0.1:43677' 
174 | 
175 | 
176 | .. note::
177 |     
178 |     Don't forget to edit ``--scheduler=``, part of the above command.
179 | 
180 | This will first print out configuration that will be used,
181 | 
182 | .. code-block:: none
183 | 
184 |    Loaded: 9 STAC items from 's2-ms-mosaic_2020-06-06--P1D.geojson'
185 |    Will use following load configuration
186 |    ------------------------------------------------------------
187 |    { /** NOTE: this section was edited for brevity **/
188 |      "scenario": "s2-ms-mosaic_2020-06-06--P1D",
189 |      "method": "odc-stac",
190 |      "chunks": [ 2048, 2048 ],
191 |      "bands": [ "B02", "B03", "B04" ],
192 |      "resolution": 80.0,
193 |      "crs": null,
194 |      "resampling": null,
195 |      "patch_url": "planetary_computer.sas.sign",
196 |      "extra": {
197 |        "stackstac": { "dtype": "uint16", "fill_value": 0 },
198 |        "odc-stac": { "groupby": "solar_day", "stac_cfg": {"*": {"warnings": "ignore" }}}
199 |      }
200 |    }
201 |    ------------------------------------------------------------
202 | 
203 | 
204 | followed by information about data being loaded and some stats about the Dask cluster on which the
205 | benchmark will run:
206 | 
207 | .. code-block:: none
208 | 
209 |    Connecting to Dask Scheduler: tcp://127.0.0.1:43677
210 |    Constructing Dask graph
211 |    Starting benchmark run (10 runs)
212 |    ============================================================
213 |    Will write results to: s2-ms-mosaic_2020-06-06--P1D_20220104T080235.133458.pkl
214 |    method      : odc-stac
215 |    Scenario    : s2-ms-mosaic_2020-06-06--P1D
216 |    T.slice     : 2020-06-06
217 |    Data        : 1.3.11373.1374.uint16,  89.42 MiB
218 |    Chunks      : 1.1.2048.1374 (T.B.Y.X)
219 |    GEO         : epsg:32735
220 |                | 80, 0, 499920|
221 |                | 0,-80, 9200080|
222 |    Cluster     : 1 workers, 4 threads, 8.00 GiB 
223 |    ------------------------------------------------------------
224 | 
225 | As benchmark runs are completed brief summaries are printed:
226 | 
227 | .. code-block:: none
228 | 
229 |    T.Elapsed   :    2.845 seconds
230 |    T.Submit    :    0.228 seconds
231 |    Throughput  :   16.480 Mpx/second (overall)
232 |                |    4.120 Mpx/second (per thread)
233 |    ------------------------------------------------------------
234 |    T.Elapsed   :    2.448 seconds
235 |    T.Submit    :    0.015 seconds
236 |    Throughput  :   19.152 Mpx/second (overall)
237 |                |    4.788 Mpx/second (per thread)
238 |    ... continues
239 | 
240 | You can terminate early without losing data with ``Ctrl-C``. Benchmark results are saved after each
241 | benchmark pass (overwriting previous save-point) in case there is a crash or some other fatal
242 | error.
243 | 
244 | 
245 | Review Results
246 | ==============
247 | 
248 | To convert benchmark results stored in ``.pkl`` file(s) to CSV use the following:
249 | 
250 | .. code-block:: bash
251 | 
252 |    python -m odc.stac.bench report *.pkl --output results.csv
253 | 
254 | The idea is to run benchmarks with different load configurations, different chunk sizes for example,
255 | or comparing relative costs of resampling modes, then combine those into one data table.
256 | 


--------------------------------------------------------------------------------
/tests/test_bench.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=wrong-import-order,wrong-import-position,
  2 | # pylint: disable=redefined-outer-name,missing-function-docstring,missing-module-docstring
  3 | import pytest
  4 | 
  5 | distributed = pytest.importorskip("distributed")
  6 | 
  7 | from unittest.mock import MagicMock
  8 | 
  9 | import xarray
 10 | import numpy as np
 11 | from distributed import Client
 12 | from odc.geo.xr import ODCExtension
 13 | 
 14 | from odc.stac.bench import (
 15 |     BenchLoadParams,
 16 |     collect_context_info,
 17 |     load_from_json,
 18 |     run_bench,
 19 | )
 20 | 
 21 | CFG = {
 22 |     "*": {
 23 |         "warnings": "ignore",
 24 |         # for every asset in every product default to uint16 with nodata=0
 25 |         "assets": {"*": {"data_type": "uint16", "nodata": 0}},
 26 |     }
 27 | }
 28 | 
 29 | 
 30 | @pytest.fixture
 31 | def fake_dask_client(monkeypatch):
 32 |     cc = MagicMock()
 33 |     cc.scheduler_info.return_value = {
 34 |         "type": "Scheduler",
 35 |         "id": "Scheduler-80d943db-16f6-4476-a51a-64d57a287e9b",
 36 |         "address": "inproc://10.10.10.10/1281505/1",
 37 |         "services": {"dashboard": 8787},
 38 |         "started": 1638320006.6135786,
 39 |         "workers": {
 40 |             "inproc://10.10.10.10/1281505/4": {
 41 |                 "type": "Worker",
 42 |                 "id": 0,
 43 |                 "host": "10.1.1.140",
 44 |                 "resources": {},
 45 |                 "local_directory": "/tmp/dask-worker-space/worker-uhq1b9bh",
 46 |                 "name": 0,
 47 |                 "nthreads": 2,
 48 |                 "memory_limit": 524288000,
 49 |                 "last_seen": 1638320007.2504623,
 50 |                 "services": {"dashboard": 38439},
 51 |                 "metrics": {
 52 |                     "executing": 0,
 53 |                     "in_memory": 0,
 54 |                     "ready": 0,
 55 |                     "in_flight": 0,
 56 |                     "bandwidth": {"total": 100000000, "workers": {}, "types": {}},
 57 |                     "spilled_nbytes": 0,
 58 |                     "cpu": 0.0,
 59 |                     "memory": 145129472,
 60 |                     "time": 1638320007.2390554,
 61 |                     "read_bytes": 0.0,
 62 |                     "write_bytes": 0.0,
 63 |                     "read_bytes_disk": 0.0,
 64 |                     "write_bytes_disk": 0.0,
 65 |                     "num_fds": 82,
 66 |                 },
 67 |                 "nanny": None,
 68 |             }
 69 |         },
 70 |     }
 71 |     cc.cancel.return_value = None
 72 |     cc.restart.return_value = cc
 73 |     cc.persist = lambda x: x
 74 |     cc.compute = lambda x: x
 75 | 
 76 |     monkeypatch.setattr(distributed, "wait", MagicMock())
 77 |     yield cc
 78 | 
 79 | 
 80 | @pytest.fixture(scope="module")
 81 | def dask_client():
 82 |     client = Client(
 83 |         n_workers=1,
 84 |         threads_per_worker=2,
 85 |         memory_limit="500MiB",
 86 |         local_directory="/tmp/",
 87 |         memory_target_fraction=False,
 88 |         memory_spill_fraction=False,
 89 |         memory_pause_fraction=False,
 90 |         dashboard_address=None,
 91 |         processes=False,
 92 |     )
 93 |     yield client
 94 |     client.shutdown()
 95 |     del client
 96 | 
 97 | 
 98 | @pytest.mark.skipif(
 99 |     not pytest.importorskip("stackstac"), reason="stackstac not installed"
100 | )
101 | def test_load_from_json_stackstac(fake_dask_client, bench_site1, bench_site2) -> None:
102 |     dask_client = fake_dask_client
103 |     params = BenchLoadParams(
104 |         scenario="test1",
105 |         method="stackstac",
106 |         bands=("B04", "B02", "B03"),
107 |         chunks=(2048, 2048),
108 |         resampling="nearest",
109 |         extra={
110 |             "odc-stac": {"groupby": "solar_day", "stac_cfg": CFG},
111 |             "stackstac": {
112 |                 "dtype": "uint16",
113 |                 "fill_value": np.uint16(0),
114 |                 "rescale": False,
115 |             },
116 |         },
117 |     )
118 |     xx = load_from_json(bench_site1, params)
119 |     assert "band" in xx.dims
120 |     assert xx.shape == (1, 3, 90978, 10980)
121 |     assert xx.dtype == "uint16"
122 |     assert xx.spec.epsg == 32735
123 | 
124 |     yy = load_from_json(
125 |         bench_site1, params.with_method("odc-stac"), geobox=xx.odc.geobox
126 |     )
127 | 
128 |     rrx = collect_context_info(dask_client, xx)
129 |     rry = collect_context_info(dask_client, yy)
130 |     assert rrx.shape == rry.shape
131 |     assert rrx == rry
132 | 
133 |     xx = load_from_json(bench_site2, params)
134 |     assert "band" in xx.dims
135 |     assert xx.dtype == "uint16"
136 |     assert xx.spec.epsg == 32735
137 | 
138 |     params.crs = "epsg:32736"
139 |     xx = load_from_json(bench_site2, params)
140 |     assert "band" in xx.dims
141 |     assert xx.dtype == "uint16"
142 |     assert xx.spec.epsg == 32736
143 | 
144 |     with pytest.raises(ValueError):
145 |         load_from_json(bench_site1, params.with_method("wroNg"))
146 | 
147 | 
148 | def test_bench_context(fake_dask_client, bench_site1, bench_site2) -> None:
149 |     params = BenchLoadParams(
150 |         scenario="test1",
151 |         method="odc-stac",
152 |         bands=("red", "green", "blue"),
153 |         chunks=(2048, 2048),
154 |         extra={"odc-stac": {"groupby": "solar_day", "stac_cfg": CFG}},
155 |     )
156 |     xx = load_from_json(bench_site1, params)
157 |     nt, ny, nx = xx.red.shape
158 |     nb = len(xx.data_vars)
159 | 
160 |     # Check normal case Dataset, with time coords
161 |     rr = collect_context_info(
162 |         fake_dask_client, xx, method=params.method, scenario="site1"
163 |     )
164 |     assert isinstance(xx.odc, ODCExtension)
165 |     assert rr.shape == (nt, nb, ny, nx)
166 |     assert rr.chunks == (1, 1, 2048, 2048)
167 |     assert rr.crs == f"epsg:{xx.odc.geobox.crs.epsg}"
168 |     assert rr.crs == xx.odc.geobox.crs
169 |     assert rr.nthreads == 2
170 |     assert rr.total_ram == 500 * (1 << 20)
171 | 
172 |     header_txt = rr.render_txt()
173 |     assert "T.slice   : 2020-06-06" in header_txt
174 |     assert f"Data      : 1.3.{ny}.{nx}.uint16,  5.58 GiB" in header_txt
175 | 
176 |     run_txt = rr.render_timing_info((0, 0.1, 30))
177 |     assert isinstance(run_txt, str)
178 | 
179 |     pd_dict = rr.to_pandas_dict()
180 |     assert pd_dict["resolution"] == rr.resolution
181 |     assert pd_dict["data"] == f"1.3.{ny}.{nx}.uint16"
182 |     assert pd_dict["chunks_x"] == 2048
183 |     assert pd_dict["chunks_y"] == 2048
184 | 
185 |     # Check DataArray case
186 |     rr = collect_context_info(
187 |         fake_dask_client, xx.red, method="odc-stac", scenario="site1"
188 |     )
189 |     assert rr.shape == (nt, 1, ny, nx)
190 |     assert rr.crs == xx.odc.geobox.crs
191 | 
192 |     # Check Dataset with 0 dimension time axis and extras field
193 |     rr = collect_context_info(
194 |         fake_dask_client,
195 |         xx.isel(time=0),
196 |         method=params.method,
197 |         scenario=params.scenario,
198 |         extras={"custom": 2},
199 |     )
200 |     assert rr.extras == {"custom": 2}
201 |     assert rr.shape == (1, nb, ny, nx)
202 | 
203 |     header_txt = rr.render_txt()
204 |     assert "GEO       : epsg:32735" in header_txt
205 |     assert "T.slice   : 2020-06-06" in header_txt
206 | 
207 |     # Check no time info at all
208 |     rr = collect_context_info(
209 |         fake_dask_client,
210 |         xx.isel(time=0, drop=True),
211 |         method=params.method,
212 |         scenario=params.scenario,
213 |     )
214 |     assert rr.shape == (nt, nb, ny, nx)
215 |     assert rr.dtype == xx.red.dtype
216 |     assert rr.temporal_id == "-"
217 | 
218 |     # Check wrong type
219 |     with pytest.raises(ValueError):
220 |         collect_context_info(fake_dask_client, "wrong input type")  # type: ignore
221 | 
222 |     # Check multi-time axis
223 |     xx = load_from_json(bench_site2, params)
224 |     nt, ny, nx = xx.red.shape
225 |     nb = len(xx.data_vars)
226 | 
227 |     assert nt > 1
228 | 
229 |     rr = collect_context_info(
230 |         fake_dask_client,
231 |         xx,
232 |         method=params.method,
233 |         scenario=params.scenario,
234 |     )
235 |     assert rr.shape == (nt, nb, ny, nx)
236 |     assert rr.temporal_id == "2020-06-01__2020-07-31"
237 | 
238 |     # Check missing GEO info
239 |     no_geo = _strip_geo(xx.red)
240 |     assert no_geo.odc.geobox is None or no_geo.odc.geobox.crs is None
241 |     with pytest.raises(ValueError):
242 |         # no geobox
243 |         collect_context_info(fake_dask_client, no_geo)
244 | 
245 | 
246 | def _strip_geo(xx: xarray.DataArray) -> xarray.DataArray:
247 |     no_geo = xx.drop_vars("spatial_ref")
248 |     no_geo.attrs.pop("crs", None)
249 |     no_geo.attrs.pop("grid_mapping", None)
250 |     no_geo.encoding.pop("grid_mapping", None)
251 |     no_geo.x.attrs.pop("crs", None)
252 |     no_geo.y.attrs.pop("crs", None)
253 |     # get rid of cached geobox
254 |     no_geo = xarray.DataArray(
255 |         no_geo.data,
256 |         coords=no_geo.coords,
257 |         dims=no_geo.dims,
258 |         attrs=no_geo.attrs,
259 |     )
260 |     assert no_geo.odc.geobox is None or no_geo.odc.geobox.crs is None
261 |     return no_geo
262 | 
263 | 
264 | def test_run_bench(fake_dask_client, bench_site1, capsys) -> None:
265 |     dask_client = fake_dask_client
266 |     params = BenchLoadParams(
267 |         scenario="test1",
268 |         method="odc-stac",
269 |         bands=("red", "green", "blue"),
270 |         chunks=(2048, 2048),
271 |         extra={"odc-stac": {"groupby": "solar_day", "stac_cfg": CFG}},
272 |     )
273 |     xx = load_from_json(bench_site1, params)
274 | 
275 |     rr, timing = run_bench(xx, dask_client, 10)
276 | 
277 |     assert rr.scenario == params.scenario
278 |     assert rr.method == params.method
279 |     assert len(timing) == 10
280 |     _io = capsys.readouterr()
281 |     assert len(_io.out) > 0
282 | 
283 | 
284 | def test_bench_params_json() -> None:
285 |     params = BenchLoadParams(
286 |         scenario="test1",
287 |         method="odc-stac",
288 |         bands=("red", "green", "blue"),
289 |         chunks=(100, 200),
290 |         extra={"odc-stac": {"groupby": "solar_day", "stac_cfg": CFG}},
291 |     )
292 | 
293 |     assert params == BenchLoadParams.from_json(params.to_json())
294 |     assert params.to_json() == BenchLoadParams.from_json(params.to_json()).to_json()
295 | 
296 |     # function should round-trip too
297 |     params.patch_url = load_from_json
298 |     assert params == BenchLoadParams.from_json(params.to_json())
299 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=redefined-outer-name,missing-module-docstring,missing-function-docstring
  2 | # pylint: disable=import-outside-toplevel
  3 | import datetime as dt
  4 | 
  5 | import pytest
  6 | from dask.base import tokenize
  7 | from odc.geo.geobox import GeoBox
  8 | from odc.loader.types import (
  9 |     AuxDataSource,
 10 |     BandKey,
 11 |     RasterBandMetadata,
 12 |     RasterGroupMetadata,
 13 |     RasterLoadParams,
 14 |     RasterSource,
 15 |     norm_key,
 16 | )
 17 | 
 18 | from odc.stac import ParsedItem, RasterCollectionMetadata
 19 | from odc.stac.model import PropertyLoadRequest
 20 | from odc.stac.testing.stac import b_, mk_parsed_item
 21 | 
 22 | 
 23 | def test_band_load_info() -> None:
 24 |     meta = RasterBandMetadata(data_type="uint16", nodata=13)
 25 |     band = RasterSource("https://example.com/some.tif", meta=meta)
 26 |     assert RasterLoadParams.same_as(meta).dtype == "uint16"
 27 |     assert RasterLoadParams.same_as(band).fill_value == 13
 28 | 
 29 |     band = RasterSource("file:///")
 30 |     assert RasterLoadParams.same_as(band).dtype == "float32"
 31 |     assert RasterLoadParams().dtype is None
 32 |     assert RasterLoadParams().nearest is True
 33 |     assert RasterLoadParams(resampling="average").nearest is False
 34 | 
 35 | 
 36 | @pytest.mark.parametrize("lon", [0, -179, 179, 10, 23.4])
 37 | def test_mid_longitude(lon: float) -> None:
 38 |     gbox = GeoBox.from_bbox((lon - 0.1, 0, lon + 0.1, 1), shape=(100, 100))
 39 |     xx = mk_parsed_item([b_("b1", gbox)])
 40 |     assert xx.geometry is not None
 41 |     assert xx.geometry.crs == "epsg:4326"
 42 |     assert xx.mid_longitude == pytest.approx(lon)
 43 | 
 44 |     assert mk_parsed_item([]).mid_longitude is None
 45 | 
 46 | 
 47 | def test_solar_day() -> None:
 48 |     def _mk(lon: float, datetime):
 49 |         gbox = GeoBox.from_bbox((lon - 0.1, 0, lon + 0.1, 1), shape=(100, 100))
 50 |         return mk_parsed_item([b_("b1", gbox)], datetime=datetime)
 51 | 
 52 |     for lon in [0, 1, 2, 3, 14, -1, -14, -3]:
 53 |         xx = _mk(lon, "2020-01-02T12:13:14Z")
 54 |         assert xx.mid_longitude == pytest.approx(lon)
 55 |         assert xx.nominal_datetime == xx.solar_date
 56 | 
 57 |     xx = _mk(15.1, "2020-01-02T12:13:14Z")
 58 |     assert xx.nominal_datetime != xx.solar_date
 59 |     assert xx.nominal_datetime + dt.timedelta(seconds=3600) == xx.solar_date
 60 |     assert xx.nominal_datetime + dt.timedelta(seconds=3600) == xx.solar_date_at(20)
 61 | 
 62 |     xx = _mk(-15.1, "2020-01-02T12:13:14Z")
 63 |     assert xx.nominal_datetime != xx.solar_date
 64 |     assert xx.nominal_datetime - dt.timedelta(seconds=3600) == xx.solar_date
 65 |     assert xx.nominal_datetime - dt.timedelta(seconds=3600) == xx.solar_date_at(-20)
 66 | 
 67 |     xx = mk_parsed_item([b_("b1")], datetime="2000-01-02")
 68 |     assert xx.geometry is None
 69 |     assert xx.nominal_datetime == xx.solar_date
 70 | 
 71 |     xx = _mk(10, None)
 72 |     with pytest.raises(ValueError):
 73 |         _ = xx.solar_date
 74 | 
 75 | 
 76 | @pytest.fixture()
 77 | def collection_ab() -> RasterCollectionMetadata:
 78 |     return RasterCollectionMetadata(
 79 |         "ab",
 80 |         RasterGroupMetadata(
 81 |             {
 82 |                 ("a", 1): RasterBandMetadata("uint8"),
 83 |                 ("b", 1): RasterBandMetadata("uint16"),
 84 |             },
 85 |             {"A": [("a", 1)], "AA": [("a", 1)], "B": [("b", 1)]},
 86 |         ),
 87 |         has_proj=True,
 88 |         band2grid={},
 89 |     )
 90 | 
 91 | 
 92 | @pytest.fixture()
 93 | def parsed_item_ab(collection_ab: RasterCollectionMetadata) -> ParsedItem:
 94 |     def _src(k: BandKey) -> RasterSource | AuxDataSource:
 95 |         meta = collection_ab[k]
 96 |         if isinstance(meta, RasterBandMetadata):
 97 |             return RasterSource(f"file:///{k[0]}-{k[1]}.tif", meta=meta)
 98 |         return AuxDataSource(f"file:///{k[0]}-{k[1]}.aux", meta=meta)
 99 | 
100 |     return ParsedItem(
101 |         "item-ab",
102 |         collection_ab,
103 |         {k: _src(k) for k in collection_ab},
104 |     )
105 | 
106 | 
107 | def test_collection(collection_ab: RasterCollectionMetadata) -> None:
108 |     xx = collection_ab
109 | 
110 |     assert xx.canonical_name("b") == "b"
111 |     assert xx.canonical_name("B") == "b"
112 |     assert xx.canonical_name("AA") == "a"
113 |     assert xx.canonical_name("a") == "a"
114 | 
115 |     assert xx.band_key("B") == ("b", 1)
116 |     assert xx.band_key("AA") == ("a", 1)
117 |     assert xx["AA"].data_type == "uint8"
118 |     assert xx["b"].data_type == "uint16"
119 |     assert "b" in xx
120 |     assert "b.1" in xx
121 |     assert ("b", 1) in xx
122 |     assert {} not in xx
123 |     assert ("some-random", 1) not in xx
124 |     assert "no-such-band" not in xx
125 | 
126 |     assert xx.resolve_bands("AA")["AA"] == xx["a"]
127 |     assert list(xx.resolve_bands(["a", "B"])) == ["a", "B"]
128 |     assert xx.resolve_bands(["a", "B"])["B"] is xx["b"]
129 |     assert xx.resolve_bands(["a", "B"])["a"] is xx["a"]
130 |     assert set(xx) == set([("a", 1), ("b", 1)])
131 |     assert len(xx) == 2
132 | 
133 |     for k in "a AA A b B".split(" "):
134 |         assert xx.band_key(k) in xx.bands
135 |         assert xx.canonical_name(k) in ["a", "b"]
136 |         assert k in xx
137 |         assert isinstance(xx[k], RasterBandMetadata)
138 |         assert xx[k] is xx[xx.band_key(k)]
139 | 
140 |     with pytest.raises(ValueError):
141 |         _ = xx.resolve_bands(["xxxxxxxx", "a"])
142 | 
143 |     with pytest.raises(KeyError):
144 |         _ = xx["no-such-band"]
145 | 
146 | 
147 | def test_collection_allbands() -> None:
148 |     xx = mk_parsed_item([b_("a.1"), b_("a.2"), b_("a.3")])
149 |     md = xx.collection
150 |     assert md.all_bands == ["a.1", "a.2", "a.3"]
151 | 
152 |     md.aliases["AA"] = [("a", 2)]
153 |     md.aliases["AAA"] = [("a", 3)]
154 |     assert md["AA"] == md["a.2"]
155 |     assert md["AAA"] == md["a.3"]
156 | 
157 |     # expect aliases to be used for all_band when multi-band
158 |     # assets have unique aliases
159 |     assert md.all_bands == ["a.1", "AA", "AAA"]
160 |     assert md.canonical_name("a.2") == "AA"
161 |     assert md.canonical_name("AA") == "AA"
162 |     assert md.canonical_name("a.3") == "AAA"
163 |     assert md.canonical_name("AAA") == "AAA"
164 | 
165 | 
166 | def test_parsed_item(parsed_item_ab: ParsedItem) -> None:
167 |     xx = parsed_item_ab
168 |     assert xx["AA"] is not None
169 |     assert xx["b"] is not None
170 |     assert xx["AA"].meta is not None
171 |     assert xx["AA"].meta.data_type == "uint8"
172 |     assert xx["b"].meta is not None
173 |     assert xx["b"].meta.data_type == "uint16"
174 | 
175 |     assert xx.resolve_bands("AA")["AA"] == xx["a"]
176 |     assert list(xx.resolve_bands(["a", "B"])) == ["a", "B"]
177 |     assert xx.resolve_bands(["a", "B"])["B"] is xx["b"]
178 |     assert xx.resolve_bands(["a", "B"])["a"] is xx["a"]
179 |     assert set(xx) == set([("a", 1), ("b", 1)])
180 |     assert len(xx) == 2
181 |     assert len(set([xx, xx, xx])) == 1
182 |     assert ("a", 1) in xx
183 |     assert ("a", 2) not in xx
184 |     assert ("a", 2, 3) not in xx
185 | 
186 |     for k in "a AA A b B".split(" "):
187 |         assert k in xx
188 |         assert [k] not in xx
189 |         assert f"___{k}___" not in xx
190 |         assert isinstance(xx[k], RasterSource)
191 |         assert xx[k] is xx.resolve_bands(k)[k]
192 | 
193 |     assert isinstance(xx["b"], RasterSource)
194 |     assert isinstance(xx["b"].strip(), RasterSource)
195 |     assert xx["b"].strip().geobox is None
196 |     assert xx["b"].strip().meta is xx["b"].meta
197 |     assert xx["b"].strip().uri == xx["b"].uri
198 |     assert xx["b"].strip().band == xx["b"].band
199 |     assert xx["b"].strip().subdataset == xx["b"].subdataset
200 |     assert xx["b"].strip().driver_data == xx["b"].driver_data
201 | 
202 |     xx_strip = xx.strip()
203 |     assert isinstance(xx_strip["b"], RasterSource)
204 |     assert xx_strip["b"].geobox is None
205 |     assert xx_strip["b"].meta is xx["b"].meta
206 |     assert xx_strip["b"].uri == xx["b"].uri
207 |     assert xx_strip["b"].band == xx["b"].band
208 |     assert xx_strip["b"].subdataset == xx["b"].subdataset
209 |     assert xx_strip["b"].driver_data == xx["b"].driver_data
210 | 
211 | 
212 | def test_tokenize(parsed_item_ab: ParsedItem) -> None:
213 |     assert tokenize(parsed_item_ab.collection) == tokenize(parsed_item_ab.collection)
214 |     assert tokenize(parsed_item_ab) == tokenize(parsed_item_ab)
215 |     assert tokenize(parsed_item_ab["a"]) == tokenize(parsed_item_ab["a"])
216 |     assert tokenize(parsed_item_ab["a"].meta) == tokenize(parsed_item_ab["a"].meta)
217 | 
218 |     assert tokenize(RasterLoadParams()) == tokenize(RasterLoadParams())
219 |     assert tokenize(RasterLoadParams("uint8")) == tokenize(RasterLoadParams("uint8"))
220 |     assert tokenize(RasterLoadParams("uint8")) != tokenize(RasterLoadParams("uint32"))
221 | 
222 | 
223 | @pytest.mark.parametrize(
224 |     "name, expected",
225 |     [
226 |         ("a", ("a", 1)),
227 |         ("a.1", ("a", 1)),
228 |         ("a.2", ("a", 2)),
229 |         (("b", 1), ("b", 1)),
230 |         ("foo.tiff", ("foo.tiff", 1)),
231 |     ],
232 | )
233 | def test_normkey(name, expected) -> None:
234 |     assert norm_key(name) == expected
235 | 
236 | 
237 | def test_version() -> None:
238 |     from odc.stac import __version__  # pylint: disable=no-name-in-module
239 | 
240 |     assert __version__ is not None
241 |     assert len(__version__.split(".")) == 3
242 | 
243 | 
244 | def test_property_load_request_basic() -> None:
245 |     """Test basic PropertyLoadRequest functionality."""
246 |     # Test with just key
247 |     req = PropertyLoadRequest(key="eo:cloud_cover")
248 |     assert req.key == "eo:cloud_cover"
249 |     assert req.dtype == "float32"  # default
250 |     assert req.name is None  # default
251 |     assert req.nodata is None
252 |     assert req.units == "1"
253 |     assert req.output_name == "eo_cloud_cover"
254 | 
255 |     # Test with all fields
256 |     req = PropertyLoadRequest(
257 |         key="eo:cloud_cover", dtype="int16", name="cloud_cover", nodata=-999
258 |     )
259 |     assert req.key == "eo:cloud_cover"
260 |     assert req.dtype == "int16"
261 |     assert req.name == "cloud_cover"
262 |     assert req.nodata == -999
263 |     assert req.units == "1"
264 |     assert req.output_name == "cloud_cover"
265 | 
266 | 
267 | def test_property_load_request_from_user_input() -> None:
268 |     """Test from_user_input method with various inputs."""
269 |     # Test with string inputs
270 |     requests = PropertyLoadRequest.from_user_input(["eo:cloud_cover", "eo:platform"])
271 |     assert len(requests) == 2
272 |     assert requests[0].key == "eo:cloud_cover"
273 |     assert requests[1].key == "eo:platform"
274 |     assert all(req.dtype == "float32" for req in requests)
275 |     assert all(req.name is None for req in requests)
276 | 
277 |     # Test with dict inputs
278 |     requests = PropertyLoadRequest.from_user_input(
279 |         [
280 |             {"key": "eo:cloud_cover", "dtype": "int16", "name": "cloud_cover"},
281 |             {"key": "eo:platform", "name": "satellite"},
282 |         ]
283 |     )
284 |     assert len(requests) == 2
285 |     assert requests[0].key == "eo:cloud_cover"
286 |     assert requests[0].dtype == "int16"
287 |     assert requests[0].name == "cloud_cover"
288 |     assert requests[1].key == "eo:platform"
289 |     assert requests[1].dtype == "float32"  # default
290 |     assert requests[1].name == "satellite"
291 | 
292 |     # Test with mixed inputs
293 |     requests = PropertyLoadRequest.from_user_input(
294 |         ["eo:cloud_cover", {"key": "eo:platform", "name": "satellite"}]
295 |     )
296 |     assert len(requests) == 2
297 |     assert requests[0].key == "eo:cloud_cover"
298 |     assert requests[0].dtype == "float32"
299 |     assert requests[0].name is None
300 |     assert requests[1].key == "eo:platform"
301 |     assert requests[1].name == "satellite"
302 | 
303 | 
304 | def test_property_load_request_errors() -> None:
305 |     """Test error cases for PropertyLoadRequest."""
306 |     # Test missing key in dict
307 |     with pytest.raises(ValueError, match="Dictionary input must contain 'key' field"):
308 |         PropertyLoadRequest.from_user_input([{"dtype": "int16"}])
309 | 
310 |     # Test invalid input type
311 |     with pytest.raises(ValueError, match="Input must be string or dict"):
312 |         PropertyLoadRequest.from_user_input([123])  # type: ignore
313 | 
314 |     # Test empty sequence
315 |     requests = PropertyLoadRequest.from_user_input([])
316 |     assert len(requests) == 0
317 | 


--------------------------------------------------------------------------------
/tests/data/S2A_28QCH_20200714_0_L2A.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "type": "Feature",
  3 |   "stac_version": "1.0.0-beta.1",
  4 |   "stac_extensions": [
  5 |     "https://stac-extensions.github.io/eo/v1.0.0/schema.json",
  6 |     "https://stac-extensions.github.io/view/v1.0.0/schema.json",
  7 |     "https://stac-extensions.github.io/projection/v1.0.0/schema.json"
  8 |   ],
  9 |   "id": "S2A_28QCH_20200714_0_L2A",
 10 |   "bbox": [
 11 |     -16.92047119589637,
 12 |     20.052963012507906,
 13 |     -16.72595055093299,
 14 |     20.791551352272126
 15 |   ],
 16 |   "geometry": {
 17 |     "type": "Polygon",
 18 |     "coordinates": [
 19 |       [
 20 |         [
 21 |           -16.911257292502636,
 22 |           20.052963012507906
 23 |         ],
 24 |         [
 25 |           -16.92047119589637,
 26 |           20.78948450776572
 27 |         ],
 28 |         [
 29 |           -16.72595055093299,
 30 |           20.791551352272126
 31 |         ],
 32 |         [
 33 |           -16.911257292502636,
 34 |           20.052963012507906
 35 |         ]
 36 |       ]
 37 |     ]
 38 |   },
 39 |   "properties": {
 40 |     "datetime": "2020-07-14T11:55:52Z",
 41 |     "platform": "sentinel-2a",
 42 |     "constellation": "sentinel-2",
 43 |     "instruments": [
 44 |       "MSI"
 45 |     ],
 46 |     "gsd": 10,
 47 |     "data_coverage": 6.85,
 48 |     "view:off_nadir": 0,
 49 |     "eo:cloud_cover": 2.36,
 50 |     "proj:epsg": 32628,
 51 |     "sentinel:latitude_band": "Q",
 52 |     "sentinel:grid_square": "CH",
 53 |     "sentinel:sequence": "0",
 54 |     "sentinel:product_id": "S2A_MSIL2A_20200714T115221_N0214_R123_T28QCH_20200714T191310"
 55 |   },
 56 |   "collection": "sentinel-s2-l2a-cogs",
 57 |   "assets": {
 58 |     "thumbnail": {
 59 |       "title": "Thumbnail",
 60 |       "type": "image/png",
 61 |       "href": "https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/28/Q/CH/2020/7/14/0/preview.jpg"
 62 |     },
 63 |     "overview": {
 64 |       "title": "True color image",
 65 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
 66 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/L2A_PVI.tif",
 67 |       "proj:shape": [
 68 |         343,
 69 |         343
 70 |       ],
 71 |       "proj:transform": [
 72 |         320.0,
 73 |         0.0,
 74 |         300000.0,
 75 |         0.0,
 76 |         -320.0,
 77 |         2300040.0,
 78 |         0.0,
 79 |         0.0,
 80 |         1.0
 81 |       ]
 82 |     },
 83 |     "info": {
 84 |       "title": "Original JSON metadata",
 85 |       "type": "application/json",
 86 |       "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/28/Q/CH/2020/7/14/0/tileInfo.json"
 87 |     },
 88 |     "metadata": {
 89 |       "title": "Original XML metadata",
 90 |       "type": "application/xml",
 91 |       "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/28/Q/CH/2020/7/14/0/metadata.xml"
 92 |     },
 93 |     "visual": {
 94 |       "title": "True color image",
 95 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
 96 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/TCI.tif",
 97 |       "proj:shape": [
 98 |         10980,
 99 |         10980
100 |       ],
101 |       "proj:transform": [
102 |         10.0,
103 |         0.0,
104 |         300000.0,
105 |         0.0,
106 |         -10.0,
107 |         2300040.0,
108 |         0.0,
109 |         0.0,
110 |         1.0
111 |       ]
112 |     },
113 |     "B01": {
114 |       "title": "Band 1 (coastal)",
115 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
116 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B01.tif",
117 |       "proj:shape": [
118 |         1830,
119 |         1830
120 |       ],
121 |       "proj:transform": [
122 |         60.0,
123 |         0.0,
124 |         300000.0,
125 |         0.0,
126 |         -60.0,
127 |         2300040.0,
128 |         0.0,
129 |         0.0,
130 |         1.0
131 |       ]
132 |     },
133 |     "B02": {
134 |       "title": "Band 2 (blue)",
135 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
136 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B02.tif",
137 |       "proj:shape": [
138 |         10980,
139 |         10980
140 |       ],
141 |       "proj:transform": [
142 |         10.0,
143 |         0.0,
144 |         300000.0,
145 |         0.0,
146 |         -10.0,
147 |         2300040.0,
148 |         0.0,
149 |         0.0,
150 |         1.0
151 |       ]
152 |     },
153 |     "B03": {
154 |       "title": "Band 3 (green)",
155 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
156 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B03.tif",
157 |       "proj:shape": [
158 |         10980,
159 |         10980
160 |       ],
161 |       "proj:transform": [
162 |         10.0,
163 |         0.0,
164 |         300000.0,
165 |         0.0,
166 |         -10.0,
167 |         2300040.0,
168 |         0.0,
169 |         0.0,
170 |         1.0
171 |       ]
172 |     },
173 |     "B04": {
174 |       "title": "Band 4 (red)",
175 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
176 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B04.tif",
177 |       "proj:shape": [
178 |         10980,
179 |         10980
180 |       ],
181 |       "proj:transform": [
182 |         10.0,
183 |         0.0,
184 |         300000.0,
185 |         0.0,
186 |         -10.0,
187 |         2300040.0,
188 |         0.0,
189 |         0.0,
190 |         1.0
191 |       ]
192 |     },
193 |     "B05": {
194 |       "title": "Band 5",
195 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
196 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B05.tif",
197 |       "proj:shape": [
198 |         5490,
199 |         5490
200 |       ],
201 |       "proj:transform": [
202 |         20.0,
203 |         0.0,
204 |         300000.0,
205 |         0.0,
206 |         -20.0,
207 |         2300040.0,
208 |         0.0,
209 |         0.0,
210 |         1.0
211 |       ]
212 |     },
213 |     "B06": {
214 |       "title": "Band 6",
215 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
216 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B06.tif",
217 |       "proj:shape": [
218 |         5490,
219 |         5490
220 |       ],
221 |       "proj:transform": [
222 |         20.0,
223 |         0.0,
224 |         300000.0,
225 |         0.0,
226 |         -20.0,
227 |         2300040.0,
228 |         0.0,
229 |         0.0,
230 |         1.0
231 |       ]
232 |     },
233 |     "B07": {
234 |       "title": "Band 7",
235 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
236 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B07.tif",
237 |       "proj:shape": [
238 |         5490,
239 |         5490
240 |       ],
241 |       "proj:transform": [
242 |         20.0,
243 |         0.0,
244 |         300000.0,
245 |         0.0,
246 |         -20.0,
247 |         2300040.0,
248 |         0.0,
249 |         0.0,
250 |         1.0
251 |       ]
252 |     },
253 |     "B08": {
254 |       "title": "Band 8 (nir)",
255 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
256 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B08.tif",
257 |       "proj:shape": [
258 |         10980,
259 |         10980
260 |       ],
261 |       "proj:transform": [
262 |         10.0,
263 |         0.0,
264 |         300000.0,
265 |         0.0,
266 |         -10.0,
267 |         2300040.0,
268 |         0.0,
269 |         0.0,
270 |         1.0
271 |       ]
272 |     },
273 |     "B8A": {
274 |       "title": "Band 8A",
275 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
276 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B8A.tif",
277 |       "proj:shape": [
278 |         5490,
279 |         5490
280 |       ],
281 |       "proj:transform": [
282 |         20.0,
283 |         0.0,
284 |         300000.0,
285 |         0.0,
286 |         -20.0,
287 |         2300040.0,
288 |         0.0,
289 |         0.0,
290 |         1.0
291 |       ]
292 |     },
293 |     "B09": {
294 |       "title": "Band 9",
295 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
296 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B09.tif",
297 |       "proj:shape": [
298 |         1830,
299 |         1830
300 |       ],
301 |       "proj:transform": [
302 |         60.0,
303 |         0.0,
304 |         300000.0,
305 |         0.0,
306 |         -60.0,
307 |         2300040.0,
308 |         0.0,
309 |         0.0,
310 |         1.0
311 |       ]
312 |     },
313 |     "B11": {
314 |       "title": "Band 11 (swir16)",
315 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
316 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B11.tif",
317 |       "proj:shape": [
318 |         5490,
319 |         5490
320 |       ],
321 |       "proj:transform": [
322 |         20.0,
323 |         0.0,
324 |         300000.0,
325 |         0.0,
326 |         -20.0,
327 |         2300040.0,
328 |         0.0,
329 |         0.0,
330 |         1.0
331 |       ]
332 |     },
333 |     "B12": {
334 |       "title": "Band 12 (swir22)",
335 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
336 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/B12.tif",
337 |       "proj:shape": [
338 |         5490,
339 |         5490
340 |       ],
341 |       "proj:transform": [
342 |         20.0,
343 |         0.0,
344 |         300000.0,
345 |         0.0,
346 |         -20.0,
347 |         2300040.0,
348 |         0.0,
349 |         0.0,
350 |         1.0
351 |       ]
352 |     },
353 |     "AOT": {
354 |       "title": "Aerosol Optical Thickness (AOT)",
355 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
356 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/AOT.tif",
357 |       "proj:shape": [
358 |         1830,
359 |         1830
360 |       ],
361 |       "proj:transform": [
362 |         60.0,
363 |         0.0,
364 |         300000.0,
365 |         0.0,
366 |         -60.0,
367 |         2300040.0,
368 |         0.0,
369 |         0.0,
370 |         1.0
371 |       ]
372 |     },
373 |     "WVP": {
374 |       "title": "Water Vapour (WVP)",
375 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
376 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/WVP.tif",
377 |       "proj:shape": [
378 |         10980,
379 |         10980
380 |       ],
381 |       "proj:transform": [
382 |         10.0,
383 |         0.0,
384 |         300000.0,
385 |         0.0,
386 |         -10.0,
387 |         2300040.0,
388 |         0.0,
389 |         0.0,
390 |         1.0
391 |       ]
392 |     },
393 |     "SCL": {
394 |       "title": "Scene Classification Map (SCL)",
395 |       "type": "image/tiff; application=geotiff; profile=cloud-optimized",
396 |       "href": "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/SCL.tif",
397 |       "proj:shape": [
398 |         5490,
399 |         5490
400 |       ],
401 |       "proj:transform": [
402 |         20.0,
403 |         0.0,
404 |         300000.0,
405 |         0.0,
406 |         -20.0,
407 |         2300040.0,
408 |         0.0,
409 |         0.0,
410 |         1.0
411 |       ]
412 |     }
413 |   },
414 |   "links": [
415 |     {
416 |       "rel": "self",
417 |       "href": "s3://sentinel-cogs/sentinel-s2-l2a-cogs/2020/S2A_28QCH_20200714_0_L2A/S2A_28QCH_20200714_0_L2A.json",
418 |       "type": "application/json"
419 |     },
420 |     {
421 |       "title": "Source STAC Item",
422 |       "rel": "derived_from",
423 |       "href": "/tmp/tmpj8uymxue/s3:/cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/2020/S2A_28QCH_20200714_0_L2A/S2A_28QCH_20200714_0_L2A.json",
424 |       "type": "application/json"
425 |     }
426 |   ]
427 | }
428 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
  1 | name: Run Code Checks
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   push:
  6 | 
  7 | # When a PR is updated, cancel the jobs from the previous version. Merges
  8 | # do not define head_ref, so use run_id to never cancel those jobs.
  9 | concurrency:
 10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 11 |   cancel-in-progress: true
 12 | 
 13 | jobs:
 14 |   build-wheels:
 15 |     timeout-minutes: 15
 16 |     runs-on: ubuntu-latest
 17 | 
 18 |     steps:
 19 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 20 | 
 21 |       - name: Setup Python
 22 |         uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
 23 |         with:
 24 |           python-version: "3.10"
 25 | 
 26 |       - name: Install Dependencies
 27 |         run: |
 28 |           python -m pip install --upgrade flit twine
 29 |           python -m pip freeze
 30 | 
 31 |       - name: Build Clean Packages
 32 |         run: |
 33 |           mkdir -p ./wheels/clean
 34 |           flit build --format wheel --format sdist
 35 |           mv dist/* ./wheels/clean/
 36 |           find ./wheels/clean -type f
 37 | 
 38 |       - name: Check Packaging
 39 |         run: |
 40 |           find ./wheels/clean -type f | xargs twine check
 41 | 
 42 |       - name: Upload wheels as artifacts
 43 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
 44 |         with:
 45 |           name: python-wheels
 46 |           path: ./wheels/clean/
 47 |           retention-days: 1
 48 | 
 49 |   build-test-env-base:
 50 |     timeout-minutes: 15
 51 |     runs-on: ubuntu-latest
 52 | 
 53 |     steps:
 54 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 55 | 
 56 |       - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
 57 |         id: conda_cache
 58 |         with:
 59 |           path: /tmp/test_env
 60 |           key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }}
 61 | 
 62 |       - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
 63 |         if: steps.conda_cache.outputs.cache-hit != 'true'
 64 |         with:
 65 |           miniforge-variant: Miniforge3
 66 |           miniforge-version: latest
 67 |           use-mamba: true
 68 | 
 69 |       - name: Dump Conda Environment Info
 70 |         shell: bash -l {0}
 71 |         if: steps.conda_cache.outputs.cache-hit != 'true'
 72 |         run: |
 73 |           conda info
 74 |           conda list
 75 |           conda config --show-sources
 76 |           conda config --show
 77 |           printenv | sort
 78 | 
 79 |       - name: Build Python Environment for Testing
 80 |         shell: bash -l {0}
 81 |         if: steps.conda_cache.outputs.cache-hit != 'true'
 82 |         run: |
 83 |           mamba env create -f tests/test-env-py310.yml -p /tmp/test_env
 84 | 
 85 |       - name: Check Python Env
 86 |         shell: bash -l {0}
 87 |         if: steps.conda_cache.outputs.cache-hit != 'true'
 88 |         run: |
 89 |           mamba env export -p /tmp/test_env
 90 | 
 91 |   build-binder-env:
 92 |     timeout-minutes: 15
 93 |     runs-on: ubuntu-latest
 94 | 
 95 |     steps:
 96 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 97 | 
 98 |       - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
 99 |         id: binder_cache
100 |         with:
101 |           path: /tmp/binder_env
102 |           key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }}
103 | 
104 |       - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
105 |         if: steps.binder_cache.outputs.cache-hit != 'true'
106 |         with:
107 |           miniforge-variant: Miniforge3 
108 |           miniforge-version: latest
109 |           use-mamba: true
110 | 
111 |       - name: Dump Conda Environment Info
112 |         shell: bash -l {0}
113 |         if: steps.binder_cache.outputs.cache-hit != 'true'
114 |         run: |
115 |           conda info
116 |           conda list
117 |           conda config --show-sources
118 |           conda config --show
119 |           printenv | sort
120 | 
121 |       - name: Build Python Environment for Notebooks
122 |         shell: bash -l {0}
123 |         if: steps.binder_cache.outputs.cache-hit != 'true'
124 |         run: |
125 |           cd binder
126 |           mamba env create -f environment.yml -p /tmp/binder_env
127 | 
128 |       - name: Check Python Env
129 |         shell: bash -l {0}
130 |         if: steps.binder_cache.outputs.cache-hit != 'true'
131 |         run: |
132 |           mamba env export -p /tmp/binder_env
133 | 
134 |   run-black-check:
135 |     timeout-minutes: 15
136 |     runs-on: ubuntu-latest
137 |     needs:
138 |       - build-test-env-base
139 | 
140 |     steps:
141 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
142 |       - name: Get Conda Environment from Cache
143 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
144 |         id: conda_cache
145 |         with:
146 |           path: /tmp/test_env
147 |           key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }}
148 | 
149 |       - name: Update PATH
150 |         shell: bash
151 |         run: |
152 |           echo "/tmp/test_env/bin" >> $GITHUB_PATH
153 | 
154 |       - name: Check formatting (black)
155 |         shell: bash
156 |         run: |
157 |           black --version
158 |           black --check --diff .
159 | 
160 |   run-pylint:
161 |     timeout-minutes: 15
162 |     runs-on: ubuntu-latest
163 |     needs:
164 |       - build-test-env-base
165 | 
166 |     steps:
167 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
168 |       - name: Get Conda Environment from Cache
169 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
170 |         id: conda_cache
171 |         with:
172 |           path: /tmp/test_env
173 |           key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }}
174 | 
175 |       - name: Update PATH
176 |         shell: bash
177 |         run: |
178 |           echo "/tmp/test_env/bin" >> $GITHUB_PATH
179 | 
180 |       - name: Install in Edit mode
181 |         shell: bash
182 |         run: |
183 |           pip install -e . --no-deps
184 | 
185 |       - name: Check with pylint
186 |         shell: bash
187 |         run: |
188 |           echo "Running pylint"
189 |           pylint --version
190 |           echo "-------------------------------------"
191 |           pylint -v odc
192 | 
193 |   run-mypy:
194 |     timeout-minutes: 15
195 |     runs-on: ubuntu-latest
196 |     needs:
197 |       - build-test-env-base
198 | 
199 |     steps:
200 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
201 |       - name: Get Conda Environment from Cache
202 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
203 |         id: conda_cache
204 |         with:
205 |           path: /tmp/test_env
206 |           key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }}
207 | 
208 |       - name: Update PATH
209 |         shell: bash
210 |         run: |
211 |           echo "/tmp/test_env/bin" >> $GITHUB_PATH
212 | 
213 |       - name: Check with mypy
214 |         shell: bash
215 |         run: |
216 |           python -m mypy --namespace-packages --explicit-package-bases odc
217 | 
218 | 
219 |   test-with-botocore-and-coverage:
220 |     timeout-minutes: 15
221 |     runs-on: ubuntu-latest
222 |     permissions:
223 |       id-token: write
224 |     needs:
225 |       - build-test-env-base
226 |       - run-black-check
227 | 
228 |     steps:
229 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
230 | 
231 |       - name: Get Conda Environment from Cache
232 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
233 |         id: conda_cache
234 |         with:
235 |           path: /tmp/test_env
236 | 
237 |           key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }}
238 | 
239 |       - name: Update PATH
240 |         shell: bash
241 |         run: |
242 |           echo "/tmp/test_env/bin" >> $GITHUB_PATH
243 | 
244 |       - name: Install in Edit mode
245 |         shell: bash
246 |         run: |
247 |           pip install -e '.[botocore]' --no-deps
248 | 
249 |       - name: Run Tests
250 |         shell: bash
251 |         run: |
252 |           echo "Running Tests"
253 |           pytest --cov=. \
254 |           --cov-report=html \
255 |           --cov-report=xml:coverage.xml \
256 |           --timeout=30 \
257 |           tests odc
258 | 
259 |         env:
260 |           AWS_DEFAULT_REGION: us-west-2
261 |           DASK_TEMPORARY_DIRECTORY: /tmp/dask
262 | 
263 |       - name: Upload Coverage
264 |         if: |
265 |           github.repository == 'opendatacube/odc-stac'
266 | 
267 |         uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3
268 |         with:
269 |           fail_ci_if_error: false
270 |           verbose: false
271 |           use_oidc: true
272 | 
273 |   test-wheels:
274 |     timeout-minutes: 15
275 |     runs-on: ubuntu-latest
276 | 
277 |     needs:
278 |       - build-test-env-base
279 |       - run-black-check
280 |       - build-wheels
281 | 
282 |     steps:
283 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
284 | 
285 |       - name: Download wheels from artifacts
286 |         uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
287 |         with:
288 |           name: python-wheels
289 |           path: ./wheels/clean
290 | 
291 |       - name: Get Conda Environment from Cache
292 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
293 |         id: conda_cache
294 |         with:
295 |           path: /tmp/test_env
296 |           key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }}
297 | 
298 |       - name: Update PATH
299 |         shell: bash
300 |         run: |
301 |           echo "/tmp/test_env/bin" >> $GITHUB_PATH
302 | 
303 |       - name: Install wheels for testing
304 |         shell: bash
305 |         run: |
306 |           which python
307 | 
308 |           ls -lh wheels/clean
309 |           python -m pip install --no-deps wheels/clean/*whl
310 |           python -m pip check || true
311 | 
312 |       - name: Run Tests
313 |         shell: bash
314 |         run: |
315 |           echo "Running Tests"
316 |           pytest --timeout=30 tests odc
317 | 
318 |         env:
319 |           AWS_DEFAULT_REGION: us-west-2
320 |           DASK_TEMPORARY_DIRECTORY: /tmp/dask
321 | 
322 |   build-notebooks:
323 |     timeout-minutes: 15
324 |     runs-on: ubuntu-latest
325 | 
326 |     needs:
327 |       - build-binder-env
328 | 
329 |     steps:
330 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
331 | 
332 |       - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
333 |         id: nb_cache
334 |         with:
335 |           path: docs/notebooks
336 |           key: docs-notebooks-${{ hashFiles('notebooks/*.py') }}
337 | 
338 |       - name: Get Conda Environment from Cache
339 |         if: steps.nb_cache.outputs.cache-hit != 'true'
340 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
341 |         id: conda_cache
342 |         with:
343 |           path: /tmp/binder_env
344 |           key: ${{ runner.os }}-binder-env-${{ hashFiles('binder/environment.yml') }}
345 | 
346 |       - name: Update PATH
347 |         if: steps.nb_cache.outputs.cache-hit != 'true'
348 |         shell: bash
349 |         run: |
350 |           echo "/tmp/binder_env/bin" >> $GITHUB_PATH
351 | 
352 |       - name: Run Notebooks
353 |         if: steps.nb_cache.outputs.cache-hit != 'true'
354 |         run: |
355 |           nb_dir=docs/notebooks
356 |           mkdir -p $nb_dir
357 |           for src in $(find notebooks -type f -maxdepth 1 -name '*py'); do
358 |              dst="$nb_dir/$(basename ${src%%.py}.ipynb)"
359 |              echo "$src -> $dst"
360 |              ./binder/render-nb-pipe.sh <$src >$dst
361 |           done
362 |           ls -lh $nb_dir
363 | 
364 |   check-docs:
365 |     timeout-minutes: 15
366 |     runs-on: ubuntu-latest
367 | 
368 |     needs:
369 |       - build-test-env-base
370 |       - run-black-check
371 |       - build-notebooks
372 | 
373 |     steps:
374 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
375 | 
376 |       - name: Get Rendered Notebooks
377 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
378 |         id: nb_cache
379 |         with:
380 |           path: docs/notebooks
381 |           key: docs-notebooks-${{ hashFiles('notebooks/*.py') }}
382 | 
383 |       - name: Get Conda Environment from Cache
384 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
385 |         id: conda_cache
386 |         with:
387 |           path: /tmp/test_env
388 |           key: ${{ runner.os }}-test-env-py310-${{ hashFiles('tests/test-env-py310.yml', 'pyproject.toml') }}
389 | 
390 |       - name: Update PATH
391 |         shell: bash
392 |         run: |
393 |           echo "/tmp/test_env/bin" >> $GITHUB_PATH
394 | 
395 |       - name: Config
396 |         id: cfg
397 |         run: |
398 |           find notebooks/ -maxdepth 1 -name '*.py' -type f | sort -f -d
399 |   
400 |           nb_dir="docs/notebooks"
401 |           nb_hash=$(python scripts/notebook_hash.py)
402 |           echo "Notebooks hash: ${nb_hash}"
403 |           echo "nb-hash=${nb_hash}" >> $GITHUB_OUTPUT
404 |           echo "nb-hash-short=${nb_hash:0:16}" >> $GITHUB_OUTPUT
405 |   
406 |       - name: Install in Edit mode
407 |         shell: bash
408 |         run: |
409 |           pip install -e . --no-deps
410 | 
411 |       - name: Build STAC docs
412 |         shell: bash
413 |         run: |
414 |           make -C docs html
415 | 
416 |       - name: Deploy to Netlify
417 |         id: netlify
418 |         if: github.event_name == 'pull_request'
419 |         uses: nwtgck/actions-netlify@4cbaf4c08f1a7bfa537d6113472ef4424e4eb654 # v3.0.0
420 |         with:
421 |           production-branch: "develop"
422 |           publish-dir: "docs/_build/html"
423 |           deploy-message: "Deploy from GitHub Actions"
424 |           github-token: ${{ secrets.GITHUB_TOKEN }}
425 |           enable-pull-request-comment: true
426 |           enable-commit-comment: false
427 |           alias: ${{ steps.cfg.outputs.nb-hash-short }}
428 | 
429 |         env:
430 |           NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
431 |           NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
432 | 
433 |       - name: Print Notice
434 |         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
435 |         if: github.event_name == 'pull_request'
436 |         env:
437 |           NETLIFY_URL: ${{ steps.netlify.outputs.deploy-url }}
438 |         with:
439 |           script: |
440 |             const {NETLIFY_URL} = process.env
441 |             core.notice(`Published to: ${NETLIFY_URL}`)
442 | 


--------------------------------------------------------------------------------
/tests/data/LC08_L2SR_081119_20200101_20200823_02_T2.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "type": "Feature",
  3 |     "stac_version": "1.0.0-beta.2",
  4 |     "id": "LC08_L2SR_081119_20200101_20200823_02_T2",
  5 |     "properties": {
  6 |         "collection": "landsat-c2l2-sr",
  7 |         "eo:gsd": 30,
  8 |         "eo:platform": "LANDSAT_8",
  9 |         "datetime": "2020-01-01T00:00:00Z",
 10 |         "eo:cloud_cover": 0,
 11 |         "eo:sun_azimuth": 93.54107327,
 12 |         "eo:sun_elevation": 22.72192082,
 13 |         "landsat:cloud_cover_land": 0,
 14 |         "landsat:wrs_type": "2",
 15 |         "landsat:wrs_path": "81",
 16 |         "landsat:wrs_row": "119",
 17 |         "landsat:scene_id": "LC80811192020001LGN00",
 18 |         "landsat:collection_category": "T2",
 19 |         "landsat:collection_number": "02",
 20 |         "eo:bands": [
 21 |             {
 22 |                 "name": "SR_B1",
 23 |                 "common_name": "coastal",
 24 |                 "gsd": 30,
 25 |                 "center_wavelength": 0.44
 26 |             },
 27 |             {
 28 |                 "name": "SR_B2",
 29 |                 "common_name": "blue",
 30 |                 "gsd": 30,
 31 |                 "center_wavelength": 0.48
 32 |             },
 33 |             {
 34 |                 "name": "SR_B3",
 35 |                 "common_name": "green",
 36 |                 "gsd": 30,
 37 |                 "center_wavelength": 0.56
 38 |             },
 39 |             {
 40 |                 "name": "SR_B4",
 41 |                 "common_name": "red",
 42 |                 "gsd": 30,
 43 |                 "center_wavelength": 0.65
 44 |             },
 45 |             {
 46 |                 "name": "SR_B5",
 47 |                 "common_name": "nir08",
 48 |                 "gsd": 30,
 49 |                 "center_wavelength": 0.86
 50 |             },
 51 |             {
 52 |                 "name": "SR_B6",
 53 |                 "common_name": "swir16",
 54 |                 "gsd": 30,
 55 |                 "center_wavelength": 1.6
 56 |             },
 57 |             {
 58 |                 "name": "SR_B7",
 59 |                 "common_name": "swir22",
 60 |                 "gsd": 30,
 61 |                 "center_wavelength": 2.2
 62 |             }
 63 |         ],
 64 |         "constellation": "Landsat",
 65 |         "instruments": [
 66 |             "oli",
 67 |             "tirs"
 68 |         ],
 69 |         "view:off_nadir": 0,
 70 |         "proj:epsg": 3031
 71 |     },
 72 |     "geometry": {
 73 |         "type": "Polygon",
 74 |         "coordinates": [
 75 |             [
 76 |                 [
 77 |                     106.0673404715087,
 78 |                     -79.61169259634802
 79 |                 ],
 80 |                 [
 81 |                     111.50547149149213,
 82 |                     -81.06844416149866
 83 |                 ],
 84 |                 [
 85 |                     101.33143658242044,
 86 |                     -81.93331557352852
 87 |                 ],
 88 |                 [
 89 |                     97.0135423519546,
 90 |                     -80.34938321953908
 91 |                 ],
 92 |                 [
 93 |                     106.0673404715087,
 94 |                     -79.61169259634802
 95 |                 ]
 96 |             ]
 97 |         ]
 98 |     },
 99 |     "links": [
100 |         {
101 |             "rel": "self",
102 |             "href": "./LC08_L2SR_081119_20200101_20200823_02_T2.json",
103 |             "type": "application/json"
104 |         }
105 |     ],
106 |     "assets": {
107 |         "thumbnail": {
108 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_thumb_small.jpeg",
109 |             "type": "image/jpeg",
110 |             "title": "Thumbnail image"
111 |         },
112 |         "reduced_resolution_browse": {
113 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_thumb_large.jpeg",
114 |             "type": "image/jpeg",
115 |             "title": "Reduced resolution browse image"
116 |         },
117 |         "index": {
118 |             "href": "https://landsatlook.usgs.gov/stac-browser/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2",
119 |             "type": "text/html",
120 |             "title": "HTML index page"
121 |         },
122 |         "SR_B1.TIF": {
123 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B1.TIF",
124 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
125 |             "title": "Coastal/Aerosol Band (B1)",
126 |             "description": "Collection 2 Level-2 Coastal/Aerosol Band (B1) Surface Reflectance",
127 |             "eo:bands": [
128 |                 0
129 |             ],
130 |             "proj:transform": [
131 |                 1164.15,
132 |                 0.0,
133 |                 857085.0,
134 |                 0.0,
135 |                 -1150.6499999999999,
136 |                 -127485.0,
137 |                 0.0,
138 |                 0.0,
139 |                 1.0
140 |             ],
141 |             "proj:shape": [
142 |                 200,
143 |                 200
144 |             ]
145 |         },
146 |         "SR_B2.TIF": {
147 |             "href": "tests/data-files/landsat/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B2_small.TIF",
148 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
149 |             "title": "Blue Band (B2)",
150 |             "description": "Collection 2 Level-2 Blue Band (B2) Surface Reflectance",
151 |             "eo:bands": [
152 |                 1
153 |             ],
154 |             "proj:transform": [
155 |                 1164.15,
156 |                 0.0,
157 |                 857085.0,
158 |                 0.0,
159 |                 -1150.6499999999999,
160 |                 -127485.0,
161 |                 0.0,
162 |                 0.0,
163 |                 1.0
164 |             ],
165 |             "proj:shape": [
166 |                 200,
167 |                 200
168 |             ]
169 |         },
170 |         "SR_B3.TIF": {
171 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B3.TIF",
172 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
173 |             "title": "Green Band (B3)",
174 |             "description": "Collection 2 Level-2 Green Band (B3) Surface Reflectance",
175 |             "eo:bands": [
176 |                 2
177 |             ],
178 |             "proj:transform": [
179 |                 1164.15,
180 |                 0.0,
181 |                 857085.0,
182 |                 0.0,
183 |                 -1150.6499999999999,
184 |                 -127485.0,
185 |                 0.0,
186 |                 0.0,
187 |                 1.0
188 |             ],
189 |             "proj:shape": [
190 |                 200,
191 |                 200
192 |             ]
193 |         },
194 |         "SR_B4.TIF": {
195 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B4.TIF",
196 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
197 |             "title": "Red Band (B4)",
198 |             "description": "Collection 2 Level-2 Red Band (B4) Surface Reflectance",
199 |             "eo:bands": [
200 |                 3
201 |             ],
202 |             "proj:transform": [
203 |                 1164.15,
204 |                 0.0,
205 |                 857085.0,
206 |                 0.0,
207 |                 -1150.6499999999999,
208 |                 -127485.0,
209 |                 0.0,
210 |                 0.0,
211 |                 1.0
212 |             ],
213 |             "proj:shape": [
214 |                 200,
215 |                 200
216 |             ]
217 |         },
218 |         "SR_B5.TIF": {
219 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B5.TIF",
220 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
221 |             "title": "Near Infrared Band 0.8 (B5)",
222 |             "description": "Collection 2 Level-2 Near Infrared Band 0.8 (B5) Surface Reflectance",
223 |             "eo:bands": [
224 |                 4
225 |             ],
226 |             "proj:transform": [
227 |                 1164.15,
228 |                 0.0,
229 |                 857085.0,
230 |                 0.0,
231 |                 -1150.6499999999999,
232 |                 -127485.0,
233 |                 0.0,
234 |                 0.0,
235 |                 1.0
236 |             ],
237 |             "proj:shape": [
238 |                 200,
239 |                 200
240 |             ]
241 |         },
242 |         "SR_B6.TIF": {
243 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B6.TIF",
244 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
245 |             "title": "Short-wave Infrared Band 1.6 (B6)",
246 |             "description": "Collection 2 Level-2 Short-wave Infrared Band 1.6 (B6) Surface Reflectance",
247 |             "eo:bands": [
248 |                 5
249 |             ],
250 |             "proj:transform": [
251 |                 1164.15,
252 |                 0.0,
253 |                 857085.0,
254 |                 0.0,
255 |                 -1150.6499999999999,
256 |                 -127485.0,
257 |                 0.0,
258 |                 0.0,
259 |                 1.0
260 |             ],
261 |             "proj:shape": [
262 |                 200,
263 |                 200
264 |             ]
265 |         },
266 |         "SR_B7.TIF": {
267 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_B7.TIF",
268 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
269 |             "title": "Short-wave Infrared Band 2.2 (B7)",
270 |             "description": "Collection 2 Level-2 Short-wave Infrared Band 2.2 (B7) Surface Reflectance",
271 |             "eo:bands": [
272 |                 6
273 |             ],
274 |             "proj:transform": [
275 |                 1164.15,
276 |                 0.0,
277 |                 857085.0,
278 |                 0.0,
279 |                 -1150.6499999999999,
280 |                 -127485.0,
281 |                 0.0,
282 |                 0.0,
283 |                 1.0
284 |             ],
285 |             "proj:shape": [
286 |                 200,
287 |                 200
288 |             ]
289 |         },
290 |         "SR_QA_AEROSOL.TIF": {
291 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_SR_QA_AEROSOL.TIF",
292 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
293 |             "title": "Aerosol Quality Analysis Band",
294 |             "description": "Collection 2 Level-2 Aerosol Quality Analysis Band (ANG) Surface Reflectance",
295 |             "proj:transform": [
296 |                 1164.15,
297 |                 0.0,
298 |                 857085.0,
299 |                 0.0,
300 |                 -1150.6499999999999,
301 |                 -127485.0,
302 |                 0.0,
303 |                 0.0,
304 |                 1.0
305 |             ],
306 |             "proj:shape": [
307 |                 200,
308 |                 200
309 |             ]
310 |         },
311 |         "ANG.txt": {
312 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_ANG.txt",
313 |             "type": "text/plain",
314 |             "title": "Angle Coefficients File",
315 |             "description": "Collection 2 Level-2 Angle Coefficients File (ANG) Surface Reflectance"
316 |         },
317 |         "MTL.txt": {
318 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_MTL.txt",
319 |             "type": "text/plain",
320 |             "title": "Product Metadata File",
321 |             "description": "Collection 2 Level-2 Product Metadata File (MTL) Surface Reflectance"
322 |         },
323 |         "MTL.xml": {
324 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_MTL.xml",
325 |             "type": "application/xml",
326 |             "title": "Product Metadata File (xml)",
327 |             "description": "Collection 2 Level-1 Product Metadata File (xml) Surface Reflectance"
328 |         },
329 |         "MTL.json": {
330 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_MTL.json",
331 |             "type": "application/json",
332 |             "title": "Product Metadata File (json)",
333 |             "description": "Collection 2 Level-2 Product Metadata File (json) Surface Reflectance"
334 |         },
335 |         "QA_PIXEL.TIF": {
336 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_QA_PIXEL.TIF",
337 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
338 |             "title": "Pixel Quality Assessment Band",
339 |             "description": "Collection 2 Level-2 Pixel Quality Assessment Band Surface Reflectance",
340 |             "proj:transform": [
341 |                 1164.15,
342 |                 0.0,
343 |                 857085.0,
344 |                 0.0,
345 |                 -1150.6499999999999,
346 |                 -127485.0,
347 |                 0.0,
348 |                 0.0,
349 |                 1.0
350 |             ],
351 |             "proj:shape": [
352 |                 200,
353 |                 200
354 |             ]
355 |         },
356 |         "QA_RADSAT.TIF": {
357 |             "href": "https://landsatlook.usgs.gov/data/collection02/level-2/standard/oli-tirs/2020/081/119/LC08_L2SR_081119_20200101_20200823_02_T2/LC08_L2SR_081119_20200101_20200823_02_T2_QA_RADSAT.TIF",
358 |             "type": "image/tiff; application=geotiff; profile=cloud-optimized",
359 |             "title": "Radiometric Saturation Quality Assessment Band",
360 |             "description": "Collection 2 Level-2 Radiometric Saturation Quality Assessment Band Surface Reflectance",
361 |             "proj:transform": [
362 |                 1164.15,
363 |                 0.0,
364 |                 857085.0,
365 |                 0.0,
366 |                 -1150.6499999999999,
367 |                 -127485.0,
368 |                 0.0,
369 |                 0.0,
370 |                 1.0
371 |             ],
372 |             "proj:shape": [
373 |                 200,
374 |                 200
375 |             ]
376 |         }
377 |     },
378 |     "bbox": [
379 |         97.0135423519546,
380 |         -81.93331557352852,
381 |         111.50547149149213,
382 |         -79.61169259634802
383 |     ],
384 |     "stac_extensions": [
385 |         "eo",
386 |         "https://landsat.usgs.gov/stac/landsat-extension/schema.json",
387 |         "view",
388 |         "projection"
389 |     ],
390 |     "collection": "landsat-c2l2-sr",
391 |     "description": "Landsat Collection 2 Level-2 Surface Reflectance Product"
392 | }
393 | 


--------------------------------------------------------------------------------