├── .codespellignore ├── .github ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── continuous-integration.yml │ └── release.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── codecov.yml ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── api.rst │ ├── conf.py │ ├── generated │ └── stactask.rst │ ├── index.rst │ └── usage.rst ├── pyproject.toml ├── stactask ├── __init__.py ├── asset_io.py ├── config.py ├── exceptions.py ├── logging.py ├── py.typed ├── task.py └── utils.py └── tests ├── __init__.py ├── conftest.py ├── fixtures └── sentinel2-l2a-j2k-payload.json ├── tasks.py ├── test_task.py ├── test_task_download.py └── test_utils.py /.codespellignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-utils/stac-task/f544899e49c6f8487deaaca9e4272ed71b6071fd/.codespellignore -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | - package-ecosystem: pip 8 | directory: "/" 9 | schedule: 10 | interval: weekly 11 | versioning-strategy: increase-if-necessary 12 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | **Related Issue(s):** 2 | 3 | - # 4 | 5 | 6 | **Proposed Changes:** 7 | 8 | 1. 9 | 2. 10 | 11 | **PR Checklist:** 12 | 13 | - [ ] I have added my changes to the [CHANGELOG](https://github.com/stac-utils/stac-server/blob/main/CHANGELOG.md) **or** a CHANGELOG entry is not required. 14 | -------------------------------------------------------------------------------- /.github/workflows/continuous-integration.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | standard: 11 | name: Lint and test 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: 16 | - "3.9" 17 | - "3.10" 18 | - "3.11" 19 | - "3.12" 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: actions/setup-python@v5 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | cache: pip 26 | - name: Install dependencies for linting 27 | run: pip install '.[dev]' 28 | - name: Lint 29 | run: pre-commit run --all-files 30 | - name: Install dependencies for testing 31 | run: pip install '.[test]' 32 | - name: Test 33 | run: pytest 34 | codecov: 35 | name: Codecov 36 | needs: 37 | - standard 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: actions/setup-python@v5 42 | with: 43 | python-version: "3.12" 44 | cache: pip 45 | - name: Install 46 | run: pip install '.[test]' 47 | - name: Test 48 | run: pytest --cov=stactask 49 | - name: Upload coverage to Codecov 50 | uses: codecov/codecov-action@v4 51 | with: 52 | token: ${{ secrets.CODECOV_TOKEN }} 53 | file: ./coverage.xml 54 | fail_ci_if_error: false 55 | docs: 56 | name: Docs 57 | needs: 58 | - standard 59 | runs-on: ubuntu-latest 60 | steps: 61 | - uses: actions/checkout@v4 62 | - uses: actions/setup-python@v5 63 | with: 64 | python-version: "3.11" 65 | cache: pip 66 | - name: Install 67 | run: pip install . -r docs/requirements.txt 68 | - name: Make docs 69 | run: cd docs && make html 70 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | release: 10 | name: release 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | 15 | - name: Set up Python 3.x 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: "3.x" 19 | 20 | - name: Install release dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install build twine 24 | 25 | - name: Build and publish package 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_STACUTILS_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_STACUTILS_PASSWORD }} 29 | run: | 30 | python -m build 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/build/ 73 | docs/source/generated 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/#use-with-ide 111 | .pdm.toml 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # PyCharm 157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 159 | # and can be added to the global gitignore or merged into this file. For a more nuclear 160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 161 | #.idea/ 162 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Configuration file for pre-commit (https://pre-commit.com/). 2 | # Please run `pre-commit run --all-files` when adding or changing entries. 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.5.0 7 | hooks: 8 | - id: trailing-whitespace 9 | - id: end-of-file-fixer 10 | - id: check-yaml 11 | - id: check-added-large-files 12 | - repo: https://github.com/codespell-project/codespell 13 | rev: v2.2.6 14 | hooks: 15 | - id: codespell 16 | args: [--ignore-words=.codespellignore] 17 | types_or: [jupyter, markdown, python, shell] 18 | - repo: https://github.com/psf/black 19 | rev: 24.2.0 20 | hooks: 21 | - id: black 22 | - repo: https://github.com/pre-commit/mirrors-mypy 23 | rev: v1.9.0 24 | hooks: 25 | - id: mypy 26 | additional_dependencies: 27 | - pytest 28 | - types-setuptools == 65.7.0.3 29 | - repo: https://github.com/astral-sh/ruff-pre-commit 30 | rev: v0.4.1 31 | hooks: 32 | - id: ruff 33 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-22.04" 5 | tools: 6 | python: "3.11" 7 | 8 | python: 9 | install: 10 | - requirements: docs/requirements.txt 11 | 12 | sphinx: 13 | configuration: docs/source/conf.py 14 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 6 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.6.1] 9 | 10 | ### Added 11 | 12 | - ([#167](https://github.com/stac-utils/stac-task/pull/167)) Adds workflow-level 13 | options to the ProcessDefinition object in a new `workflow_options` field. They are 14 | combined with each task's options, giving precedence to the task options on conflict. 15 | - ([#167](https://github.com/stac-utils/stac-task/pull/167)) Adds a `workflow_options` 16 | property to the `Task` class that returns the `workflow_options` dictionary from the 17 | `ProcessDefinition` object. 18 | - ([#167](https://github.com/stac-utils/stac-task/pull/167)) Adds a `task_options` 19 | property to the `Task` class that returns the task options from the `tasks` dictionary 20 | in the `ProcessDefinition` object. 21 | 22 | ### Deprecated 23 | 24 | - ([#166](https://github.com/stac-utils/stac-task/pull/166)) Bare `ProcessDefinition` 25 | objects are deprecated in favor of arrays of `ProcessDefinition` objects. 26 | 27 | ## [0.6.0] 28 | 29 | ### ⚠️ Breaking Change 30 | 31 | - ([#147](https://github.com/stac-utils/stac-task/pull/147)) Moved 32 | `Task.validate` from class method to instance method, availing 33 | implementers of other instance convenience methods (i.e. `self.parameters`). 34 | 35 | ## [0.5.1] - 2024-05-23 36 | 37 | ### Added 38 | 39 | - download_item_assets and download_items_assets methods now accept a parameter `file_name` for configuring the filename to save the STAC Item as. If unset, it defaults to `item.json` and if set to `None` the filename is inferred from the ID. 40 | 41 | ## [0.5.0] - 2024-05-08 42 | 43 | ### Deprecated 44 | 45 | - Support for Python 3.8 has been removed. 46 | - CLI flags `--skip-upload` and `--skip-validation` deprecated in favor of `--upload/--no-upload` and `--validate/no-validate` 47 | - Task constructor arguments `skip_upload` and `skip_validation` deprecated in favor of `upload` and `validate` 48 | 49 | ### Fixed 50 | 51 | - Several CLI arguments were missing `help` descriptions 52 | 53 | ### Changed 54 | 55 | - Replaced the use of fsspec with stac-asset for downloading Item Assets 56 | - `--local` flag no longer turns off validation 57 | - The `processing:software` field is no longer added to Items by default. This is 58 | because the intention of the STAC Processing Extension is to add metadata about the 59 | processing of the data, whereas stactask is frequently used only for processing 60 | metadata. Users wishing to retain this field can call the method `Task.add_software_version_to_item(item)` on the resulting item to add it. 61 | - Task logging now identifies the task instance that is logging, e.g., 62 | `INFO:my-task-name:[my-collection/workflow-my-workflow/task-1] Task did a thing.` 63 | - Collection assignment now assigns the first matching collection expression, rather 64 | than the last. 65 | 66 | ### Added 67 | 68 | - Property `collection_mapping` to `Task` class to retrieve the collection mappings 69 | from upload_options 70 | - Utils method `find_collection` to allow the retrieval of the collection name for 71 | an Item dict 72 | - Task method `upload_local_item_assets_to_s3(item)` to upload all local assets to S3 73 | - Added support for using stdin and stdout as input and output for task, e.g., `cat in.json | src/mytask/mytask.py run --local | tee out.json` 74 | 75 | ## [v0.4.2] - 2024-03-08 76 | 77 | ### Added 78 | 79 | - ([#92](https://github.com/stac-utils/stac-task/pull/92)) Task.upload_item_assets_to_s3 and asset_io.upload_item_assets_to_s3 support explicitly specifying the boto3utils3.s3 object. 80 | 81 | ## [v0.4.1] - 2024-03-06 82 | 83 | ### Fixed 84 | 85 | - ([#90](https://github.com/stac-utils/stac-task/pull/90)) Block asset_io 86 | module from reaching out to upstream stac APIs (especially on NASA Wednesdays 87 | `transform_hrefs=False`) 88 | 89 | ## [v0.4.0] - 2024-02-14 90 | 91 | ### Fixed 92 | 93 | - ([#86](https://github.com/stac-utils/stac-task/pull/86)) Guard cleanup of workdir to ensure task was actually created. 94 | 95 | ### Added 96 | 97 | - ([#72](https://github.com/stac-utils/stac-task/pull/72)) Given that `_get_file` is part of the `AsyncFileSystem` spec, this 98 | adds the synchronous `get_file` as a way to retrieve files if `_get_file` is 99 | not found. 100 | - ([#77](https://github.com/stac-utils/stac-task/pull/77)) Added option `keep_original_filenames` to download routines to 101 | support legacy applications dependent on filename specifics. 102 | 103 | ## [v0.3.0] - 2023-12-20 104 | 105 | ### Changed 106 | 107 | - handler now explicitly calls performs workdir cleanup 108 | - workdir cleanup is correctly defensive and logs errors 109 | 110 | ## [v0.2.0] - 2023-11-16 111 | 112 | ### Changed 113 | 114 | - Ensure `workdir` is an absolute path 115 | ([#54](https://github.com/stac-utils/stac-task/pull/51)). 116 | - When a `workdir` is set for a `Task` the `workdir` will no longer be removed 117 | by default ([#51](https://github.com/stac-utils/stac-task/pull/51)). That is, 118 | the `save_workdir` argument to `Task` constructor now defaults to `None`, and 119 | if left as `None` the default behavior is now conditional on whether or not a 120 | `workdir` is specified. 121 | 122 | - If `workdir` is `None`, a temp directory will be created and `save_workdir` 123 | will default to `False` (remove working directory). 124 | - If a `workdir` is specified, then `save_workdir` will default to `True` 125 | (keep working directory). 126 | 127 | In either case, an explicit `True` or `False` value for `save_workdir` will 128 | take precedence. 129 | 130 | ## [v0.1.1] - 2023-07-12 131 | 132 | ### Fixed 133 | 134 | - Typing ([#11](https://github.com/stac-utils/stac-task/pull/11), [#25](https://github.com/stac-utils/stac-task/pull/25)) 135 | - Removed console scripts ([#18](https://github.com/stac-utils/stac-task/pull/18)) 136 | 137 | ## [v0.1.0] - 2022-10-31 138 | 139 | Initial release. 140 | 141 | [unreleased]: 142 | [0.6.1]: 143 | [0.6.0]: 144 | [0.5.1]: 145 | [0.5.0]: 146 | [v0.4.2]: 147 | [v0.4.1]: 148 | [v0.4.0]: 149 | [v0.3.0]: 150 | [v0.2.0]: 151 | [v0.1.1]: 152 | [v0.1.0]: 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2022 Element 84 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include stactask/py.typed 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # STAC Task (stac-task) 3 | 4 | [![Build Status](https://github.com/stac-utils/stac-task/workflows/CI/badge.svg?branch=main)](https://github.com/stac-utils/stac-task/actions/workflows/continuous-integration.yml) 5 | [![PyPI version](https://badge.fury.io/py/stac-task.svg)](https://badge.fury.io/py/stac-task) 6 | [![Documentation Status](https://readthedocs.org/projects/stac-task/badge/?version=latest)](https://stac-task.readthedocs.io/en/latest/?badge=latest) 7 | [![codecov](https://codecov.io/gh/stac-utils/stac-task/branch/main/graph/badge.svg)](https://codecov.io/gh/stac-utils/stac-task) 8 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 9 | 10 | - [Quickstart for Creating New Tasks](#quickstart-for-creating-new-tasks) 11 | - [Task Input](#task-input) 12 | - [ProcessDefinition Object](#processdefinition-object) 13 | - [UploadOptions Object](#uploadoptions-object) 14 | - [path\_template](#path_template) 15 | - [collections](#collections) 16 | - [tasks](#tasks) 17 | - [TaskConfig Object](#taskconfig-object) 18 | - [workflow_options](#workflow_options) 19 | - [Full ProcessDefinition Example](#full-processdefinition-example) 20 | - [Migration](#migration) 21 | - [0.4.x -\> 0.5.x](#04x---05x) 22 | - [0.5.x -\> 0.6.0](#05x---060) 23 | - [Development](#development) 24 | - [Contributing](#contributing) 25 | 26 | This Python library consists of the Task class, which is used to create custom tasks 27 | based on a "STAC In, STAC Out" approach. The Task class acts as wrapper around custom 28 | code and provides several convenience methods for modifying STAC Items, creating derived 29 | Items, and providing a CLI. 30 | 31 | This library is based on a [branch of cirrus-lib](https://github.com/cirrus-geo/cirrus-lib/tree/features/task-class) 32 | except aims to be more generic. 33 | 34 | ## Quickstart for Creating New Tasks 35 | 36 | ```python 37 | from typing import Any 38 | 39 | from stactask import Task, DownloadConfig 40 | 41 | class MyTask(Task): 42 | name = "my-task" 43 | description = "this task does it all" 44 | 45 | def validate(self, payload: dict[str, Any]) -> bool: 46 | return len(self.items) == 1 47 | 48 | def process(self, **kwargs: Any) -> list[dict[str, Any]]: 49 | item = self.items[0] 50 | 51 | # download a datafile 52 | item = self.download_item_assets( 53 | item, 54 | config=DownloadConfig(include=['data']) 55 | ) 56 | 57 | # operate on the local file to create a new asset 58 | item = self.upload_item_assets_to_s3(item) 59 | 60 | # this task returns a single item 61 | return [item.to_dict(include_self_link=True, transform_hrefs=False)] 62 | ``` 63 | 64 | ## Task Input 65 | 66 | Task input is often referred to as a 'payload'. 67 | 68 | | Field Name | Type | Description | 69 | | ---------- | ------------------------- | --------------------------------------------------- | 70 | | type | string | Must be FeatureCollection | 71 | | features | [Item] | An array of STAC Items | 72 | | process | [`ProcessDefinition`] | An array of `ProcessDefinition` objects. | 73 | | ~~process~~ | ~~`ProcessDefinition`~~ | **DEPRECATED** A `ProcessDefinition` object | 74 | 75 | ### ProcessDefinition Object 76 | 77 | A Task can be provided additional configuration via the 'process' field in the input 78 | payload. 79 | 80 | | Field Name | Type | Description | 81 | | ---------------- | ------------------ | ------------------------------------------------------------------------ | 82 | | description | string | Description of the process configuration | 83 | | upload_options | `UploadOptions` | An `UploadOptions` object | 84 | | tasks | Map | Dictionary of task configurations. | 85 | | ~~tasks~~ | ~~[`TaskConfig`]~~ | **DEPRECATED** A list of `TaskConfig` objects. | 86 | | workflow_options | Map | Dictionary of configuration options applied to all tasks in the workflow | 87 | 88 | 89 | #### UploadOptions Object 90 | 91 | Options used when uploading Item assets to a remote server can be specified in a 92 | 'upload_options' field in the `ProcessDefinition` object. 93 | 94 | | Field Name | Type | Description | 95 | | ------------- | ------------- | --------------------------------------------------------------------------------------- | 96 | | path_template | string | **REQUIRED** A string template for specifying the location of uploaded assets | 97 | | public_assets | [str] | A list of asset keys that should be marked as public when uploaded | 98 | | headers | Map | A set of key, value headers to send when uploading data to s3 | 99 | | collections | Map | A mapping of output collection name to a JSONPath pattern (for matching Items) | 100 | | s3_urls | bool | Controls if the final published URLs should be an s3 (s3://*bucket*/*key*) or https URL | 101 | 102 | ##### path_template 103 | 104 | The 'path_template' string is a way to control the output location of uploaded assets 105 | from a STAC Item using metadata from the Item itself. The template can contain fixed 106 | strings along with variables used for substitution. See [the PySTAC documentation for 107 | `LayoutTemplate`](https://pystac.readthedocs.io/en/stable/api/layout.html#pystac.layout.LayoutTemplate) 108 | for a list of supported template variables and their meaning. 109 | 110 | ##### collections 111 | 112 | The 'collections' dictionary provides a collection ID and JSONPath pattern for matching 113 | against STAC Items. At the end of processing, before the final STAC Items are returned, 114 | the Task class can be used to assign all of the Items to specific collection IDs. For 115 | each Item the JSONPath pattern for all collections will be compared. The first match 116 | will cause the Item's Collection ID to be set to the provided value. 117 | 118 | For example: 119 | 120 | ```json 121 | "collections": { 122 | "landsat-c2l2": "$[?(@.id =~ 'LC08.*')]" 123 | } 124 | ``` 125 | 126 | In this example, the task will set any STAC Items that have an ID beginning with "LC08" 127 | to the `landsat-c2l2` collection. 128 | 129 | See [JSONPath Online Evaluator](https://jsonpath.com) to experiment with JSONPath and 130 | [regex101](https://regex101.com) to experiment with regex. 131 | 132 | #### tasks 133 | 134 | The 'tasks' field is a dictionary with an optional key for each task. If present, it 135 | contains a dictionary that is converted to a set of keywords and passed to the Task's 136 | `process` function. The documentation for each Task will provide the list of available 137 | parameters. 138 | 139 | ```json 140 | { 141 | "tasks": { 142 | "task-a": { 143 | "param1": "value1" 144 | }, 145 | "task-c": { 146 | "param2": "value2" 147 | } 148 | } 149 | } 150 | ``` 151 | 152 | In the example above, a task named `task-a` would have the `param1=value1` passed as a 153 | keyword, while `task-c` would have `param2=value2` passed. If there were a `task-b` to 154 | be run, it would not be passed any keywords. 155 | 156 | #### TaskConfig Object 157 | 158 | **DEPRECATED** The 'tasks' field _should_ be a dictionary of parameters, with task names 159 | as keys. See [tasks](#tasks) for more information. `TaskConfig` objects are supported 160 | for backwards compatibility. 161 | 162 | | Field Name | Type | Description | 163 | | ---------- | ------------- | ----------------------------------------------------------------------------------- | 164 | | name | str | **REQUIRED** Name of the task | 165 | | parameters | Map | Dictionary of keyword parameters that will be passed to the Task `process` function | 166 | 167 | #### workflow_options 168 | 169 | The 'workflow_options' field is a dictionary of options that apply to all tasks in the 170 | workflow. The 'workflow_options' dictionary is combined with each task's option 171 | dictionary. If a key in the 'workflow_options' dictionary conflicts with a key in a 172 | task's option dictionary, the task option value takes precedence. 173 | 174 | ### Full ProcessDefinition Example 175 | 176 | ```json 177 | { 178 | "description": "My process configuration", 179 | "upload_options": { 180 | "path_template": "s3://my-bucket/${collection}/${year}/${month}/${day}/${id}", 181 | "collections": { 182 | "landsat-c2l2": "$[?(@.id =~ 'LC08.*')]" 183 | } 184 | }, 185 | "tasks": { 186 | "task-name": { 187 | "param": "value" 188 | } 189 | } 190 | } 191 | ``` 192 | 193 | ## Migration 194 | 195 | ### 0.4.x -> 0.5.x 196 | 197 | In 0.5.0, the previous use of fsspec to download Item Assets has been replaced with the 198 | stac-asset library. This has necessitated a change in the parameters that the download 199 | methods accept. 200 | 201 | The primary change is that the Task methods `download_item_assets` and 202 | `download_items_assets` (items plural) now accept fewer explicit and implicit (kwargs) 203 | parameters. 204 | 205 | Previously, the methods looked like: 206 | 207 | ```python 208 | def download_item_assets( 209 | self, 210 | item: Item, 211 | path_template: str = "${collection}/${id}", 212 | keep_original_filenames: bool = False, 213 | **kwargs: Any, 214 | ) -> Item: 215 | ``` 216 | 217 | but now look like: 218 | 219 | ```python 220 | def download_item_assets( 221 | self, 222 | item: Item, 223 | path_template: str = "${collection}/${id}", 224 | config: Optional[DownloadConfig] = None, 225 | ) -> Item: 226 | ``` 227 | 228 | Similarly, the `asset_io` package methods were previously: 229 | 230 | ```python 231 | async def download_item_assets( 232 | item: Item, 233 | assets: Optional[list[str]] = None, 234 | save_item: bool = True, 235 | overwrite: bool = False, 236 | path_template: str = "${collection}/${id}", 237 | absolute_path: bool = False, 238 | keep_original_filenames: bool = False, 239 | **kwargs: Any, 240 | ) -> Item: 241 | ``` 242 | 243 | and are now: 244 | 245 | ```python 246 | async def download_item_assets( 247 | item: Item, 248 | path_template: str = "${collection}/${id}", 249 | config: Optional[DownloadConfig] = None, 250 | ) -> Item: 251 | ``` 252 | 253 | Additionally, `kwargs` keys were set to pass configuration through to fsspec. The most 254 | common parameter was `requester_pays`, to set the Requester Pays flag in AWS S3 255 | requests. 256 | 257 | Many of these parameters can be directly translated into configuration passed in a 258 | `DownloadConfig` object, which is just a wrapper over the `stac_asset.Config` object. 259 | 260 | Migration of these various parameters to `DownloadConfig` are as follows: 261 | 262 | - `assets`: set `include` 263 | - `requester_pays`: set `s3_requester_pays` = True 264 | - `keep_original_filenames`: set `file_name_strategy` to 265 | `FileNameStrategy.FILE_NAME` if True or `FileNameStrategy.KEY` if False 266 | - `overwrite`: set `overwrite` 267 | - `save_item`: none, Item is always saved 268 | - `absolute_path`: none. To create or retrieve the Asset hrefs as absolute paths, use 269 | either `Item#make_all_asset_hrefs_absolute()` or `Asset#get_absolute_href()` 270 | 271 | ### 0.5.x -> 0.6.0 272 | 273 | Previously, the `validate` method was a _classmethod_, validating the payload argument 274 | passed. This has now been made an instance method, which validates the `self._payload` 275 | copy of the payload, from which the `Task` instance is constructed. This is 276 | behaviorally the same, in that construction will fail if validation fails, but allows 277 | implementers to utilize the instance method's convenience functions. 278 | 279 | Previous implementations of `validate` would have been similar to this: 280 | 281 | ```python 282 | @classmethod 283 | def validate(payload: dict[str, Any]) -> bool: 284 | # Check The Things™ 285 | return isinstance(payload, dict) 286 | ``` 287 | 288 | And will now need to be updated to this form: 289 | 290 | ```python 291 | def validate(self) -> bool: 292 | # Check The Things™ 293 | return isinstance(self._payload, dict) 294 | ``` 295 | 296 | ## Development 297 | 298 | Clone, install in editable mode with development and test requirements, and install the 299 | **pre-commit** hooks: 300 | 301 | ```shell 302 | git clone https://github.com/stac-utils/stac-task 303 | cd stac-task 304 | pip install -e '.[dev,test]' 305 | pre-commit install 306 | ``` 307 | 308 | To run the tests: 309 | 310 | ```shell 311 | pytest 312 | ``` 313 | 314 | To lint all the files: 315 | 316 | ```shell 317 | pre-commit run --all-files 318 | ``` 319 | 320 | ## Contributing 321 | 322 | Use Github [issues](https://github.com/stac-utils/stac-task/issues) and [pull 323 | requests](https://github.com/stac-utils/stac-task/pulls). 324 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: "5" 9 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx~=7.3.7 2 | sphinx-rtd-theme~=2.0.0 3 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. autosummary:: 5 | :toctree: generated 6 | :recursive: 7 | 8 | stactask 9 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | 3 | import os 4 | import sys 5 | 6 | sys.path.insert( 7 | 0, os.path.abspath("../../src") 8 | ) # Source code dir relative to this file 9 | 10 | # -- Project information 11 | 12 | project = "STAC Task" 13 | copyright = "2021, Element 84, Inc." 14 | 15 | release = "0.6" 16 | version = "0.6.1" 17 | 18 | # -- General configuration 19 | 20 | extensions = [ 21 | "sphinx.ext.duration", 22 | "sphinx.ext.doctest", 23 | "sphinx.ext.autodoc", 24 | "sphinx.ext.autosummary", 25 | "sphinx.ext.intersphinx", 26 | ] 27 | 28 | intersphinx_mapping = { 29 | "python": ("https://docs.python.org/3/", None), 30 | "sphinx": ("https://www.sphinx-doc.org/en/master/", None), 31 | } 32 | intersphinx_disabled_domains = ["std"] 33 | 34 | autosummary_generate = True # Turn on sphinx.ext.autosummary 35 | 36 | templates_path = ["_templates"] 37 | 38 | # -- Options for HTML output 39 | 40 | html_theme = "sphinx_rtd_theme" 41 | 42 | # -- Options for EPUB output 43 | epub_show_urls = "footnote" 44 | -------------------------------------------------------------------------------- /docs/source/generated/stactask.rst: -------------------------------------------------------------------------------- 1 | stactask 2 | ======== 3 | 4 | .. automodule:: stactask 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | .. rubric:: Modules 25 | 26 | .. autosummary:: 27 | :toctree: 28 | :recursive: 29 | 30 | stactask.asset_io 31 | stactask.exceptions 32 | stactask.task 33 | stactask.utils 34 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to STAC Task's documentation! 2 | ===================================== 3 | 4 | **STAC Task** is a Python library for geospatial processing pipeline developers 5 | that provides a standardized way of writing code. 6 | 7 | Check out the :doc:`usage` section for further information, including 8 | how to :ref:`installation` the project. 9 | 10 | .. note:: 11 | 12 | This project is under active development. 13 | 14 | Contents 15 | -------- 16 | 17 | .. toctree:: 18 | 19 | usage 20 | api 21 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | .. _installation: 5 | .. _a_simple_task_definition: 6 | .. _cli_usage: 7 | .. _api_usage: 8 | 9 | Installation 10 | ------------ 11 | 12 | To use STAC Task, first install it using pip: 13 | 14 | .. code-block:: console 15 | 16 | (.venv) $ pip install stactask 17 | 18 | 19 | A simple task definition 20 | ------------------------ 21 | 22 | The intended use of stac-task is for a developer to extend the `stactask.Task` class. An trivial 23 | example class that does is shown below, which accepts a payload containing a field `item_id` and 24 | generates a STAC Item with this id. 25 | 26 | .. code-block:: python 27 | 28 | #!/usr/bin/env python 29 | 30 | import logging 31 | import os 32 | from datetime import datetime, timezone 33 | from typing import Any 34 | 35 | from pystac import Item 36 | from stactask.exceptions import InvalidInput 37 | from stactask.task import Task 38 | 39 | 40 | class MyTask(Task): # type: ignore 41 | name = "my-task" 42 | description = "Create STAC Items for payload" 43 | version = "v2024.02.01" 44 | 45 | # override from Task 46 | @classmethod 47 | def validate(cls, payload: dict[str, Any]) -> bool: 48 | if "item_id" not in payload: 49 | raise InvalidInput("Missing field 'item_id' in payload") 50 | return True 51 | 52 | # override from Task 53 | def process(self, **kwargs: Any) -> list[dict[str, Any]]: 54 | item = Item( 55 | id=self._payload["item_id"], 56 | geometry={ 57 | "type": "Polygon", 58 | "coordinates": [ 59 | [ 60 | [100.0, 0.0], 61 | [101.0, 0.0], 62 | [101.0, 1.0], 63 | [100.0, 1.0], 64 | [100.0, 0.0], 65 | ] 66 | ], 67 | }, 68 | bbox=None, 69 | datetime=datetime.now(timezone.utc), 70 | properties={}, 71 | ) 72 | 73 | return [item.to_dict()] 74 | 75 | # Support for running as a Lambda Function 76 | def handler(event: dict[str, Any], context: Any) -> dict[str, Any]: 77 | return MyTask.handler(event) # type: ignore 78 | 79 | # Support for running as a CLI application 80 | if __name__ == "__main__": 81 | MyTask.cli() 82 | 83 | The expected input looks like the following: 84 | 85 | .. code-block:: json 86 | 87 | { 88 | "id": "my-task/workflow-my-task/5427299e5b635537f33c07e0ad32fb87", 89 | "item_id": "G23923", 90 | "process": { 91 | "upload_options": { 92 | "collections": { 93 | "my-collection": "$[?(@.id =~ '.*')]" 94 | } 95 | } 96 | } 97 | } 98 | 99 | In Task, the `/process/upload_options/collections` mapping uses JSONPath to map attributes of the 100 | output Item to the collection that should be assigned to it. In this case, we only have one defined 101 | that matches on any `id` value, and sets the collection to `my-collection`. 102 | 103 | Running this with `python my-task.py run --local in.json` results in the following 104 | output JSON, which has modified the payload to add a new 105 | `features` attribute array. 106 | 107 | .. code-block:: json 108 | 109 | { 110 | "id": "my-task/workflow-my-task/5427299e5b635537f33c07e0ad32fb87", 111 | "item_id": "G23923", 112 | "process": { 113 | "upload_options": { 114 | "collections": { 115 | "my-collection": "$[?(@.id =~ '.*')]" 116 | } 117 | } 118 | }, 119 | "features": [ 120 | { 121 | "type": "Feature", 122 | "stac_version": "1.0.0", 123 | "id": "G23923", 124 | "properties": { 125 | "datetime": "2024-04-04T13:55:05.598886Z", 126 | "processing:software": { 127 | "my-task": "v2024.02.01" 128 | } 129 | }, 130 | "geometry": { 131 | "type": "Polygon", 132 | "coordinates": [ 133 | [ 134 | [ 100.0, 0.0 ], 135 | [ 101.0, 0.0 ], 136 | [ 101.0, 1.0 ], 137 | [ 100.0, 1.0 ], 138 | [ 100.0, 0.0 ] 139 | ] 140 | ] 141 | }, 142 | "links": [], 143 | "assets": {}, 144 | "stac_extensions": [ 145 | "https://stac-extensions.github.io/processing/v1.1.0/schema.json" 146 | ], 147 | "collection": "my-collection" 148 | } 149 | ] 150 | } 151 | 152 | CLI Usage 153 | --------- 154 | 155 | To run a Task as a CLI application, add a main definition to the class inheriting Task: 156 | 157 | .. code-block:: python 158 | 159 | if __name__ == "__main__": 160 | MyTask.cli() 161 | 162 | This provides a CLI that supports several useful flags for using stac-task. Invoking it 163 | without any arguments will print usage. 164 | 165 | A common way of invoking the task is: 166 | 167 | .. code-block:: console 168 | 169 | src/mytask/mytask.py run --local --logging DEBUG 170 | 171 | 172 | An example of running it might look like: 173 | 174 | .. code-block:: console 175 | 176 | src/mytask/mytask.py run --logging DEBUG --local my-input-file.json 177 | 178 | Payload can be read from stdin: 179 | 180 | .. code-block:: console 181 | 182 | cat input.json | src/mytask/mytask.py run --local | tee output.json 183 | 184 | The first argument is the command, of which the only option currently is `run`. 185 | 186 | - `--logging ` - configure the logging level of the task, one of DEBUG, INFO, WARN, ERROR, or CRITICAL 187 | - `--local` - sets several other flags to reasonable values for local testing, including `save-workdir`, 188 | `skip-upload`, `skip-validation`, sets the `workdir`` to the directory `local-output`, and 189 | sets the `output` file to `local-output/output-payload.json`. 190 | - `input` - the location of the input payload file 191 | 192 | All of the parameters set by `--local` can also be configured independently: 193 | 194 | - `--workdir ` - the directory that task operations should use for storage 195 | - `--save-workdir` - retain the workdir after the task exits 196 | - `--output ` - the file path to write the task output to 197 | - `--skip-upload` - don't upload the payload to S3 198 | - `--skip-validation` - don't perform JSON validate on the payload 199 | 200 | API Usage 201 | --------- 202 | 203 | The Task constructor accepts a `payload` argument of type `dict[str, Any]`, usually passed 204 | though the `handler` static method, that represents 205 | a JSON object. This can either be the payload itself or a reference to the actual payload. 206 | If the Task payload dictionary contains a field named either `href` or `url`, the `handler` method will set 207 | the Task's payload to the contents of that URI. Any fsspec storage supported and configured can be used, 208 | such as a local file, a remote HTTP URL, or an S3 URI. 209 | 210 | Typically, this payload contains configuration needed for the Task to execute. The payload can be 211 | accessed via `self._payload`. The Task can directly modify the payload, though most commonly, 212 | the payload is only added to by returning a list of STAC Items from the overridden `process` method. 213 | 214 | When the `handler` static method is invoked, the following sequence of events happens: 215 | 216 | - the `validate` method is called on the payload 217 | - the payload is populated with either the direct value or the contents of `href` or `url` 218 | - the `process` method is executed to generate a list of STAC Items 219 | - the list of list of STAC Items (represented as list of dictionaries) output from 220 | `process` is assigned to the payload `features` attribute 221 | to the payload's `features` attribute 222 | - the payload's property `/process/upload_options/collections` mapping uses 223 | JSONPath to map attributes of the 224 | output Item to the collection that should be assigned to it 225 | - the contents of _workdir are deleted, unless `save-workdir` is set 226 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "stactask" 3 | version = "0.6.1" 4 | authors = [{ name = "Matthew Hanson", email = "matt.a.hanson@gmail.com" }] 5 | maintainers = [{ name = "Ian Cooke", email = "ircwaves@gmail.com" }] 6 | description = "Class interface for running custom algorithms and workflows on STAC Items" 7 | readme = "README.md" 8 | requires-python = ">=3.9" 9 | keywords = ["pystac", "imagery", "raster", "catalog", "STAC"] 10 | license = { text = "Apache-2.0" } 11 | classifiers = [ 12 | "Development Status :: 2 - Pre-Alpha", 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: Apache Software License", 15 | "Natural Language :: English", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | ] 22 | dependencies = [ 23 | "pystac>=1.6", 24 | "python-dateutil>=2.7.0", 25 | "boto3-utils>=0.3.2", 26 | "fsspec>=2022.8.2", 27 | "stac-asset>=0.3.0", 28 | "jsonpath_ng>=1.5.3", 29 | "requests>=2.28.1", 30 | "s3fs>=2022.8.2", 31 | ] 32 | 33 | [project.optional-dependencies] 34 | dev = [ 35 | "black~=24.0", 36 | "codespell~=2.3", 37 | "mypy~=1.9", 38 | "pre-commit~=3.7", 39 | "ruff~=0.6.5", 40 | "types-setuptools~=75.1", 41 | "boto3-stubs", 42 | ] 43 | test = ["pytest~=8.0", "pytest-cov~=5.0", "pytest-env~=1.1", "moto~=5.0.5"] 44 | 45 | [project.urls] 46 | Issues = "https://github.com/stac-utils/stactask/issues" 47 | Github = "https://github.com/stac-utils/stac-task" 48 | Changelog = "https://github.com/stac-utils/stac-task/blob/main/CHANGELOG.md" 49 | 50 | [tool.mypy] 51 | strict = true 52 | 53 | [[tool.mypy.overrides]] 54 | module = ["boto3utils", "jsonpath_ng.ext", "fsspec"] 55 | ignore_missing_imports = true 56 | 57 | [tool.ruff.lint] 58 | select = ["F", "E", "W", "I", "ERA", "RUF"] 59 | 60 | [tool.pytest.ini_options] 61 | addopts = "-rx -q -s -vvv" 62 | log_cli_level = "INFO" 63 | log_cli = true 64 | markers = ["system", "unit"] 65 | env = [ 66 | "AWS_DEFAULT_REGION=us-west-2", 67 | "AWS_ACCESS_KEY_ID=foo", 68 | "AWS_SECRET_ACCESS_KEY=bar", 69 | "AWS_SESSION_TOKEN=baz", 70 | ] 71 | filterwarnings = ["ignore::UserWarning:stactask.*:"] 72 | -------------------------------------------------------------------------------- /stactask/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import PackageNotFoundError, version 2 | 3 | try: 4 | __version__ = version("stactask") 5 | except PackageNotFoundError: 6 | # package is not installed 7 | pass 8 | 9 | from .config import DownloadConfig 10 | from .task import Task 11 | 12 | __all__ = ["Task", "DownloadConfig"] 13 | -------------------------------------------------------------------------------- /stactask/asset_io.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from os import path as op 4 | from typing import Any, Iterable, Optional, Union 5 | 6 | import stac_asset 7 | from boto3utils import s3 8 | from pystac import Item 9 | from pystac.layout import LayoutTemplate 10 | 11 | from .config import DownloadConfig 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | # global dictionary of sessions per bucket 16 | global_s3_client = s3() 17 | 18 | 19 | async def download_item_assets( 20 | item: Item, 21 | path_template: str = "${collection}/${id}", 22 | config: Optional[DownloadConfig] = None, 23 | keep_non_downloaded: bool = True, 24 | file_name: Optional[str] = "item.json", 25 | ) -> Item: 26 | return await stac_asset.download_item( 27 | item=item.clone(), 28 | directory=LayoutTemplate(path_template).substitute(item), 29 | file_name=file_name, 30 | config=config, 31 | keep_non_downloaded=keep_non_downloaded, 32 | ) 33 | 34 | 35 | async def download_items_assets( 36 | items: Iterable[Item], 37 | path_template: str = "${collection}/${id}", 38 | config: Optional[DownloadConfig] = None, 39 | keep_non_downloaded: bool = True, 40 | file_name: Optional[str] = "item.json", 41 | ) -> list[Item]: 42 | return await asyncio.gather( 43 | *[ 44 | asyncio.create_task( 45 | download_item_assets( 46 | item=item, 47 | path_template=path_template, 48 | config=config, 49 | keep_non_downloaded=keep_non_downloaded, 50 | file_name=file_name, 51 | ) 52 | ) 53 | for item in items 54 | ] 55 | ) 56 | 57 | 58 | def upload_item_assets_to_s3( 59 | item: Item, 60 | assets: Optional[list[str]] = None, 61 | public_assets: Union[None, list[str], str] = None, 62 | path_template: str = "${collection}/${id}", 63 | s3_urls: bool = False, 64 | headers: Optional[dict[str, Any]] = None, 65 | s3_client: Optional[s3] = None, 66 | **kwargs: Any, # unused, but retain to permit unused attributes from upload_options 67 | ) -> Item: 68 | """Upload Item assets to an S3 bucket 69 | Args: 70 | item (Item): STAC Item 71 | assets (list[str], optional): List of asset keys to upload. Defaults to None. 72 | public_assets (list[str], optional): List of assets keys that should be 73 | public. Defaults to []. 74 | path_template (str, optional): Path string template. Defaults to 75 | '${collection}/${id}'. 76 | s3_urls (bool, optional): Return s3 URLs instead of http URLs. Defaults 77 | to False. 78 | headers (dict, optional): Dictionary of headers to set on uploaded 79 | assets. Defaults to {}. 80 | s3_client (boto3utils.s3, optional): Use this s3 object instead of the default 81 | global one. Defaults to None. 82 | Returns: 83 | Item: A new STAC Item with uploaded assets pointing to newly uploaded file URLs 84 | """ 85 | 86 | if s3_client is None: 87 | s3_client = global_s3_client 88 | 89 | if headers is None: 90 | headers = {} 91 | 92 | # deepcopy of item 93 | _item = item.to_dict(transform_hrefs=False) 94 | 95 | if public_assets is None: 96 | public_assets = [] 97 | # determine which assets should be public 98 | elif isinstance(public_assets, str): 99 | if public_assets == "ALL": 100 | public_assets = list(_item["assets"].keys()) 101 | else: 102 | raise ValueError(f"unexpected value for `public_assets`: {public_assets}") 103 | 104 | # if assets not provided, upload all assets 105 | _assets = assets if assets is not None else _item["assets"].keys() 106 | 107 | for key in [a for a in _assets if a in _item["assets"].keys()]: 108 | asset = _item["assets"][key] 109 | filename = asset["href"] 110 | if not op.exists(filename): 111 | logger.warning(f"Cannot upload {filename}: does not exist") 112 | continue 113 | public = True if key in public_assets else False 114 | _headers = {} 115 | if "type" in asset: 116 | _headers["ContentType"] = asset["type"] 117 | _headers.update(headers) 118 | # output URL 119 | layout = LayoutTemplate(op.join(path_template, op.basename(filename))) 120 | url = layout.substitute(item) 121 | 122 | # upload 123 | logger.debug(f"Uploading {filename} to {url}") 124 | url_out = s3_client.upload( 125 | filename, url, public=public, extra=_headers, http_url=not s3_urls 126 | ) 127 | _item["assets"][key]["href"] = url_out 128 | 129 | return Item.from_dict(_item) 130 | -------------------------------------------------------------------------------- /stactask/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from stac_asset import Config 4 | 5 | 6 | @dataclass 7 | class DownloadConfig(Config): # type: ignore 8 | pass 9 | -------------------------------------------------------------------------------- /stactask/exceptions.py: -------------------------------------------------------------------------------- 1 | class InvalidInput(Exception): 2 | """Exception class for when processing fails due to invalid input 3 | 4 | Args: 5 | Exception (Exception): Base class 6 | """ 7 | 8 | pass 9 | 10 | 11 | class FailedValidation(Exception): 12 | """Exception class thrown when input payload does not validate""" 13 | 14 | pass 15 | -------------------------------------------------------------------------------- /stactask/logging.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | from typing import TYPE_CHECKING, Any, Optional 5 | 6 | if TYPE_CHECKING: 7 | _LoggerAdapter = logging.LoggerAdapter[logging.Logger] # pragma: no cover 8 | else: 9 | _LoggerAdapter = logging.LoggerAdapter 10 | 11 | 12 | class TaskLoggerAdapter(_LoggerAdapter): 13 | def __init__(self, logger: logging.Logger, prefix: Optional[str]) -> None: 14 | super().__init__(logger, {}) 15 | self.prefix = prefix 16 | 17 | def process(self, msg: str, kwargs: Any) -> tuple[str, Any]: 18 | if self.prefix is not None: 19 | return f"[{self.prefix}] {msg}", kwargs 20 | else: 21 | return msg, kwargs 22 | -------------------------------------------------------------------------------- /stactask/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-utils/stac-task/f544899e49c6f8487deaaca9e4272ed71b6071fd/stactask/py.typed -------------------------------------------------------------------------------- /stactask/task.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import json 4 | import logging 5 | import os 6 | import sys 7 | import warnings 8 | from abc import ABC, abstractmethod 9 | from copy import deepcopy 10 | from os import makedirs 11 | from pathlib import Path 12 | from shutil import rmtree 13 | from tempfile import mkdtemp 14 | from typing import Any, Callable, Iterable, Optional, Union 15 | 16 | import fsspec 17 | from boto3utils import s3 18 | from pystac import Asset, Item, ItemCollection 19 | 20 | from .asset_io import ( 21 | download_item_assets, 22 | download_items_assets, 23 | upload_item_assets_to_s3, 24 | ) 25 | from .config import DownloadConfig 26 | from .exceptions import FailedValidation 27 | from .logging import TaskLoggerAdapter 28 | from .utils import find_collection as utils_find_collection 29 | 30 | # types 31 | PathLike = Union[str, Path] 32 | 33 | 34 | class DeprecatedStoreTrueAction(argparse._StoreTrueAction): 35 | def __call__(self, parser, namespace, values, option_string=None) -> None: # type: ignore 36 | warnings.warn("Argument %s is deprecated." % self.option_strings) 37 | super().__call__(parser, namespace, values, option_string) 38 | 39 | 40 | class Task(ABC): 41 | """ 42 | Tasks can use parameters provided in a `process` Dictionary that is supplied in 43 | the ItemCollection JSON under the "process" field. An example process 44 | definition: 45 | 46 | ``` 47 | { 48 | "description": "My process configuration" 49 | "upload_options": { 50 | "path_template": "s3://my-bucket/${collection}/${year}/${month}/${day}/${id}", 51 | "collections": { 52 | "landsat-c2l2": "" 53 | } 54 | }, 55 | "tasks": { 56 | "task-name": { 57 | "param": "value" 58 | } 59 | ] 60 | } 61 | ``` 62 | """ 63 | 64 | name = "task" 65 | description = "A task for doing things" 66 | version = "0.1.0" 67 | 68 | def __init__( 69 | self: "Task", 70 | payload: dict[str, Any], 71 | workdir: Optional[PathLike] = None, 72 | save_workdir: Optional[bool] = None, 73 | skip_upload: bool = False, # deprecated 74 | skip_validation: bool = False, # deprecated 75 | upload: bool = True, 76 | validate: bool = True, 77 | ): 78 | self._payload = payload 79 | 80 | if not skip_validation and validate: 81 | if not self.validate(): 82 | raise FailedValidation() 83 | 84 | # set instance variables 85 | if skip_upload: 86 | self._upload = False 87 | else: 88 | self._upload = upload 89 | 90 | self._skip_upload = not upload # deprecated 91 | 92 | # create temporary work directory if workdir is None 93 | if workdir is None: 94 | self._workdir = Path(mkdtemp()) 95 | # if we are using a temp workdir we want to rm by default 96 | self._save_workdir = save_workdir if save_workdir is not None else False 97 | else: 98 | self._workdir = Path(workdir).absolute() 99 | makedirs(self._workdir, exist_ok=True) 100 | # if a workdir was specified we don't want to rm by default 101 | self._save_workdir = save_workdir if save_workdir is not None else True 102 | 103 | self.logger = TaskLoggerAdapter( 104 | logging.getLogger(self.name), 105 | self._payload.get("id"), 106 | ) 107 | 108 | @property 109 | def process_definition(self) -> dict[str, Any]: 110 | process = self._payload.get("process", []) 111 | if isinstance(process, dict): 112 | warnings.warn( 113 | ( 114 | "`process` as a bare dictionary will be unsupported in a future " 115 | "version; wrap it in a list to remove this warning" 116 | ), 117 | DeprecationWarning, 118 | stacklevel=2, 119 | ) 120 | return process 121 | 122 | if not isinstance(process, list): 123 | raise TypeError("unable to parse `process`: must be type list") 124 | 125 | if not process: 126 | return {} 127 | 128 | if not isinstance(process[0], dict): 129 | raise TypeError( 130 | ( 131 | "unable to parse `process`: the first element of the list must be " 132 | "a dictionary" 133 | ) 134 | ) 135 | 136 | return process[0] 137 | 138 | @property 139 | def workflow_options(self) -> dict[str, Any]: 140 | workflow_options_ = self.process_definition.get("workflow_options", {}) 141 | if not isinstance(workflow_options_, dict): 142 | raise TypeError("unable to parse `workflow_options`: must be type dict") 143 | return workflow_options_ 144 | 145 | @property 146 | def task_options(self) -> dict[str, Any]: 147 | task_options_ = self.process_definition.get("tasks", {}) 148 | if not isinstance(task_options_, (dict, list)): 149 | raise TypeError( 150 | "unable to parse `tasks`: must be type dict or type list (deprecated)" 151 | ) 152 | 153 | if isinstance(task_options_, list): 154 | warnings.warn( 155 | ( 156 | "`tasks` as a list of TaskConfig objects will be unsupported in a " 157 | "future version; use a dictionary of task options to remove this " 158 | "warning" 159 | ), 160 | DeprecationWarning, 161 | stacklevel=2, 162 | ) 163 | task_config_list = [ 164 | cfg for cfg in task_options_ if cfg["name"] == self.name 165 | ] 166 | if len(task_config_list) == 0: 167 | return {} 168 | else: 169 | task_config: dict[str, Any] = task_config_list[0] 170 | parameters = task_config.get("parameters", {}) 171 | if isinstance(parameters, dict): 172 | return parameters 173 | else: 174 | raise TypeError("unable to parse `parameters`: must be type dict") 175 | 176 | if isinstance(task_options_, dict): 177 | options = task_options_.get(self.name, {}) 178 | if isinstance(options, dict): 179 | return options 180 | else: 181 | raise TypeError( 182 | f"unable to parse options for task '{self.name}': must be type dict" 183 | ) 184 | 185 | @property 186 | def parameters(self) -> dict[str, Any]: 187 | return {**self.workflow_options, **self.task_options} 188 | 189 | @property 190 | def upload_options(self) -> dict[str, Any]: 191 | upload_options = self.process_definition.get("upload_options", {}) 192 | if isinstance(upload_options, dict): 193 | return upload_options 194 | else: 195 | raise ValueError(f"upload_options is not a dict: {type(upload_options)}") 196 | 197 | @property 198 | def collection_mapping(self) -> dict[str, str]: 199 | collection_mapping = self.upload_options.get("collections", {}) 200 | if isinstance(collection_mapping, dict): 201 | return collection_mapping 202 | else: 203 | raise ValueError(f"collections is not a dict: {type(collection_mapping)}") 204 | 205 | @property 206 | def items_as_dicts(self) -> list[dict[str, Any]]: 207 | features = self._payload.get("features", []) 208 | if isinstance(features, list): 209 | return features 210 | else: 211 | raise ValueError(f"features is not a list: {type(features)}") 212 | 213 | @property 214 | def items(self) -> ItemCollection: 215 | items_dict = {"type": "FeatureCollection", "features": self.items_as_dicts} 216 | return ItemCollection.from_dict(items_dict, preserve_dict=True) 217 | 218 | @classmethod 219 | def add_software_version(cls, items: list[dict[str, Any]]) -> list[dict[str, Any]]: 220 | warnings.warn( 221 | "add_software_version is deprecated, " 222 | "use add_software_version_to_item instead", 223 | DeprecationWarning, 224 | ) 225 | modified_items = list() 226 | for item in items: 227 | modified_items.append(cls.add_software_version_to_item(item)) 228 | return modified_items 229 | 230 | @classmethod 231 | def add_software_version_to_item(cls, item: dict[str, Any]) -> dict[str, Any]: 232 | """Adds software version information to a single item. 233 | 234 | Uses the processing extension. 235 | 236 | Args: 237 | item: A single STAC item 238 | 239 | Returns: 240 | dict[str, Any]: The same item with processing information applied. 241 | """ 242 | processing_ext = ( 243 | "https://stac-extensions.github.io/processing/v1.1.0/schema.json" 244 | ) 245 | if "stac_extensions" not in item: 246 | item["stac_extensions"] = [] 247 | item["stac_extensions"].append(processing_ext) 248 | item["stac_extensions"] = list(set(item["stac_extensions"])) 249 | if "properties" not in item: 250 | item["properties"] = {} 251 | item["properties"]["processing:software"] = {cls.name: cls.version} 252 | return item 253 | 254 | def validate(self) -> bool: 255 | """Validates `self._payload` and returns True if valid. If invalid, raises 256 | ``stactask.exceptions.FailedValidation`` or returns False.""" 257 | # put validation logic on input Items and process definition here 258 | return True 259 | 260 | def cleanup_workdir(self) -> None: 261 | """Remove work directory if configured not to save it""" 262 | try: 263 | if ( 264 | not self._save_workdir 265 | and self._workdir 266 | and os.path.exists(self._workdir) 267 | ): 268 | self.logger.debug("Removing work directory %s", self._workdir) 269 | rmtree(self._workdir) 270 | except Exception as e: 271 | self.logger.warning( 272 | "Failed removing work directory %s: %s", self._workdir, e 273 | ) 274 | 275 | def assign_collections(self) -> None: 276 | """Assigns new collection names based on upload_options collections attribute 277 | according to the first matching expression in the order they are defined.""" 278 | for item in self._payload["features"]: 279 | if coll := utils_find_collection(self.collection_mapping, item): 280 | item["collection"] = coll 281 | 282 | def download_item_assets( 283 | self, 284 | item: Item, 285 | path_template: str = "${collection}/${id}", 286 | config: Optional[DownloadConfig] = None, 287 | keep_non_downloaded: bool = True, 288 | file_name: Optional[str] = "item.json", 289 | ) -> Item: 290 | """Download provided asset keys for the given item. Assets are 291 | saved in workdir in a directory (as specified by path_template), and 292 | the items are updated with the new asset hrefs. 293 | 294 | Args: 295 | item (pystac.Item): STAC Item for which assets need be downloaded. 296 | path_template (Optional[str]): String to be interpolated to specify 297 | where to store downloaded files. 298 | config (Optional[DownloadConfig]): Configuration for downloading an item 299 | and its assets. 300 | keep_original_filenames (Optional[bool]): Controls whether original 301 | file names should be used, or asset key + extension. 302 | file_name (Optional[str]): The name of the item file to save. 303 | """ 304 | return asyncio.get_event_loop().run_until_complete( 305 | download_item_assets( 306 | item, 307 | path_template=str(self._workdir / path_template), 308 | config=config, 309 | keep_non_downloaded=keep_non_downloaded, 310 | file_name=file_name, 311 | ) 312 | ) 313 | 314 | def download_items_assets( 315 | self, 316 | items: Iterable[Item], 317 | path_template: str = "${collection}/${id}", 318 | config: Optional[DownloadConfig] = None, 319 | keep_non_downloaded: bool = True, 320 | file_name: Optional[str] = "item.json", 321 | ) -> list[Item]: 322 | """Download provided asset keys for the given items. Assets are 323 | saved in workdir in a directory (as specified by path_template), and 324 | the items are updated with the new asset hrefs. 325 | 326 | Args: 327 | items (list[pystac.Item]): List of STAC Items for which assets need 328 | be downloaded. 329 | path_template (Optional[str]): String to be interpolated to specify 330 | where to store downloaded files. 331 | config (Optional[DownloadConfig]): Configuration for downloading items 332 | and their assets. 333 | keep_original_filenames (Optional[bool]): Controls whether original 334 | file names should be used, or asset key + extension. 335 | file_name (Optional[str]): The name of the item file to save. 336 | """ 337 | return list( 338 | asyncio.get_event_loop().run_until_complete( 339 | download_items_assets( 340 | items, 341 | path_template=str(self._workdir / path_template), 342 | config=config, 343 | keep_non_downloaded=keep_non_downloaded, 344 | file_name=file_name, 345 | ) 346 | ) 347 | ) 348 | 349 | def upload_item_assets_to_s3( 350 | self, 351 | item: Item, 352 | assets: Optional[list[str]] = None, 353 | s3_client: Optional[s3] = None, 354 | ) -> Item: 355 | if self._upload: 356 | item = upload_item_assets_to_s3( 357 | item=item, assets=assets, s3_client=s3_client, **self.upload_options 358 | ) 359 | else: 360 | self.logger.warning("Skipping upload of new and modified assets") 361 | 362 | return item 363 | 364 | def _is_local_asset(self, asset: Asset) -> bool: 365 | return bool(asset.href.startswith(str(self._workdir))) 366 | 367 | def _get_local_asset_keys(self, item: Item) -> list[str]: 368 | return [ 369 | key for key, asset in item.assets.items() if self._is_local_asset(asset) 370 | ] 371 | 372 | def upload_local_item_assets_to_s3( 373 | self, 374 | item: Item, 375 | s3_client: Optional[s3] = None, 376 | ) -> Item: 377 | return self.upload_item_assets_to_s3( 378 | item=item, 379 | assets=self._get_local_asset_keys(item), 380 | s3_client=s3_client, 381 | ) 382 | 383 | # this should be in PySTAC 384 | @staticmethod 385 | def create_item_from_item(item: dict[str, Any]) -> dict[str, Any]: 386 | new_item = deepcopy(item) 387 | # create a derived output item 388 | links = [ 389 | link["href"] for link in item.get("links", []) if link["rel"] == "self" 390 | ] 391 | if len(links) == 1: 392 | # add derived from link 393 | new_item["links"].append( 394 | { 395 | "title": "Source STAC Item", 396 | "rel": "derived_from", 397 | "href": links[0], 398 | "type": "application/json", 399 | } 400 | ) 401 | return new_item 402 | 403 | @abstractmethod 404 | def process(self, **kwargs: Any) -> list[dict[str, Any]]: 405 | """Main task logic - virtual 406 | 407 | Returns: 408 | [type]: [description] 409 | """ 410 | # download assets of interest, this will update self.items 411 | # do some stuff 412 | pass 413 | 414 | def post_process_item(self, item: dict[str, Any]) -> dict[str, Any]: 415 | """Perform post-processing operations on an item. 416 | 417 | E.g. add software version information. 418 | 419 | Most tasks should prefer to not override this method, as logic should be 420 | kept in :py:meth:`Task.process`. If you do override this method, make 421 | sure to call ``super().post_process_item()`` AFTER doing any custom 422 | post-processing, so any regular behavior can take your changes into account. 423 | 424 | Args: 425 | item: An item produced by :py:meth:`Task.process` 426 | 427 | Returns: 428 | dict[str, Any]: The item with any additional attributes applied. 429 | """ 430 | assert "stac_extensions" in item 431 | assert isinstance(item["stac_extensions"], list) 432 | item["stac_extensions"].sort() 433 | return item 434 | 435 | @classmethod 436 | def handler(cls, payload: dict[str, Any], **kwargs: Any) -> dict[str, Any]: 437 | task = None 438 | try: 439 | if "href" in payload or "url" in payload: 440 | # read input 441 | with fsspec.open(payload.get("href", payload.get("url"))) as f: 442 | payload = json.loads(f.read()) 443 | 444 | task = cls(payload, **kwargs) 445 | try: 446 | items = list() 447 | for item in task.process(**task.parameters): 448 | items.append(task.post_process_item(item)) 449 | 450 | task._payload["features"] = items 451 | task.assign_collections() 452 | 453 | return task._payload 454 | except Exception as err: 455 | task.logger.error(err, exc_info=True) 456 | raise err 457 | finally: 458 | if task: 459 | task.cleanup_workdir() 460 | 461 | @classmethod 462 | def parse_args(cls, args: list[str]) -> dict[str, Any]: 463 | dhf = argparse.ArgumentDefaultsHelpFormatter 464 | parser0 = argparse.ArgumentParser(description=cls.description) 465 | parser0.add_argument( 466 | "--version", 467 | help="Print version and exit", 468 | action="version", 469 | version=cls.version, 470 | ) 471 | 472 | pparser = argparse.ArgumentParser(add_help=False) 473 | pparser.add_argument( 474 | "--logging", default="INFO", help="DEBUG, INFO, WARN, ERROR, CRITICAL" 475 | ) 476 | 477 | subparsers = parser0.add_subparsers(dest="command") 478 | 479 | # run 480 | parser = subparsers.add_parser( 481 | "run", 482 | parents=[pparser], 483 | formatter_class=dhf, 484 | help="Process STAC Item Collection", 485 | ) 486 | parser.add_argument( 487 | "input", 488 | nargs="?", 489 | help="Full path of item collection to process (s3 or local)", 490 | ) 491 | 492 | parser.add_argument( 493 | "--output", 494 | default=None, 495 | help="Write output payload to this URL", 496 | ) 497 | 498 | # additional options 499 | parser.add_argument( 500 | "--workdir", 501 | default=None, 502 | type=Path, 503 | help="Use this as work directory. Will be created.", 504 | ) 505 | 506 | parser.add_argument( 507 | "--save-workdir", 508 | dest="save_workdir", 509 | action="store_true", 510 | default=False, 511 | help="Save workdir after completion", 512 | ) 513 | 514 | # skips are deprecated in favor of boolean optionals 515 | parser.add_argument( 516 | "--skip-upload", 517 | dest="skip_upload", 518 | action=DeprecatedStoreTrueAction, 519 | default=False, 520 | help="DEPRECATED: Skip uploading of generated assets and STAC Items", 521 | ) 522 | parser.add_argument( 523 | "--skip-validation", 524 | dest="skip_validation", 525 | action=DeprecatedStoreTrueAction, 526 | default=False, 527 | help="DEPRECATED: Skip validation of input payload", 528 | ) 529 | 530 | parser.add_argument( 531 | "--upload", 532 | dest="upload", 533 | action="store_true", 534 | default=True, 535 | help="Upload generated assets and resulting STAC Items", 536 | ) 537 | parser.add_argument( 538 | "--no-upload", 539 | dest="upload", 540 | action="store_false", 541 | help="Don't upload generated assets and resulting STAC Items", 542 | ) 543 | parser.add_argument( 544 | "--validate", 545 | dest="validate", 546 | action="store_true", 547 | default=True, 548 | help="Validate input payload", 549 | ) 550 | parser.add_argument( 551 | "--no-validate", 552 | dest="validate", 553 | action="store_false", 554 | help="Don't validate input payload", 555 | ) 556 | 557 | parser.add_argument( 558 | "--local", 559 | action="store_true", 560 | default=False, 561 | help=""" Run local mode 562 | (save-workdir = True, upload = False, 563 | workdir = 'local-output', output = 'local-output/output-payload.json') """, 564 | ) 565 | 566 | # turn Namespace into dictionary 567 | pargs = vars(parser0.parse_args(args)) 568 | # only keep keys that are not None 569 | pargs = {k: v for k, v in pargs.items() if v is not None} 570 | 571 | if pargs.pop("skip_validation", False): 572 | pargs["validate"] = False 573 | if pargs.pop("skip_upload", False): 574 | pargs["upload"] = False 575 | 576 | if pargs.pop("local", False): 577 | pargs["save_workdir"] = True 578 | pargs["upload"] = False 579 | if pargs.get("workdir") is None: 580 | pargs["workdir"] = "local-output" 581 | if pargs.get("output") is None: 582 | pargs["output"] = Path(pargs["workdir"]) / "output-payload.json" 583 | 584 | if pargs.get("command", None) is None: 585 | parser.print_help() 586 | sys.exit(0) 587 | 588 | return pargs 589 | 590 | @classmethod 591 | def cli(cls) -> None: 592 | args = cls.parse_args(sys.argv[1:]) 593 | cmd = args.pop("command") 594 | 595 | # logging 596 | loglevel = args.pop("logging") 597 | logging.basicConfig(level=loglevel) 598 | 599 | # quiet these loud loggers 600 | for ql in [ 601 | "botocore", 602 | "s3transfer", 603 | "urllib3", 604 | "fsspec", 605 | "asyncio", 606 | "aiobotocore", 607 | ]: 608 | logging.getLogger(ql).propagate = False 609 | 610 | if cmd == "run": 611 | href = args.pop("input", None) 612 | href_out = args.pop("output", None) 613 | 614 | # read input 615 | if href is None: 616 | payload = json.load(sys.stdin) 617 | else: 618 | with fsspec.open(href) as f: 619 | payload = json.loads(f.read()) 620 | 621 | # run task handler 622 | payload_out = cls.handler(payload, **args) 623 | 624 | # write output 625 | if href_out is None: 626 | json.dump(payload_out, sys.stdout) 627 | else: 628 | with fsspec.open(href_out, "w") as f: 629 | f.write(json.dumps(payload_out)) 630 | 631 | 632 | # from https://pythonalgos.com/runtimeerror-event-loop-is-closed-asyncio-fix/ 633 | """fix yelling at me error""" 634 | from asyncio.proactor_events import _ProactorBasePipeTransport # noqa 635 | from functools import wraps # noqa 636 | 637 | 638 | def silence_event_loop_closed(func: Callable[[Any], Any]) -> Callable[[Any], Any]: 639 | @wraps(func) 640 | def wrapper(self, *args: Any, **kwargs: Any) -> Any: # type: ignore 641 | try: 642 | return func(self, *args, **kwargs) 643 | except RuntimeError as e: 644 | if str(e) != "Event loop is closed": 645 | raise 646 | 647 | return wrapper 648 | 649 | 650 | setattr( 651 | _ProactorBasePipeTransport, 652 | "__del__", 653 | silence_event_loop_closed(_ProactorBasePipeTransport.__del__), 654 | ) 655 | """fix yelling at me error end""" 656 | -------------------------------------------------------------------------------- /stactask/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | from jsonpath_ng.ext import parser 4 | 5 | 6 | def stac_jsonpath_match(item: dict[str, Any], expr: str) -> bool: 7 | """Match jsonpath expression against STAC JSON. 8 | Use https://jsonpath.com to experiment with JSONpath 9 | and https://regex101.com to experiment with regex 10 | 11 | Args: 12 | item (dict): A STAC Item represented as a dict 13 | expr (str): A valid JSONPath expression 14 | 15 | Raises: 16 | err: Invalid inputs 17 | 18 | Returns: 19 | Boolean: Returns True if the jsonpath expression matches the STAC Item JSON 20 | """ 21 | return len([x.value for x in parser.parse(expr).find([item])]) == 1 22 | 23 | 24 | def find_collection( 25 | collection_mapping: dict[str, str], item: dict[str, Any] 26 | ) -> Optional[str]: 27 | """Find the collection for a given STAC Item represented as a dictionary from a 28 | dictionary of collection names to JSONPath expressions. 29 | 30 | Args: 31 | collection_mapping (dict): A dictionary of collection names to JSONPath 32 | expressions. 33 | item (dict): A STAC Item 34 | 35 | Returns: 36 | Optional[str]: Returns None if no JSONPath expression matches, returns a 37 | collection name if one does 38 | """ 39 | return next( 40 | ( 41 | c 42 | for c, expr in collection_mapping.items() 43 | if stac_jsonpath_match(item, expr) 44 | ), 45 | None, 46 | ) 47 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-utils/stac-task/f544899e49c6f8487deaaca9e4272ed71b6071fd/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def pytest_addoption(parser: pytest.Parser) -> None: 5 | parser.addoption( 6 | "--runslow", action="store_true", default=False, help="run slow tests" 7 | ) 8 | parser.addoption( 9 | "--s3-requester-pays", 10 | action="store_true", 11 | default=False, 12 | help="run tests that require fetching data via s3 requester pays", 13 | ) 14 | 15 | 16 | def pytest_configure(config: pytest.Config) -> None: 17 | config.addinivalue_line("markers", "slow: mark test as slow to run") 18 | config.addinivalue_line( 19 | "markers", "s3_requester_pays: mark test as requiring s3 requester pays to run" 20 | ) 21 | 22 | 23 | def pytest_collection_modifyitems( 24 | config: pytest.Config, items: list[pytest.Item] 25 | ) -> None: 26 | if not config.getoption("--runslow"): 27 | skip_slow = pytest.mark.skip(reason="need --runslow option to run") 28 | for item in items: 29 | if "slow" in item.keywords: 30 | item.add_marker(skip_slow) 31 | if not config.getoption("--s3-requester-pays"): 32 | skip_s3_requestor_pays = pytest.mark.skip( 33 | reason="need --s3-requester-pays option to run" 34 | ) 35 | for item in items: 36 | if "s3_requester_pays" in item.keywords: 37 | item.add_marker(skip_s3_requestor_pays) 38 | -------------------------------------------------------------------------------- /tests/fixtures/sentinel2-l2a-j2k-payload.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "id": "sentinel-s2-l2a/workflow-test/S2B_17HQD_20201103_0_L2A", 4 | "process": [ 5 | { 6 | "input_collections": [ 7 | "sentinel-2-l2a" 8 | ], 9 | "workflow": "cog-archive", 10 | "upload_options": { 11 | "path_template": "s3://sentinel-cogs/${collection}/${mgrs:utm_zone}/${mgrs:latitude_band}/${mgrs:grid_square}/${year}/${month}/${id}", 12 | "public_assets": "ALL", 13 | "collections": { 14 | "sentinel-2-l2a": "$[?(@.id =~ 'S2[AB].*')]" 15 | }, 16 | "headers": { 17 | "CacheControl": "public, max-age=31536000, immutable" 18 | } 19 | }, 20 | "tasks": { 21 | "nothing-task": { 22 | "do_nothing": true 23 | }, 24 | "derived-item-task": { 25 | "parameter": "value" 26 | } 27 | } 28 | } 29 | ], 30 | "features": [ 31 | { 32 | "type": "Feature", 33 | "stac_version": "1.0.0", 34 | "id": "S2A_52HGH_20221007_0_L2A", 35 | "properties": { 36 | "platform": "sentinel-2a", 37 | "constellation": "sentinel-2", 38 | "instruments": [ 39 | "msi" 40 | ], 41 | "eo:cloud_cover": 78.295034, 42 | "proj:epsg": 32752, 43 | "mgrs:utm_zone": 52, 44 | "mgrs:latitude_band": "H", 45 | "mgrs:grid_square": "GH", 46 | "grid:code": "MGRS-52HGH", 47 | "view:sun_azimuth": 46.088322562412, 48 | "view:sun_elevation": 52.681252151154, 49 | "sentinel2:degraded_msi_data_percentage": 0, 50 | "sentinel2:nodata_pixel_percentage": 97.707945, 51 | "sentinel2:saturated_defective_pixel_percentage": 0, 52 | "sentinel2:dark_features_percentage": 0, 53 | "sentinel2:cloud_shadow_percentage": 0, 54 | "sentinel2:vegetation_percentage": 0, 55 | "sentinel2:not_vegetated_percentage": 0, 56 | "sentinel2:water_percentage": 21.704969, 57 | "sentinel2:unclassified_percentage": 0, 58 | "sentinel2:medium_proba_clouds_percentage": 22.258073, 59 | "sentinel2:high_proba_clouds_percentage": 33.747041, 60 | "sentinel2:thin_cirrus_percentage": 22.289918, 61 | "sentinel2:snow_ice_percentage": 0, 62 | "sentinel2:product_type": "S2MSI2A", 63 | "sentinel2:processing_baseline": "04.00", 64 | "datetime": "2022-10-07T01:16:42.073000Z", 65 | "sentinel2:id": "S2A_OPER_MSI_L2A_TL_ATOS_20221007T034556_A038080_T52HGH", 66 | "created": "2022-10-07T05:12:48.954Z", 67 | "updated": "2022-10-07T05:12:48.954Z" 68 | }, 69 | "geometry": { 70 | "type": "Polygon", 71 | "coordinates": [ 72 | [ 73 | [ 74 | 131.84473074941502, 75 | -33.40641110020154 76 | ], 77 | [ 78 | 132.33006605863437, 79 | -33.39432388162573 80 | ], 81 | [ 82 | 132.33408301028325, 83 | -33.49926273351823 84 | ], 85 | [ 86 | 132.16504589306683, 87 | -33.46584562137108 88 | ], 89 | [ 90 | 132.16101633266382, 91 | -33.466800128679814 92 | ], 93 | [ 94 | 132.12639717607075, 95 | -33.46102328676277 96 | ], 97 | [ 98 | 131.90551437336606, 99 | -33.42039938612281 100 | ], 101 | [ 102 | 131.89618393486802, 103 | -33.42061775783755 104 | ], 105 | [ 106 | 131.89307839625462, 107 | -33.4155028259863 108 | ], 109 | [ 110 | 131.85351663182533, 111 | -33.40817577994235 112 | ], 113 | [ 114 | 131.8447922035708, 115 | -33.40829494986468 116 | ], 117 | [ 118 | 131.84473074941502, 119 | -33.40641110020154 120 | ] 121 | ] 122 | ] 123 | }, 124 | "links": [ 125 | { 126 | "rel": "self", 127 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_52HGH_20221007_0_L2A" 128 | }, 129 | { 130 | "rel": "canonical", 131 | "href": "s3://cirrus-es-prod-data/sentinel-2-l2a/52/H/GH/2022/10/S2A_52HGH_20221007_0_L2A/S2A_52HGH_20221007_0_L2A.json", 132 | "type": "application/json" 133 | }, 134 | { 135 | "rel": "license", 136 | "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice" 137 | }, 138 | { 139 | "rel": "parent", 140 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a" 141 | }, 142 | { 143 | "rel": "collection", 144 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a" 145 | }, 146 | { 147 | "rel": "root", 148 | "href": "https://earth-search.aws.element84.com/v1/" 149 | } 150 | ], 151 | "assets": { 152 | "aot": { 153 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/AOT.jp2", 154 | "type": "image/jp2", 155 | "title": "Aerosol optical thickness (AOT)", 156 | "proj:shape": [ 157 | 5490, 158 | 5490 159 | ], 160 | "proj:transform": [ 161 | 20, 162 | 0, 163 | 699960, 164 | 0, 165 | -20, 166 | 6300040 167 | ], 168 | "raster:bands": [ 169 | { 170 | "nodata": 0, 171 | "data_type": "uint16", 172 | "bits_per_sample": 15, 173 | "spatial_resolution": 20, 174 | "unit": "none", 175 | "scale": 0.001, 176 | "offset": 0 177 | } 178 | ], 179 | "roles": [ 180 | "data" 181 | ] 182 | }, 183 | "blue": { 184 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R10m/B02.jp2", 185 | "type": "image/jp2", 186 | "title": "Blue (band 2) - 10m", 187 | "eo:bands": [ 188 | { 189 | "name": "blue", 190 | "common_name": "blue", 191 | "description": "Blue (band 2)", 192 | "center_wavelength": 0.49, 193 | "full_width_half_max": 0.098 194 | } 195 | ], 196 | "gsd": 10, 197 | "proj:shape": [ 198 | 10980, 199 | 10980 200 | ], 201 | "proj:transform": [ 202 | 10, 203 | 0, 204 | 699960, 205 | 0, 206 | -10, 207 | 6300040 208 | ], 209 | "raster:bands": [ 210 | { 211 | "nodata": 0, 212 | "data_type": "uint16", 213 | "bits_per_sample": 15, 214 | "spatial_resolution": 10, 215 | "unit": "none", 216 | "scale": 0.0001, 217 | "offset": -0.1 218 | } 219 | ], 220 | "roles": [ 221 | "data" 222 | ] 223 | }, 224 | "coastal": { 225 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R60m/B01.jp2", 226 | "type": "image/jp2", 227 | "title": "Coastal aerosol (band 1) - 60m", 228 | "eo:bands": [ 229 | { 230 | "name": "coastal", 231 | "common_name": "coastal", 232 | "description": "Coastal aerosol (band 1)", 233 | "center_wavelength": 0.443, 234 | "full_width_half_max": 0.027 235 | } 236 | ], 237 | "gsd": 60, 238 | "proj:shape": [ 239 | 1830, 240 | 1830 241 | ], 242 | "proj:transform": [ 243 | 60, 244 | 0, 245 | 699960, 246 | 0, 247 | -60, 248 | 6300040 249 | ], 250 | "raster:bands": [ 251 | { 252 | "nodata": 0, 253 | "data_type": "uint16", 254 | "bits_per_sample": 15, 255 | "spatial_resolution": 60, 256 | "unit": "none", 257 | "scale": 0.0001, 258 | "offset": -0.1 259 | } 260 | ], 261 | "roles": [ 262 | "data" 263 | ] 264 | }, 265 | "granule_metadata": { 266 | "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/metadata.xml", 267 | "type": "application/xml", 268 | "roles": [ 269 | "metadata" 270 | ] 271 | }, 272 | "green": { 273 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R10m/B03.jp2", 274 | "type": "image/jp2", 275 | "title": "Green (band 3) - 10m", 276 | "eo:bands": [ 277 | { 278 | "name": "green", 279 | "common_name": "green", 280 | "description": "Green (band 3)", 281 | "center_wavelength": 0.56, 282 | "full_width_half_max": 0.045 283 | } 284 | ], 285 | "gsd": 10, 286 | "proj:shape": [ 287 | 10980, 288 | 10980 289 | ], 290 | "proj:transform": [ 291 | 10, 292 | 0, 293 | 699960, 294 | 0, 295 | -10, 296 | 6300040 297 | ], 298 | "raster:bands": [ 299 | { 300 | "nodata": 0, 301 | "data_type": "uint16", 302 | "bits_per_sample": 15, 303 | "spatial_resolution": 10, 304 | "unit": "none", 305 | "scale": 0.0001, 306 | "offset": -0.1 307 | } 308 | ], 309 | "roles": [ 310 | "data" 311 | ] 312 | }, 313 | "nir": { 314 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R10m/B08.jp2", 315 | "type": "image/jp2", 316 | "title": "NIR 1 (band 8) - 10m", 317 | "eo:bands": [ 318 | { 319 | "name": "nir", 320 | "common_name": "nir", 321 | "description": "NIR 1 (band 8)", 322 | "center_wavelength": 0.842, 323 | "full_width_half_max": 0.145 324 | } 325 | ], 326 | "gsd": 10, 327 | "proj:shape": [ 328 | 10980, 329 | 10980 330 | ], 331 | "proj:transform": [ 332 | 10, 333 | 0, 334 | 699960, 335 | 0, 336 | -10, 337 | 6300040 338 | ], 339 | "raster:bands": [ 340 | { 341 | "nodata": 0, 342 | "data_type": "uint16", 343 | "bits_per_sample": 15, 344 | "spatial_resolution": 10, 345 | "unit": "none", 346 | "scale": 0.0001, 347 | "offset": -0.1 348 | } 349 | ], 350 | "roles": [ 351 | "data" 352 | ] 353 | }, 354 | "nir08": { 355 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/B8A.jp2", 356 | "type": "image/jp2", 357 | "title": "NIR 2 (band 8A) - 20m", 358 | "eo:bands": [ 359 | { 360 | "name": "nir08", 361 | "common_name": "nir08", 362 | "description": "NIR 2 (band 8A)", 363 | "center_wavelength": 0.865, 364 | "full_width_half_max": 0.033 365 | } 366 | ], 367 | "gsd": 20, 368 | "proj:shape": [ 369 | 5490, 370 | 5490 371 | ], 372 | "proj:transform": [ 373 | 20, 374 | 0, 375 | 699960, 376 | 0, 377 | -20, 378 | 6300040 379 | ], 380 | "raster:bands": [ 381 | { 382 | "nodata": 0, 383 | "data_type": "uint16", 384 | "bits_per_sample": 15, 385 | "spatial_resolution": 20, 386 | "unit": "none", 387 | "scale": 0.0001, 388 | "offset": -0.1 389 | } 390 | ], 391 | "roles": [ 392 | "data" 393 | ] 394 | }, 395 | "nir09": { 396 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R60m/B09.jp2", 397 | "type": "image/jp2", 398 | "title": "NIR 3 (band 9) - 60m", 399 | "eo:bands": [ 400 | { 401 | "name": "nir09", 402 | "common_name": "nir09", 403 | "description": "NIR 3 (band 9)", 404 | "center_wavelength": 0.945, 405 | "full_width_half_max": 0.026 406 | } 407 | ], 408 | "gsd": 60, 409 | "proj:shape": [ 410 | 1830, 411 | 1830 412 | ], 413 | "proj:transform": [ 414 | 60, 415 | 0, 416 | 699960, 417 | 0, 418 | -60, 419 | 6300040 420 | ], 421 | "raster:bands": [ 422 | { 423 | "nodata": 0, 424 | "data_type": "uint16", 425 | "bits_per_sample": 15, 426 | "spatial_resolution": 60, 427 | "unit": "none", 428 | "scale": 0.0001, 429 | "offset": -0.1 430 | } 431 | ], 432 | "roles": [ 433 | "data" 434 | ] 435 | }, 436 | "red": { 437 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R10m/B04.jp2", 438 | "type": "image/jp2", 439 | "title": "Red (band 4) - 10m", 440 | "eo:bands": [ 441 | { 442 | "name": "red", 443 | "common_name": "red", 444 | "description": "Red (band 4)", 445 | "center_wavelength": 0.665, 446 | "full_width_half_max": 0.038 447 | } 448 | ], 449 | "gsd": 10, 450 | "proj:shape": [ 451 | 10980, 452 | 10980 453 | ], 454 | "proj:transform": [ 455 | 10, 456 | 0, 457 | 699960, 458 | 0, 459 | -10, 460 | 6300040 461 | ], 462 | "raster:bands": [ 463 | { 464 | "nodata": 0, 465 | "data_type": "uint16", 466 | "bits_per_sample": 15, 467 | "spatial_resolution": 10, 468 | "unit": "none", 469 | "scale": 0.0001, 470 | "offset": -0.1 471 | } 472 | ], 473 | "roles": [ 474 | "data" 475 | ] 476 | }, 477 | "rededge1": { 478 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/B05.jp2", 479 | "type": "image/jp2", 480 | "title": "Red edge 1 (band 5) - 20m", 481 | "eo:bands": [ 482 | { 483 | "name": "rededge1", 484 | "common_name": "rededge", 485 | "description": "Red edge 1 (band 5)", 486 | "center_wavelength": 0.704, 487 | "full_width_half_max": 0.019 488 | } 489 | ], 490 | "gsd": 20, 491 | "proj:shape": [ 492 | 5490, 493 | 5490 494 | ], 495 | "proj:transform": [ 496 | 20, 497 | 0, 498 | 699960, 499 | 0, 500 | -20, 501 | 6300040 502 | ], 503 | "raster:bands": [ 504 | { 505 | "nodata": 0, 506 | "data_type": "uint16", 507 | "bits_per_sample": 15, 508 | "spatial_resolution": 20, 509 | "unit": "none", 510 | "scale": 0.0001, 511 | "offset": -0.1 512 | } 513 | ], 514 | "roles": [ 515 | "data" 516 | ] 517 | }, 518 | "rededge2": { 519 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/B06.jp2", 520 | "type": "image/jp2", 521 | "title": "Red edge 2 (band 6) - 20m", 522 | "eo:bands": [ 523 | { 524 | "name": "rededge2", 525 | "common_name": "rededge", 526 | "description": "Red edge 2 (band 6)", 527 | "center_wavelength": 0.74, 528 | "full_width_half_max": 0.018 529 | } 530 | ], 531 | "gsd": 20, 532 | "proj:shape": [ 533 | 5490, 534 | 5490 535 | ], 536 | "proj:transform": [ 537 | 20, 538 | 0, 539 | 699960, 540 | 0, 541 | -20, 542 | 6300040 543 | ], 544 | "raster:bands": [ 545 | { 546 | "nodata": 0, 547 | "data_type": "uint16", 548 | "bits_per_sample": 15, 549 | "spatial_resolution": 20, 550 | "unit": "none", 551 | "scale": 0.0001, 552 | "offset": -0.1 553 | } 554 | ], 555 | "roles": [ 556 | "data" 557 | ] 558 | }, 559 | "rededge3": { 560 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/B07.jp2", 561 | "type": "image/jp2", 562 | "title": "Red edge 3 (band 7) - 20m", 563 | "eo:bands": [ 564 | { 565 | "name": "rededge3", 566 | "common_name": "rededge", 567 | "description": "Red edge 3 (band 7)", 568 | "center_wavelength": 0.783, 569 | "full_width_half_max": 0.028 570 | } 571 | ], 572 | "gsd": 20, 573 | "proj:shape": [ 574 | 5490, 575 | 5490 576 | ], 577 | "proj:transform": [ 578 | 20, 579 | 0, 580 | 699960, 581 | 0, 582 | -20, 583 | 6300040 584 | ], 585 | "raster:bands": [ 586 | { 587 | "nodata": 0, 588 | "data_type": "uint16", 589 | "bits_per_sample": 15, 590 | "spatial_resolution": 20, 591 | "unit": "none", 592 | "scale": 0.0001, 593 | "offset": -0.1 594 | } 595 | ], 596 | "roles": [ 597 | "data" 598 | ] 599 | }, 600 | "scl": { 601 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/SCL.jp2", 602 | "type": "image/jp2", 603 | "title": "Scene classification map (SCL)", 604 | "proj:shape": [ 605 | 5490, 606 | 5490 607 | ], 608 | "proj:transform": [ 609 | 20, 610 | 0, 611 | 699960, 612 | 0, 613 | -20, 614 | 6300040 615 | ], 616 | "raster:bands": [ 617 | { 618 | "nodata": 0, 619 | "data_type": "uint8", 620 | "spatial_resolution": 20, 621 | "unit": "none" 622 | } 623 | ], 624 | "roles": [ 625 | "data" 626 | ] 627 | }, 628 | "swir16": { 629 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/B11.jp2", 630 | "type": "image/jp2", 631 | "title": "SWIR 1 (band 11) - 20m", 632 | "eo:bands": [ 633 | { 634 | "name": "swir16", 635 | "common_name": "swir16", 636 | "description": "SWIR 1 (band 11)", 637 | "center_wavelength": 1.61, 638 | "full_width_half_max": 0.143 639 | } 640 | ], 641 | "gsd": 20, 642 | "proj:shape": [ 643 | 5490, 644 | 5490 645 | ], 646 | "proj:transform": [ 647 | 20, 648 | 0, 649 | 699960, 650 | 0, 651 | -20, 652 | 6300040 653 | ], 654 | "raster:bands": [ 655 | { 656 | "nodata": 0, 657 | "data_type": "uint16", 658 | "bits_per_sample": 15, 659 | "spatial_resolution": 20, 660 | "unit": "none", 661 | "scale": 0.0001, 662 | "offset": -0.1 663 | } 664 | ], 665 | "roles": [ 666 | "data" 667 | ] 668 | }, 669 | "swir22": { 670 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/B12.jp2", 671 | "type": "image/jp2", 672 | "title": "SWIR 2 (band 12) - 20m", 673 | "eo:bands": [ 674 | { 675 | "name": "swir22", 676 | "common_name": "swir22", 677 | "description": "SWIR 2 (band 12)", 678 | "center_wavelength": 2.19, 679 | "full_width_half_max": 0.242 680 | } 681 | ], 682 | "gsd": 20, 683 | "proj:shape": [ 684 | 5490, 685 | 5490 686 | ], 687 | "proj:transform": [ 688 | 20, 689 | 0, 690 | 699960, 691 | 0, 692 | -20, 693 | 6300040 694 | ], 695 | "raster:bands": [ 696 | { 697 | "nodata": 0, 698 | "data_type": "uint16", 699 | "bits_per_sample": 15, 700 | "spatial_resolution": 20, 701 | "unit": "none", 702 | "scale": 0.0001, 703 | "offset": -0.1 704 | } 705 | ], 706 | "roles": [ 707 | "data" 708 | ] 709 | }, 710 | "thumbnail": { 711 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/preview.jpg", 712 | "type": "image/jpeg", 713 | "title": "Thumbnail image", 714 | "roles": [ 715 | "thumbnail" 716 | ] 717 | }, 718 | "tileinfo_metadata": { 719 | "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/tileInfo.json", 720 | "type": "application/json", 721 | "roles": [ 722 | "metadata" 723 | ] 724 | }, 725 | "visual": { 726 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R10m/TCI.jp2", 727 | "type": "image/jp2", 728 | "title": "True color image", 729 | "eo:bands": [ 730 | { 731 | "name": "red", 732 | "common_name": "red", 733 | "description": "Red (band 4)", 734 | "center_wavelength": 0.665, 735 | "full_width_half_max": 0.038 736 | }, 737 | { 738 | "name": "green", 739 | "common_name": "green", 740 | "description": "Green (band 3)", 741 | "center_wavelength": 0.56, 742 | "full_width_half_max": 0.045 743 | }, 744 | { 745 | "name": "blue", 746 | "common_name": "blue", 747 | "description": "Blue (band 2)", 748 | "center_wavelength": 0.49, 749 | "full_width_half_max": 0.098 750 | } 751 | ], 752 | "proj:shape": [ 753 | 10980, 754 | 10980 755 | ], 756 | "proj:transform": [ 757 | 10, 758 | 0, 759 | 699960, 760 | 0, 761 | -10, 762 | 6300040 763 | ], 764 | "roles": [ 765 | "visual" 766 | ] 767 | }, 768 | "wvp": { 769 | "href": "s3://sentinel-s2-l2a/tiles/52/H/GH/2022/10/7/0/R20m/WVP.jp2", 770 | "type": "image/jp2", 771 | "title": "Water vapour (WVP)", 772 | "proj:shape": [ 773 | 5490, 774 | 5490 775 | ], 776 | "proj:transform": [ 777 | 20, 778 | 0, 779 | 699960, 780 | 0, 781 | -20, 782 | 6300040 783 | ], 784 | "raster:bands": [ 785 | { 786 | "nodata": 0, 787 | "data_type": "uint16", 788 | "bits_per_sample": 15, 789 | "spatial_resolution": 20, 790 | "unit": "cm", 791 | "scale": 0.001, 792 | "offset": 0 793 | } 794 | ], 795 | "roles": [ 796 | "data" 797 | ] 798 | } 799 | }, 800 | "bbox": [ 801 | 131.84473074941502, 802 | -33.49926273351823, 803 | 132.33408301028325, 804 | -33.39432388162573 805 | ], 806 | "stac_extensions": [ 807 | "https://stac-extensions.github.io/mgrs/v1.0.0/schema.json", 808 | "https://stac-extensions.github.io/processing/v1.1.0/schema.json", 809 | "https://stac-extensions.github.io/eo/v1.0.0/schema.json", 810 | "https://stac-extensions.github.io/projection/v1.0.0/schema.json", 811 | "https://stac-extensions.github.io/grid/v1.0.0/schema.json", 812 | "https://stac-extensions.github.io/view/v1.0.0/schema.json" 813 | ], 814 | "collection": "sentinel-2-l2a" 815 | }, 816 | { 817 | "type": "Feature", 818 | "stac_version": "1.0.0", 819 | "id": "S2B_5CNL_20221009_0_L2A", 820 | "properties": { 821 | "platform": "sentinel-2b", 822 | "constellation": "sentinel-2", 823 | "instruments": [ 824 | "msi" 825 | ], 826 | "eo:cloud_cover": 99.999821, 827 | "proj:epsg": 32705, 828 | "mgrs:utm_zone": 5, 829 | "mgrs:latitude_band": "C", 830 | "mgrs:grid_square": "NL", 831 | "grid:code": "MGRS-5CNL", 832 | "view:sun_azimuth": 81.1274543823066, 833 | "view:sun_elevation": 7.973203342632701, 834 | "sentinel2:degraded_msi_data_percentage": 0.0005, 835 | "sentinel2:nodata_pixel_percentage": 83.257097, 836 | "sentinel2:saturated_defective_pixel_percentage": 0, 837 | "sentinel2:dark_features_percentage": 0, 838 | "sentinel2:cloud_shadow_percentage": 0.000119, 839 | "sentinel2:vegetation_percentage": 0, 840 | "sentinel2:not_vegetated_percentage": 0.00004, 841 | "sentinel2:water_percentage": 0, 842 | "sentinel2:unclassified_percentage": 0, 843 | "sentinel2:medium_proba_clouds_percentage": 99.999821, 844 | "sentinel2:high_proba_clouds_percentage": 0, 845 | "sentinel2:thin_cirrus_percentage": 0, 846 | "sentinel2:snow_ice_percentage": 0.00002, 847 | "sentinel2:product_type": "S2MSI2A", 848 | "sentinel2:processing_baseline": "04.00", 849 | "datetime": "2022-10-09T16:27:07.934000Z", 850 | "sentinel2:id": "S2B_OPER_MSI_L2A_TL_2BPS_20221009T203656_A029209_T05CNL", 851 | "created": "2022-10-09T22:36:20.098Z", 852 | "updated": "2022-10-09T22:36:20.098Z" 853 | }, 854 | "geometry": { 855 | "type": "Polygon", 856 | "coordinates": [ 857 | [ 858 | [ 859 | -149.54493909269485, 860 | -80.14724598182129 861 | ], 862 | [ 863 | -147.2621821830198, 864 | -80.11624825958002 865 | ], 866 | [ 867 | -146.7256610337989, 868 | -80.96598744120935 869 | ], 870 | [ 871 | -147.04231723855636, 872 | -80.87294940282473 873 | ], 874 | [ 875 | -147.36646324007836, 876 | -80.76728877872992 877 | ], 878 | [ 879 | -147.56897947194392, 880 | -80.70882000936726 881 | ], 882 | [ 883 | -149.1655414351392, 884 | -80.24349905832875 885 | ], 886 | [ 887 | -149.32599115526003, 888 | -80.19742231629866 889 | ], 890 | [ 891 | -149.47223704681943, 892 | -80.16752952399128 893 | ], 894 | [ 895 | -149.54493909269485, 896 | -80.14724598182129 897 | ] 898 | ] 899 | ] 900 | }, 901 | "links": [ 902 | { 903 | "rel": "self", 904 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_5CNL_20221009_0_L2A" 905 | }, 906 | { 907 | "rel": "canonical", 908 | "href": "s3://cirrus-es-prod-data/sentinel-2-l2a/5/C/NL/2022/10/S2B_5CNL_20221009_0_L2A/S2B_5CNL_20221009_0_L2A.json", 909 | "type": "application/json" 910 | }, 911 | { 912 | "rel": "license", 913 | "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice" 914 | }, 915 | { 916 | "rel": "parent", 917 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a" 918 | }, 919 | { 920 | "rel": "collection", 921 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a" 922 | }, 923 | { 924 | "rel": "root", 925 | "href": "https://earth-search.aws.element84.com/v1/" 926 | } 927 | ], 928 | "assets": { 929 | "aot": { 930 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/AOT.jp2", 931 | "type": "image/jp2", 932 | "title": "Aerosol optical thickness (AOT)", 933 | "proj:shape": [ 934 | 5490, 935 | 5490 936 | ], 937 | "proj:transform": [ 938 | 20, 939 | 0, 940 | 499980, 941 | 0, 942 | -20, 943 | 1100020 944 | ], 945 | "raster:bands": [ 946 | { 947 | "nodata": 0, 948 | "data_type": "uint16", 949 | "bits_per_sample": 15, 950 | "spatial_resolution": 20, 951 | "unit": "none", 952 | "scale": 0.001, 953 | "offset": 0 954 | } 955 | ], 956 | "roles": [ 957 | "data" 958 | ] 959 | }, 960 | "blue": { 961 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R10m/B02.jp2", 962 | "type": "image/jp2", 963 | "title": "Blue (band 2) - 10m", 964 | "eo:bands": [ 965 | { 966 | "name": "blue", 967 | "common_name": "blue", 968 | "description": "Blue (band 2)", 969 | "center_wavelength": 0.49, 970 | "full_width_half_max": 0.098 971 | } 972 | ], 973 | "gsd": 10, 974 | "proj:shape": [ 975 | 10980, 976 | 10980 977 | ], 978 | "proj:transform": [ 979 | 10, 980 | 0, 981 | 499980, 982 | 0, 983 | -10, 984 | 1100020 985 | ], 986 | "raster:bands": [ 987 | { 988 | "nodata": 0, 989 | "data_type": "uint16", 990 | "bits_per_sample": 15, 991 | "spatial_resolution": 10, 992 | "unit": "none", 993 | "scale": 0.0001, 994 | "offset": -0.1 995 | } 996 | ], 997 | "roles": [ 998 | "data" 999 | ] 1000 | }, 1001 | "coastal": { 1002 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R60m/B01.jp2", 1003 | "type": "image/jp2", 1004 | "title": "Coastal aerosol (band 1) - 60m", 1005 | "eo:bands": [ 1006 | { 1007 | "name": "coastal", 1008 | "common_name": "coastal", 1009 | "description": "Coastal aerosol (band 1)", 1010 | "center_wavelength": 0.443, 1011 | "full_width_half_max": 0.027 1012 | } 1013 | ], 1014 | "gsd": 60, 1015 | "proj:shape": [ 1016 | 1830, 1017 | 1830 1018 | ], 1019 | "proj:transform": [ 1020 | 60, 1021 | 0, 1022 | 499980, 1023 | 0, 1024 | -60, 1025 | 1100020 1026 | ], 1027 | "raster:bands": [ 1028 | { 1029 | "nodata": 0, 1030 | "data_type": "uint16", 1031 | "bits_per_sample": 15, 1032 | "spatial_resolution": 60, 1033 | "unit": "none", 1034 | "scale": 0.0001, 1035 | "offset": -0.1 1036 | } 1037 | ], 1038 | "roles": [ 1039 | "data" 1040 | ] 1041 | }, 1042 | "granule_metadata": { 1043 | "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/metadata.xml", 1044 | "type": "application/xml", 1045 | "roles": [ 1046 | "metadata" 1047 | ] 1048 | }, 1049 | "green": { 1050 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R10m/B03.jp2", 1051 | "type": "image/jp2", 1052 | "title": "Green (band 3) - 10m", 1053 | "eo:bands": [ 1054 | { 1055 | "name": "green", 1056 | "common_name": "green", 1057 | "description": "Green (band 3)", 1058 | "center_wavelength": 0.56, 1059 | "full_width_half_max": 0.045 1060 | } 1061 | ], 1062 | "gsd": 10, 1063 | "proj:shape": [ 1064 | 10980, 1065 | 10980 1066 | ], 1067 | "proj:transform": [ 1068 | 10, 1069 | 0, 1070 | 499980, 1071 | 0, 1072 | -10, 1073 | 1100020 1074 | ], 1075 | "raster:bands": [ 1076 | { 1077 | "nodata": 0, 1078 | "data_type": "uint16", 1079 | "bits_per_sample": 15, 1080 | "spatial_resolution": 10, 1081 | "unit": "none", 1082 | "scale": 0.0001, 1083 | "offset": -0.1 1084 | } 1085 | ], 1086 | "roles": [ 1087 | "data" 1088 | ] 1089 | }, 1090 | "nir": { 1091 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R10m/B08.jp2", 1092 | "type": "image/jp2", 1093 | "title": "NIR 1 (band 8) - 10m", 1094 | "eo:bands": [ 1095 | { 1096 | "name": "nir", 1097 | "common_name": "nir", 1098 | "description": "NIR 1 (band 8)", 1099 | "center_wavelength": 0.842, 1100 | "full_width_half_max": 0.145 1101 | } 1102 | ], 1103 | "gsd": 10, 1104 | "proj:shape": [ 1105 | 10980, 1106 | 10980 1107 | ], 1108 | "proj:transform": [ 1109 | 10, 1110 | 0, 1111 | 499980, 1112 | 0, 1113 | -10, 1114 | 1100020 1115 | ], 1116 | "raster:bands": [ 1117 | { 1118 | "nodata": 0, 1119 | "data_type": "uint16", 1120 | "bits_per_sample": 15, 1121 | "spatial_resolution": 10, 1122 | "unit": "none", 1123 | "scale": 0.0001, 1124 | "offset": -0.1 1125 | } 1126 | ], 1127 | "roles": [ 1128 | "data" 1129 | ] 1130 | }, 1131 | "nir08": { 1132 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/B8A.jp2", 1133 | "type": "image/jp2", 1134 | "title": "NIR 2 (band 8A) - 20m", 1135 | "eo:bands": [ 1136 | { 1137 | "name": "nir08", 1138 | "common_name": "nir08", 1139 | "description": "NIR 2 (band 8A)", 1140 | "center_wavelength": 0.865, 1141 | "full_width_half_max": 0.033 1142 | } 1143 | ], 1144 | "gsd": 20, 1145 | "proj:shape": [ 1146 | 5490, 1147 | 5490 1148 | ], 1149 | "proj:transform": [ 1150 | 20, 1151 | 0, 1152 | 499980, 1153 | 0, 1154 | -20, 1155 | 1100020 1156 | ], 1157 | "raster:bands": [ 1158 | { 1159 | "nodata": 0, 1160 | "data_type": "uint16", 1161 | "bits_per_sample": 15, 1162 | "spatial_resolution": 20, 1163 | "unit": "none", 1164 | "scale": 0.0001, 1165 | "offset": -0.1 1166 | } 1167 | ], 1168 | "roles": [ 1169 | "data" 1170 | ] 1171 | }, 1172 | "nir09": { 1173 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R60m/B09.jp2", 1174 | "type": "image/jp2", 1175 | "title": "NIR 3 (band 9) - 60m", 1176 | "eo:bands": [ 1177 | { 1178 | "name": "nir09", 1179 | "common_name": "nir09", 1180 | "description": "NIR 3 (band 9)", 1181 | "center_wavelength": 0.945, 1182 | "full_width_half_max": 0.026 1183 | } 1184 | ], 1185 | "gsd": 60, 1186 | "proj:shape": [ 1187 | 1830, 1188 | 1830 1189 | ], 1190 | "proj:transform": [ 1191 | 60, 1192 | 0, 1193 | 499980, 1194 | 0, 1195 | -60, 1196 | 1100020 1197 | ], 1198 | "raster:bands": [ 1199 | { 1200 | "nodata": 0, 1201 | "data_type": "uint16", 1202 | "bits_per_sample": 15, 1203 | "spatial_resolution": 60, 1204 | "unit": "none", 1205 | "scale": 0.0001, 1206 | "offset": -0.1 1207 | } 1208 | ], 1209 | "roles": [ 1210 | "data" 1211 | ] 1212 | }, 1213 | "red": { 1214 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R10m/B04.jp2", 1215 | "type": "image/jp2", 1216 | "title": "Red (band 4) - 10m", 1217 | "eo:bands": [ 1218 | { 1219 | "name": "red", 1220 | "common_name": "red", 1221 | "description": "Red (band 4)", 1222 | "center_wavelength": 0.665, 1223 | "full_width_half_max": 0.038 1224 | } 1225 | ], 1226 | "gsd": 10, 1227 | "proj:shape": [ 1228 | 10980, 1229 | 10980 1230 | ], 1231 | "proj:transform": [ 1232 | 10, 1233 | 0, 1234 | 499980, 1235 | 0, 1236 | -10, 1237 | 1100020 1238 | ], 1239 | "raster:bands": [ 1240 | { 1241 | "nodata": 0, 1242 | "data_type": "uint16", 1243 | "bits_per_sample": 15, 1244 | "spatial_resolution": 10, 1245 | "unit": "none", 1246 | "scale": 0.0001, 1247 | "offset": -0.1 1248 | } 1249 | ], 1250 | "roles": [ 1251 | "data" 1252 | ] 1253 | }, 1254 | "rededge1": { 1255 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/B05.jp2", 1256 | "type": "image/jp2", 1257 | "title": "Red edge 1 (band 5) - 20m", 1258 | "eo:bands": [ 1259 | { 1260 | "name": "rededge1", 1261 | "common_name": "rededge", 1262 | "description": "Red edge 1 (band 5)", 1263 | "center_wavelength": 0.704, 1264 | "full_width_half_max": 0.019 1265 | } 1266 | ], 1267 | "gsd": 20, 1268 | "proj:shape": [ 1269 | 5490, 1270 | 5490 1271 | ], 1272 | "proj:transform": [ 1273 | 20, 1274 | 0, 1275 | 499980, 1276 | 0, 1277 | -20, 1278 | 1100020 1279 | ], 1280 | "raster:bands": [ 1281 | { 1282 | "nodata": 0, 1283 | "data_type": "uint16", 1284 | "bits_per_sample": 15, 1285 | "spatial_resolution": 20, 1286 | "unit": "none", 1287 | "scale": 0.0001, 1288 | "offset": -0.1 1289 | } 1290 | ], 1291 | "roles": [ 1292 | "data" 1293 | ] 1294 | }, 1295 | "rededge2": { 1296 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/B06.jp2", 1297 | "type": "image/jp2", 1298 | "title": "Red edge 2 (band 6) - 20m", 1299 | "eo:bands": [ 1300 | { 1301 | "name": "rededge2", 1302 | "common_name": "rededge", 1303 | "description": "Red edge 2 (band 6)", 1304 | "center_wavelength": 0.74, 1305 | "full_width_half_max": 0.018 1306 | } 1307 | ], 1308 | "gsd": 20, 1309 | "proj:shape": [ 1310 | 5490, 1311 | 5490 1312 | ], 1313 | "proj:transform": [ 1314 | 20, 1315 | 0, 1316 | 499980, 1317 | 0, 1318 | -20, 1319 | 1100020 1320 | ], 1321 | "raster:bands": [ 1322 | { 1323 | "nodata": 0, 1324 | "data_type": "uint16", 1325 | "bits_per_sample": 15, 1326 | "spatial_resolution": 20, 1327 | "unit": "none", 1328 | "scale": 0.0001, 1329 | "offset": -0.1 1330 | } 1331 | ], 1332 | "roles": [ 1333 | "data" 1334 | ] 1335 | }, 1336 | "rededge3": { 1337 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/B07.jp2", 1338 | "type": "image/jp2", 1339 | "title": "Red edge 3 (band 7) - 20m", 1340 | "eo:bands": [ 1341 | { 1342 | "name": "rededge3", 1343 | "common_name": "rededge", 1344 | "description": "Red edge 3 (band 7)", 1345 | "center_wavelength": 0.783, 1346 | "full_width_half_max": 0.028 1347 | } 1348 | ], 1349 | "gsd": 20, 1350 | "proj:shape": [ 1351 | 5490, 1352 | 5490 1353 | ], 1354 | "proj:transform": [ 1355 | 20, 1356 | 0, 1357 | 499980, 1358 | 0, 1359 | -20, 1360 | 1100020 1361 | ], 1362 | "raster:bands": [ 1363 | { 1364 | "nodata": 0, 1365 | "data_type": "uint16", 1366 | "bits_per_sample": 15, 1367 | "spatial_resolution": 20, 1368 | "unit": "none", 1369 | "scale": 0.0001, 1370 | "offset": -0.1 1371 | } 1372 | ], 1373 | "roles": [ 1374 | "data" 1375 | ] 1376 | }, 1377 | "scl": { 1378 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/SCL.jp2", 1379 | "type": "image/jp2", 1380 | "title": "Scene classification map (SCL)", 1381 | "proj:shape": [ 1382 | 5490, 1383 | 5490 1384 | ], 1385 | "proj:transform": [ 1386 | 20, 1387 | 0, 1388 | 499980, 1389 | 0, 1390 | -20, 1391 | 1100020 1392 | ], 1393 | "raster:bands": [ 1394 | { 1395 | "nodata": 0, 1396 | "data_type": "uint8", 1397 | "spatial_resolution": 20, 1398 | "unit": "none" 1399 | } 1400 | ], 1401 | "roles": [ 1402 | "data" 1403 | ] 1404 | }, 1405 | "swir16": { 1406 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/B11.jp2", 1407 | "type": "image/jp2", 1408 | "title": "SWIR 1 (band 11) - 20m", 1409 | "eo:bands": [ 1410 | { 1411 | "name": "swir16", 1412 | "common_name": "swir16", 1413 | "description": "SWIR 1 (band 11)", 1414 | "center_wavelength": 1.61, 1415 | "full_width_half_max": 0.143 1416 | } 1417 | ], 1418 | "gsd": 20, 1419 | "proj:shape": [ 1420 | 5490, 1421 | 5490 1422 | ], 1423 | "proj:transform": [ 1424 | 20, 1425 | 0, 1426 | 499980, 1427 | 0, 1428 | -20, 1429 | 1100020 1430 | ], 1431 | "raster:bands": [ 1432 | { 1433 | "nodata": 0, 1434 | "data_type": "uint16", 1435 | "bits_per_sample": 15, 1436 | "spatial_resolution": 20, 1437 | "unit": "none", 1438 | "scale": 0.0001, 1439 | "offset": -0.1 1440 | } 1441 | ], 1442 | "roles": [ 1443 | "data" 1444 | ] 1445 | }, 1446 | "swir22": { 1447 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/B12.jp2", 1448 | "type": "image/jp2", 1449 | "title": "SWIR 2 (band 12) - 20m", 1450 | "eo:bands": [ 1451 | { 1452 | "name": "swir22", 1453 | "common_name": "swir22", 1454 | "description": "SWIR 2 (band 12)", 1455 | "center_wavelength": 2.19, 1456 | "full_width_half_max": 0.242 1457 | } 1458 | ], 1459 | "gsd": 20, 1460 | "proj:shape": [ 1461 | 5490, 1462 | 5490 1463 | ], 1464 | "proj:transform": [ 1465 | 20, 1466 | 0, 1467 | 499980, 1468 | 0, 1469 | -20, 1470 | 1100020 1471 | ], 1472 | "raster:bands": [ 1473 | { 1474 | "nodata": 0, 1475 | "data_type": "uint16", 1476 | "bits_per_sample": 15, 1477 | "spatial_resolution": 20, 1478 | "unit": "none", 1479 | "scale": 0.0001, 1480 | "offset": -0.1 1481 | } 1482 | ], 1483 | "roles": [ 1484 | "data" 1485 | ] 1486 | }, 1487 | "thumbnail": { 1488 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/preview.jpg", 1489 | "type": "image/jpeg", 1490 | "title": "Thumbnail image", 1491 | "roles": [ 1492 | "thumbnail" 1493 | ] 1494 | }, 1495 | "tileinfo_metadata": { 1496 | "href": "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/tileInfo.json", 1497 | "type": "application/json", 1498 | "roles": [ 1499 | "metadata" 1500 | ] 1501 | }, 1502 | "visual": { 1503 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R10m/TCI.jp2", 1504 | "type": "image/jp2", 1505 | "title": "True color image", 1506 | "eo:bands": [ 1507 | { 1508 | "name": "red", 1509 | "common_name": "red", 1510 | "description": "Red (band 4)", 1511 | "center_wavelength": 0.665, 1512 | "full_width_half_max": 0.038 1513 | }, 1514 | { 1515 | "name": "green", 1516 | "common_name": "green", 1517 | "description": "Green (band 3)", 1518 | "center_wavelength": 0.56, 1519 | "full_width_half_max": 0.045 1520 | }, 1521 | { 1522 | "name": "blue", 1523 | "common_name": "blue", 1524 | "description": "Blue (band 2)", 1525 | "center_wavelength": 0.49, 1526 | "full_width_half_max": 0.098 1527 | } 1528 | ], 1529 | "proj:shape": [ 1530 | 10980, 1531 | 10980 1532 | ], 1533 | "proj:transform": [ 1534 | 10, 1535 | 0, 1536 | 499980, 1537 | 0, 1538 | -10, 1539 | 1100020 1540 | ], 1541 | "roles": [ 1542 | "visual" 1543 | ] 1544 | }, 1545 | "wvp": { 1546 | "href": "s3://sentinel-s2-l2a/tiles/5/C/NL/2022/10/9/0/R20m/WVP.jp2", 1547 | "type": "image/jp2", 1548 | "title": "Water vapour (WVP)", 1549 | "proj:shape": [ 1550 | 5490, 1551 | 5490 1552 | ], 1553 | "proj:transform": [ 1554 | 20, 1555 | 0, 1556 | 499980, 1557 | 0, 1558 | -20, 1559 | 1100020 1560 | ], 1561 | "raster:bands": [ 1562 | { 1563 | "nodata": 0, 1564 | "data_type": "uint16", 1565 | "bits_per_sample": 15, 1566 | "spatial_resolution": 20, 1567 | "unit": "cm", 1568 | "scale": 0.001, 1569 | "offset": 0 1570 | } 1571 | ], 1572 | "roles": [ 1573 | "data" 1574 | ] 1575 | } 1576 | }, 1577 | "bbox": [ 1578 | -149.54493909269485, 1579 | -80.96598744120935, 1580 | -146.7256610337989, 1581 | -80.11624825958002 1582 | ], 1583 | "stac_extensions": [ 1584 | "https://stac-extensions.github.io/eo/v1.0.0/schema.json", 1585 | "https://stac-extensions.github.io/projection/v1.0.0/schema.json", 1586 | "https://stac-extensions.github.io/mgrs/v1.0.0/schema.json", 1587 | "https://stac-extensions.github.io/view/v1.0.0/schema.json", 1588 | "https://stac-extensions.github.io/grid/v1.0.0/schema.json", 1589 | "https://stac-extensions.github.io/processing/v1.1.0/schema.json" 1590 | ], 1591 | "collection": "sentinel-2-l2a" 1592 | } 1593 | ] 1594 | } 1595 | -------------------------------------------------------------------------------- /tests/tasks.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from stactask import Task 4 | from stactask.exceptions import FailedValidation 5 | 6 | 7 | class NothingTask(Task): 8 | name = "nothing-task" 9 | description = "this task does nothing" 10 | 11 | def process(self, **kwargs: Any) -> list[dict[str, Any]]: 12 | return self.items_as_dicts 13 | 14 | 15 | class FailValidateTask(Task): 16 | name = "failvalidation-task" 17 | description = "this task always fails validation" 18 | 19 | def validate(self) -> bool: 20 | if self._payload: 21 | raise FailedValidation("Extra context about what went wrong") 22 | return True 23 | 24 | def process(self, **kwargs: Any) -> list[dict[str, Any]]: 25 | return self.items_as_dicts 26 | 27 | 28 | class DerivedItemTask(Task): 29 | name = "derived-item-task" 30 | description = "this task creates a derived item" 31 | 32 | def process(self, **kwargs: Any) -> list[dict[str, Any]]: 33 | assert kwargs["parameter"] == "value" 34 | return [self.create_item_from_item(self.items_as_dicts[0])] 35 | -------------------------------------------------------------------------------- /tests/test_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import json 3 | from pathlib import Path 4 | from typing import Any, Optional 5 | 6 | import boto3 7 | import pytest 8 | from moto import mock_aws 9 | from pystac import Asset 10 | 11 | from stactask.exceptions import FailedValidation 12 | from stactask.task import Task 13 | 14 | from .tasks import DerivedItemTask, FailValidateTask, NothingTask 15 | 16 | testpath = Path(__file__).parent 17 | cassettepath = testpath / "fixtures" / "cassettes" 18 | 19 | 20 | @pytest.fixture 21 | def payload() -> dict[str, Any]: 22 | filename = testpath / "fixtures" / "sentinel2-l2a-j2k-payload.json" 23 | with open(filename) as f: 24 | payload = json.loads(f.read()) 25 | assert isinstance(payload, dict) 26 | return payload 27 | 28 | 29 | @pytest.fixture 30 | def nothing_task(payload: dict[str, Any]) -> Task: 31 | return NothingTask(payload) 32 | 33 | 34 | @pytest.fixture 35 | def derived_item_task(payload: dict[str, Any]) -> Task: 36 | return DerivedItemTask(payload) 37 | 38 | 39 | def test_task_init(nothing_task: Task) -> None: 40 | assert len(nothing_task._payload["features"]) == 2 41 | assert len(nothing_task.items) == 2 42 | assert nothing_task.logger.name == nothing_task.name 43 | assert nothing_task._save_workdir is False 44 | 45 | 46 | def test_failed_validation(payload: dict[str, Any]) -> None: 47 | with pytest.raises(FailedValidation, match="Extra context"): 48 | FailValidateTask(payload) 49 | 50 | 51 | def test_deprecated_payload_dict(nothing_task: Task) -> None: 52 | nothing_task._payload["process"] = nothing_task._payload["process"][0] 53 | with pytest.warns(DeprecationWarning): 54 | nothing_task.process_definition 55 | 56 | 57 | def test_workflow_options_append_task_options(nothing_task: Task) -> None: 58 | nothing_task._payload["process"][0]["workflow_options"] = { 59 | "workflow_option": "workflow_option_value" 60 | } 61 | parameters = nothing_task.parameters 62 | assert parameters == { 63 | "do_nothing": True, 64 | "workflow_option": "workflow_option_value", 65 | } 66 | 67 | 68 | def test_workflow_options_populate_when_no_task_options(nothing_task: Task) -> None: 69 | nothing_task._payload["process"][0]["tasks"].pop("nothing-task") 70 | nothing_task._payload["process"][0]["workflow_options"] = { 71 | "workflow_option": "workflow_option_value" 72 | } 73 | parameters = nothing_task.parameters 74 | assert parameters == { 75 | "workflow_option": "workflow_option_value", 76 | } 77 | 78 | 79 | def test_task_options_supersede_workflow_options(nothing_task: Task) -> None: 80 | nothing_task._payload["process"][0]["workflow_options"] = { 81 | "do_nothing": False, 82 | "workflow_option": "workflow_option_value", 83 | } 84 | parameters = nothing_task.parameters 85 | assert parameters == { 86 | "do_nothing": True, 87 | "workflow_option": "workflow_option_value", 88 | } 89 | 90 | 91 | def test_edit_items(nothing_task: Task) -> None: 92 | nothing_task.process_definition["workflow"] = "test-task-workflow" 93 | assert nothing_task._payload["process"][0]["workflow"] == "test-task-workflow" 94 | 95 | 96 | def test_edit_items2(nothing_task: Task) -> None: 97 | assert nothing_task._payload["features"][0]["type"] == "Feature" 98 | 99 | 100 | @pytest.mark.parametrize("save_workdir", [False, True, None]) 101 | def test_tmp_workdir(payload: dict[str, Any], save_workdir: Optional[bool]) -> None: 102 | t = NothingTask(payload, save_workdir=save_workdir) 103 | expected = save_workdir if save_workdir is not None else False 104 | assert t._save_workdir is expected 105 | workdir = t._workdir 106 | assert workdir.parts[-1].startswith("tmp") 107 | assert workdir.is_absolute() is True 108 | assert workdir.is_dir() is True 109 | t.cleanup_workdir() 110 | assert workdir.exists() is expected 111 | 112 | 113 | @pytest.mark.parametrize("save_workdir", [False, True, None]) 114 | def test_workdir( 115 | payload: dict[str, Any], 116 | tmp_path: Path, 117 | save_workdir: Optional[bool], 118 | ) -> None: 119 | t = NothingTask(payload, workdir=tmp_path / "test_task", save_workdir=save_workdir) 120 | expected = save_workdir if save_workdir is not None else True 121 | assert t._save_workdir is expected 122 | workdir = t._workdir 123 | assert workdir.parts[-1] == "test_task" 124 | assert workdir.is_absolute() is True 125 | assert workdir.is_dir() is True 126 | t.cleanup_workdir() 127 | assert workdir.exists() is expected 128 | 129 | 130 | def test_parameters(payload: dict[str, Any]) -> None: 131 | nothing_task = NothingTask(payload) 132 | assert nothing_task.process_definition["workflow"] == "cog-archive" 133 | assert ( 134 | nothing_task.upload_options["path_template"] 135 | == payload["process"][0]["upload_options"]["path_template"] 136 | ) 137 | 138 | 139 | def test_process(nothing_task: Task) -> None: 140 | processed_items = nothing_task.process() 141 | assert processed_items[0]["type"] == "Feature" 142 | 143 | 144 | def test_post_process(payload: dict[str, Any]) -> None: 145 | class PostProcessTask(NothingTask): 146 | name = "post-processing-test" 147 | version = "42" 148 | 149 | def post_process_item(self, item: dict[str, Any]) -> dict[str, Any]: 150 | item["properties"]["foo"] = "bar" 151 | item["stac_extensions"].insert(0, "zzz") 152 | return super().post_process_item(item) 153 | 154 | payload_out = PostProcessTask.handler(payload) 155 | for item in payload_out["features"]: 156 | assert item["properties"]["foo"] == "bar" 157 | stac_extensions = item["stac_extensions"] 158 | assert item["stac_extensions"] == sorted(stac_extensions) 159 | 160 | 161 | def test_derived_item(derived_item_task: Task) -> None: 162 | items = derived_item_task.process(**derived_item_task.parameters) 163 | links = [lk for lk in items[0]["links"] if lk["rel"] == "derived_from"] 164 | assert len(links) == 1 165 | self_link = next(lk for lk in items[0]["links"] if lk["rel"] == "self") 166 | assert links[0]["href"] == self_link["href"] 167 | 168 | 169 | def test_task_handler(payload: dict[str, Any]) -> None: 170 | self_link = next( 171 | lk for lk in payload["features"][0]["links"] if lk["rel"] == "self" 172 | ) 173 | output_items = DerivedItemTask.handler(payload) 174 | derived_link = next( 175 | lk for lk in output_items["features"][0]["links"] if lk["rel"] == "derived_from" 176 | ) 177 | assert derived_link["href"] == self_link["href"] 178 | 179 | 180 | def test_parse_no_args() -> None: 181 | with pytest.raises(SystemExit): 182 | NothingTask.parse_args([]) 183 | 184 | 185 | def test_parse_args() -> None: 186 | args = NothingTask.parse_args("run input --save-workdir".split()) 187 | assert args["command"] == "run" 188 | assert args["logging"] == "INFO" 189 | assert args["input"] == "input" 190 | assert args["save_workdir"] is True 191 | assert args["upload"] is True 192 | assert args["validate"] is True 193 | 194 | 195 | def test_parse_args_deprecated_skip() -> None: 196 | args = NothingTask.parse_args("run input --skip-upload --skip-validation".split()) 197 | assert args["upload"] is False 198 | assert args["validate"] is False 199 | 200 | 201 | def test_parse_args_no_upload_and_no_validation() -> None: 202 | args = NothingTask.parse_args("run input --no-upload --no-validate".split()) 203 | assert args["upload"] is False 204 | assert args["validate"] is False 205 | 206 | 207 | def test_parse_args_no_upload_and_validation() -> None: 208 | args = NothingTask.parse_args("run input --no-upload --validate".split()) 209 | assert args["upload"] is False 210 | assert args["validate"] is True 211 | 212 | 213 | def test_parse_args_upload_and_no_validation() -> None: 214 | args = NothingTask.parse_args("run input --upload --no-validate".split()) 215 | assert args["upload"] is True 216 | assert args["validate"] is False 217 | 218 | 219 | def test_parse_args_upload_and_validation() -> None: 220 | args = NothingTask.parse_args("run input --upload --validate".split()) 221 | assert args["upload"] is True 222 | assert args["validate"] is True 223 | 224 | 225 | def test_collection_mapping(nothing_task: Task) -> None: 226 | assert nothing_task.collection_mapping == { 227 | "sentinel-2-l2a": "$[?(@.id =~ 'S2[AB].*')]" 228 | } 229 | 230 | 231 | @mock_aws # type: ignore 232 | def test_s3_upload(nothing_task: Task) -> None: 233 | # start S3 mocks 234 | s3_client = boto3.client("s3") 235 | s3_client.create_bucket( 236 | Bucket="sentinel-cogs", 237 | CreateBucketConfiguration={ 238 | "LocationConstraint": "us-west-2", 239 | }, 240 | ) 241 | # end S3 mocks 242 | 243 | item = nothing_task.items.items[0] 244 | key1_path = nothing_task._workdir / "foo.txt" 245 | key1_path.write_text("some text") 246 | asset = Asset(href=str(key1_path)) 247 | item.add_asset("key1", asset) 248 | item_after_upload = nothing_task.upload_local_item_assets_to_s3(item) 249 | 250 | assert ( 251 | item_after_upload.assets["key1"].href 252 | == "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-2-l2a/52/H/GH/2022/10/S2A_52HGH_20221007_0_L2A/foo.txt" 253 | ) 254 | 255 | 256 | if __name__ == "__main__": 257 | output = NothingTask.cli() 258 | -------------------------------------------------------------------------------- /tests/test_task_download.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | from typing import Any 5 | 6 | import pytest 7 | import stac_asset 8 | 9 | from stactask.config import DownloadConfig 10 | 11 | from .tasks import NothingTask 12 | 13 | 14 | @pytest.fixture 15 | def item_collection() -> dict[str, Any]: 16 | name = "sentinel2-l2a-j2k-payload" 17 | filename = Path(__file__).parent / "fixtures" / f"{name}.json" 18 | with open(filename) as f: 19 | items = json.loads(f.read()) 20 | assert isinstance(items, dict) 21 | return items 22 | 23 | 24 | def test_download_nosuch_asset(tmp_path: Path, item_collection: dict[str, Any]) -> None: 25 | t = NothingTask( 26 | item_collection, 27 | workdir=tmp_path / "test-task-download-nosuch-asset", 28 | save_workdir=True, 29 | ) 30 | item = t.download_item_assets( 31 | t.items[0], config=DownloadConfig(include=["nosuch_asset"]) 32 | ) 33 | 34 | # new item has same assets hrefs as old item 35 | assert [x.href for x in item.assets.values()] == [ 36 | x.href for x in t.items[0].assets.values() 37 | ] 38 | 39 | 40 | def test_download_asset_dont_keep_existing( 41 | tmp_path: Path, item_collection: dict[str, Any] 42 | ) -> None: 43 | t = NothingTask( 44 | item_collection, 45 | workdir=tmp_path / "test-task-download-nosuch-asset", 46 | save_workdir=True, 47 | ) 48 | item = t.download_item_assets( 49 | t.items[0], 50 | config=DownloadConfig(include=["nosuch_asset"]), 51 | keep_non_downloaded=False, 52 | ) 53 | 54 | # new item has no assets 55 | assert item.assets == {} 56 | 57 | 58 | # @vcr.use_cassette(str(cassettepath / 'download_assets')) 59 | def test_download_item_asset(tmp_path: Path, item_collection: dict[str, Any]) -> None: 60 | t = NothingTask(item_collection, workdir=tmp_path / "test-task-download-item-asset") 61 | item = t.download_item_assets( 62 | t.items[0], config=DownloadConfig(include=["tileinfo_metadata"]) 63 | ) 64 | assert Path(item.assets["tileinfo_metadata"].get_absolute_href()).is_file() 65 | 66 | 67 | def test_download_keep_original_filenames( 68 | tmp_path: Path, item_collection: dict[str, Any] 69 | ) -> None: 70 | t = NothingTask( 71 | item_collection, 72 | workdir=tmp_path / "test-task-download-item-asset", 73 | ) 74 | item = t.download_item_assets( 75 | t.items[0], 76 | config=DownloadConfig( 77 | include=["tileinfo_metadata"], 78 | file_name_strategy=stac_asset.FileNameStrategy.FILE_NAME, 79 | ), 80 | ).to_dict() 81 | fname = item["assets"]["tileinfo_metadata"]["href"] 82 | filename = Path(fname) 83 | assert filename.name == "tileInfo.json" 84 | 85 | 86 | def test_download_item_asset_local( 87 | tmp_path: Path, item_collection: dict[str, Any] 88 | ) -> None: 89 | t = NothingTask(item_collection, workdir=tmp_path / "test-task-download-item-asset") 90 | item = t.download_item_assets( 91 | t.items[0], config=DownloadConfig(include=["tileinfo_metadata"]) 92 | ) 93 | 94 | assert ( 95 | Path(os.path.dirname(item.self_href)) / item.assets["tileinfo_metadata"].href 96 | ).is_file() 97 | 98 | # Downloaded to local, as in prev test. 99 | # With the asset hrefs updated by the prev download, we "download" again to subdir 100 | item = t.download_item_assets( 101 | item=item, 102 | config=DownloadConfig(include=["tileinfo_metadata"]), 103 | path_template="again/${collection}/${id}", 104 | ) 105 | assert "again" in item.self_href 106 | href = item.assets["tileinfo_metadata"].get_absolute_href() 107 | assert "again" in href 108 | assert Path(href).is_file() 109 | 110 | 111 | # @vcr.use_cassette(str(cassettepath / 'download_assets')) 112 | def test_download_item_assets(tmp_path: Path, item_collection: dict[str, Any]) -> None: 113 | t = NothingTask( 114 | item_collection, 115 | workdir=tmp_path / "test-task-download-item-assets", 116 | save_workdir=True, 117 | ) 118 | item = t.download_item_assets( 119 | t.items[0], 120 | config=DownloadConfig(include=["tileinfo_metadata", "granule_metadata"]), 121 | ) 122 | 123 | assert Path(item.assets["tileinfo_metadata"].get_absolute_href()).is_file() 124 | assert Path(item.assets["granule_metadata"].get_absolute_href()).is_file() 125 | 126 | 127 | def test_download_items_assets(tmp_path: Path, item_collection: dict[str, Any]) -> None: 128 | asset_key = "tileinfo_metadata" 129 | t = NothingTask( 130 | item_collection, 131 | workdir=tmp_path / "test-task-download-items-assets", 132 | save_workdir=True, 133 | ) 134 | items = t.download_items_assets(t.items, config=DownloadConfig(include=[asset_key])) 135 | 136 | assert len(items) == 2 137 | for item in items: 138 | assert Path(item.assets[asset_key].get_absolute_href()).is_file() 139 | 140 | 141 | # @vcr.use_cassette(str(cassettepath / 'download_assets')) 142 | @pytest.mark.s3_requester_pays 143 | def test_download_large_asset(tmp_path: Path, item_collection: dict[str, Any]) -> None: 144 | t = NothingTask( 145 | item_collection, 146 | workdir=tmp_path / "test-task-download-assets", 147 | save_workdir=True, 148 | ) 149 | item = t.download_item_assets( 150 | t.items[0], config=DownloadConfig(s3_requester_pays=True, include=["red"]) 151 | ) 152 | 153 | assert Path(item.assets["red"].get_absolute_href()).is_file() 154 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from stactask.utils import find_collection, stac_jsonpath_match 2 | 3 | 4 | def test_stac_jsonpath_match() -> None: 5 | assert stac_jsonpath_match({"id": "1"}, "$[?(@.id =~ '.*')]") 6 | assert stac_jsonpath_match({"id": "1"}, "$[?(@.id == '1')]") 7 | assert not stac_jsonpath_match( 8 | {"properties": {"s2:processing_baseline": "04.00"}}, 9 | "$[?(@.properties.['s2:processing_baseline'] >= '05.00')]", 10 | ) 11 | assert stac_jsonpath_match( 12 | {"properties": {"s2:processing_baseline": "05.00"}}, 13 | "$[?(@.properties.['s2:processing_baseline'] >= '05.00')]", 14 | ) 15 | assert stac_jsonpath_match( 16 | {"properties": {"s2:processing_baseline": "04.00"}}, 17 | "$[?(@.properties.['s2:processing_baseline'] =~ '^04')]", 18 | ) 19 | assert not stac_jsonpath_match( 20 | {"properties": {"s2:processing_baseline": "05.00"}}, 21 | "$[?(@.properties.['s2:processing_baseline'] =~ '^04')]", 22 | ) 23 | 24 | 25 | def test_find_collection() -> None: 26 | assert find_collection({"a": "$[?(@.id =~ '.*')]"}, {"id": "1"}) == "a" 27 | assert ( 28 | find_collection( 29 | {"a": "$[?(@.id == '1')]", "b": "$[?(@.id == '2')]"}, {"id": "2"} 30 | ) 31 | == "b" 32 | ) 33 | assert ( 34 | find_collection( 35 | { 36 | "sentinel-2-c1-l2a": "$[?(@.properties.['s2:processing_baseline'] >= '05.00')]", # noqa: E501 37 | "sentinel-2-l2a-baseline-04": "$[?(@.properties.['s2:processing_baseline'] =~ '^04')]", # noqa: E501 38 | }, 39 | {"properties": {"s2:processing_baseline": "04.00"}}, 40 | ) 41 | == "sentinel-2-l2a-baseline-04" 42 | ) 43 | assert ( 44 | find_collection( 45 | { 46 | "sentinel-2-c1-l2a": "$[?(@.properties.['s2:processing_baseline'] >= '05.00')]", # noqa: E501 47 | "sentinel-2-l2a-baseline-04": "$[?(@.properties.['s2:processing_baseline'] =~ '^04')]", # noqa: E501 48 | }, 49 | {"properties": {"s2:processing_baseline": "05.00"}}, 50 | ) 51 | == "sentinel-2-c1-l2a" 52 | ) 53 | --------------------------------------------------------------------------------