├── .editorconfig ├── .github ├── .stale.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ ├── feature_request.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml ├── release-drafter.yml ├── remark.yaml └── workflows │ ├── citation.yaml │ ├── publish.yaml │ ├── release-drafter.yml │ ├── stac-model.yml │ └── test.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .remarkignore ├── .safety-policy.yml ├── CHANGELOG.md ├── CITATION.cff ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── README_DLM_LEGACY.md ├── README_STAC_MODEL.md ├── best-practices.md ├── docs └── static │ ├── crim.png │ ├── nrcan.png │ ├── sigspatial_2024_mlm.pdf │ ├── stac_mlm.png │ ├── terradue.png │ └── wherobots.png ├── examples ├── collection.json ├── item_bands_expression.json ├── item_basic.json ├── item_eo_and_raster_bands.json ├── item_eo_bands.json ├── item_eo_bands_summarized.json ├── item_multi_io.json └── item_raster_bands.json ├── json-schema └── schema.json ├── package.json ├── pyproject.toml ├── stac-model.bump.toml ├── stac_model ├── __init__.py ├── __main__.py ├── base.py ├── examples.py ├── input.py ├── output.py ├── runtime.py └── schema.py ├── tests ├── conftest.py ├── test_schema.py └── test_stac_model.py └── uv.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | # Check http://editorconfig.org for more information 2 | # This is the main config file for this project: 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | indent_style = space 10 | indent_size = 2 11 | trim_trailing_whitespace = true 12 | 13 | [*.{py, pyi}] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | [Makefile] 18 | indent_style = tab 19 | 20 | [*.md] 21 | trim_trailing_whitespace = false 22 | 23 | [*.{diff,patch}] 24 | trim_trailing_whitespace = false 25 | -------------------------------------------------------------------------------- /.github/.stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 120 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 30 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 10 | staleLabel: stale 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs in 30 days. Thank you 15 | for your contributions. 16 | # Comment to post when closing a stale issue. Set to `false` to disable 17 | closeComment: false 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug report 3 | about: If something isn't working 🔧 4 | title: '' 5 | labels: bug, needs-triage 6 | assignees: 7 | --- 8 | 9 | ## :bug: Bug Report 10 | 11 | 12 | 13 | ## :microscope: How To Reproduce 14 | 15 | Steps to reproduce the behavior: 16 | 17 | 1. ... 18 | 19 | ### Code sample 20 | 21 | 22 | 23 | ### Environment 24 | 25 | * OS: (e.g. Linux / Windows / macOS) 26 | * Python version 27 | * stac-model version 28 | 29 | 30 | ## :chart_with_upwards_trend: Expected behavior 31 | 32 | 33 | 34 | ## :paperclip: Additional context 35 | 36 | 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # Configuration: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository 2 | 3 | blank_issues_enabled: false 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🚀 Feature request 3 | about: Suggest an idea for this project 🏖 4 | title: '' 5 | labels: enhancement, needs-triage 6 | assignees: 7 | --- 8 | 9 | ## :rocket: Feature Request 10 | 11 | 12 | 13 | ## :sound: Motivation 14 | 15 | 19 | 20 | ## :satellite: Alternatives 21 | 22 | 23 | 24 | ## :paperclip: Additional context 25 | 26 | 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓Question 3 | about: Ask a question about this project 🎓 4 | title: '' 5 | labels: question, needs-triage 6 | assignees: 7 | --- 8 | 9 | ## Checklist 10 | 11 | 12 | 13 | - [ ] I've searched the project's [`issues`](..), looking for the following terms: 14 | - [...] 15 | 16 | ## :question: Question 17 | 18 | 19 | 20 | How can I [...]? 21 | 22 | Is it possible to [...]? 23 | 24 | ## :paperclip: Additional context 25 | 26 | 27 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | 4 | 5 | ## Related Issue 6 | 7 | 8 | 9 | ## Type of Change 10 | 11 | 12 | 13 | - [ ] :books: Examples, docs, tutorials or dependencies update; 14 | - [ ] :wrench: Bug fix (non-breaking change which fixes an issue); 15 | - [ ] :clinking_glasses: Improvement (non-breaking change which improves an existing feature); 16 | - [ ] :rocket: New feature (non-breaking change which adds functionality); 17 | - [ ] :boom: Breaking change (fix or feature that would cause existing functionality to change); 18 | - [ ] :closed_lock_with_key: Security fix. 19 | 20 | ## Checklist 21 | 22 | 23 | 24 | - [ ] I've read the [`CONTRIBUTING.md`](https://github.com/stac-extensions/mlm/blob/main/CONTRIBUTING.md) guide; 25 | - [ ] I've updated the code style using `make check`; 26 | - [ ] I've written tests for all new methods and classes that I created; 27 | - [ ] I've written the docstring in `Google` format for all the methods and classes that I used. 28 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Configuration: https://dependabot.com/docs/config-file/ 2 | # Docs: https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically 3 | 4 | version: 2 5 | 6 | updates: 7 | - package-ecosystem: "pip" 8 | directory: "/" 9 | schedule: 10 | interval: "monthly" 11 | allow: 12 | - dependency-type: "all" 13 | commit-message: 14 | prefix: ":arrow_up:" 15 | open-pull-requests-limit: 5 16 | 17 | - package-ecosystem: "github-actions" 18 | directory: "/" 19 | schedule: 20 | interval: "monthly" 21 | allow: 22 | - dependency-type: "all" 23 | commit-message: 24 | prefix: ":arrow_up:" 25 | open-pull-requests-limit: 5 26 | 27 | - package-ecosystem: "docker" 28 | directory: "/docker" 29 | schedule: 30 | interval: "monthly" 31 | allow: 32 | - dependency-type: "all" 33 | commit-message: 34 | prefix: ":arrow_up:" 35 | open-pull-requests-limit: 5 36 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | # Release drafter configuration https://github.com/release-drafter/release-drafter#configuration 2 | # Emojis were chosen to match the https://gitmoji.dev/ 3 | 4 | name-template: "v$NEXT_PATCH_VERSION" 5 | tag-template: "v$NEXT_PATCH_VERSION" 6 | 7 | categories: 8 | - title: ":rocket: Features" 9 | labels: [enhancement, feature] 10 | - title: ":wrench: Fixes & Refactoring" 11 | labels: [bug, refactoring, bugfix, fix] 12 | - title: ":package: Build System & CI/CD" 13 | labels: [build, ci, testing] 14 | - title: ":boom: Breaking Changes" 15 | labels: [breaking] 16 | - title: ":memo: Documentation" 17 | labels: [documentation] 18 | - title: ":arrow_up: Dependencies updates" 19 | labels: [dependencies] 20 | 21 | template: | 22 | ## What's Changed 23 | 24 | $CHANGES 25 | 26 | ## :busts_in_silhouette: List of contributors 27 | 28 | $CONTRIBUTORS 29 | -------------------------------------------------------------------------------- /.github/remark.yaml: -------------------------------------------------------------------------------- 1 | settings: 2 | listItemIndent: '1' 3 | emphasis: '*' 4 | spacedTable: false 5 | paddedTable: true 6 | stringify: 7 | entities: false 8 | escape: false 9 | plugins: 10 | # Check links 11 | - validate-links 12 | # Apply some recommended defaults for consistency 13 | - remark-preset-lint-consistent 14 | - remark-preset-lint-recommended 15 | - - lint-no-html 16 | - false 17 | # General formatting 18 | - - remark-lint-emphasis-marker 19 | - '*' 20 | - remark-lint-hard-break-spaces 21 | - remark-lint-blockquote-indentation 22 | - remark-lint-no-consecutive-blank-lines 23 | - - remark-lint-maximum-line-length 24 | - 120 25 | - remark-lint-no-literal-urls 26 | # GFM - autolink literals, footnotes, strikethrough, tables, tasklist 27 | - remark-gfm 28 | # Math Expression 29 | - remark-math 30 | # Code 31 | - remark-lint-fenced-code-flag 32 | - remark-lint-fenced-code-marker 33 | - remark-lint-no-shell-dollars 34 | - - remark-lint-code-block-style 35 | - 'fenced' 36 | # Headings 37 | - remark-lint-heading-increment 38 | - remark-lint-no-multiple-toplevel-headings 39 | - remark-lint-no-heading-punctuation 40 | - - remark-lint-maximum-heading-length 41 | - 70 42 | - - remark-lint-heading-style 43 | - atx 44 | - - remark-lint-no-shortcut-reference-link 45 | - false 46 | # Lists 47 | - - remark-lint-list-item-bullet-indent 48 | - 'one' 49 | - remark-lint-ordered-list-marker-style 50 | - remark-lint-ordered-list-marker-value 51 | - remark-lint-checkbox-character-style 52 | - - remark-lint-unordered-list-marker-style 53 | - '-' 54 | - - remark-lint-list-item-content-indent 55 | - 1 56 | - - remark-lint-list-item-indent 57 | - 'space' 58 | # Tables 59 | - remark-lint-table-pipes 60 | - remark-lint-table-cell-padding 61 | -------------------------------------------------------------------------------- /.github/workflows/citation.yaml: -------------------------------------------------------------------------------- 1 | name: Check Citation Format 2 | on: 3 | push: 4 | paths: 5 | - CITATION.cff 6 | pull_request: 7 | paths: 8 | - CITATION.cff 9 | jobs: 10 | check-citation: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Check whether the citation metadata from CITATION.cff is valid 15 | uses: citation-file-format/cffconvert-github-action@2.0.0 16 | with: 17 | args: "--validate" 18 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish JSON Schema or stac-model package via Github Release 2 | on: 3 | release: 4 | types: [published] 5 | jobs: 6 | deploy-schema: 7 | if: startsWith(github.ref, 'refs/tags/v') 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Inject env variables 11 | uses: rlespinasse/github-slug-action@v3.x 12 | - uses: actions/checkout@v4 13 | - name: deploy JSON Schema for version ${{ env.GITHUB_REF_SLUG }} 14 | uses: peaceiris/actions-gh-pages@v4 15 | with: 16 | github_token: ${{ secrets.GITHUB_TOKEN }} 17 | publish_dir: json-schema 18 | destination_dir: ${{ env.GITHUB_REF_SLUG }} 19 | publish-pypi: 20 | if: startsWith(github.ref, 'refs/tags/stac-model-v') 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Python 25 | uses: actions/setup-python@v5.6.0 26 | with: 27 | python-version: "3.10" 28 | - name: Install uv 29 | run: make setup 30 | - name: Publish stac-model to PyPI 31 | run: | 32 | uv build 33 | uv publish --username __token__ --password ${{ secrets.PYPI_SECRET }} 34 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - main 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v6.1.0 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | -------------------------------------------------------------------------------- /.github/workflows/stac-model.yml: -------------------------------------------------------------------------------- 1 | name: Check Python Linting and Tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | stac-model: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.10", "3.11", "3.12"] 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v5.6.0 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | 19 | - name: Install uv 20 | run: make setup 21 | 22 | - name: Set up cache 23 | uses: actions/cache@v4.2.3 24 | with: 25 | path: .venv 26 | key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('uv.lock') }} 27 | - name: Install dependencies 28 | run: make install-dev 29 | 30 | - name: Run checks 31 | run: | 32 | make lint-all 33 | 34 | - name: Run tests 35 | run: | 36 | make test 37 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Check Markdown and Examples 2 | on: [push, pull_request] 3 | jobs: 4 | deploy: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/setup-node@v4 8 | with: 9 | node-version: 'lts/*' 10 | #cache: npm 11 | - uses: actions/checkout@v4 12 | - run: | 13 | npm install 14 | npm list 15 | npm test 16 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | 4 | default_stages: [commit, push] 5 | repos: 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | rev: v4.5.0 8 | hooks: 9 | - id: check-yaml 10 | - id: end-of-file-fixer 11 | - repo: https://github.com/astral-sh/ruff-pre-commit 12 | rev: 'v0.1.12' # Use the latest version of ruff-pre-commit 13 | hooks: 14 | - id: ruff 15 | pass_filenames: false 16 | args: 17 | - --config=pyproject.toml 18 | -------------------------------------------------------------------------------- /.remarkignore: -------------------------------------------------------------------------------- 1 | # To save time scanning 2 | .idea/ 3 | .vscode/ 4 | .tox/ 5 | .git/ 6 | .github/**/*.yaml 7 | .github/**/*.yml 8 | *.egg-info/ 9 | build/ 10 | dist/ 11 | downloads/ 12 | env/ 13 | 14 | # actual items to ignore 15 | .pytest_cache/ 16 | node_modules/ 17 | docs/_build/ 18 | docs/build/ 19 | -------------------------------------------------------------------------------- /.safety-policy.yml: -------------------------------------------------------------------------------- 1 | # Safety Security and License Configuration file 2 | # https://docs.safetycli.com/safety-docs/administration/safety-policy-files 3 | 4 | security: # configuration for the `safety check` command 5 | ignore-cvss-severity-below: 0 6 | ignore-cvss-unknown-severity: False 7 | ignore-vulnerabilities: 8 | 67599: 9 | reason: disputed pip feature not used by this project 10 | continue-on-vulnerability-error: False 11 | alert: # configuration for the `safety alert` command 12 | security: 13 | # Configuration specific to Safety's GitHub Issue alerting 14 | github-issue: 15 | # Same as for security - these allow controlling if this alert will fire based 16 | # on severity information. 17 | # default: not set 18 | # ignore-cvss-severity-below: 6 19 | # ignore-cvss-unknown-severity: False 20 | 21 | # Add a label to pull requests with the cvss severity, if available 22 | # label-severity: true 23 | 24 | # Add a label to pull requests, default is 'security' 25 | # requires private repo permissions, even on public repos 26 | # default: security 27 | labels: 28 | - security 29 | 30 | # Assign users to pull requests, default is not set 31 | # requires private repo permissions, even on public repos 32 | # default: empty 33 | # assignees: 34 | # - example-user 35 | 36 | # Prefix to give issues when creating them. Note that changing 37 | # this might cause duplicate issues to be created. 38 | # default: "[PyUp] " 39 | # issue-prefix: "[PyUp] " 40 | 41 | # Configuration specific to Safety's GitHub PR alerting 42 | github-pr: 43 | # Same as for security - these allow controlling if this alert will fire based 44 | # on severity information. 45 | # default: not set 46 | # ignore-cvss-severity-below: 6 47 | # ignore-cvss-unknown-severity: False 48 | 49 | # Set the default branch (ie, main, master) 50 | # default: empty, the default branch on GitHub 51 | branch: '' 52 | 53 | # Add a label to pull requests with the cvss severity, if available 54 | # default: true 55 | # label-severity: True 56 | 57 | # Add a label to pull requests, default is 'security' 58 | # requires private repo permissions, even on public repos 59 | # default: security 60 | labels: 61 | - security 62 | 63 | # Assign users to pull requests, default is not set 64 | # requires private repo permissions, even on public repos 65 | # default: empty 66 | # assignees: 67 | # - example-user 68 | 69 | # Configure the branch prefix for PRs created by this alert. 70 | # NB: Changing this will likely cause duplicate PRs. 71 | # default: pyup/ 72 | branch-prefix: pyup/ 73 | 74 | # Set a global prefix for PRs 75 | # default: "[PyUp] " 76 | pr-prefix: "[PyUp] " 77 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased](https://github.com/stac-extensions/mlm/tree/main) 9 | 10 | ### Added 11 | 12 | - Add `embedding` as suggested dimension name 13 | (relates to [#77](https://github.com/stac-extensions/mlm/discussions/77)). 14 | - Add [`huggingface/safetensors`](https://github.com/huggingface/safetensors) 15 | recommendations for `mlm:artifact_type` and corresponding `mlm:framework` values 16 | (fixes [#68](https://github.com/stac-extensions/mlm/issues/68)). 17 | - Add [`Flax`](https://github.com/google/flax) to the list of `mlm:framework` and 18 | the corresponding `mlm:artifact_type` SafeTensors backend in the JSON schema examples. 19 | - Add [`Paddle`](https://github.com/PaddlePaddle/Paddle) to the list of `mlm:framework` 20 | (fixes [#69](https://github.com/stac-extensions/mlm/issues/69)). 21 | 22 | ### Changed 23 | 24 | - Update `stac-model==0.3.0` to provide `ValueScalingObject` from installed package. 25 | 26 | ### Deprecated 27 | 28 | - n/a 29 | 30 | ### Removed 31 | 32 | - n/a 33 | 34 | ### Fixed 35 | 36 | - n/a 37 | 38 | ## [v1.4.0](https://github.com/stac-extensions/mlm/tree/v1.4.0) 39 | 40 | ### Added 41 | 42 | - Add better descriptions about required and recommended *MLM Asset Roles* and 43 | their implications (fixes 44 | [#54](https://github.com/stac-extensions/mlm/issues/54)). 45 | - Add explicit check of `value_scaling` sub-fields `minimum`, `maximum`, `mean`, `stddev`, etc. for 46 | corresponding `type` values `min-max` and `z-score` that depend on it. 47 | - Allow different `value_scaling` operations per band/channel/dimension as needed by the model. 48 | - Allow a `processing:expression` for a band/channel/dimension-specific `value_scaling` operation, 49 | granting more flexibility in the definition of input preparation in contrast to having it applied 50 | for the entire input (but still possible). 51 | - Add optional `mlm:compile_method` field at the Asset level with options `aot` 52 | for Ahead of Time Compilation, `jit` for Just-In Time Compilation. 53 | 54 | ### Changed 55 | 56 | - Explicitly disallow `mlm:name`, `mlm:input`, `mlm:output` and `mlm:hyperparameters` at the Asset level. 57 | These fields describe the model as a whole and should therefore be defined in Item properties. 58 | - Moved `norm_type` to `value_scaling` object to better reflect the expected operation, which could be another 59 | operation than what is typically known as "normalization" or "standardization" techniques in machine learning. 60 | - Moved `statistics` to `value_scaling` object to better reflect their mutual `type` and additional 61 | properties dependencies. 62 | - moved `mlm:artifact_type` field value descriptions that are framework specific to best-practices section. 63 | - expanded suggested `mlm:artifact_type` values to include Tensorflow/Keras. 64 | 65 | ### Deprecated 66 | 67 | - n/a 68 | 69 | ### Removed 70 | 71 | - Removed `norm_type` enum values that were ambiguous regarding their expected result. 72 | Instead, a `processing:expression` should be employed to explicitly define the calculation they represent. 73 | - Removed `norm_clip` property. It is now represented under `value_scaling` objects with a 74 | corresponding `type` definition. 75 | - Removed `norm_by_channel` from `mlm:input` objects. If rescaling (previously normalization in the documentation) 76 | is a single value, broadcasting to the relevant bands should be performed implicitly. 77 | Otherwise, the amount of `value_scaling` objects should match the number of bands or channels involved in the input. 78 | 79 | ### Fixed 80 | 81 | - Fix missing `mlm:artifact_type` property check for a Model Asset definition 82 | (fixes ). 83 | The `mlm:artifact_type` is now mutually and exclusively required by the corresponding Asset with `mlm:model` role. 84 | - Fix check of disallowed unknown/undefined `mlm:`-prefixed fields 85 | (fixes [#41](https://github.com/stac-extensions/mlm/issues/41)). 86 | 87 | ## [v1.3.0](https://github.com/stac-extensions/mlm/tree/v1.3.0) 88 | 89 | ### Added 90 | 91 | - Add `raster:bands` required property `name` for describing `mlm:input` bands 92 | (see [README - Bands and Statistics](README.md#bands-and-statistics) for details). 93 | - Add README warnings about new extension `eo` and `raster` versions. 94 | 95 | ### Changed 96 | 97 | - Split `ModelBands` and `AnyBandsRef` definitions in the JSON schema to allow them to be referenced individually. 98 | - Move `AnyBandsRef` definition explicitly to STAC Item JSON schema, rather than implicitly inferred via `mlm:input`. 99 | - Modified the JSON schema to use a `if` check of the `type` (STAC Item or Collection) prior to validating further 100 | properties. This allows some validators (e.g. `pystac`) to better report the *real* error that causes the schema 101 | to fail, rather than reporting the first mismatching `type` case with a poor error description to debug the issue. 102 | 103 | ### Deprecated 104 | 105 | - n/a 106 | 107 | ### Removed 108 | 109 | - Removed `$comment` entries from the JSON schema that are considered as invalid by some parsers. 110 | - When `mlm:input` objects do **NOT** define band references (i.e.: `bands: []` is used), the JSON schema will not 111 | fail if an Asset with the `mlm:model` role contains a band definition. This is to allow MLM model definitions to 112 | simultaneously use some inputs with `bands` reference names while others do not. 113 | 114 | ### Fixed 115 | 116 | - Band checks against [`eo`](https://github.com/stac-extensions/eo), [`raster`](https://github.com/stac-extensions/eo) 117 | or STAC Core 1.1 [`bands`](https://github.com/radiantearth/stac-spec/blob/master/commons/common-metadata.md#bands) 118 | when a `mlm:input` references names in `bands` are now properly validated. 119 | - Fix the examples using `raster:bands` incorrectly defined in STAC Item properties. 120 | The correct use is for them to be defined under the STAC Asset using the `mlm:model` role. 121 | - Fix the [EuroSAT ResNet pydantic example](./stac_model/examples.py) that incorrectly referenced some `bands` 122 | in its `mlm:input` definition without providing any definition of those bands. The `eo:bands` properties have 123 | been added to the corresponding `model` Asset using 124 | the [`pystac.extensions.eo`](https://github.com/stac-utils/pystac/blob/main/pystac/extensions/eo.py) utilities. 125 | - Fix various STAC Asset definitions erroneously employing `mlm:model` role instead of the intended `mlm:source_code`. 126 | 127 | ## [v1.2.0](https://github.com/stac-extensions/mlm/tree/v1.2.0) 128 | 129 | ### Added 130 | 131 | - Add the missing JSON schema `item_assets` definition under a Collection to ensure compatibility with 132 | the [Item Assets](https://github.com/stac-extensions/item-assets) extension, as mentioned this specification. 133 | - Add `ModelBand` representation using `name`, `format` and `expression` properties to allow derived band references 134 | (fixes [crim-ca/mlm-extension#7](https://github.com/stac-extensions/mlm/discussions/7)). 135 | 136 | ### Changed 137 | 138 | - Adds a job to `.github/workflows/publish.yaml` to publish the `stac-model` package to PyPI. 139 | 140 | ### Deprecated 141 | 142 | - n/a 143 | 144 | ### Removed 145 | 146 | - Field `mlm:name` requirement to be unique. There is no way to guarantee this from a single Item's definition 147 | and their JSON schema validation. For uniqueness requirement, users should instead rely on the `id` property 148 | of the Item, which is ensured to be unique under the corresponding Collection, since it would not be retrievable 149 | otherwise (i.e.: `collections/{collectionID}/items/{itemID}`). 150 | 151 | ### Fixed 152 | 153 | - Fix the validation strategy of the `mlm:model` role required by at least one Asset under a STAC Item. 154 | Although the role requirement was validated, the definition did not allow for other Assets without it to exist. 155 | - Correct `stac-model` version in code and publish matching release on PyPI. 156 | 157 | ## [v1.1.0](https://github.com/stac-extensions/mlm/tree/v1.1.0) 158 | 159 | ### Added 160 | 161 | - Add pattern for `mlm:framework`, needing at least one alphanumeric character, 162 | without leading or trailing non-alphanumeric characters. 163 | - Add [`examples/item_eo_and_raster_bands.json`](examples/item_eo_and_raster_bands.json) demonstrating the original 164 | use case represented by the previous [`examples/item_eo_bands.json`](examples/item_eo_bands.json) contents. 165 | - Add a `description` field for `mlm:input` and `mlm:output` definitions. 166 | 167 | ### Changed 168 | 169 | - Adjust `scikit-learn` and `Hugging Face` framework names to match the format employed by the official documentation. 170 | 171 | ### Deprecated 172 | 173 | - n/a 174 | 175 | ### Removed 176 | 177 | - Removed combination of `mlm:input` with `bands: null` that could never occur due to pre-requirement of `type: array`. 178 | 179 | ### Fixed 180 | 181 | - Fix `AnyBands` definition and use in the JSON schema to better consider possible use cases with `eo` extension. 182 | - Fix [`examples/item_eo_bands.json`](examples/item_eo_bands.json) that was incorrectly also using `raster` extension. 183 | This is not fundamentally wrong, but it did not allow to validate the `eo` extension use case properly, since 184 | the `raster:bands` reference caused a bypass for the `mlm:input[*].bands` to succeed validation. 185 | 186 | ## [v1.0.0](https://github.com/stac-extensions/mlm/tree/v1.0.0) 187 | 188 | ### Added 189 | 190 | - more [Task Enum](README.md#task-enum) tasks 191 | - [Model Output Object](README.md#model-output-object) 192 | - `batch_size` and hardware summary 193 | - [`mlm:accelerator`, `mlm:accelerator_constrained`, `mlm:accelerator_summary`](./README.md#accelerator-type-enum) 194 | to specify hardware requirements for the model 195 | - Use common metadata 196 | [Asset Object](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#asset-object) 197 | to refer to model asset and source code. 198 | - use `classification:classes` in Model Output 199 | - add `scene-classification` to the Enum Tasks to allow disambiguation between 200 | pixel-wise and patch-based classification 201 | 202 | ### Changed 203 | 204 | - `disk_size` replaced by `file:size` (see [Best Practices - File Extension](best-practices.md#file-extension)) 205 | - `memory_size` under `dlm:architecture` moved directly under Item properties as `mlm:memory_size` 206 | - replaced all hardware/accelerator/runtime definitions into distinct `mlm` fields directly under the 207 | STAC Item properties (top-level, not nested) to allow better search support by STAC API. 208 | - reorganized `dlm:architecture` nested fields to exist at the top level of properties as `mlm:name`, `mlm:summary` 209 | and so on to provide STAC API search capabilities. 210 | - replaced `normalization:mean`, etc. with [statistics](./README.md#bands-and-statistics) from STAC 1.1 common metadata 211 | - added `pydantic` models for internal schema objects in `stac_model` package and published to PYPI 212 | - specified [`rel_type`](README.md#relation-types) to be `derived_from` and 213 | specify how model item or collection json should be named 214 | - replaced all Enum Tasks names to use hyphens instead of spaces 215 | - replaced `dlm:task` by `mlm:tasks` using an array of value instead of a single one, allowing models to represent 216 | multiple tasks they support simultaneously or interchangeably depending on context 217 | - replace `pre_processing_function` and `post_processing_function` to use similar definitions 218 | to the [Processing Extension - Expression Object](https://github.com/stac-extensions/processing#expression-object) 219 | such that more extended definitions of custom processors can be defined. 220 | - updated JSON schema to reflect changes of MLM fields 221 | 222 | ### Deprecated 223 | 224 | - any `dlm`-prefixed field or property 225 | 226 | ### Removed 227 | 228 | - Data Object, replaced with [Model Input Object](./README.md#model-input-object) that uses the `name` field from 229 | the [common metadata band object][stac-bands] which also records `data_type` and `nodata` type 230 | 231 | ### Fixed 232 | 233 | - n/a 234 | 235 | [stac-bands]: https://github.com/radiantearth/stac-spec/blob/f9b3c59ba810541c9da70c5f8d39635f8cba7bcd/item-spec/common-metadata.md#bands 236 | 237 | ## [v1.0.0-beta3](https://github.com/crim-ca/dlm-extension/tree/v1.0.0-beta3) 238 | 239 | ### Added 240 | 241 | - Added example model architecture summary text. 242 | 243 | ### Changed 244 | 245 | - Modified `$id` if the extension schema to refer to the expected location when eventually released 246 | (`https://schemas.stacspec.org/v1.0.0-beta.3/extensions/dl-model/json-schema/schema.json`). 247 | - Replaced `dtype` field by `data_type` to better align with the corresponding field of 248 | [`raster:bands`][raster-band-object]. 249 | - Replaced `nodata_value` field by `nodata` to better align with the corresponding field of 250 | [`raster:bands`][raster-band-object]. 251 | - Refactored schema to use distinct definitions and references instead of embedding all objects 252 | within `dl-model` properties. 253 | - Allow schema to contain other `dlm:`-prefixed elements using `patternProperties` and explicitly 254 | deny other `additionalProperties`. 255 | - Allow `class_name_mapping` to be directly provided as a mapping of index-based properties and class-name values. 256 | 257 | [raster-band-object]: https://github.com/stac-extensions/raster/#raster-band-object 258 | 259 | ### Deprecated 260 | 261 | - Specifying `class_name_mapping` by array is deprecated. 262 | Direct mapping as an object of index to class name should be used. 263 | For backward compatibility, mapping as array and using nested objects with `index` and `class_name` properties 264 | is still permitted, although overly verbose compared to the direct mapping. 265 | 266 | ### Removed 267 | 268 | - Field `nodata_value`. 269 | - Field `dtype`. 270 | 271 | ### Fixed 272 | 273 | - Fixed references to other STAC extensions to use the official schema links on `https://stac-extensions.github.io/`. 274 | - Fixed examples to refer to local files. 275 | - Fixed formatting of tables and descriptions in README. 276 | 277 | ## [v1.0.0-beta2](https://github.com/crim-ca/dlm-extension/tree/v1.0.0-beta2) 278 | 279 | ### Added 280 | 281 | - Initial release of the extension description and schema. 282 | 283 | ### Changed 284 | 285 | - n/a 286 | 287 | ### Deprecated 288 | 289 | - n/a 290 | 291 | ### Removed 292 | 293 | - n/a 294 | 295 | ### Fixed 296 | 297 | - n/a 298 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: If you use this standard or software, please cite it using the metadata from this file. 3 | title: Machine Learning Model Extension Specification for SpatioTemporal Asset Catalog 4 | type: software 5 | keywords: 6 | - mlm 7 | - Machine Learning 8 | - Model 9 | - STAC 10 | url: "https://github.com/stac-extensions/mlm/blob/main/README.md" 11 | repository-code: "https://github.com/stac-extensions/mlm" 12 | license: Apache-2.0 13 | license-url: https://github.com/stac-extensions/mlm/blob/main/LICENSE 14 | identifiers: 15 | - type: doi 16 | value: "10.1145/3681769.3698586" 17 | description: "Conference paper presenting the standard." 18 | - type: url 19 | value: "https://stac-extensions.github.io/mlm/" 20 | description: "Generic URL of the MLM extension schema versions for 'stac_extensions' references." 21 | contact: 22 | - given-names: Francis 23 | family-names: Charette-Migneault 24 | email: francis.charette-migneault@crim.ca 25 | affiliation: Computer Research Institute of Montréal (CRIM) 26 | orcid: "https://orcid.org/0000-0003-4862-3349" 27 | - given-names: Ryan 28 | family-names: Avery 29 | alias: rbavery 30 | email: ryan@wherobots.com 31 | affiliation: "Wherobots, Inc." 32 | orcid: "https://orcid.org/0000-0001-7392-1474" 33 | authors: &authors 34 | - given-names: Francis 35 | family-names: Charette-Migneault 36 | alias: fmigneault 37 | email: francis.charette-migneault@crim.ca 38 | affiliation: Computer Research Institute of Montréal (CRIM) 39 | orcid: "https://orcid.org/0000-0003-4862-3349" 40 | - given-names: Ryan 41 | family-names: Avery 42 | alias: rbavery 43 | email: ryan@wherobots.com 44 | affiliation: "Wherobots, Inc." 45 | orcid: "https://orcid.org/0000-0001-7392-1474" 46 | - &crim 47 | name: Computer Research Institute of Montréal 48 | city: Montréal 49 | region: Québec 50 | alias: CRIM 51 | website: "https://www.crim.ca/" 52 | email: info@crim.ca 53 | tel: 1 (514) 840-1234 54 | country: CA 55 | post-code: H3N 1M3 56 | address: "101 – 405, avenue Ogilvy" 57 | - name: "Wherobots, Inc." 58 | address: 350 California St 59 | city: San Francisco 60 | country: US 61 | post-code: "94104" 62 | region: California 63 | website: "https://www.wherobots.ai/" 64 | location: Floor 1 - Lincoln Towne Center 65 | 66 | references: 67 | - type: software-code 68 | title: "A PydanticV2 and PySTAC validation and serialization library for the STAC ML Model Extension" 69 | keywords: 70 | - stac_model 71 | repository-code: "https://github.com/stac-extensions/mlm/tree/main/stac_model" 72 | repository-artifact: "https://pypi.org/project/stac-model/" 73 | url: "https://github.com/stac-extensions/mlm/blob/main/README_STAC_MODEL.md" 74 | authors: 75 | - given-names: Ryan 76 | family-names: Avery 77 | alias: rbavery 78 | email: ryan@wherobots.com 79 | affiliation: "Wherobots, Inc." 80 | orcid: "https://orcid.org/0000-0001-7392-1474" 81 | - given-names: Francis 82 | family-names: Charette-Migneault 83 | alias: fmigneault 84 | email: francis.charette-migneault@crim.ca 85 | affiliation: Computer Research Institute of Montréal (CRIM) 86 | orcid: "https://orcid.org/0000-0003-4862-3349" 87 | 88 | - type: standard 89 | title: STAC MLM specification 90 | authors: *authors 91 | identifiers: 92 | - type: url 93 | value: "https://stac-extensions.github.io/mlm/v1.4.0/schema.json" 94 | description: "Latest extension URL used in 'stac_extensions' references." 95 | - type: url 96 | value: "https://stac-extensions.github.io/mlm/" 97 | description: "Generic URL of the MLM extension schema versions for 'stac_extensions' references." 98 | 99 | - type: software-code 100 | title: "Archive repository of the STAC MLM specification." 101 | repository-code: "https://github.com/crim-ca/mlm-extension" 102 | authors: *authors 103 | identifiers: 104 | - type: url 105 | value: "https://crim-ca.github.io/mlm-extension/v1.3.0/schema.json" 106 | description: "Archive extension URL used in 'stac_extensions' references." 107 | - type: url 108 | value: "https://crim-ca.github.io/mlm-extension/" 109 | description: "Generic URL of the archived MLM extension schema versions for 'stac_extensions' references." 110 | 111 | - type: report 112 | title: Project CCCOT03 – Technical Report 113 | abstract: "Project CCCOT03: Proposal for a STAC Extension for Deep Learning Models" 114 | keywords: 115 | - dlm 116 | - Deep Learning 117 | - Model 118 | - STAC 119 | repository: "https://raw.githubusercontent.com/crim-ca/CCCOT03/main/CCCOT03_Rapport%20Final_FINAL_EN.pdf" 120 | repository-code: "https://github.com/crim-ca/dlm-extension" 121 | license: Apache-2.0 122 | license-url: https://github.com/crim-ca/dlm-extension/blob/main/LICENSE 123 | date-released: "2020-12-14" 124 | languages: 125 | - en 126 | doi: "10.13140/RG.2.2.27858.68804" 127 | url: "https://www.researchgate.net/publication/349003427" 128 | institution: *crim 129 | authors: 130 | - given-names: Francis 131 | family-names: Charette-Migneault 132 | alias: fmigneault 133 | email: francis.charette-migneault@crim.ca 134 | affiliation: Computer Research Institute of Montréal (CRIM) 135 | orcid: "https://orcid.org/0000-0003-4862-3349" 136 | - given-names: Samuel 137 | family-names: Foucher 138 | alias: sfoucher 139 | orcid: "https://orcid.org/0000-0001-9557-6907" 140 | - given-names: David 141 | family-names: Landry 142 | orcid: "https://orcid.org/0000-0001-5343-2235" 143 | - given-names: Yves 144 | family-names: Moisan 145 | alias: ymoisan 146 | - name: Computer Research Institute of Montréal 147 | city: Montréal 148 | region: Québec 149 | alias: CRIM 150 | website: "https://www.crim.ca/" 151 | email: info@crim.ca 152 | tel: 1 (514) 840-1234 153 | country: CA 154 | post-code: H3N 1M3 155 | address: "101 – 405, avenue Ogilvy" 156 | - name: "Natural Resources Canada" 157 | country: CA 158 | website: "https://natural-resources.canada.ca/" 159 | - name: "Canada Centre for Mapping and Earth Observation" 160 | alias: CCMEO 161 | country: CA 162 | website: "https://natural-resources.canada.ca/research-centres-and-labs/canada-centre-for-mapping-and-earth-observation/25735" 163 | 164 | - type: conference 165 | notes: Conference reference where the demo paper presenting MLM is published. 166 | title: "GeoSearch’24: Proceedings of the 3rd ACM SIGSPATIAL International Workshop on Searching and Mining Large Collections of Geospatial Data" 167 | conference: 168 | name: "SIGSPATIAL’24: The 32nd ACM International Conference on Advances in Geographic Information Systems" 169 | date-start: "2024-10-29" 170 | date-end: "2024-11-01" 171 | city: Atlanta 172 | region: Georgia 173 | country: US 174 | url: https://dl.acm.org/doi/proceedings/10.1145/3681769 175 | isbn: "979-8-4007-1148-0" 176 | date-published: "2024-10-29" 177 | publisher: 178 | name: "Association for Computing Machinery" 179 | authors: 180 | - given-names: Hao 181 | family-names: Li 182 | - given-names: Abhishek 183 | family-names: Potnis 184 | - given-names: Wenwen 185 | family-names: Li 186 | - given-names: Dalton 187 | family-names: Lunga 188 | - given-names: Martin 189 | family-names: Werner 190 | - given-names: Andreas 191 | family-names: Züfle 192 | 193 | preferred-citation: 194 | type: conference-paper 195 | doi: "10.1145/3681769.3698586" 196 | title: Machine Learning Model Specification for Cataloging Spatio-Temporal Models 197 | conference: 198 | name: 3rd ACM SIGSPATIAL International Workshop on Searching and Mining Large Collections of Geospatial Data 199 | alias: GeoSearch’24 200 | date-published: "2024-10-29" 201 | year: 2024 202 | month: 10 203 | pages: 4 204 | loc-start: 36 205 | loc-end: 39 206 | location: 207 | name: Georgia Tech Hotel and Conference Center 208 | city: Atlanta 209 | region: Georgia 210 | country: US 211 | languages: 212 | - en 213 | abstract: >- 214 | The Machine Learning Model (MLM) extension is a 215 | specification that extends the SpatioTemporal Asset 216 | Catalogs (STAC) framework to catalog machine learning 217 | models. This demo paper introduces the goals of the MLM, 218 | highlighting its role in improving 219 | searchability and reproducibility of geospatial models. 220 | The MLM is contextualized within the STAC ecosystem, 221 | demonstrating its compatibility and the advantages it 222 | brings to discovering relevant geospatial models and 223 | describing their inference requirements. 224 | 225 | A detailed overview of the MLM's structure and fields 226 | describes the tasks, hardware requirements, frameworks, 227 | and inputs/outputs associated with machine learning 228 | models. Three use cases are presented, showcasing the 229 | application of the MLM in describing models for land cover 230 | classification and image segmentation. These examples 231 | illustrate how the MLM facilitates easier search and better 232 | understanding of how to deploy models in inference pipelines. 233 | 234 | The discussion addresses future challenges in extending 235 | the MLM to account for the diversity in machine learning 236 | models, including foundational and fine-tuned models, 237 | multi-modal models, and the importance of describing the 238 | data pipeline and infrastructure models depend on. 239 | Finally, the paper demonstrates the potential of the MLM 240 | to be a unifying standard to enable benchmarking and 241 | comparing geospatial machine learning models. 242 | keywords: 243 | - STAC 244 | - Catalog 245 | - Machine Learning 246 | - Spatio-Temporal Models 247 | - Search 248 | contact: 249 | - given-names: Francis 250 | family-names: Charette-Migneault 251 | email: francis.charette-migneault@crim.ca 252 | affiliation: Computer Research Institute of Montréal (CRIM) 253 | orcid: "https://orcid.org/0000-0003-4862-3349" 254 | authors: 255 | - given-names: Francis 256 | family-names: Charette-Migneault 257 | email: francis.charette-migneault@crim.ca 258 | affiliation: Computer Research Institute of Montréal (CRIM) 259 | orcid: "https://orcid.org/0000-0003-4862-3349" 260 | - given-names: Ryan 261 | family-names: Avery 262 | email: ryan@wherobots.com 263 | affiliation: "Wherobots, Inc." 264 | orcid: "https://orcid.org/0000-0001-7392-1474" 265 | - given-names: Brian 266 | family-names: Pondi 267 | email: brian.pondi@uni-muenster.de 268 | affiliation: "Institute for Geoinformatics, University of Münster" 269 | orcid: "https://orcid.org/0009-0008-0367-1690" 270 | - given-names: Joses 271 | family-names: Omojola 272 | affiliation: University of Arizona 273 | email: jomojo1@arizona.edu 274 | orcid: "https://orcid.org/0000-0001-5807-2953" 275 | - given-names: Simone 276 | family-names: Vaccari 277 | email: simone.vaccari@terradue.com 278 | affiliation: Terradue 279 | orcid: "https://orcid.org/0000-0002-2757-4165" 280 | - given-names: Parham 281 | family-names: Membari 282 | email: parham.membari@terradue.com 283 | affiliation: Terradue 284 | orcid: "https://orcid.org/0009-0004-7594-4011" 285 | - given-names: Devis 286 | family-names: Peressutti 287 | email: devis.peressutti@planet.com 288 | affiliation: "Sinergise Solutions, a Planet Labs company" 289 | orcid: "https://orcid.org/0000-0002-4660-0576" 290 | - given-names: Jia 291 | family-names: Yu 292 | email: jiayu@wherobots.com 293 | affiliation: "Wherobots, Inc." 294 | orcid: "https://orcid.org/0000-0003-1340-6475" 295 | - given-names: Jed 296 | family-names: Sundwall 297 | email: jed@radiant.earth 298 | affiliation: Radiant Earth 299 | orcid: "https://orcid.org/0000-0001-9681-230X" 300 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute to MLM specification or `stac-model` 2 | 3 | ## Project setup 4 | 5 | 1. If you don't have `uv` installed run: 6 | 7 | ```bash 8 | make setup 9 | ``` 10 | 11 | This installs `uv` as a [standalone application][uv-install].
12 | For more details, see also the [`uv` documentation][uv-docs]. 13 | 14 | 2. Initialize project dependencies with `uv` and install `pre-commit` hooks: 15 | 16 | ```bash 17 | make install-dev 18 | make pre-commit-install 19 | ``` 20 | 21 | This will install project dependencies into the currently active environment. If you would like to 22 | use `uv`'s default behavior of managing a project-scoped environment, use `uv` commands directly to 23 | install dependencies. `uv sync` will install dependencies and dev dependencies in `.venv` and update the `uv.lock`. 24 | 25 | ## PR submission 26 | 27 | Before submitting your code please do the following steps: 28 | 29 | 1. Add any changes you want 30 | 31 | 2. Add tests for the new changes 32 | 33 | 3. Edit documentation if you have changed something significant 34 | 35 | You're then ready to run and test your contributions. 36 | 37 | 4. Run linting checks: 38 | 39 | ```bash 40 | make lint-all 41 | ``` 42 | 43 | 5. Run `tests` (including your new ones) with 44 | 45 | ```bash 46 | make test 47 | ``` 48 | 49 | 6. Upload your changes to your fork, then make a PR from there to the main repo: 50 | 51 | ```bash 52 | git checkout -b your-branch 53 | git add . 54 | git commit -m ":tada: Initial commit" 55 | git remote add origin https://github.com/your-fork/mlm-extension.git 56 | git push -u origin your-branch 57 | ``` 58 | 59 | ## Building and releasing 60 | 61 | 62 | 63 | > [!WARNING] 64 | > There are multiple types of releases for this repository:
65 | > 66 | > 1. Release for MLM specification (usually, this should include one for `stac-model` as well to support it) 67 | > 2. Release for `stac-model` only 68 | 69 | 70 | 71 | ### Building a new version of MLM specification 72 | 73 | - Checkout to the `main` branch by making sure the CI passed all previous tests. 74 | - Bump the version with `bump-my-version bump --verbose `. 75 | - Consider using `--dry-run` beforehand to inspect the changes. 76 | - The `` should be one of `major`, `minor`, or `patch`.
77 | Alternatively, the version can be set explicitly with `--new-version patch`.
78 | For more details, refer to the [Semantic Versions][semver] standard; 79 | - Make a commit to `GitHub` and push the corresponding auto-generated `v{MAJOR}.{MINOR}.{PATCH}` tag. 80 | - Validate that the CI validated everything once again. 81 | - Create a `GitHub release` with the created tag. 82 | 83 | 84 | 85 | > [!WARNING] 86 | > 87 | > - Ensure the "Set as the latest release" option is selected :heavy_check_mark:. 88 | > - Ensure the diff ranges from the previous MLM version, and not an intermediate `stac-model` release. 89 | 90 | 91 | 92 | ### Building a new version of `stac-model` 93 | 94 | - Apply any relevant changes and `CHANGELOG.md` entries in a PR that modifies `stac-model`. 95 | - Bump the version with `bump-my-version bump --verbose --config-file stac-model.bump.toml`. 96 | - You can pass the new version explicitly, or a rule such as `major`, `minor`, or `patch`.
97 | For more details, refer to the [Semantic Versions][semver] standard; 98 | - Once CI validation succeeded, merge the corresponding PR branch. 99 | - Checkout to `main` branch that contains the freshly created merge commit. 100 | - Push the tag `stac-model-v{MAJOR}.{MINOR}.{PATCH}`. The CI should auto-publish it to PyPI. 101 | - Create a `GitHub release` (if not automatically drafted by the CI). 102 | 103 | 104 | 105 | > [!WARNING] 106 | > 107 | > - Ensure the "Set as the latest release" option is deselected :x:. 108 | > - Ensure the diff ranges from the previous release of `stac-model`, not an intermediate MLM release. 109 | 110 | 111 | 112 | ## Other help 113 | 114 | You can contribute by spreading a word about this library. 115 | It would also be a huge contribution to write 116 | a short article on how you are using this project. 117 | You can also share how the ML Model extension does or does 118 | not serve your needs with us in the GitHub Discussions or raise 119 | Issues for bugs. 120 | 121 | [uv-install]: https://docs.astral.sh/uv/getting-started/installation/ 122 | 123 | [uv-docs]: https://docs.astral.sh/uv/ 124 | 125 | [semver]: https://semver.org/ 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #* Variables 2 | SHELL ?= /usr/bin/env bash 3 | 4 | # use the directory rather than the python binary to allow auto-discovery, which is more cross-platform compatible 5 | PYTHON_PATH := $(shell which python) 6 | PYTHON_ROOT := $(shell dirname $(dir $(PYTHON_PATH))) 7 | UV_PYTHON_ROOT ?= $(PYTHON_ROOT) 8 | 9 | # to actually reuse an existing virtual/conda environment, the 'UV_PROJECT_ENVIRONMENT' variable must be set to it 10 | # use this command: 11 | # UV_PROJECT_ENVIRONMENT=/path/to/env make [target] 12 | # consider exporting this variable in '/path/to/env/etc/conda/activate.d/env.sh' to enable it by default when 13 | # activating a conda environment, and reset it in '/path/to/env/etc/conda/deactivate.d/env.sh' 14 | UV_PROJECT_ENVIRONMENT ?= 15 | # make sure every uv command employs the specified environment path 16 | ifeq (${UV_PROJECT_ENVIRONMENT},) 17 | UV_COMMAND := uv 18 | else 19 | UV_COMMAND := UV_PROJECT_ENVIRONMENT="${UV_PROJECT_ENVIRONMENT}" uv 20 | endif 21 | 22 | #* UV 23 | .PHONY: setup 24 | setup: 25 | which uv >/dev/null || (curl -LsSf https://astral.sh/uv/install.sh | sh) 26 | 27 | .PHONY: publish 28 | publish: 29 | $(UV_COMMAND) publish --build 30 | 31 | #* Installation 32 | .PHONY: install 33 | install: setup 34 | $(UV_COMMAND) export --format requirements-txt -o requirements.txt --no-dev 35 | $(UV_COMMAND) pip install --python "$(UV_PYTHON_ROOT)" -r requirements.txt 36 | 37 | .PHONY: install-dev 38 | install-dev: setup 39 | $(UV_COMMAND) export --format requirements-txt -o requirements-dev.txt 40 | $(UV_COMMAND) pip install --python "$(UV_PYTHON_ROOT)" -r requirements-dev.txt 41 | 42 | .PHONY: pre-commit-install 43 | pre-commit-install: setup 44 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" pre-commit install 45 | 46 | #* Formatters 47 | .PHONY: codestyle 48 | codestyle: setup 49 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" ruff format --config=pyproject.toml stac_model tests 50 | 51 | .PHONY: format 52 | format: codestyle 53 | 54 | #* Linting 55 | .PHONY: test 56 | test: setup 57 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" pytest -c pyproject.toml --cov-report=html --cov=stac_model tests/ 58 | 59 | .PHONY: check 60 | check: check-examples check-markdown check-lint check-mypy check-safety check-citation 61 | 62 | .PHONY: check-all 63 | check-all: check 64 | 65 | .PHONY: mypy 66 | mypy: setup 67 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" mypy --config-file pyproject.toml ./ 68 | 69 | .PHONY: check-mypy 70 | check-mypy: mypy 71 | 72 | .PHONY: check-safety 73 | check-safety: setup 74 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" safety check --full-report 75 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" bandit -ll --recursive stac_model tests 76 | 77 | .PHONY: lint 78 | lint: setup 79 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" ruff check --fix --config=pyproject.toml ./ 80 | 81 | .PHONY: check-lint 82 | check-lint: lint 83 | $(UV_COMMAND) run --python "$(UV_PYTHON_ROOT)" ruff check --config=pyproject.toml ./ 84 | 85 | .PHONY: format-lint 86 | format-lint: lint 87 | ruff format --config=pyproject.toml ./ 88 | 89 | .PHONY: install-npm 90 | install-npm: 91 | npm install 92 | 93 | .PHONY: check-markdown 94 | check-markdown: install-npm 95 | npm run check-markdown 96 | 97 | .PHONY: format-markdown 98 | format-markdown: install-npm 99 | npm run format-markdown 100 | 101 | .PHONY: check-examples 102 | check-examples: install-npm 103 | npm run check-examples 104 | 105 | .PHONY: format-examples 106 | format-examples: install-npm 107 | npm run format-examples 108 | 109 | FORMATTERS := lint markdown examples 110 | $(addprefix fix-, $(FORMATTERS)): fix-%: format-% 111 | 112 | .PHONY: lint-all 113 | lint-all: lint mypy check-safety check-markdown 114 | 115 | .PHONY: update-dev-deps 116 | update-dev-deps: setup 117 | $(UV_COMMAND) export --only-dev --format requirements-txt -o requirements-only-dev.txt 118 | $(UV_COMMAND) pip install --python "$(UV_PYTHON_ROOT)" -r requirements-only-dev.txt 119 | 120 | #* Cleaning 121 | .PHONY: pycache-remove 122 | pycache-remove: 123 | find . | grep -E "(__pycache__|\.pyc|\.pyo$$)" | xargs rm -rf 124 | 125 | .PHONY: dsstore-remove 126 | dsstore-remove: 127 | find . | grep -E ".DS_Store" | xargs rm -rf 128 | 129 | .PHONY: mypycache-remove 130 | mypycache-remove: 131 | find . | grep -E ".mypy_cache" | xargs rm -rf 132 | 133 | .PHONY: ipynbcheckpoints-remove 134 | ipynbcheckpoints-remove: 135 | find . | grep -E ".ipynb_checkpoints" | xargs rm -rf 136 | 137 | .PHONY: pytestcache-remove 138 | pytestcache-remove: 139 | find . | grep -E ".pytest_cache" | xargs rm -rf 140 | 141 | .PHONY: build-remove 142 | build-remove: 143 | rm -rf build/ 144 | 145 | .PHONY: cleanup 146 | cleanup: pycache-remove dsstore-remove mypycache-remove ipynbcheckpoints-remove pytestcache-remove 147 | -------------------------------------------------------------------------------- /README_DLM_LEGACY.md: -------------------------------------------------------------------------------- 1 | # Deep Learning Model (DLM) Extension 2 | 3 | 4 | 5 | > [!NOTE] 6 | > This is legacy documentation references of [Deep Learning Model extension](https://github.com/crim-ca/dlm-extension) 7 | > preceding the current Machine Learning Model (MLM) extension. 8 | 9 | 10 | 11 | Check the original [Technical Report](https://github.com/crim-ca/CCCOT03/raw/main/CCCOT03_Rapport%20Final_FINAL_EN.pdf). 12 | 13 | ![Image Description](https://i.imgur.com/cVAg5sA.png) 14 | -------------------------------------------------------------------------------- /README_STAC_MODEL.md: -------------------------------------------------------------------------------- 1 | # stac-model 2 | 3 | 4 | 5 |
6 | 7 | [![Python support][bp1]][bp2] 8 | [![PyPI Release][bp3]][bp2] 9 | [![Repository][bscm1]][bp4] 10 | [![Releases][bscm2]][bp5] 11 | 12 | [![Contributions Welcome][bp8]][bp9] 13 | 14 | [![uv][bp11]][bp12] 15 | [![Pre-commit][bp15]][bp16] 16 | [![Semantic versions][blic3]][bp5] 17 | [![Pipelines][bscm6]][bscm7] 18 | 19 | *A PydanticV2 and PySTAC validation and serialization library for the STAC ML Model Extension* 20 | 21 |
22 | 23 | > ⚠️
24 | > FIXME: update description with ML framework connectors (pytorch, scikit-learn, etc.) 25 | 26 | ## Installation 27 | 28 | ```shell 29 | pip install -U stac-model 30 | ``` 31 | 32 | or install with uv: 33 | 34 | ```shell 35 | uv add stac-model 36 | ``` 37 | 38 | Then you can run 39 | 40 | ```shell 41 | stac-model --help 42 | ``` 43 | 44 | ## Creating example metadata JSON for a STAC Item 45 | 46 | ```shell 47 | stac-model 48 | ``` 49 | 50 | This will make [this example item](./examples/item_basic.json) for an example model. 51 | 52 | ## 📈 Releases 53 | 54 | You can see the list of available releases on the [GitHub Releases][github-releases] page. 55 | 56 | ## 📄 License 57 | 58 | [![License][blic1]][blic2] 59 | 60 | This project is licenced under the terms of the `Apache Software License 2.0` licence. 61 | See [LICENSE][blic2] for more details. 62 | 63 | ## 💗 Credits 64 | 65 | [![Python project templated from galactipy.][bp6]][bp7] 66 | 67 | 68 | 69 | [bp1]: https://img.shields.io/pypi/pyversions/stac-model?style=for-the-badge 70 | 71 | [bp2]: https://pypi.org/project/stac-model/ 72 | 73 | [bp3]: https://img.shields.io/pypi/v/stac-model?style=for-the-badge&logo=pypi&color=3775a9 74 | 75 | [bp4]: https://github.com/stac-extensions/mlm 76 | 77 | [bp5]: https://github.com/stac-extensions/mlm/releases 78 | 79 | [bp6]: https://img.shields.io/badge/made%20with-galactipy%20%F0%9F%8C%8C-179287?style=for-the-badge&labelColor=193A3E 80 | 81 | [bp7]: https://kutt.it/7fYqQl 82 | 83 | [bp8]: https://img.shields.io/static/v1.svg?label=Contributions&message=Welcome&color=0059b3&style=for-the-badge 84 | 85 | [bp9]: https://github.com/stac-extensions/mlm/blob/main/CONTRIBUTING.md 86 | 87 | [bp11]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json&style=for-the-badge 88 | 89 | [bp12]: https://docs.astral.sh/uv/ 90 | 91 | [bp15]: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white&style=for-the-badge 92 | 93 | [bp16]: https://github.com/stac-extensions/mlm/blob/main/.pre-commit-config.yaml 94 | 95 | [blic1]: https://img.shields.io/github/license/stac-extensions/mlm?style=for-the-badge 96 | 97 | [blic2]: https://github.com/stac-extensions/mlm/blob/main/LICENSE 98 | 99 | [blic3]: https://img.shields.io/badge/%F0%9F%93%A6-semantic%20versions-4053D6?style=for-the-badge 100 | 101 | [github-releases]: https://github.com/stac-extensions/mlm/releases 102 | 103 | [bscm1]: https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white 104 | 105 | [bscm2]: https://img.shields.io/github/v/release/stac-extensions/mlm?filter=stac-model-v*&style=for-the-badge&logo=semantic-release&color=347d39 106 | 107 | [bscm6]: https://img.shields.io/github/actions/workflow/status/stac-extensions/mlm/publish.yaml?style=for-the-badge&logo=github 108 | 109 | [bscm7]: https://github.com/stac-extensions/mlm/blob/main/.github/workflows/publish.yaml 110 | 111 | [hub1]: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuring-dependabot-version-updates#enabling-dependabot-version-updates 112 | 113 | [hub2]: https://github.com/marketplace/actions/close-stale-issues 114 | 115 | [hub6]: https://docs.github.com/en/code-security/dependabot 116 | 117 | [hub8]: https://github.com/stac-extensions/mlm/blob/main/.github/release-drafter.yml 118 | 119 | [hub9]: https://github.com/stac-extensions/mlm/blob/main/.github/.stale.yml 120 | -------------------------------------------------------------------------------- /docs/static/crim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-extensions/mlm/2e814859429ce1367102824056ef7a3233390f4d/docs/static/crim.png -------------------------------------------------------------------------------- /docs/static/nrcan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-extensions/mlm/2e814859429ce1367102824056ef7a3233390f4d/docs/static/nrcan.png -------------------------------------------------------------------------------- /docs/static/sigspatial_2024_mlm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-extensions/mlm/2e814859429ce1367102824056ef7a3233390f4d/docs/static/sigspatial_2024_mlm.pdf -------------------------------------------------------------------------------- /docs/static/stac_mlm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-extensions/mlm/2e814859429ce1367102824056ef7a3233390f4d/docs/static/stac_mlm.png -------------------------------------------------------------------------------- /docs/static/terradue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-extensions/mlm/2e814859429ce1367102824056ef7a3233390f4d/docs/static/terradue.png -------------------------------------------------------------------------------- /docs/static/wherobots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stac-extensions/mlm/2e814859429ce1367102824056ef7a3233390f4d/docs/static/wherobots.png -------------------------------------------------------------------------------- /examples/collection.json: -------------------------------------------------------------------------------- 1 | { 2 | "stac_version": "1.0.0", 3 | "stac_extensions": [ 4 | "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" 5 | ], 6 | "type": "Collection", 7 | "id": "ml-model-examples", 8 | "title": "Machine Learning Model examples", 9 | "description": "Collection of items contained in the Machine Learning Model examples.", 10 | "license": "Apache-2.0", 11 | "extent": { 12 | "spatial": { 13 | "bbox": [ 14 | [ 15 | -7.882190080512502, 16 | 37.13739173208318, 17 | 27.911651652899923, 18 | 58.21798141355221 19 | ] 20 | ] 21 | }, 22 | "temporal": { 23 | "interval": [ 24 | [ 25 | "1900-01-01T00:00:00Z", 26 | "9999-12-31T23:59:59Z" 27 | ] 28 | ] 29 | } 30 | }, 31 | "item_assets": { 32 | "weights": { 33 | "title": "model weights", 34 | "roles": [ 35 | "mlm:model", 36 | "mlm:weights" 37 | ] 38 | } 39 | }, 40 | "summaries": { 41 | "datetime": { 42 | "minimum": "1900-01-01T00:00:00Z", 43 | "maximum": "9999-12-31T23:59:59Z" 44 | } 45 | }, 46 | "links": [ 47 | { 48 | "href": "collection.json", 49 | "rel": "self" 50 | }, 51 | { 52 | "href": "item_basic.json", 53 | "rel": "item" 54 | }, 55 | { 56 | "href": "item_bands_expression.json", 57 | "rel": "item" 58 | }, 59 | { 60 | "href": "item_eo_bands.json", 61 | "rel": "item" 62 | }, 63 | { 64 | "href": "item_eo_and_raster_bands.json", 65 | "rel": "item" 66 | }, 67 | { 68 | "href": "item_eo_bands_summarized.json", 69 | "rel": "item" 70 | }, 71 | { 72 | "href": "item_raster_bands.json", 73 | "rel": "item" 74 | }, 75 | { 76 | "href": "item_multi_io.json", 77 | "rel": "item" 78 | } 79 | ] 80 | } 81 | -------------------------------------------------------------------------------- /examples/item_bands_expression.json: -------------------------------------------------------------------------------- 1 | { 2 | "$comment": "Demonstrate the use of MLM and EO for bands description, with EO bands directly in the Model Asset.", 3 | "stac_version": "1.0.0", 4 | "stac_extensions": [ 5 | "https://stac-extensions.github.io/mlm/v1.4.0/schema.json", 6 | "https://stac-extensions.github.io/eo/v1.1.0/schema.json", 7 | "https://stac-extensions.github.io/raster/v1.1.0/schema.json", 8 | "https://stac-extensions.github.io/file/v1.0.0/schema.json", 9 | "https://stac-extensions.github.io/ml-aoi/v0.2.0/schema.json" 10 | ], 11 | "type": "Feature", 12 | "id": "resnet-18_sentinel-2_all_moco_classification", 13 | "collection": "ml-model-examples", 14 | "geometry": { 15 | "type": "Polygon", 16 | "coordinates": [ 17 | [ 18 | [ 19 | -7.882190080512502, 20 | 37.13739173208318 21 | ], 22 | [ 23 | -7.882190080512502, 24 | 58.21798141355221 25 | ], 26 | [ 27 | 27.911651652899923, 28 | 58.21798141355221 29 | ], 30 | [ 31 | 27.911651652899923, 32 | 37.13739173208318 33 | ], 34 | [ 35 | -7.882190080512502, 36 | 37.13739173208318 37 | ] 38 | ] 39 | ] 40 | }, 41 | "bbox": [ 42 | -7.882190080512502, 43 | 37.13739173208318, 44 | 27.911651652899923, 45 | 58.21798141355221 46 | ], 47 | "properties": { 48 | "description": "Sourced from torchgeo python library, identifier is ResNet18_Weights.SENTINEL2_ALL_MOCO", 49 | "datetime": null, 50 | "start_datetime": "1900-01-01T00:00:00Z", 51 | "end_datetime": "9999-12-31T23:59:59Z", 52 | "mlm:name": "Resnet-18 Sentinel-2 ALL MOCO", 53 | "mlm:tasks": [ 54 | "classification" 55 | ], 56 | "mlm:architecture": "ResNet", 57 | "mlm:framework": "pytorch", 58 | "mlm:framework_version": "2.1.2+cu121", 59 | "file:size": 43000000, 60 | "mlm:memory_size": 1, 61 | "mlm:total_parameters": 11700000, 62 | "mlm:pretrained_source": "EuroSat Sentinel-2", 63 | "mlm:accelerator": "cuda", 64 | "mlm:accelerator_constrained": false, 65 | "mlm:accelerator_summary": "Unknown", 66 | "mlm:batch_size_suggestion": 256, 67 | "mlm:input": [ 68 | { 69 | "name": "RBG+NDVI Bands Sentinel-2 Batch", 70 | "bands": [ 71 | { 72 | "name": "B04" 73 | }, 74 | { 75 | "name": "B03" 76 | }, 77 | { 78 | "name": "B02" 79 | }, 80 | { 81 | "name": "NDVI", 82 | "format": "rio-calc", 83 | "expression": "(B08 - B04) / (B08 + B04)" 84 | } 85 | ], 86 | "input": { 87 | "shape": [ 88 | -1, 89 | 4, 90 | 64, 91 | 64 92 | ], 93 | "dim_order": [ 94 | "batch", 95 | "channel", 96 | "height", 97 | "width" 98 | ], 99 | "data_type": "float32" 100 | } 101 | } 102 | ], 103 | "mlm:output": [ 104 | { 105 | "name": "classification", 106 | "tasks": [ 107 | "segmentation", 108 | "semantic-segmentation" 109 | ], 110 | "result": { 111 | "shape": [ 112 | -1, 113 | 2 114 | ], 115 | "dim_order": [ 116 | "batch", 117 | "class" 118 | ], 119 | "data_type": "float32" 120 | }, 121 | "classification_classes": [ 122 | { 123 | "value": 1, 124 | "name": "vegetation", 125 | "title": "Vegetation", 126 | "description": "Pixels were vegetation is detected.", 127 | "color_hint": "00FF00", 128 | "nodata": false 129 | }, 130 | { 131 | "value": 0, 132 | "name": "background", 133 | "title": "Non-Vegetation", 134 | "description": "Anything that is not classified as vegetation.", 135 | "color_hint": "000000", 136 | "nodata": false 137 | } 138 | ], 139 | "post_processing_function": null 140 | } 141 | ] 142 | }, 143 | "assets": { 144 | "weights": { 145 | "href": "https://example.com/model-rgb-ndvi.pth", 146 | "title": "Pytorch weights checkpoint", 147 | "description": "A vegetation classification model trained on Sentinel-2 imagery and NDVI.", 148 | "type": "application/octet-stream; application=pytorch", 149 | "roles": [ 150 | "mlm:model", 151 | "mlm:weights" 152 | ], 153 | "mlm:artifact_type": "torch.save", 154 | "$comment": "Following 'eo:bands' is required to fulfil schema validation of 'eo' extension.", 155 | "eo:bands": [ 156 | { 157 | "name": "B02", 158 | "common_name": "blue", 159 | "description": "Blue (band 2)", 160 | "center_wavelength": 0.49, 161 | "full_width_half_max": 0.098 162 | }, 163 | { 164 | "name": "B03", 165 | "common_name": "green", 166 | "description": "Green (band 3)", 167 | "center_wavelength": 0.56, 168 | "full_width_half_max": 0.045 169 | }, 170 | { 171 | "name": "B04", 172 | "common_name": "red", 173 | "description": "Red (band 4)", 174 | "center_wavelength": 0.665, 175 | "full_width_half_max": 0.038 176 | }, 177 | { 178 | "name": "B08", 179 | "common_name": "nir", 180 | "description": "NIR 1 (band 8)", 181 | "center_wavelength": 0.842, 182 | "full_width_half_max": 0.145 183 | } 184 | ] 185 | } 186 | }, 187 | "links": [ 188 | { 189 | "rel": "collection", 190 | "href": "./collection.json", 191 | "type": "application/json" 192 | }, 193 | { 194 | "rel": "self", 195 | "href": "./item_bands_expression.json", 196 | "type": "application/geo+json" 197 | }, 198 | { 199 | "rel": "derived_from", 200 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", 201 | "type": "application/json", 202 | "ml-aoi:split": "train" 203 | } 204 | ] 205 | } 206 | -------------------------------------------------------------------------------- /examples/item_basic.json: -------------------------------------------------------------------------------- 1 | { 2 | "stac_version": "1.0.0", 3 | "stac_extensions": [ 4 | "https://stac-extensions.github.io/mlm/v1.4.0/schema.json" 5 | ], 6 | "type": "Feature", 7 | "id": "example-model", 8 | "collection": "ml-model-examples", 9 | "geometry": { 10 | "type": "Polygon", 11 | "coordinates": [ 12 | [ 13 | [ 14 | -7.882190080512502, 15 | 37.13739173208318 16 | ], 17 | [ 18 | -7.882190080512502, 19 | 58.21798141355221 20 | ], 21 | [ 22 | 27.911651652899923, 23 | 58.21798141355221 24 | ], 25 | [ 26 | 27.911651652899923, 27 | 37.13739173208318 28 | ], 29 | [ 30 | -7.882190080512502, 31 | 37.13739173208318 32 | ] 33 | ] 34 | ] 35 | }, 36 | "bbox": [ 37 | -7.882190080512502, 38 | 37.13739173208318, 39 | 27.911651652899923, 40 | 58.21798141355221 41 | ], 42 | "properties": { 43 | "description": "Basic STAC Item with only the MLM extension and no other extension cross-references.", 44 | "datetime": null, 45 | "start_datetime": "1900-01-01T00:00:00Z", 46 | "end_datetime": "9999-12-31T23:59:59Z", 47 | "mlm:name": "example-model", 48 | "mlm:tasks": [ 49 | "classification" 50 | ], 51 | "mlm:architecture": "ResNet", 52 | "mlm:input": [ 53 | { 54 | "name": "Model with RGB input that does not refer to any band.", 55 | "bands": [], 56 | "input": { 57 | "shape": [ 58 | -1, 59 | 3, 60 | 64, 61 | 64 62 | ], 63 | "dim_order": [ 64 | "batch", 65 | "channel", 66 | "height", 67 | "width" 68 | ], 69 | "data_type": "float32" 70 | } 71 | } 72 | ], 73 | "mlm:output": [ 74 | { 75 | "name": "classification", 76 | "tasks": [ 77 | "classification" 78 | ], 79 | "result": { 80 | "shape": [ 81 | -1, 82 | 1 83 | ], 84 | "dim_order": [ 85 | "batch", 86 | "class" 87 | ], 88 | "data_type": "uint8" 89 | }, 90 | "classification_classes": [ 91 | { 92 | "value": 0, 93 | "name": "BACKGROUND", 94 | "description": "Background non-city.", 95 | "color_hint": [ 96 | 0, 97 | 0, 98 | 0 99 | ] 100 | }, 101 | { 102 | "value": 1, 103 | "name": "CITY", 104 | "description": "A city is detected.", 105 | "color_hint": [ 106 | 0, 107 | 0, 108 | 255 109 | ] 110 | } 111 | ] 112 | } 113 | ] 114 | }, 115 | "assets": { 116 | "model": { 117 | "href": "https://huggingface.co/example/model-card", 118 | "title": "Pytorch weights checkpoint", 119 | "description": "Example model.", 120 | "type": "text/html", 121 | "roles": [ 122 | "mlm:model" 123 | ], 124 | "mlm:artifact_type": "torch.save" 125 | } 126 | }, 127 | "links": [ 128 | { 129 | "rel": "collection", 130 | "href": "./collection.json", 131 | "type": "application/json" 132 | }, 133 | { 134 | "rel": "self", 135 | "href": "./item_basic.json", 136 | "type": "application/geo+json" 137 | } 138 | ] 139 | } 140 | -------------------------------------------------------------------------------- /examples/item_eo_and_raster_bands.json: -------------------------------------------------------------------------------- 1 | { 2 | "stac_version": "1.0.0", 3 | "stac_extensions": [ 4 | "https://stac-extensions.github.io/mlm/v1.4.0/schema.json", 5 | "https://stac-extensions.github.io/eo/v1.1.0/schema.json", 6 | "https://stac-extensions.github.io/raster/v1.1.0/schema.json", 7 | "https://stac-extensions.github.io/file/v1.0.0/schema.json", 8 | "https://stac-extensions.github.io/ml-aoi/v0.2.0/schema.json" 9 | ], 10 | "type": "Feature", 11 | "id": "resnet-18_sentinel-2_all_moco_classification", 12 | "collection": "ml-model-examples", 13 | "geometry": { 14 | "type": "Polygon", 15 | "coordinates": [ 16 | [ 17 | [ 18 | -7.882190080512502, 19 | 37.13739173208318 20 | ], 21 | [ 22 | -7.882190080512502, 23 | 58.21798141355221 24 | ], 25 | [ 26 | 27.911651652899923, 27 | 58.21798141355221 28 | ], 29 | [ 30 | 27.911651652899923, 31 | 37.13739173208318 32 | ], 33 | [ 34 | -7.882190080512502, 35 | 37.13739173208318 36 | ] 37 | ] 38 | ] 39 | }, 40 | "bbox": [ 41 | -7.882190080512502, 42 | 37.13739173208318, 43 | 27.911651652899923, 44 | 58.21798141355221 45 | ], 46 | "properties": { 47 | "description": "Sourced from torchgeo python library, identifier is ResNet18_Weights.SENTINEL2_ALL_MOCO", 48 | "datetime": null, 49 | "start_datetime": "1900-01-01T00:00:00Z", 50 | "end_datetime": "9999-12-31T23:59:59Z", 51 | "mlm:name": "Resnet-18 Sentinel-2 ALL MOCO", 52 | "mlm:tasks": [ 53 | "classification" 54 | ], 55 | "mlm:architecture": "ResNet", 56 | "mlm:framework": "pytorch", 57 | "mlm:framework_version": "2.1.2+cu121", 58 | "file:size": 43000000, 59 | "mlm:memory_size": 1, 60 | "mlm:total_parameters": 11700000, 61 | "mlm:pretrained_source": "EuroSat Sentinel-2", 62 | "mlm:accelerator": "cuda", 63 | "mlm:accelerator_constrained": false, 64 | "mlm:accelerator_summary": "Unknown", 65 | "mlm:batch_size_suggestion": 256, 66 | "mlm:input": [ 67 | { 68 | "name": "13 Band Sentinel-2 Batch", 69 | "bands": [ 70 | "B01", 71 | "B02", 72 | "B03", 73 | "B04", 74 | "B05", 75 | "B06", 76 | "B07", 77 | "B08", 78 | "B8A", 79 | "B09", 80 | "B10", 81 | "B11", 82 | "B12" 83 | ], 84 | "input": { 85 | "shape": [ 86 | -1, 87 | 13, 88 | 64, 89 | 64 90 | ], 91 | "dim_order": [ 92 | "batch", 93 | "channel", 94 | "height", 95 | "width" 96 | ], 97 | "data_type": "float32" 98 | }, 99 | "resize_type": null, 100 | "value_scaling": [ 101 | { 102 | "type": "z-score", 103 | "mean": 1354.40546513, 104 | "stddev": 245.71762908 105 | }, 106 | { 107 | "type": "z-score", 108 | "mean": 1118.24399958, 109 | "stddev": 333.00778264 110 | }, 111 | { 112 | "type": "z-score", 113 | "mean": 1042.92983953, 114 | "stddev": 395.09249139 115 | }, 116 | { 117 | "type": "z-score", 118 | "mean": 947.62620298, 119 | "stddev": 593.75055589 120 | }, 121 | { 122 | "type": "z-score", 123 | "mean": 1199.47283961, 124 | "stddev": 566.4170017 125 | }, 126 | { 127 | "type": "z-score", 128 | "mean": 1999.79090914, 129 | "stddev": 861.18399006 130 | }, 131 | { 132 | "type": "z-score", 133 | "mean": 2369.22292565, 134 | "stddev": 1086.63139075 135 | }, 136 | { 137 | "type": "z-score", 138 | "mean": 2296.82608323, 139 | "stddev": 1117.98170791 140 | }, 141 | { 142 | "type": "z-score", 143 | "mean": 732.08340178, 144 | "stddev": 404.91978886 145 | }, 146 | { 147 | "type": "z-score", 148 | "mean": 12.11327804, 149 | "stddev": 4.77584468 150 | }, 151 | { 152 | "type": "z-score", 153 | "mean": 1819.01027855, 154 | "stddev": 1002.58768311 155 | }, 156 | { 157 | "type": "z-score", 158 | "mean": 1118.92391149, 159 | "stddev": 761.30323499 160 | }, 161 | { 162 | "type": "z-score", 163 | "mean": 2594.14080798, 164 | "stddev": 1231.58581042 165 | } 166 | ], 167 | "pre_processing_function": { 168 | "format": "python", 169 | "expression": "torchgeo.datamodules.eurosat.EuroSATDataModule.collate_fn" 170 | } 171 | } 172 | ], 173 | "mlm:output": [ 174 | { 175 | "name": "classification", 176 | "tasks": [ 177 | "classification" 178 | ], 179 | "result": { 180 | "shape": [ 181 | -1, 182 | 10 183 | ], 184 | "dim_order": [ 185 | "batch", 186 | "class" 187 | ], 188 | "data_type": "float32" 189 | }, 190 | "classification_classes": [ 191 | { 192 | "value": 0, 193 | "name": "Annual Crop", 194 | "description": null, 195 | "title": null, 196 | "color_hint": null, 197 | "nodata": false 198 | }, 199 | { 200 | "value": 1, 201 | "name": "Forest", 202 | "description": null, 203 | "title": null, 204 | "color_hint": null, 205 | "nodata": false 206 | }, 207 | { 208 | "value": 2, 209 | "name": "Herbaceous Vegetation", 210 | "description": null, 211 | "title": null, 212 | "color_hint": null, 213 | "nodata": false 214 | }, 215 | { 216 | "value": 3, 217 | "name": "Highway", 218 | "description": null, 219 | "title": null, 220 | "color_hint": null, 221 | "nodata": false 222 | }, 223 | { 224 | "value": 4, 225 | "name": "Industrial Buildings", 226 | "description": null, 227 | "title": null, 228 | "color_hint": null, 229 | "nodata": false 230 | }, 231 | { 232 | "value": 5, 233 | "name": "Pasture", 234 | "description": null, 235 | "title": null, 236 | "color_hint": null, 237 | "nodata": false 238 | }, 239 | { 240 | "value": 6, 241 | "name": "Permanent Crop", 242 | "description": null, 243 | "title": null, 244 | "color_hint": null, 245 | "nodata": false 246 | }, 247 | { 248 | "value": 7, 249 | "name": "Residential Buildings", 250 | "description": null, 251 | "title": null, 252 | "color_hint": null, 253 | "nodata": false 254 | }, 255 | { 256 | "value": 8, 257 | "name": "River", 258 | "description": null, 259 | "title": null, 260 | "color_hint": null, 261 | "nodata": false 262 | }, 263 | { 264 | "value": 9, 265 | "name": "SeaLake", 266 | "description": null, 267 | "title": null, 268 | "color_hint": null, 269 | "nodata": false 270 | } 271 | ], 272 | "post_processing_function": null 273 | } 274 | ], 275 | "eo:bands": [ 276 | { 277 | "name": "B01", 278 | "common_name": "coastal", 279 | "description": "Coastal aerosol (band 1)", 280 | "center_wavelength": 0.443, 281 | "full_width_half_max": 0.027 282 | }, 283 | { 284 | "name": "B02", 285 | "common_name": "blue", 286 | "description": "Blue (band 2)", 287 | "center_wavelength": 0.49, 288 | "full_width_half_max": 0.098 289 | }, 290 | { 291 | "name": "B03", 292 | "common_name": "green", 293 | "description": "Green (band 3)", 294 | "center_wavelength": 0.56, 295 | "full_width_half_max": 0.045 296 | }, 297 | { 298 | "name": "B04", 299 | "common_name": "red", 300 | "description": "Red (band 4)", 301 | "center_wavelength": 0.665, 302 | "full_width_half_max": 0.038 303 | }, 304 | { 305 | "name": "B05", 306 | "common_name": "rededge", 307 | "description": "Red edge 1 (band 5)", 308 | "center_wavelength": 0.704, 309 | "full_width_half_max": 0.019 310 | }, 311 | { 312 | "name": "B06", 313 | "common_name": "rededge", 314 | "description": "Red edge 2 (band 6)", 315 | "center_wavelength": 0.74, 316 | "full_width_half_max": 0.018 317 | }, 318 | { 319 | "name": "B07", 320 | "common_name": "rededge", 321 | "description": "Red edge 3 (band 7)", 322 | "center_wavelength": 0.783, 323 | "full_width_half_max": 0.028 324 | }, 325 | { 326 | "name": "B08", 327 | "common_name": "nir", 328 | "description": "NIR 1 (band 8)", 329 | "center_wavelength": 0.842, 330 | "full_width_half_max": 0.145 331 | }, 332 | { 333 | "name": "B8A", 334 | "common_name": "nir08", 335 | "description": "NIR 2 (band 8A)", 336 | "center_wavelength": 0.865, 337 | "full_width_half_max": 0.033 338 | }, 339 | { 340 | "name": "B09", 341 | "common_name": "nir09", 342 | "description": "NIR 3 (band 9)", 343 | "center_wavelength": 0.945, 344 | "full_width_half_max": 0.026 345 | }, 346 | { 347 | "name": "B10", 348 | "common_name": "cirrus", 349 | "description": "SWIR - Cirrus (band 10)", 350 | "center_wavelength": 1.375, 351 | "full_width_half_max": 0.026 352 | }, 353 | { 354 | "name": "B11", 355 | "common_name": "swir16", 356 | "description": "SWIR 1 (band 11)", 357 | "center_wavelength": 1.61, 358 | "full_width_half_max": 0.143 359 | }, 360 | { 361 | "name": "B12", 362 | "common_name": "swir22", 363 | "description": "SWIR 2 (band 12)", 364 | "center_wavelength": 2.19, 365 | "full_width_half_max": 0.242 366 | } 367 | ], 368 | "raster:bands": [ 369 | { 370 | "name": "B01", 371 | "nodata": 0, 372 | "data_type": "uint16", 373 | "bits_per_sample": 15, 374 | "spatial_resolution": 60, 375 | "scale": 0.0001, 376 | "offset": 0, 377 | "unit": "m" 378 | }, 379 | { 380 | "name": "B02", 381 | "nodata": 0, 382 | "data_type": "uint16", 383 | "bits_per_sample": 15, 384 | "spatial_resolution": 10, 385 | "scale": 0.0001, 386 | "offset": 0, 387 | "unit": "m" 388 | }, 389 | { 390 | "name": "B03", 391 | "nodata": 0, 392 | "data_type": "uint16", 393 | "bits_per_sample": 15, 394 | "spatial_resolution": 10, 395 | "scale": 0.0001, 396 | "offset": 0, 397 | "unit": "m" 398 | }, 399 | { 400 | "name": "B04", 401 | "nodata": 0, 402 | "data_type": "uint16", 403 | "bits_per_sample": 15, 404 | "spatial_resolution": 10, 405 | "scale": 0.0001, 406 | "offset": 0, 407 | "unit": "m" 408 | }, 409 | { 410 | "name": "B05", 411 | "nodata": 0, 412 | "data_type": "uint16", 413 | "bits_per_sample": 15, 414 | "spatial_resolution": 20, 415 | "scale": 0.0001, 416 | "offset": 0, 417 | "unit": "m" 418 | }, 419 | { 420 | "name": "B06", 421 | "nodata": 0, 422 | "data_type": "uint16", 423 | "bits_per_sample": 15, 424 | "spatial_resolution": 20, 425 | "scale": 0.0001, 426 | "offset": 0, 427 | "unit": "m" 428 | }, 429 | { 430 | "name": "B07", 431 | "nodata": 0, 432 | "data_type": "uint16", 433 | "bits_per_sample": 15, 434 | "spatial_resolution": 20, 435 | "scale": 0.0001, 436 | "offset": 0, 437 | "unit": "m" 438 | }, 439 | { 440 | "name": "B08", 441 | "nodata": 0, 442 | "data_type": "uint16", 443 | "bits_per_sample": 15, 444 | "spatial_resolution": 10, 445 | "scale": 0.0001, 446 | "offset": 0, 447 | "unit": "m" 448 | }, 449 | { 450 | "name": "B8A", 451 | "nodata": 0, 452 | "data_type": "uint16", 453 | "bits_per_sample": 15, 454 | "spatial_resolution": 20, 455 | "scale": 0.0001, 456 | "offset": 0, 457 | "unit": "m" 458 | }, 459 | { 460 | "name": "B09", 461 | "nodata": 0, 462 | "data_type": "uint16", 463 | "bits_per_sample": 15, 464 | "spatial_resolution": 60, 465 | "scale": 0.0001, 466 | "offset": 0, 467 | "unit": "m" 468 | }, 469 | { 470 | "name": "B10", 471 | "nodata": 0, 472 | "data_type": "uint16", 473 | "bits_per_sample": 15, 474 | "spatial_resolution": 60, 475 | "scale": 0.0001, 476 | "offset": 0, 477 | "unit": "m" 478 | }, 479 | { 480 | "name": "B11", 481 | "nodata": 0, 482 | "data_type": "uint16", 483 | "bits_per_sample": 15, 484 | "spatial_resolution": 20, 485 | "scale": 0.0001, 486 | "offset": 0, 487 | "unit": "m" 488 | }, 489 | { 490 | "name": "B12", 491 | "nodata": 0, 492 | "data_type": "uint16", 493 | "bits_per_sample": 15, 494 | "spatial_resolution": 20, 495 | "scale": 0.0001, 496 | "offset": 0, 497 | "unit": "m" 498 | } 499 | ] 500 | }, 501 | "assets": { 502 | "weights": { 503 | "href": "https://huggingface.co/torchgeo/resnet18_sentinel2_all_moco/resolve/main/resnet18_sentinel2_all_moco-59bfdff9.pth", 504 | "title": "Pytorch weights checkpoint", 505 | "description": "A Resnet-18 classification model trained on normalized Sentinel-2 imagery with Eurosat landcover labels with torchgeo", 506 | "type": "application/octet-stream; application=pytorch", 507 | "roles": [ 508 | "mlm:model", 509 | "mlm:weights" 510 | ], 511 | "mlm:artifact_type": "torch.save", 512 | "$comment": "Following 'eo:bands' is required to fulfil schema validation of 'eo' extension.", 513 | "eo:bands": [ 514 | { 515 | "name": "coastal" 516 | }, 517 | { 518 | "name": "blue" 519 | }, 520 | { 521 | "name": "green" 522 | }, 523 | { 524 | "name": "red" 525 | }, 526 | { 527 | "name": "rededge1" 528 | }, 529 | { 530 | "name": "rededge2" 531 | }, 532 | { 533 | "name": "rededge3" 534 | }, 535 | { 536 | "name": "nir" 537 | }, 538 | { 539 | "name": "nir08" 540 | }, 541 | { 542 | "name": "nir09" 543 | }, 544 | { 545 | "name": "cirrus" 546 | }, 547 | { 548 | "name": "swir16" 549 | }, 550 | { 551 | "name": "swir22" 552 | } 553 | ] 554 | }, 555 | "source_code": { 556 | "href": "https://github.com/microsoft/torchgeo/blob/61efd2e2c4df7ebe3bd03002ebbaeaa3cfe9885a/torchgeo/models/resnet.py#L207", 557 | "title": "Model implementation.", 558 | "description": "Source code to run the model.", 559 | "type": "text/x-python", 560 | "roles": [ 561 | "code", 562 | "metadata" 563 | ] 564 | } 565 | }, 566 | "links": [ 567 | { 568 | "rel": "collection", 569 | "href": "./collection.json", 570 | "type": "application/json" 571 | }, 572 | { 573 | "rel": "self", 574 | "href": "./item_eo_bands.json", 575 | "type": "application/geo+json" 576 | }, 577 | { 578 | "rel": "derived_from", 579 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", 580 | "type": "application/json", 581 | "ml-aoi:split": "train" 582 | } 583 | ] 584 | } 585 | -------------------------------------------------------------------------------- /examples/item_eo_bands.json: -------------------------------------------------------------------------------- 1 | { 2 | "$comment": "Demonstrate the use of MLM and EO for bands description, with EO bands directly in the Model Asset.", 3 | "stac_version": "1.0.0", 4 | "stac_extensions": [ 5 | "https://stac-extensions.github.io/mlm/v1.4.0/schema.json", 6 | "https://stac-extensions.github.io/eo/v1.1.0/schema.json", 7 | "https://stac-extensions.github.io/raster/v1.1.0/schema.json", 8 | "https://stac-extensions.github.io/file/v1.0.0/schema.json", 9 | "https://stac-extensions.github.io/ml-aoi/v0.2.0/schema.json" 10 | ], 11 | "type": "Feature", 12 | "id": "resnet-18_sentinel-2_all_moco_classification", 13 | "collection": "ml-model-examples", 14 | "geometry": { 15 | "type": "Polygon", 16 | "coordinates": [ 17 | [ 18 | [ 19 | -7.882190080512502, 20 | 37.13739173208318 21 | ], 22 | [ 23 | -7.882190080512502, 24 | 58.21798141355221 25 | ], 26 | [ 27 | 27.911651652899923, 28 | 58.21798141355221 29 | ], 30 | [ 31 | 27.911651652899923, 32 | 37.13739173208318 33 | ], 34 | [ 35 | -7.882190080512502, 36 | 37.13739173208318 37 | ] 38 | ] 39 | ] 40 | }, 41 | "bbox": [ 42 | -7.882190080512502, 43 | 37.13739173208318, 44 | 27.911651652899923, 45 | 58.21798141355221 46 | ], 47 | "properties": { 48 | "description": "Sourced from torchgeo python library, identifier is ResNet18_Weights.SENTINEL2_ALL_MOCO", 49 | "datetime": null, 50 | "start_datetime": "1900-01-01T00:00:00Z", 51 | "end_datetime": "9999-12-31T23:59:59Z", 52 | "mlm:name": "Resnet-18 Sentinel-2 ALL MOCO", 53 | "mlm:tasks": [ 54 | "classification" 55 | ], 56 | "mlm:architecture": "ResNet", 57 | "mlm:framework": "pytorch", 58 | "mlm:framework_version": "2.1.2+cu121", 59 | "file:size": 43000000, 60 | "mlm:memory_size": 1, 61 | "mlm:total_parameters": 11700000, 62 | "mlm:pretrained_source": "EuroSat Sentinel-2", 63 | "mlm:accelerator": "cuda", 64 | "mlm:accelerator_constrained": false, 65 | "mlm:accelerator_summary": "Unknown", 66 | "mlm:batch_size_suggestion": 256, 67 | "mlm:input": [ 68 | { 69 | "name": "13 Band Sentinel-2 Batch", 70 | "bands": [ 71 | "B01", 72 | "B02", 73 | "B03", 74 | "B04", 75 | "B05", 76 | "B06", 77 | "B07", 78 | "B08", 79 | "B8A", 80 | "B09", 81 | "B10", 82 | "B11", 83 | "B12" 84 | ], 85 | "input": { 86 | "shape": [ 87 | -1, 88 | 13, 89 | 64, 90 | 64 91 | ], 92 | "dim_order": [ 93 | "batch", 94 | "channel", 95 | "height", 96 | "width" 97 | ], 98 | "data_type": "float32" 99 | }, 100 | "norm_by_channel": true, 101 | "resize_type": null, 102 | "value_scaling": [ 103 | { 104 | "type": "z-score", 105 | "mean": 1354.40546513, 106 | "stddev": 245.71762908 107 | }, 108 | { 109 | "type": "z-score", 110 | "mean": 1118.24399958, 111 | "stddev": 333.00778264 112 | }, 113 | { 114 | "type": "z-score", 115 | "mean": 1042.92983953, 116 | "stddev": 395.09249139 117 | }, 118 | { 119 | "type": "z-score", 120 | "mean": 947.62620298, 121 | "stddev": 593.75055589 122 | }, 123 | { 124 | "type": "z-score", 125 | "mean": 1199.47283961, 126 | "stddev": 566.4170017 127 | }, 128 | { 129 | "type": "z-score", 130 | "mean": 1999.79090914, 131 | "stddev": 861.18399006 132 | }, 133 | { 134 | "type": "z-score", 135 | "mean": 2369.22292565, 136 | "stddev": 1086.63139075 137 | }, 138 | { 139 | "type": "z-score", 140 | "mean": 2296.82608323, 141 | "stddev": 1117.98170791 142 | }, 143 | { 144 | "type": "z-score", 145 | "mean": 732.08340178, 146 | "stddev": 404.91978886 147 | }, 148 | { 149 | "type": "z-score", 150 | "mean": 12.11327804, 151 | "stddev": 4.77584468 152 | }, 153 | { 154 | "type": "z-score", 155 | "mean": 1819.01027855, 156 | "stddev": 1002.58768311 157 | }, 158 | { 159 | "type": "z-score", 160 | "mean": 1118.92391149, 161 | "stddev": 761.30323499 162 | }, 163 | { 164 | "type": "z-score", 165 | "mean": 2594.14080798, 166 | "stddev": 1231.58581042 167 | } 168 | ], 169 | "pre_processing_function": { 170 | "format": "python", 171 | "expression": "torchgeo.datamodules.eurosat.EuroSATDataModule.collate_fn" 172 | } 173 | } 174 | ], 175 | "mlm:output": [ 176 | { 177 | "name": "classification", 178 | "tasks": [ 179 | "classification" 180 | ], 181 | "result": { 182 | "shape": [ 183 | -1, 184 | 10 185 | ], 186 | "dim_order": [ 187 | "batch", 188 | "class" 189 | ], 190 | "data_type": "float32" 191 | }, 192 | "classification_classes": [ 193 | { 194 | "value": 0, 195 | "name": "Annual Crop", 196 | "description": null, 197 | "title": null, 198 | "color_hint": null, 199 | "nodata": false 200 | }, 201 | { 202 | "value": 1, 203 | "name": "Forest", 204 | "description": null, 205 | "title": null, 206 | "color_hint": null, 207 | "nodata": false 208 | }, 209 | { 210 | "value": 2, 211 | "name": "Herbaceous Vegetation", 212 | "description": null, 213 | "title": null, 214 | "color_hint": null, 215 | "nodata": false 216 | }, 217 | { 218 | "value": 3, 219 | "name": "Highway", 220 | "description": null, 221 | "title": null, 222 | "color_hint": null, 223 | "nodata": false 224 | }, 225 | { 226 | "value": 4, 227 | "name": "Industrial Buildings", 228 | "description": null, 229 | "title": null, 230 | "color_hint": null, 231 | "nodata": false 232 | }, 233 | { 234 | "value": 5, 235 | "name": "Pasture", 236 | "description": null, 237 | "title": null, 238 | "color_hint": null, 239 | "nodata": false 240 | }, 241 | { 242 | "value": 6, 243 | "name": "Permanent Crop", 244 | "description": null, 245 | "title": null, 246 | "color_hint": null, 247 | "nodata": false 248 | }, 249 | { 250 | "value": 7, 251 | "name": "Residential Buildings", 252 | "description": null, 253 | "title": null, 254 | "color_hint": null, 255 | "nodata": false 256 | }, 257 | { 258 | "value": 8, 259 | "name": "River", 260 | "description": null, 261 | "title": null, 262 | "color_hint": null, 263 | "nodata": false 264 | }, 265 | { 266 | "value": 9, 267 | "name": "SeaLake", 268 | "description": null, 269 | "title": null, 270 | "color_hint": null, 271 | "nodata": false 272 | } 273 | ], 274 | "post_processing_function": null 275 | } 276 | ] 277 | }, 278 | "assets": { 279 | "weights": { 280 | "href": "https://huggingface.co/torchgeo/resnet18_sentinel2_all_moco/resolve/main/resnet18_sentinel2_all_moco-59bfdff9.pth", 281 | "title": "Pytorch weights checkpoint", 282 | "description": "A Resnet-18 classification model trained on normalized Sentinel-2 imagery with Eurosat landcover labels with torchgeo", 283 | "type": "application/octet-stream; application=pytorch", 284 | "roles": [ 285 | "mlm:model", 286 | "mlm:weights" 287 | ], 288 | "mlm:artifact_type": "torch.save", 289 | "$comment": "Following 'eo:bands' is required to fulfil schema validation of 'eo' extension.", 290 | "eo:bands": [ 291 | { 292 | "name": "B01", 293 | "common_name": "coastal", 294 | "description": "Coastal aerosol (band 1)", 295 | "center_wavelength": 0.443, 296 | "full_width_half_max": 0.027 297 | }, 298 | { 299 | "name": "B02", 300 | "common_name": "blue", 301 | "description": "Blue (band 2)", 302 | "center_wavelength": 0.49, 303 | "full_width_half_max": 0.098 304 | }, 305 | { 306 | "name": "B03", 307 | "common_name": "green", 308 | "description": "Green (band 3)", 309 | "center_wavelength": 0.56, 310 | "full_width_half_max": 0.045 311 | }, 312 | { 313 | "name": "B04", 314 | "common_name": "red", 315 | "description": "Red (band 4)", 316 | "center_wavelength": 0.665, 317 | "full_width_half_max": 0.038 318 | }, 319 | { 320 | "name": "B05", 321 | "common_name": "rededge", 322 | "description": "Red edge 1 (band 5)", 323 | "center_wavelength": 0.704, 324 | "full_width_half_max": 0.019 325 | }, 326 | { 327 | "name": "B06", 328 | "common_name": "rededge", 329 | "description": "Red edge 2 (band 6)", 330 | "center_wavelength": 0.74, 331 | "full_width_half_max": 0.018 332 | }, 333 | { 334 | "name": "B07", 335 | "common_name": "rededge", 336 | "description": "Red edge 3 (band 7)", 337 | "center_wavelength": 0.783, 338 | "full_width_half_max": 0.028 339 | }, 340 | { 341 | "name": "B08", 342 | "common_name": "nir", 343 | "description": "NIR 1 (band 8)", 344 | "center_wavelength": 0.842, 345 | "full_width_half_max": 0.145 346 | }, 347 | { 348 | "name": "B8A", 349 | "common_name": "nir08", 350 | "description": "NIR 2 (band 8A)", 351 | "center_wavelength": 0.865, 352 | "full_width_half_max": 0.033 353 | }, 354 | { 355 | "name": "B09", 356 | "common_name": "nir09", 357 | "description": "NIR 3 (band 9)", 358 | "center_wavelength": 0.945, 359 | "full_width_half_max": 0.026 360 | }, 361 | { 362 | "name": "B10", 363 | "common_name": "cirrus", 364 | "description": "SWIR - Cirrus (band 10)", 365 | "center_wavelength": 1.375, 366 | "full_width_half_max": 0.026 367 | }, 368 | { 369 | "name": "B11", 370 | "common_name": "swir16", 371 | "description": "SWIR 1 (band 11)", 372 | "center_wavelength": 1.61, 373 | "full_width_half_max": 0.143 374 | }, 375 | { 376 | "name": "B12", 377 | "common_name": "swir22", 378 | "description": "SWIR 2 (band 12)", 379 | "center_wavelength": 2.19, 380 | "full_width_half_max": 0.242 381 | } 382 | ] 383 | }, 384 | "source_code": { 385 | "href": "https://github.com/microsoft/torchgeo/blob/61efd2e2c4df7ebe3bd03002ebbaeaa3cfe9885a/torchgeo/models/resnet.py#L207", 386 | "title": "Model implementation.", 387 | "description": "Source code to run the model.", 388 | "type": "text/x-python", 389 | "roles": [ 390 | "mlm:source_code", 391 | "code", 392 | "metadata" 393 | ] 394 | } 395 | }, 396 | "links": [ 397 | { 398 | "rel": "collection", 399 | "href": "./collection.json", 400 | "type": "application/json" 401 | }, 402 | { 403 | "rel": "self", 404 | "href": "./item_eo_bands.json", 405 | "type": "application/geo+json" 406 | }, 407 | { 408 | "rel": "derived_from", 409 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", 410 | "type": "application/json", 411 | "ml-aoi:split": "train" 412 | } 413 | ] 414 | } 415 | -------------------------------------------------------------------------------- /examples/item_eo_bands_summarized.json: -------------------------------------------------------------------------------- 1 | { 2 | "$comment": "Demonstrate the use of MLM and EO for bands description, with EO bands summarized in the Item properties and referenced by name in the Model Asset.", 3 | "stac_version": "1.0.0", 4 | "stac_extensions": [ 5 | "https://stac-extensions.github.io/mlm/v1.4.0/schema.json", 6 | "https://stac-extensions.github.io/eo/v1.1.0/schema.json", 7 | "https://stac-extensions.github.io/raster/v1.1.0/schema.json", 8 | "https://stac-extensions.github.io/file/v1.0.0/schema.json", 9 | "https://stac-extensions.github.io/ml-aoi/v0.2.0/schema.json" 10 | ], 11 | "type": "Feature", 12 | "id": "resnet-18_sentinel-2_all_moco_classification", 13 | "collection": "ml-model-examples", 14 | "geometry": { 15 | "type": "Polygon", 16 | "coordinates": [ 17 | [ 18 | [ 19 | -7.882190080512502, 20 | 37.13739173208318 21 | ], 22 | [ 23 | -7.882190080512502, 24 | 58.21798141355221 25 | ], 26 | [ 27 | 27.911651652899923, 28 | 58.21798141355221 29 | ], 30 | [ 31 | 27.911651652899923, 32 | 37.13739173208318 33 | ], 34 | [ 35 | -7.882190080512502, 36 | 37.13739173208318 37 | ] 38 | ] 39 | ] 40 | }, 41 | "bbox": [ 42 | -7.882190080512502, 43 | 37.13739173208318, 44 | 27.911651652899923, 45 | 58.21798141355221 46 | ], 47 | "properties": { 48 | "description": "Sourced from torchgeo python library, identifier is ResNet18_Weights.SENTINEL2_ALL_MOCO", 49 | "datetime": null, 50 | "start_datetime": "1900-01-01T00:00:00Z", 51 | "end_datetime": "9999-12-31T23:59:59Z", 52 | "mlm:name": "Resnet-18 Sentinel-2 ALL MOCO", 53 | "mlm:tasks": [ 54 | "classification" 55 | ], 56 | "mlm:architecture": "ResNet", 57 | "mlm:framework": "pytorch", 58 | "mlm:framework_version": "2.1.2+cu121", 59 | "file:size": 43000000, 60 | "mlm:memory_size": 1, 61 | "mlm:total_parameters": 11700000, 62 | "mlm:pretrained_source": "EuroSat Sentinel-2", 63 | "mlm:accelerator": "cuda", 64 | "mlm:accelerator_constrained": false, 65 | "mlm:accelerator_summary": "Unknown", 66 | "mlm:batch_size_suggestion": 256, 67 | "mlm:input": [ 68 | { 69 | "name": "13 Band Sentinel-2 Batch", 70 | "bands": [ 71 | "B01", 72 | "B02", 73 | "B03", 74 | "B04", 75 | "B05", 76 | "B06", 77 | "B07", 78 | "B08", 79 | "B8A", 80 | "B09", 81 | "B10", 82 | "B11", 83 | "B12" 84 | ], 85 | "input": { 86 | "shape": [ 87 | -1, 88 | 13, 89 | 64, 90 | 64 91 | ], 92 | "dim_order": [ 93 | "batch", 94 | "channel", 95 | "height", 96 | "width" 97 | ], 98 | "data_type": "float32" 99 | }, 100 | "resize_type": null, 101 | "value_scaling": [ 102 | { 103 | "type": "z-score", 104 | "mean": 1354.40546513, 105 | "stddev": 245.71762908 106 | }, 107 | { 108 | "type": "z-score", 109 | "mean": 1118.24399958, 110 | "stddev": 333.00778264 111 | }, 112 | { 113 | "type": "z-score", 114 | "mean": 1042.92983953, 115 | "stddev": 395.09249139 116 | }, 117 | { 118 | "type": "z-score", 119 | "mean": 947.62620298, 120 | "stddev": 593.75055589 121 | }, 122 | { 123 | "type": "z-score", 124 | "mean": 1199.47283961, 125 | "stddev": 566.4170017 126 | }, 127 | { 128 | "type": "z-score", 129 | "mean": 1999.79090914, 130 | "stddev": 861.18399006 131 | }, 132 | { 133 | "type": "z-score", 134 | "mean": 2369.22292565, 135 | "stddev": 1086.63139075 136 | }, 137 | { 138 | "type": "z-score", 139 | "mean": 2296.82608323, 140 | "stddev": 1117.98170791 141 | }, 142 | { 143 | "type": "z-score", 144 | "mean": 732.08340178, 145 | "stddev": 404.91978886 146 | }, 147 | { 148 | "type": "z-score", 149 | "mean": 12.11327804, 150 | "stddev": 4.77584468 151 | }, 152 | { 153 | "type": "z-score", 154 | "mean": 1819.01027855, 155 | "stddev": 1002.58768311 156 | }, 157 | { 158 | "type": "z-score", 159 | "mean": 1118.92391149, 160 | "stddev": 761.30323499 161 | }, 162 | { 163 | "type": "z-score", 164 | "mean": 2594.14080798, 165 | "stddev": 1231.58581042 166 | } 167 | ], 168 | "pre_processing_function": { 169 | "format": "python", 170 | "expression": "torchgeo.datamodules.eurosat.EuroSATDataModule.collate_fn" 171 | } 172 | } 173 | ], 174 | "mlm:output": [ 175 | { 176 | "name": "classification", 177 | "tasks": [ 178 | "classification" 179 | ], 180 | "result": { 181 | "shape": [ 182 | -1, 183 | 10 184 | ], 185 | "dim_order": [ 186 | "batch", 187 | "class" 188 | ], 189 | "data_type": "float32" 190 | }, 191 | "classification_classes": [ 192 | { 193 | "value": 0, 194 | "name": "Annual Crop", 195 | "description": null, 196 | "title": null, 197 | "color_hint": null, 198 | "nodata": false 199 | }, 200 | { 201 | "value": 1, 202 | "name": "Forest", 203 | "description": null, 204 | "title": null, 205 | "color_hint": null, 206 | "nodata": false 207 | }, 208 | { 209 | "value": 2, 210 | "name": "Herbaceous Vegetation", 211 | "description": null, 212 | "title": null, 213 | "color_hint": null, 214 | "nodata": false 215 | }, 216 | { 217 | "value": 3, 218 | "name": "Highway", 219 | "description": null, 220 | "title": null, 221 | "color_hint": null, 222 | "nodata": false 223 | }, 224 | { 225 | "value": 4, 226 | "name": "Industrial Buildings", 227 | "description": null, 228 | "title": null, 229 | "color_hint": null, 230 | "nodata": false 231 | }, 232 | { 233 | "value": 5, 234 | "name": "Pasture", 235 | "description": null, 236 | "title": null, 237 | "color_hint": null, 238 | "nodata": false 239 | }, 240 | { 241 | "value": 6, 242 | "name": "Permanent Crop", 243 | "description": null, 244 | "title": null, 245 | "color_hint": null, 246 | "nodata": false 247 | }, 248 | { 249 | "value": 7, 250 | "name": "Residential Buildings", 251 | "description": null, 252 | "title": null, 253 | "color_hint": null, 254 | "nodata": false 255 | }, 256 | { 257 | "value": 8, 258 | "name": "River", 259 | "description": null, 260 | "title": null, 261 | "color_hint": null, 262 | "nodata": false 263 | }, 264 | { 265 | "value": 9, 266 | "name": "SeaLake", 267 | "description": null, 268 | "title": null, 269 | "color_hint": null, 270 | "nodata": false 271 | } 272 | ], 273 | "post_processing_function": null 274 | } 275 | ], 276 | "eo:bands": [ 277 | { 278 | "name": "B01", 279 | "common_name": "coastal", 280 | "description": "Coastal aerosol (band 1)", 281 | "center_wavelength": 0.443, 282 | "full_width_half_max": 0.027 283 | }, 284 | { 285 | "name": "B02", 286 | "common_name": "blue", 287 | "description": "Blue (band 2)", 288 | "center_wavelength": 0.49, 289 | "full_width_half_max": 0.098 290 | }, 291 | { 292 | "name": "B03", 293 | "common_name": "green", 294 | "description": "Green (band 3)", 295 | "center_wavelength": 0.56, 296 | "full_width_half_max": 0.045 297 | }, 298 | { 299 | "name": "B04", 300 | "common_name": "red", 301 | "description": "Red (band 4)", 302 | "center_wavelength": 0.665, 303 | "full_width_half_max": 0.038 304 | }, 305 | { 306 | "name": "B05", 307 | "common_name": "rededge", 308 | "description": "Red edge 1 (band 5)", 309 | "center_wavelength": 0.704, 310 | "full_width_half_max": 0.019 311 | }, 312 | { 313 | "name": "B06", 314 | "common_name": "rededge", 315 | "description": "Red edge 2 (band 6)", 316 | "center_wavelength": 0.74, 317 | "full_width_half_max": 0.018 318 | }, 319 | { 320 | "name": "B07", 321 | "common_name": "rededge", 322 | "description": "Red edge 3 (band 7)", 323 | "center_wavelength": 0.783, 324 | "full_width_half_max": 0.028 325 | }, 326 | { 327 | "name": "B08", 328 | "common_name": "nir", 329 | "description": "NIR 1 (band 8)", 330 | "center_wavelength": 0.842, 331 | "full_width_half_max": 0.145 332 | }, 333 | { 334 | "name": "B8A", 335 | "common_name": "nir08", 336 | "description": "NIR 2 (band 8A)", 337 | "center_wavelength": 0.865, 338 | "full_width_half_max": 0.033 339 | }, 340 | { 341 | "name": "B09", 342 | "common_name": "nir09", 343 | "description": "NIR 3 (band 9)", 344 | "center_wavelength": 0.945, 345 | "full_width_half_max": 0.026 346 | }, 347 | { 348 | "name": "B10", 349 | "common_name": "cirrus", 350 | "description": "SWIR - Cirrus (band 10)", 351 | "center_wavelength": 1.375, 352 | "full_width_half_max": 0.026 353 | }, 354 | { 355 | "name": "B11", 356 | "common_name": "swir16", 357 | "description": "SWIR 1 (band 11)", 358 | "center_wavelength": 1.61, 359 | "full_width_half_max": 0.143 360 | }, 361 | { 362 | "name": "B12", 363 | "common_name": "swir22", 364 | "description": "SWIR 2 (band 12)", 365 | "center_wavelength": 2.19, 366 | "full_width_half_max": 0.242 367 | } 368 | ] 369 | }, 370 | "assets": { 371 | "weights": { 372 | "href": "https://huggingface.co/torchgeo/resnet18_sentinel2_all_moco/resolve/main/resnet18_sentinel2_all_moco-59bfdff9.pth", 373 | "title": "Pytorch weights checkpoint", 374 | "description": "A Resnet-18 classification model trained on normalized Sentinel-2 imagery with Eurosat landcover labels with torchgeo", 375 | "type": "application/octet-stream; application=pytorch", 376 | "roles": [ 377 | "mlm:model", 378 | "mlm:weights" 379 | ], 380 | "mlm:artifact_type": "torch.save", 381 | "$comment": "Following 'eo:bands' is required to fulfil schema validation of 'eo' extension.", 382 | "eo:bands": [ 383 | { 384 | "name": "coastal" 385 | }, 386 | { 387 | "name": "blue" 388 | }, 389 | { 390 | "name": "green" 391 | }, 392 | { 393 | "name": "red" 394 | }, 395 | { 396 | "name": "rededge1" 397 | }, 398 | { 399 | "name": "rededge2" 400 | }, 401 | { 402 | "name": "rededge3" 403 | }, 404 | { 405 | "name": "nir" 406 | }, 407 | { 408 | "name": "nir08" 409 | }, 410 | { 411 | "name": "nir09" 412 | }, 413 | { 414 | "name": "cirrus" 415 | }, 416 | { 417 | "name": "swir16" 418 | }, 419 | { 420 | "name": "swir22" 421 | } 422 | ] 423 | }, 424 | "source_code": { 425 | "href": "https://github.com/microsoft/torchgeo/blob/61efd2e2c4df7ebe3bd03002ebbaeaa3cfe9885a/torchgeo/models/resnet.py#L207", 426 | "title": "Model implementation.", 427 | "description": "Source code to run the model.", 428 | "type": "text/x-python", 429 | "roles": [ 430 | "code", 431 | "metadata" 432 | ] 433 | } 434 | }, 435 | "links": [ 436 | { 437 | "rel": "collection", 438 | "href": "./collection.json", 439 | "type": "application/json" 440 | }, 441 | { 442 | "rel": "self", 443 | "href": "./item_eo_bands.json", 444 | "type": "application/geo+json" 445 | }, 446 | { 447 | "rel": "derived_from", 448 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", 449 | "type": "application/json", 450 | "ml-aoi:split": "train" 451 | } 452 | ] 453 | } 454 | -------------------------------------------------------------------------------- /examples/item_multi_io.json: -------------------------------------------------------------------------------- 1 | { 2 | "stac_version": "1.0.0", 3 | "stac_extensions": [ 4 | "https://stac-extensions.github.io/mlm/v1.4.0/schema.json", 5 | "https://stac-extensions.github.io/raster/v1.1.0/schema.json", 6 | "https://stac-extensions.github.io/file/v1.0.0/schema.json", 7 | "https://stac-extensions.github.io/ml-aoi/v0.2.0/schema.json" 8 | ], 9 | "type": "Feature", 10 | "id": "model-multi-input", 11 | "collection": "ml-model-examples", 12 | "geometry": { 13 | "type": "Polygon", 14 | "coordinates": [ 15 | [ 16 | [ 17 | -7.882190080512502, 18 | 37.13739173208318 19 | ], 20 | [ 21 | -7.882190080512502, 22 | 58.21798141355221 23 | ], 24 | [ 25 | 27.911651652899923, 26 | 58.21798141355221 27 | ], 28 | [ 29 | 27.911651652899923, 30 | 37.13739173208318 31 | ], 32 | [ 33 | -7.882190080512502, 34 | 37.13739173208318 35 | ] 36 | ] 37 | ] 38 | }, 39 | "bbox": [ 40 | -7.882190080512502, 41 | 37.13739173208318, 42 | 27.911651652899923, 43 | 58.21798141355221 44 | ], 45 | "properties": { 46 | "description": "Generic model that employs multiple input sources with different combination of bands, and some inputs without any band at all.", 47 | "datetime": null, 48 | "start_datetime": "1900-01-01T00:00:00Z", 49 | "end_datetime": "9999-12-31T23:59:59Z", 50 | "mlm:name": "Resnet-18 Sentinel-2 ALL MOCO", 51 | "mlm:tasks": [ 52 | "classification" 53 | ], 54 | "mlm:architecture": "ResNet", 55 | "mlm:framework": "pytorch", 56 | "mlm:framework_version": "2.1.2+cu121", 57 | "file:size": 43000000, 58 | "mlm:memory_size": 1, 59 | "mlm:total_parameters": 11700000, 60 | "mlm:pretrained_source": "EuroSat Sentinel-2", 61 | "mlm:accelerator": "cuda", 62 | "mlm:accelerator_constrained": false, 63 | "mlm:accelerator_summary": "Unknown", 64 | "mlm:batch_size_suggestion": 256, 65 | "mlm:input": [ 66 | { 67 | "name": "RGB", 68 | "bands": [ 69 | "B04", 70 | "B03", 71 | "B02" 72 | ], 73 | "input": { 74 | "shape": [ 75 | -1, 76 | 3, 77 | 64, 78 | 64 79 | ], 80 | "dim_order": [ 81 | "batch", 82 | "channel", 83 | "height", 84 | "width" 85 | ], 86 | "data_type": "uint16" 87 | }, 88 | "value_scaling": null, 89 | "resize_type": null 90 | }, 91 | { 92 | "name": "NDVI", 93 | "bands": [ 94 | "B04", 95 | "B08" 96 | ], 97 | "pre_processing_function": { 98 | "format": "gdal-calc", 99 | "expression": "(A - B) / (A + B)" 100 | }, 101 | "input": { 102 | "shape": [ 103 | -1, 104 | 1, 105 | 64, 106 | 64 107 | ], 108 | "dim_order": [ 109 | "batch", 110 | "ndvi", 111 | "height", 112 | "width" 113 | ], 114 | "data_type": "uint16" 115 | } 116 | }, 117 | { 118 | "name": "DEM", 119 | "description": "Digital elevation model. Comes from another source than the Sentinel bands. Therefore, no 'bands' associated to it.", 120 | "bands": [], 121 | "input": { 122 | "shape": [ 123 | -1, 124 | 1, 125 | 64, 126 | 64 127 | ], 128 | "dim_order": [ 129 | "batch", 130 | "ndvi", 131 | "height", 132 | "width" 133 | ], 134 | "data_type": "float32" 135 | } 136 | } 137 | ], 138 | "mlm:output": [ 139 | { 140 | "name": "vegetation-segmentation", 141 | "tasks": [ 142 | "semantic-segmentation" 143 | ], 144 | "result": { 145 | "shape": [ 146 | -1, 147 | 1 148 | ], 149 | "dim_order": [ 150 | "batch", 151 | "class" 152 | ], 153 | "data_type": "uint8" 154 | }, 155 | "classification_classes": [ 156 | { 157 | "value": 0, 158 | "name": "NON_VEGETATION", 159 | "description": "background pixels", 160 | "color_hint": null 161 | }, 162 | { 163 | "value": 1, 164 | "name": "VEGETATION", 165 | "description": "pixels where vegetation was detected", 166 | "color_hint": [ 167 | 0, 168 | 255, 169 | 0 170 | ] 171 | } 172 | ], 173 | "post_processing_function": null 174 | }, 175 | { 176 | "name": "inverse-mask", 177 | "tasks": [ 178 | "semantic-segmentation" 179 | ], 180 | "result": { 181 | "shape": [ 182 | -1, 183 | 1 184 | ], 185 | "dim_order": [ 186 | "batch", 187 | "class" 188 | ], 189 | "data_type": "uint8" 190 | }, 191 | "classification_classes": [ 192 | { 193 | "value": 0, 194 | "name": "NON_VEGETATION", 195 | "description": "background pixels", 196 | "color_hint": [ 197 | 255, 198 | 255, 199 | 255 200 | ] 201 | }, 202 | { 203 | "value": 1, 204 | "name": "VEGETATION", 205 | "description": "pixels where vegetation was detected", 206 | "color_hint": [ 207 | 0, 208 | 0, 209 | 0 210 | ] 211 | } 212 | ], 213 | "post_processing_function": { 214 | "format": "gdal-calc", 215 | "expression": "logical_not(A)" 216 | } 217 | } 218 | ] 219 | }, 220 | "assets": { 221 | "weights": { 222 | "href": "https://huggingface.co/torchgeo/resnet50_sentinel2_rgb_moco/blob/main/resnet50_sentinel2_rgb_moco.pth", 223 | "title": "Pytorch weights checkpoint", 224 | "description": "A Resnet-50 classification model trained on Sentinel-2 RGB imagery with torchgeo.", 225 | "type": "application/octet-stream; application=pytorch", 226 | "roles": [ 227 | "mlm:model", 228 | "mlm:weights" 229 | ], 230 | "mlm:artifact_type": "torch.save", 231 | "raster:bands": [ 232 | { 233 | "name": "B02 - blue", 234 | "nodata": 0, 235 | "data_type": "uint16", 236 | "bits_per_sample": 15, 237 | "spatial_resolution": 10, 238 | "scale": 0.0001, 239 | "offset": 0, 240 | "unit": "m" 241 | }, 242 | { 243 | "name": "B03 - green", 244 | "nodata": 0, 245 | "data_type": "uint16", 246 | "bits_per_sample": 15, 247 | "spatial_resolution": 10, 248 | "scale": 0.0001, 249 | "offset": 0, 250 | "unit": "m" 251 | }, 252 | { 253 | "name": "B04 - red", 254 | "nodata": 0, 255 | "data_type": "uint16", 256 | "bits_per_sample": 15, 257 | "spatial_resolution": 10, 258 | "scale": 0.0001, 259 | "offset": 0, 260 | "unit": "m" 261 | }, 262 | { 263 | "name": "B08 - nir", 264 | "nodata": 0, 265 | "data_type": "uint16", 266 | "bits_per_sample": 15, 267 | "spatial_resolution": 10, 268 | "scale": 0.0001, 269 | "offset": 0, 270 | "unit": "m" 271 | } 272 | ] 273 | } 274 | }, 275 | "links": [ 276 | { 277 | "rel": "collection", 278 | "href": "./collection.json", 279 | "type": "application/json" 280 | }, 281 | { 282 | "rel": "self", 283 | "href": "./item_multi_io.json", 284 | "type": "application/geo+json" 285 | }, 286 | { 287 | "rel": "derived_from", 288 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", 289 | "type": "application/json", 290 | "ml-aoi:split": "train" 291 | } 292 | ] 293 | } 294 | -------------------------------------------------------------------------------- /examples/item_raster_bands.json: -------------------------------------------------------------------------------- 1 | { 2 | "stac_version": "1.0.0", 3 | "stac_extensions": [ 4 | "https://stac-extensions.github.io/mlm/v1.4.0/schema.json", 5 | "https://stac-extensions.github.io/raster/v1.1.0/schema.json", 6 | "https://stac-extensions.github.io/file/v1.0.0/schema.json", 7 | "https://stac-extensions.github.io/ml-aoi/v0.2.0/schema.json" 8 | ], 9 | "type": "Feature", 10 | "id": "resnet-18_sentinel-2_all_moco_classification", 11 | "collection": "ml-model-examples", 12 | "geometry": { 13 | "type": "Polygon", 14 | "coordinates": [ 15 | [ 16 | [ 17 | -7.882190080512502, 18 | 37.13739173208318 19 | ], 20 | [ 21 | -7.882190080512502, 22 | 58.21798141355221 23 | ], 24 | [ 25 | 27.911651652899923, 26 | 58.21798141355221 27 | ], 28 | [ 29 | 27.911651652899923, 30 | 37.13739173208318 31 | ], 32 | [ 33 | -7.882190080512502, 34 | 37.13739173208318 35 | ] 36 | ] 37 | ] 38 | }, 39 | "bbox": [ 40 | -7.882190080512502, 41 | 37.13739173208318, 42 | 27.911651652899923, 43 | 58.21798141355221 44 | ], 45 | "properties": { 46 | "description": "Sourced from torchgeo python library, identifier is ResNet18_Weights.SENTINEL2_ALL_MOCO", 47 | "datetime": null, 48 | "start_datetime": "1900-01-01T00:00:00Z", 49 | "end_datetime": "9999-12-31T23:59:59Z", 50 | "mlm:name": "Resnet-18 Sentinel-2 ALL MOCO", 51 | "mlm:tasks": [ 52 | "classification" 53 | ], 54 | "mlm:architecture": "ResNet", 55 | "mlm:framework": "pytorch", 56 | "mlm:framework_version": "2.1.2+cu121", 57 | "file:size": 43000000, 58 | "mlm:memory_size": 1, 59 | "mlm:total_parameters": 11700000, 60 | "mlm:pretrained_source": "EuroSat Sentinel-2", 61 | "mlm:accelerator": "cuda", 62 | "mlm:accelerator_constrained": false, 63 | "mlm:accelerator_summary": "Unknown", 64 | "mlm:batch_size_suggestion": 256, 65 | "mlm:input": [ 66 | { 67 | "name": "13 Band Sentinel-2 Batch", 68 | "bands": [ 69 | "B01", 70 | "B02", 71 | "B03", 72 | "B04", 73 | "B05", 74 | "B06", 75 | "B07", 76 | "B08", 77 | "B8A", 78 | "B09", 79 | "B10", 80 | "B11", 81 | "B12" 82 | ], 83 | "input": { 84 | "shape": [ 85 | -1, 86 | 13, 87 | 64, 88 | 64 89 | ], 90 | "dim_order": [ 91 | "batch", 92 | "channel", 93 | "height", 94 | "width" 95 | ], 96 | "data_type": "float32" 97 | }, 98 | "value_scaling": null, 99 | "resize_type": null, 100 | "pre_processing_function": { 101 | "format": "python", 102 | "expression": "torchgeo.datamodules.eurosat.EuroSATDataModule.collate_fn" 103 | } 104 | } 105 | ], 106 | "mlm:output": [ 107 | { 108 | "name": "classification", 109 | "tasks": [ 110 | "classification" 111 | ], 112 | "result": { 113 | "shape": [ 114 | -1, 115 | 10 116 | ], 117 | "dim_order": [ 118 | "batch", 119 | "class" 120 | ], 121 | "data_type": "float32" 122 | }, 123 | "classification_classes": [ 124 | { 125 | "value": 0, 126 | "name": "Annual Crop", 127 | "description": null, 128 | "title": null, 129 | "color_hint": null, 130 | "nodata": false 131 | }, 132 | { 133 | "value": 1, 134 | "name": "Forest", 135 | "description": null, 136 | "title": null, 137 | "color_hint": null, 138 | "nodata": false 139 | }, 140 | { 141 | "value": 2, 142 | "name": "Herbaceous Vegetation", 143 | "description": null, 144 | "title": null, 145 | "color_hint": null, 146 | "nodata": false 147 | }, 148 | { 149 | "value": 3, 150 | "name": "Highway", 151 | "description": null, 152 | "title": null, 153 | "color_hint": null, 154 | "nodata": false 155 | }, 156 | { 157 | "value": 4, 158 | "name": "Industrial Buildings", 159 | "description": null, 160 | "title": null, 161 | "color_hint": null, 162 | "nodata": false 163 | }, 164 | { 165 | "value": 5, 166 | "name": "Pasture", 167 | "description": null, 168 | "title": null, 169 | "color_hint": null, 170 | "nodata": false 171 | }, 172 | { 173 | "value": 6, 174 | "name": "Permanent Crop", 175 | "description": null, 176 | "title": null, 177 | "color_hint": null, 178 | "nodata": false 179 | }, 180 | { 181 | "value": 7, 182 | "name": "Residential Buildings", 183 | "description": null, 184 | "title": null, 185 | "color_hint": null, 186 | "nodata": false 187 | }, 188 | { 189 | "value": 8, 190 | "name": "River", 191 | "description": null, 192 | "title": null, 193 | "color_hint": null, 194 | "nodata": false 195 | }, 196 | { 197 | "value": 9, 198 | "name": "SeaLake", 199 | "description": null, 200 | "title": null, 201 | "color_hint": null, 202 | "nodata": false 203 | } 204 | ], 205 | "post_processing_function": null 206 | } 207 | ] 208 | }, 209 | "assets": { 210 | "weights": { 211 | "href": "https://huggingface.co/torchgeo/resnet18_sentinel2_all_moco/resolve/main/resnet18_sentinel2_all_moco-59bfdff9.pth", 212 | "title": "Pytorch weights checkpoint", 213 | "description": "A Resnet-18 classification model trained on normalized Sentinel-2 imagery with Eurosat landcover labels with torchgeo", 214 | "type": "application/octet-stream; application=pytorch", 215 | "roles": [ 216 | "mlm:model", 217 | "mlm:weights" 218 | ], 219 | "mlm:artifact_type": "torch.save", 220 | "raster:bands": [ 221 | { 222 | "name": "B01", 223 | "nodata": 0, 224 | "data_type": "uint16", 225 | "bits_per_sample": 15, 226 | "spatial_resolution": 60, 227 | "scale": 0.0001, 228 | "offset": 0, 229 | "unit": "m" 230 | }, 231 | { 232 | "name": "B02", 233 | "nodata": 0, 234 | "data_type": "uint16", 235 | "bits_per_sample": 15, 236 | "spatial_resolution": 10, 237 | "scale": 0.0001, 238 | "offset": 0, 239 | "unit": "m" 240 | }, 241 | { 242 | "name": "B03", 243 | "nodata": 0, 244 | "data_type": "uint16", 245 | "bits_per_sample": 15, 246 | "spatial_resolution": 10, 247 | "scale": 0.0001, 248 | "offset": 0, 249 | "unit": "m" 250 | }, 251 | { 252 | "name": "B04", 253 | "nodata": 0, 254 | "data_type": "uint16", 255 | "bits_per_sample": 15, 256 | "spatial_resolution": 10, 257 | "scale": 0.0001, 258 | "offset": 0, 259 | "unit": "m" 260 | }, 261 | { 262 | "name": "B05", 263 | "nodata": 0, 264 | "data_type": "uint16", 265 | "bits_per_sample": 15, 266 | "spatial_resolution": 20, 267 | "scale": 0.0001, 268 | "offset": 0, 269 | "unit": "m" 270 | }, 271 | { 272 | "name": "B06", 273 | "nodata": 0, 274 | "data_type": "uint16", 275 | "bits_per_sample": 15, 276 | "spatial_resolution": 20, 277 | "scale": 0.0001, 278 | "offset": 0, 279 | "unit": "m" 280 | }, 281 | { 282 | "name": "B07", 283 | "nodata": 0, 284 | "data_type": "uint16", 285 | "bits_per_sample": 15, 286 | "spatial_resolution": 20, 287 | "scale": 0.0001, 288 | "offset": 0, 289 | "unit": "m" 290 | }, 291 | { 292 | "name": "B08", 293 | "nodata": 0, 294 | "data_type": "uint16", 295 | "bits_per_sample": 15, 296 | "spatial_resolution": 10, 297 | "scale": 0.0001, 298 | "offset": 0, 299 | "unit": "m" 300 | }, 301 | { 302 | "name": "B8A", 303 | "nodata": 0, 304 | "data_type": "uint16", 305 | "bits_per_sample": 15, 306 | "spatial_resolution": 20, 307 | "scale": 0.0001, 308 | "offset": 0, 309 | "unit": "m" 310 | }, 311 | { 312 | "name": "B09", 313 | "nodata": 0, 314 | "data_type": "uint16", 315 | "bits_per_sample": 15, 316 | "spatial_resolution": 60, 317 | "scale": 0.0001, 318 | "offset": 0, 319 | "unit": "m" 320 | }, 321 | { 322 | "name": "B10", 323 | "nodata": 0, 324 | "data_type": "uint16", 325 | "bits_per_sample": 15, 326 | "spatial_resolution": 60, 327 | "scale": 0.0001, 328 | "offset": 0, 329 | "unit": "m" 330 | }, 331 | { 332 | "name": "B11", 333 | "nodata": 0, 334 | "data_type": "uint16", 335 | "bits_per_sample": 15, 336 | "spatial_resolution": 20, 337 | "scale": 0.0001, 338 | "offset": 0, 339 | "unit": "m" 340 | }, 341 | { 342 | "name": "B12", 343 | "nodata": 0, 344 | "data_type": "uint16", 345 | "bits_per_sample": 15, 346 | "spatial_resolution": 20, 347 | "scale": 0.0001, 348 | "offset": 0, 349 | "unit": "m" 350 | } 351 | ] 352 | }, 353 | "source_code": { 354 | "href": "https://github.com/microsoft/torchgeo/blob/61efd2e2c4df7ebe3bd03002ebbaeaa3cfe9885a/torchgeo/models/resnet.py#L207", 355 | "title": "Model implementation.", 356 | "description": "Source code to run the model.", 357 | "type": "text/x-python", 358 | "roles": [ 359 | "mlm:source_code", 360 | "code", 361 | "metadata" 362 | ] 363 | } 364 | }, 365 | "links": [ 366 | { 367 | "rel": "collection", 368 | "href": "./collection.json", 369 | "type": "application/json" 370 | }, 371 | { 372 | "rel": "self", 373 | "href": "./item_raster_bands.json", 374 | "type": "application/geo+json" 375 | }, 376 | { 377 | "rel": "derived_from", 378 | "href": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", 379 | "type": "application/json", 380 | "ml-aoi:split": "train" 381 | } 382 | ] 383 | } 384 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "stac-mlm", 3 | "version": "1.4.0", 4 | "scripts": { 5 | "test": "npm run check-markdown && npm run check-examples", 6 | "check-markdown": "remark . -f -r .github/remark.yaml -i .remarkignore", 7 | "format-markdown": "remark . -f -r .github/remark.yaml -i .remarkignore -o", 8 | "check-examples": "stac-node-validator . --lint --verbose --schemaMap https://stac-extensions.github.io/mlm/v1.4.0/schema.json=./json-schema/schema.json", 9 | "format-examples": "stac-node-validator . --format --schemaMap https://stac-extensions.github.io/mlm/v1.4.0/schema.json=./json-schema/schema.json" 10 | }, 11 | "dependencies": { 12 | "remark-cli": "^8.0.0", 13 | "remark-gfm": "^4.0.0", 14 | "remark-lint": "^7.0.0", 15 | "remark-lint-no-html": "^2.0.0", 16 | "remark-math": "^6.0.0", 17 | "remark-preset-lint-consistent": "^3.0.0", 18 | "remark-preset-lint-markdown-style-guide": "^3.0.0", 19 | "remark-preset-lint-recommended": "^4.0.0", 20 | "remark-validate-links": "^10.0.0", 21 | "stac-node-validator": "^1.0.0" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [tool.pdm.build] 6 | includes = [ 7 | "stac_model", 8 | ] 9 | 10 | [project] 11 | authors = [ 12 | {name = "Ryan Avery", email = "ryan@wherobots.com"}, 13 | {name = "Francis Charette-Migneault", email = "francis.charette-migneault@crim.ca"}, 14 | ] 15 | license = {text = "Apache Software License 2.0"} 16 | requires-python = "<4.0,>=3.10" 17 | dependencies = [ 18 | "typer<1.0.0,>=0.9.0", 19 | "rich>=13.7.0,<15.0.0", 20 | "pydantic<3.0.0,>=2.6.3", 21 | "pydantic-core<3,>=2", 22 | "pystac<2.0.0,>=1.9.0", 23 | "shapely<3,>=2", 24 | "jsonschema<5.0.0,>=4.21.1", 25 | "pip>=25.0.0", 26 | ] 27 | # important: leave the name and version together for bump resolution 28 | name = "stac-model" 29 | version = "0.3.0" 30 | description = "A PydanticV2 validation and serialization libary for the STAC ML Model Extension" 31 | readme = "README_STAC_MODEL.md" 32 | keywords = [] 33 | classifiers = [ 34 | "Development Status :: 4 - Beta", 35 | "Operating System :: OS Independent", 36 | "Topic :: Software Development :: Libraries :: Python Modules", 37 | "License :: OSI Approved :: Apache Software License", 38 | "Programming Language :: Python :: 3", 39 | "Programming Language :: Python :: 3.10", 40 | "Programming Language :: Python :: 3.11", 41 | "Programming Language :: Python :: 3.12", 42 | "Programming Language :: Python :: 3 :: Only", 43 | "Framework :: Pydantic", 44 | "Framework :: Pydantic :: 2", 45 | "Intended Audience :: Developers", 46 | "Intended Audience :: Information Technology", 47 | "Intended Audience :: Science/Research", 48 | "Topic :: File Formats :: JSON :: JSON Schema", 49 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 50 | "Topic :: Scientific/Engineering :: GIS", 51 | "Topic :: Scientific/Engineering :: Image Processing", 52 | "Topic :: Scientific/Engineering :: Image Recognition", 53 | ] 54 | 55 | [tool.uv] 56 | dev-dependencies = [ 57 | "mypy<2.0.0,>=1.0.0", 58 | "mypy-extensions<1.0.0,>=0.4.3", 59 | "pre-commit<3.0.0,>=2.21.0", 60 | "bandit<2.0.0,>=1.7.5", 61 | "safety<3.0.0,>=2.3.4", 62 | "pystac<2.0.0,>=1.10.0", 63 | "pydocstyle[toml]<7.0.0,>=6.2.0", 64 | "pydoclint<0.6,>=0.3", 65 | "pytest<8.0.0,>=7.2.1", 66 | "pytest-cov<5.0.0,>=4.1.0", 67 | "pytest-mock<4.0.0,>=3.10.0", 68 | "pytest-timeout<3.0.0,>=2.2.0", 69 | "pytest-benchmark<5.0.0,>=4.0.0", 70 | "pytest-sugar<1.0.0,>=0.9.7", 71 | "pytest-click<2.0.0,>=1.1.0", 72 | "pytest-pikachu<2.0.0,>=1.0.0", 73 | "coverage<8.0.0,>=7.3.0", 74 | "ruff<1.0.0,>=0.2.2", 75 | "bump-my-version<0.27,>=0.21", 76 | "types-python-dateutil>=2.9.0.20241003", 77 | ] 78 | 79 | [project.urls] 80 | homepage = "https://github.com/stac-extensions/mlm/blob/main/README_STAC_MODEL.md" 81 | repository = "https://github.com/crim-ca/mlm-extension" 82 | 83 | [project.scripts] 84 | stac-model = "stac_model.__main__:app" 85 | 86 | [tool.bumpversion] 87 | # NOTE: 88 | # Although these definitions are provided in this 'stac-model' project file, 89 | # they are actually intented for versioning the MLM specification itself. 90 | # To version 'stac-model', use the 'bump-my-version bump' operation using the 'stac-model.bump.toml' file. 91 | # See also https://github.com/stac-extensions/mlm/blob/main/CONTRIBUTING.md#building-and-releasing 92 | current_version = "1.4.0" 93 | parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" 94 | serialize = ["{major}.{minor}.{patch}"] 95 | search = "{current_version}" 96 | replace = "{new_version}" 97 | regex = false 98 | ignore_missing_version = true 99 | ignore_missing_files = false 100 | tag = true 101 | sign_tags = false 102 | tag_name = "v{new_version}" 103 | tag_message = "Bump version: {current_version} → {new_version}" 104 | allow_dirty = false 105 | commit = true 106 | commit_args = "--no-verify" 107 | message = "Bump version: {current_version} → {new_version}" 108 | 109 | [[tool.bumpversion.files]] 110 | glob = "**/*.json" 111 | glob_exclude = [ 112 | ".git/**", 113 | "**/__pycache__/**", 114 | ".mypy_cache/**", 115 | ".tox/**", 116 | ".venv/**", 117 | "_build/**", 118 | "build/**", 119 | "dist/**", 120 | "node_modules/**", 121 | ] 122 | search = "https://stac-extensions.github.io/mlm/v{current_version}/schema.json" 123 | replace = "https://stac-extensions.github.io/mlm/v{new_version}/schema.json" 124 | 125 | [[tool.bumpversion.files]] 126 | glob = "**/*.md" 127 | glob_exclude = [ 128 | ".git/**", 129 | "**/__pycache__/**", 130 | ".mypy_cache/**", 131 | ".tox/**", 132 | ".venv/**", 133 | "_build/**", 134 | "build/**", 135 | "dist/**", 136 | "node_modules/**", 137 | ] 138 | search = "https://stac-extensions.github.io/mlm/v{current_version}/schema.json" 139 | replace = "https://stac-extensions.github.io/mlm/v{new_version}/schema.json" 140 | 141 | [[tool.bumpversion.files]] 142 | glob = "**/*.py" 143 | glob_exclude = [ 144 | ".git/**", 145 | "**/__pycache__/**", 146 | ".mypy_cache/**", 147 | ".tox/**", 148 | ".venv/**", 149 | "_build/**", 150 | "build/**", 151 | "dist/**", 152 | "node_modules/**", 153 | ] 154 | search = "https://stac-extensions.github.io/mlm/v{current_version}/schema.json" 155 | replace = "https://stac-extensions.github.io/mlm/v{new_version}/schema.json" 156 | 157 | [[tool.bumpversion.files]] 158 | filename = "CHANGELOG.md" 159 | search = """ 160 | ## [Unreleased](https://github.com/stac-extensions/mlm/tree/main) 161 | """ 162 | replace = """ 163 | ## [Unreleased](https://github.com/stac-extensions/mlm/tree/main) 164 | 165 | ### Added 166 | 167 | - n/a 168 | 169 | ### Changed 170 | 171 | - n/a 172 | 173 | ### Deprecated 174 | 175 | - n/a 176 | 177 | ### Removed 178 | 179 | - n/a 180 | 181 | ### Fixed 182 | 183 | - n/a 184 | 185 | ## [v{new_version}](https://github.com/stac-extensions/mlm/tree/v{new_version}) 186 | """ 187 | 188 | [[tool.bumpversion.files]] 189 | filename = "CITATION.cff" 190 | search = "https://stac-extensions.github.io/mlm/v{current_version}/schema.json" 191 | replace = "https://stac-extensions.github.io/mlm/v{new_version}/schema.json" 192 | 193 | [[tool.bumpversion.files]] 194 | filename = "package.json" 195 | search = "\"version\": \"{current_version}\"" 196 | replace = "\"version\": \"{new_version}\"" 197 | 198 | [tool.ruff] 199 | ignore = ["UP007", "E501"] 200 | exclude = [ 201 | ".git", 202 | "__pycache__", 203 | ".mypy_cache", 204 | ".tox", 205 | ".venv", 206 | "_build", 207 | "buck-out", 208 | "build", 209 | "dist", 210 | "env", 211 | "venv", 212 | "node_modules", 213 | ] 214 | respect-gitignore = true 215 | line-length = 120 216 | show-fixes = true 217 | 218 | [tool.ruff.lint] 219 | select = [ 220 | # pycodestyle 221 | "E", 222 | # Pyflakes 223 | "F", 224 | # pyupgrade 225 | "UP", 226 | # flake8-bugbear 227 | "B", 228 | # flake8-simplify 229 | "SIM", 230 | # isort 231 | "I", 232 | ] 233 | 234 | [tool.ruff.lint.isort] 235 | known-local-folder = ["tests", "conftest"] 236 | known-first-party = ["stac_model"] 237 | extra-standard-library = ["typing_extensions"] 238 | 239 | [tool.mypy] 240 | # https://github.com/python/mypy 241 | # https://mypy.readthedocs.io/en/latest/config_file.html#using-a-pyproject-toml-file 242 | python_version = "3.10" 243 | pretty = true 244 | show_traceback = true 245 | color_output = true 246 | 247 | allow_redefinition = false 248 | check_untyped_defs = true 249 | disallow_any_generics = true 250 | disallow_incomplete_defs = true 251 | ignore_missing_imports = true 252 | implicit_reexport = false 253 | no_implicit_optional = true 254 | show_column_numbers = true 255 | show_error_codes = true 256 | show_error_context = true 257 | strict_equality = true 258 | strict_optional = true 259 | warn_no_return = true 260 | warn_redundant_casts = true 261 | warn_return_any = true 262 | warn_unreachable = true 263 | warn_unused_configs = true 264 | warn_unused_ignores = true 265 | 266 | plugins = [ 267 | "pydantic.mypy" 268 | ] 269 | 270 | [tool.pydantic-mypy] 271 | init_forbid_extra = true 272 | init_typed = true 273 | warn_required_dynamic_aliases = true 274 | 275 | [tool.pydocstyle] 276 | # https://github.com/PyCQA/pydocstyle 277 | # http://www.pydocstyle.org/en/stable/usage.html#available-options 278 | convention = "google" 279 | match_dir = "^(stac_model|tests)" 280 | # ignore missing documentation, just validate provided ones 281 | add_ignore = "D100,D101,D102,D103,D104,D105,D107,D200,D202,D204,D212,D401" 282 | 283 | [tool.pydoclint] 284 | # https://github.com/jsh9/pydoclint 285 | # https://jsh9.github.io/pydoclint/how_to_config.html 286 | style = "google" 287 | exclude = '\.git|\.hg|\.mypy_cache|\.tox|.?v?env|__pycache__|_build|buck-out|dist|node_modules' 288 | # don't require type hints, since we have them in the signature instead (don't duplicate) 289 | arg-type-hints-in-docstring = false 290 | arg-type-hints-in-signature = true 291 | check-return-types = false 292 | 293 | [tool.pytest.ini_options] 294 | # https://github.com/pytest-dev/pytest 295 | # https://docs.pytest.org/en/6.2.x/customize.html#pyproject-toml 296 | # Directories that are not visited by pytest collector: 297 | norecursedirs =[ 298 | "hooks", 299 | "*.egg", 300 | ".eggs", 301 | "dist", 302 | "build", 303 | "docs", 304 | ".tox", 305 | ".git", 306 | "__pycache__", 307 | "node_modules", 308 | ] 309 | doctest_optionflags = ["NUMBER", "NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL"] 310 | timeout = 1000 311 | 312 | # Extra options: 313 | addopts = [ 314 | "--strict-markers", 315 | "--tb=short", 316 | "--doctest-modules", 317 | "--doctest-continue-on-failure", 318 | "--pikachu" 319 | ] 320 | 321 | [tool.coverage.run] 322 | source = ["tests"] 323 | branch = true 324 | 325 | [tool.coverage.report] 326 | exclude_also = [ 327 | "def main", 328 | "if __name__ == .__main__.:" 329 | ] 330 | fail_under = 50 331 | show_missing = true 332 | 333 | [tool.coverage.paths] 334 | source = ["stac_model"] 335 | -------------------------------------------------------------------------------- /stac-model.bump.toml: -------------------------------------------------------------------------------- 1 | 2 | [tool.bumpversion] 3 | # NOTE: 4 | # This is the bump definition for the 'stac-model' package. 5 | # For the MLM specification, refer to the main 'pyproject.toml'. 6 | # they are actually intented for versioning the MLM specification itself. 7 | # To version 'stac-model', use the 'bump-my-version bump' operation with this file. 8 | # See also https://github.com/stac-extensions/mlm/blob/main/CONTRIBUTING.md#building-and-releasing 9 | current_version = "0.3.0" 10 | parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" 11 | serialize = ["{major}.{minor}.{patch}"] 12 | search = "{current_version}" 13 | replace = "{new_version}" 14 | regex = false 15 | ignore_missing_version = true 16 | ignore_missing_files = false 17 | tag = true 18 | sign_tags = false 19 | tag_name = "stac-model-v{new_version}" 20 | tag_message = "Bump version: stac-model {current_version} → {new_version}" 21 | allow_dirty = false 22 | commit = true 23 | commit_args = "--no-verify" 24 | message = "Bump version: stac-model {current_version} → {new_version}" 25 | 26 | [[tool.bumpversion.files]] 27 | filename = "uv.lock" 28 | search = """ 29 | name = "stac-model" 30 | version = "{current_version}" 31 | """ 32 | replace = """ 33 | name = "stac-model" 34 | version = "{new_version}" 35 | """ 36 | 37 | [[tool.bumpversion.files]] 38 | filename = "pyproject.toml" 39 | search = """ 40 | name = "stac-model" 41 | version = "{current_version}" 42 | """ 43 | replace = """ 44 | name = "stac-model" 45 | version = "{new_version}" 46 | """ 47 | 48 | -------------------------------------------------------------------------------- /stac_model/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A PydanticV2/PySTAC validation and serialization library for the STAC Machine Learning Model Extension. 3 | """ 4 | 5 | from importlib import metadata 6 | 7 | try: 8 | __version__ = metadata.version("stac-model") 9 | except metadata.PackageNotFoundError: 10 | __version__ = "unknown" 11 | -------------------------------------------------------------------------------- /stac_model/__main__.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import typer 4 | from rich.console import Console 5 | 6 | from stac_model import __version__ 7 | from stac_model.examples import eurosat_resnet 8 | from stac_model.schema import ItemMLModelExtension 9 | 10 | app = typer.Typer( 11 | name="stac-model", 12 | help="A PydanticV2 validation and serialization library for the STAC Machine Learning Model Extension", 13 | add_completion=False, 14 | ) 15 | console = Console() 16 | 17 | 18 | def version_callback(print_version: bool) -> None: 19 | """Print the version of the package.""" 20 | if print_version: 21 | console.print(f"[yellow]stac-model[/] version: [bold blue]{__version__}[/]") 22 | raise typer.Exit() 23 | 24 | 25 | @app.command(name="") 26 | def main( 27 | print_version: bool = typer.Option( 28 | None, 29 | "-v", 30 | "--version", 31 | callback=version_callback, 32 | is_eager=True, 33 | help="Prints the version of the stac-model package.", 34 | ), 35 | ) -> ItemMLModelExtension: 36 | """Generate example spec.""" 37 | ml_model_meta = eurosat_resnet() 38 | with open("example.json", "w") as json_file: 39 | json.dump(ml_model_meta.item.to_dict(), json_file, indent=4) 40 | print("Example model metadata written to ./example.json.") 41 | return ml_model_meta 42 | 43 | 44 | if __name__ == "__main__": 45 | app() 46 | -------------------------------------------------------------------------------- /stac_model/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import Any, Literal, TypeAlias, Union 4 | 5 | from pydantic import BaseModel, ConfigDict, model_serializer 6 | 7 | Number: TypeAlias = int | float 8 | JSON: TypeAlias = dict[str, "JSON"] | list["JSON"] | Number | bool | str | None 9 | 10 | 11 | @dataclass 12 | class _OmitIfNone: 13 | pass 14 | 15 | 16 | OmitIfNone = _OmitIfNone() 17 | 18 | 19 | class MLMBaseModel(BaseModel): 20 | """ 21 | Allows wrapping any field with an annotation to drop it entirely if unset. 22 | 23 | ```python 24 | field: Annotated[Optional[], OmitIfNone] = None 25 | # or 26 | field: Annotated[Optional[], OmitIfNone] = Field(default=None) 27 | ``` 28 | 29 | Since `OmitIfNone` implies that the value could be `None` (even though it would be dropped), 30 | the `Optional` annotation must be specified to corresponding typings to avoid `mypy` lint issues. 31 | 32 | It is important to use `MLMBaseModel`, otherwise the serializer will not be called and applied. 33 | 34 | Reference: https://github.com/pydantic/pydantic/discussions/5461#discussioncomment-7503283 35 | """ 36 | 37 | @model_serializer 38 | def model_serialize(self): 39 | omit_if_none_fields = { 40 | key: field 41 | for key, field in self.model_fields.items() 42 | if any(isinstance(m, _OmitIfNone) for m in field.metadata) 43 | } 44 | values = { 45 | self.__fields__[key].alias or key: val # use the alias if specified 46 | for key, val in self 47 | if key not in omit_if_none_fields or val is not None 48 | } 49 | return values 50 | 51 | model_config = ConfigDict( 52 | populate_by_name=True, 53 | ) 54 | 55 | 56 | DataType: TypeAlias = Literal[ 57 | "uint8", 58 | "uint16", 59 | "uint32", 60 | "uint64", 61 | "int8", 62 | "int16", 63 | "int32", 64 | "int64", 65 | "float16", 66 | "float32", 67 | "float64", 68 | "cint16", 69 | "cint32", 70 | "cfloat32", 71 | "cfloat64", 72 | "other", 73 | ] 74 | 75 | 76 | class TaskEnum(str, Enum): 77 | REGRESSION = "regression" 78 | CLASSIFICATION = "classification" 79 | SCENE_CLASSIFICATION = "scene-classification" 80 | DETECTION = "detection" 81 | OBJECT_DETECTION = "object-detection" 82 | SEGMENTATION = "segmentation" 83 | SEMANTIC_SEGMENTATION = "semantic-segmentation" 84 | INSTANCE_SEGMENTATION = "instance-segmentation" 85 | PANOPTIC_SEGMENTATION = "panoptic-segmentation" 86 | SIMILARITY_SEARCH = "similarity-search" 87 | GENERATIVE = "generative" 88 | IMAGE_CAPTIONING = "image-captioning" 89 | SUPER_RESOLUTION = "super-resolution" 90 | 91 | 92 | ModelTaskNames: TypeAlias = Literal[ 93 | "regression", 94 | "classification", 95 | "scene-classification", 96 | "detection", 97 | "object-detection", 98 | "segmentation", 99 | "semantic-segmentation", 100 | "instance-segmentation", 101 | "panoptic-segmentation", 102 | "similarity-search", 103 | "generative", 104 | "image-captioning", 105 | "super-resolution", 106 | ] 107 | 108 | 109 | ModelTask = Union[ModelTaskNames, TaskEnum] 110 | 111 | 112 | class ProcessingExpression(MLMBaseModel): 113 | # FIXME: should use 'pystac' reference, but 'processing' extension is not implemented yet! 114 | format: str 115 | expression: Any 116 | -------------------------------------------------------------------------------- /stac_model/examples.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | 3 | import pystac 4 | import shapely 5 | from dateutil.parser import parse as parse_dt 6 | from pystac.extensions.eo import Band, EOExtension 7 | from pystac.extensions.file import FileExtension 8 | 9 | from stac_model.base import ProcessingExpression 10 | from stac_model.input import InputStructure, ModelInput, ValueScalingObject 11 | from stac_model.output import MLMClassification, ModelOutput, ModelResult 12 | from stac_model.schema import ItemMLModelExtension, MLModelExtension, MLModelProperties 13 | 14 | 15 | def eurosat_resnet() -> ItemMLModelExtension: 16 | input_struct = InputStructure( 17 | shape=[-1, 13, 64, 64], 18 | dim_order=["batch", "channel", "height", "width"], 19 | data_type="float32", 20 | ) 21 | band_names = [ 22 | "B01", 23 | "B02", 24 | "B03", 25 | "B04", 26 | "B05", 27 | "B06", 28 | "B07", 29 | "B08", 30 | "B8A", 31 | "B09", 32 | "B10", 33 | "B11", 34 | "B12", 35 | ] 36 | stats_mean = [ 37 | 1354.40546513, 38 | 1118.24399958, 39 | 1042.92983953, 40 | 947.62620298, 41 | 1199.47283961, 42 | 1999.79090914, 43 | 2369.22292565, 44 | 2296.82608323, 45 | 732.08340178, 46 | 12.11327804, 47 | 1819.01027855, 48 | 1118.92391149, 49 | 2594.14080798, 50 | ] 51 | stats_stddev = [ 52 | 245.71762908, 53 | 333.00778264, 54 | 395.09249139, 55 | 593.75055589, 56 | 566.4170017, 57 | 861.18399006, 58 | 1086.63139075, 59 | 1117.98170791, 60 | 404.91978886, 61 | 4.77584468, 62 | 1002.58768311, 63 | 761.30323499, 64 | 1231.58581042, 65 | ] 66 | value_scaling = [ 67 | cast( 68 | ValueScalingObject, 69 | dict( 70 | type="z-score", 71 | mean=mean, 72 | stddev=stddev, 73 | ), 74 | ) 75 | for mean, stddev in zip(stats_mean, stats_stddev, strict=False) 76 | ] 77 | model_input = ModelInput( 78 | name="13 Band Sentinel-2 Batch", 79 | bands=band_names, 80 | input=input_struct, 81 | resize_type=None, 82 | value_scaling=value_scaling, 83 | pre_processing_function=ProcessingExpression( 84 | format="python", 85 | expression="torchgeo.datamodules.eurosat.EuroSATDataModule.collate_fn", 86 | ), # noqa: E501 87 | ) 88 | result_struct = ModelResult( 89 | shape=[-1, 10], 90 | dim_order=["batch", "class"], 91 | data_type="float32", 92 | ) 93 | class_map = { 94 | "Annual Crop": 0, 95 | "Forest": 1, 96 | "Herbaceous Vegetation": 2, 97 | "Highway": 3, 98 | "Industrial Buildings": 4, 99 | "Pasture": 5, 100 | "Permanent Crop": 6, 101 | "Residential Buildings": 7, 102 | "River": 8, 103 | "SeaLake": 9, 104 | } 105 | class_objects = [ 106 | MLMClassification( 107 | value=class_value, 108 | name=class_name, 109 | ) 110 | for class_name, class_value in class_map.items() 111 | ] 112 | model_output = ModelOutput( 113 | name="classification", 114 | tasks={"classification"}, 115 | classes=class_objects, 116 | result=result_struct, 117 | post_processing_function=None, 118 | ) 119 | assets = { 120 | "model": pystac.Asset( 121 | title="Pytorch weights checkpoint", 122 | description=( 123 | "A Resnet-18 classification model trained on normalized Sentinel-2 " 124 | "imagery with Eurosat landcover labels with torchgeo." 125 | ), 126 | href="https://huggingface.co/torchgeo/resnet18_sentinel2_all_moco/resolve/main/resnet18_sentinel2_all_moco-59bfdff9.pth", 127 | media_type="application/octet-stream; application=pytorch", 128 | roles=[ 129 | "mlm:model", 130 | "mlm:weights", 131 | "data", 132 | ], 133 | extra_fields={"mlm:artifact_type": "torch.save"} 134 | ), 135 | "source_code": pystac.Asset( 136 | title="Model implementation.", 137 | description="Source code to run the model.", 138 | href="https://github.com/microsoft/torchgeo/blob/61efd2e2c4df7ebe3bd03002ebbaeaa3cfe9885a/torchgeo/models/resnet.py#L207", 139 | media_type="text/x-python", 140 | roles=[ 141 | "mlm:source_code", 142 | "code", 143 | ], 144 | ), 145 | } 146 | 147 | ml_model_size = 43000000 148 | ml_model_meta = MLModelProperties( 149 | name="Resnet-18 Sentinel-2 ALL MOCO", 150 | architecture="ResNet-18", 151 | tasks={"classification"}, 152 | framework="pytorch", 153 | framework_version="2.1.2+cu121", 154 | accelerator="cuda", 155 | accelerator_constrained=False, 156 | accelerator_summary="Unknown", 157 | file_size=ml_model_size, 158 | memory_size=1, 159 | pretrained=True, 160 | pretrained_source="EuroSat Sentinel-2", 161 | total_parameters=11_700_000, 162 | input=[model_input], 163 | output=[model_output], 164 | ) 165 | # TODO, this can't be serialized but pystac.item calls for a datetime 166 | # in docs. start_datetime=datetime.strptime("1900-01-01", "%Y-%m-%d") 167 | # Is this a problem that we don't do date validation if we supply as str? 168 | start_datetime_str = "1900-01-01" 169 | end_datetime_str = "9999-01-01" # cannot be None, invalid against STAC Core! 170 | start_datetime = parse_dt(start_datetime_str).isoformat() + "Z" 171 | end_datetime = parse_dt(end_datetime_str).isoformat() + "Z" 172 | bbox = [ 173 | -7.882190080512502, 174 | 37.13739173208318, 175 | 27.911651652899923, 176 | 58.21798141355221, 177 | ] 178 | geometry = shapely.geometry.Polygon.from_bounds(*bbox).__geo_interface__ 179 | item_name = "item_basic" 180 | col_name = "ml-model-examples" 181 | item = pystac.Item( 182 | id=item_name, 183 | collection=col_name, 184 | geometry=geometry, 185 | bbox=bbox, 186 | datetime=None, 187 | properties={ 188 | "start_datetime": start_datetime, 189 | "end_datetime": end_datetime, 190 | "description": "Sourced from torchgeo python library, identifier is ResNet18_Weights.SENTINEL2_ALL_MOCO", 191 | }, 192 | assets=assets, 193 | ) 194 | 195 | # note: cannot use 'item.add_derived_from' since it expects a 'Item' object, but we refer to a 'Collection' here 196 | # item.add_derived_from("https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a") 197 | item.add_link( 198 | pystac.Link( 199 | target="https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", 200 | rel=pystac.RelType.DERIVED_FROM, 201 | media_type=pystac.MediaType.JSON, 202 | ) 203 | ) 204 | 205 | # define more link references 206 | col = pystac.Collection( 207 | id=col_name, 208 | title="Machine Learning Model examples", 209 | description="Collection of items contained in the Machine Learning Model examples.", 210 | extent=pystac.Extent( 211 | temporal=pystac.TemporalExtent([[parse_dt(start_datetime), parse_dt(end_datetime)]]), 212 | spatial=pystac.SpatialExtent([bbox]), 213 | ), 214 | ) 215 | col.set_self_href("./examples/collection.json") 216 | col.add_item(item) 217 | item.set_self_href(f"./examples/{item_name}.json") 218 | 219 | model_asset = cast( 220 | FileExtension[pystac.Asset], 221 | FileExtension.ext(assets["model"], add_if_missing=True), 222 | ) 223 | model_asset.apply(size=ml_model_size) 224 | 225 | eo_model_asset = cast( 226 | EOExtension[pystac.Asset], 227 | EOExtension.ext(assets["model"], add_if_missing=True), 228 | ) 229 | # NOTE: 230 | # typically, it is recommended to add as much details as possible for the band description 231 | # minimally, the names (which are well-known for sentinel-2) are sufficient 232 | eo_bands = [] 233 | for name in band_names: 234 | band = Band({}) 235 | band.apply(name=name) 236 | eo_bands.append(band) 237 | eo_model_asset.apply(bands=eo_bands) 238 | 239 | item_mlm = MLModelExtension.ext(item, add_if_missing=True) 240 | item_mlm.apply(ml_model_meta.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True)) 241 | return item_mlm 242 | -------------------------------------------------------------------------------- /stac_model/input.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from typing import Annotated, Any, Literal, Optional, TypeAlias, Union 3 | from typing_extensions import Self 4 | 5 | from pydantic import Field, model_validator 6 | 7 | from stac_model.base import DataType, MLMBaseModel, Number, OmitIfNone, ProcessingExpression 8 | 9 | 10 | class InputStructure(MLMBaseModel): 11 | shape: list[Union[int, float]] = Field(min_length=1) 12 | dim_order: list[str] = Field(min_length=1) 13 | data_type: DataType 14 | 15 | @model_validator(mode="after") 16 | def validate_dimensions(self) -> Self: 17 | if len(self.shape) != len(self.dim_order): 18 | raise ValueError("Dimension order and shape must be of equal length for corresponding indices.") 19 | return self 20 | 21 | 22 | class ValueScalingClipMin(MLMBaseModel): 23 | type: Literal["clip-min"] = "clip-min" 24 | minimum: Number 25 | 26 | 27 | class ValueScalingClipMax(MLMBaseModel): 28 | type: Literal["clip-max"] = "clip-max" 29 | maximum: Number 30 | 31 | 32 | class ValueScalingClip(MLMBaseModel): 33 | type: Literal["clip"] = "clip" 34 | minimum: Number 35 | maximum: Number 36 | 37 | 38 | class ValueScalingMinMax(MLMBaseModel): 39 | type: Literal["min-max"] = "min-max" 40 | minimum: Number 41 | maximum: Number 42 | 43 | 44 | class ValueScalingZScore(MLMBaseModel): 45 | type: Literal["z-score"] = "z-score" 46 | mean: Number 47 | stddev: Number 48 | 49 | 50 | class ValueScalingOffset(MLMBaseModel): 51 | type: Literal["offset"] = "offset" 52 | value: Number 53 | 54 | 55 | class ValueScalingScale(MLMBaseModel): 56 | type: Literal["scale"] = "scale" 57 | value: Number 58 | 59 | 60 | class ValueScalingProcessingExpression(ProcessingExpression): 61 | type: Literal["processing"] = "processing" 62 | 63 | 64 | ValueScalingObject: TypeAlias = Optional[ 65 | Union[ 66 | ValueScalingMinMax, 67 | ValueScalingZScore, 68 | ValueScalingClip, 69 | ValueScalingClipMin, 70 | ValueScalingClipMax, 71 | ValueScalingOffset, 72 | ValueScalingScale, 73 | ValueScalingProcessingExpression, 74 | ] 75 | ] 76 | 77 | ResizeType: TypeAlias = Optional[ 78 | Literal[ 79 | "crop", 80 | "pad", 81 | "interpolation-nearest", 82 | "interpolation-linear", 83 | "interpolation-cubic", 84 | "interpolation-area", 85 | "interpolation-lanczos4", 86 | "interpolation-max", 87 | "wrap-fill-outliers", 88 | "wrap-inverse-map", 89 | ] 90 | ] 91 | 92 | 93 | class ModelBand(MLMBaseModel): 94 | name: str = Field( 95 | description=( 96 | "Name of the band to use for the input, " 97 | "referring to the name of an entry in a 'bands' definition from another STAC extension." 98 | ) 99 | ) 100 | # similar to 'ProcessingExpression', but they can be omitted here 101 | format: Annotated[Optional[str], OmitIfNone] = Field( 102 | default=None, 103 | description="", 104 | ) 105 | expression: Annotated[Optional[Any], OmitIfNone] = Field( 106 | default=None, 107 | description="", 108 | ) 109 | 110 | @model_validator(mode="after") 111 | def validate_expression(self) -> Self: 112 | if ( # mutually dependant 113 | (self.format is not None or self.expression is not None) 114 | and (self.format is None or self.expression is None) 115 | ): 116 | raise ValueError("Model band 'format' and 'expression' are mutually dependant.") 117 | return self 118 | 119 | 120 | class ModelInput(MLMBaseModel): 121 | name: str 122 | # order is critical here (same index as dim shape), allow duplicate if the model needs it somehow 123 | bands: Sequence[str | ModelBand] = Field( 124 | description=( 125 | "List of bands that compose the input. " 126 | "If a string is used, it is implied to correspond to a named-band. " 127 | "If no band is needed for the input, use an empty array." 128 | ), 129 | examples=[ 130 | [ 131 | "B01", 132 | {"name": "B02"}, 133 | { 134 | "name": "NDVI", 135 | "format": "rio-calc", 136 | "expression": "(B08 - B04) / (B08 + B04)", 137 | }, 138 | ], 139 | ], 140 | ) 141 | input: InputStructure 142 | value_scaling: Annotated[Optional[list[ValueScalingObject]], OmitIfNone] = None 143 | resize_type: Annotated[Optional[ResizeType], OmitIfNone] = None 144 | pre_processing_function: Optional[ProcessingExpression] = None 145 | -------------------------------------------------------------------------------- /stac_model/output.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, Any, cast 2 | 3 | from pydantic import AliasChoices, ConfigDict, Field, model_serializer 4 | from pystac.extensions.classification import Classification 5 | 6 | from stac_model.base import DataType, MLMBaseModel, ModelTask, OmitIfNone, ProcessingExpression 7 | 8 | 9 | class ModelResult(MLMBaseModel): 10 | shape: list[int | float] = Field(..., min_length=1) 11 | dim_order: list[str] = Field(..., min_length=1) 12 | data_type: DataType 13 | 14 | 15 | # MLMClassification: TypeAlias = Annotated[ 16 | # Classification, 17 | # PlainSerializer( 18 | # lambda x: x.to_dict(), 19 | # when_used="json", 20 | # return_type=TypedDict( 21 | # "Classification", 22 | # { 23 | # "value": int, 24 | # "name": str, 25 | # "description": NotRequired[str], 26 | # "color_hint": NotRequired[str], 27 | # } 28 | # ) 29 | # ) 30 | # ] 31 | 32 | 33 | class MLMClassification(MLMBaseModel, Classification): 34 | @model_serializer() 35 | def model_dump(self, *_: Any, **__: Any) -> dict[str, Any]: 36 | return self.to_dict() # type: ignore[call-arg] 37 | 38 | def __init__( 39 | self, 40 | value: int, 41 | description: str | None = None, 42 | name: str | None = None, 43 | color_hint: str | None = None, 44 | ) -> None: 45 | Classification.__init__(self, {}) 46 | if not name and not description: 47 | raise ValueError("Class name or description is required!") 48 | self.apply( 49 | value=value, 50 | name=name or description, 51 | description=cast(str, description or name), 52 | color_hint=color_hint, 53 | ) 54 | 55 | def __hash__(self) -> int: 56 | return sum(map(hash, self.to_dict().items())) 57 | 58 | def __setattr__(self, key: str, value: Any) -> None: 59 | if key == "properties": 60 | Classification.__setattr__(self, key, value) 61 | else: 62 | MLMBaseModel.__setattr__(self, key, value) 63 | 64 | model_config = ConfigDict( 65 | populate_by_name=True, 66 | arbitrary_types_allowed=True, 67 | ) 68 | 69 | 70 | # class ClassObject(BaseModel): 71 | # value: int 72 | # name: str 73 | # description: Optional[str] = None 74 | # title: Optional[str] = None 75 | # color_hint: Optional[str] = None 76 | # nodata: Optional[bool] = False 77 | 78 | 79 | class ModelOutput(MLMBaseModel): 80 | name: str 81 | tasks: set[ModelTask] 82 | result: ModelResult 83 | 84 | # NOTE: 85 | # Although it is preferable to have 'Set' to avoid duplicate, 86 | # it is more important to keep the order in this case, 87 | # which we would lose with 'Set'. 88 | # We also get some unhashable errors with 'Set', although 'MLMClassification' implements '__hash__'. 89 | classes: Annotated[list[MLMClassification], OmitIfNone] = Field( 90 | alias="classification:classes", 91 | validation_alias=AliasChoices("classification:classes", "classification_classes", "classes"), 92 | ) 93 | post_processing_function: ProcessingExpression | None = None 94 | 95 | model_config = ConfigDict( 96 | populate_by_name=True, 97 | ) 98 | -------------------------------------------------------------------------------- /stac_model/runtime.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Annotated, Literal, Union 3 | 4 | from pydantic import AliasChoices, Field 5 | 6 | from stac_model.base import MLMBaseModel, OmitIfNone 7 | 8 | 9 | class AcceleratorEnum(str, Enum): 10 | amd64 = "amd64" 11 | cuda = "cuda" 12 | xla = "xla" 13 | amd_rocm = "amd-rocm" 14 | intel_ipex_cpu = "intel-ipex-cpu" 15 | intel_ipex_gpu = "intel-ipex-gpu" 16 | macos_arm = "macos-arm" 17 | 18 | def __str__(self): 19 | return self.value 20 | 21 | 22 | AcceleratorName = Literal[ 23 | "amd64", 24 | "cuda", 25 | "xla", 26 | "amd-rocm", 27 | "intel-ipex-cpu", 28 | "intel-ipex-gpu", 29 | "macos-arm", 30 | ] 31 | 32 | AcceleratorType = Union[AcceleratorName, AcceleratorEnum] 33 | 34 | 35 | class Runtime(MLMBaseModel): 36 | framework: Annotated[str | None, OmitIfNone] = Field(default=None) 37 | framework_version: Annotated[str | None, OmitIfNone] = Field(default=None) 38 | file_size: Annotated[int | None, OmitIfNone] = Field( 39 | alias="file:size", 40 | validation_alias=AliasChoices("file_size", "file:size"), 41 | default=None, 42 | ) 43 | memory_size: Annotated[int | None, OmitIfNone] = Field(default=None) 44 | batch_size_suggestion: Annotated[int | None, OmitIfNone] = Field(default=None) 45 | 46 | accelerator: AcceleratorType | None = Field(default=None) 47 | accelerator_constrained: bool = Field(default=False) 48 | accelerator_summary: Annotated[str | None, OmitIfNone] = Field(default=None) 49 | accelerator_count: Annotated[int | None, OmitIfNone] = Field(default=None, ge=1) 50 | -------------------------------------------------------------------------------- /stac_model/schema.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections.abc import Iterable 3 | from typing import ( 4 | Annotated, 5 | Any, 6 | Generic, 7 | Literal, 8 | TypeVar, 9 | Union, 10 | cast, 11 | get_args, 12 | overload, 13 | ) 14 | 15 | import pystac 16 | from pydantic import ConfigDict, Field 17 | from pydantic.fields import FieldInfo 18 | from pystac.extensions.base import ( 19 | ExtensionManagementMixin, 20 | PropertiesExtension, 21 | SummariesExtension, 22 | ) 23 | 24 | from stac_model.base import ModelTask, OmitIfNone 25 | from stac_model.input import ModelInput 26 | from stac_model.output import ModelOutput 27 | from stac_model.runtime import Runtime 28 | 29 | T = TypeVar( 30 | "T", 31 | pystac.Collection, 32 | pystac.Item, 33 | pystac.Asset, # item_assets.AssetDefinition, 34 | ) 35 | 36 | SchemaName = Literal["mlm"] 37 | SCHEMA_URI: str = "https://stac-extensions.github.io/mlm/v1.4.0/schema.json" 38 | PREFIX = f"{get_args(SchemaName)[0]}:" 39 | 40 | 41 | def mlm_prefix_adder(field_name: str) -> str: 42 | return "mlm:" + field_name 43 | 44 | 45 | class MLModelProperties(Runtime): 46 | name: str = Field(min_length=1) 47 | architecture: str = Field(min_length=1) 48 | tasks: set[ModelTask] 49 | input: list[ModelInput] 50 | output: list[ModelOutput] 51 | 52 | total_parameters: int 53 | pretrained: Annotated[bool | None, OmitIfNone] = Field(default=True) 54 | pretrained_source: Annotated[str | None, OmitIfNone] = None 55 | 56 | model_config = ConfigDict(alias_generator=mlm_prefix_adder, populate_by_name=True, extra="ignore") 57 | 58 | 59 | class MLModelExtension( 60 | Generic[T], 61 | PropertiesExtension, 62 | # FIXME: resolve typing incompatibility? 63 | # 'pystac.Asset' does not derive from STACObject 64 | # therefore, it technically cannot be used in 'ExtensionManagementMixin[T]' 65 | # however, this makes our extension definition much easier and avoids lots of code duplication 66 | ExtensionManagementMixin[ # type: ignore[type-var] 67 | Union[ 68 | pystac.Collection, 69 | pystac.Item, 70 | pystac.Asset, 71 | ] 72 | ], 73 | ): 74 | @property 75 | def name(self) -> SchemaName: 76 | return cast(SchemaName, get_args(SchemaName)[0]) 77 | 78 | def apply( 79 | self, 80 | properties: MLModelProperties | dict[str, Any], 81 | ) -> None: 82 | """ 83 | Applies Machine Learning Model Extension properties to the extended :mod:`~pystac` object. 84 | """ 85 | if isinstance(properties, dict): 86 | properties = MLModelProperties(**properties) 87 | data_json = json.loads(properties.model_dump_json(by_alias=True)) 88 | for prop, val in data_json.items(): 89 | self._set_property(prop, val) 90 | 91 | @classmethod 92 | def get_schema_uri(cls) -> str: 93 | return SCHEMA_URI 94 | 95 | @overload 96 | @classmethod 97 | def ext(cls, obj: pystac.Asset, add_if_missing: bool = False) -> "AssetMLModelExtension": ... 98 | 99 | @overload 100 | @classmethod 101 | def ext(cls, obj: pystac.Item, add_if_missing: bool = False) -> "ItemMLModelExtension": ... 102 | 103 | @overload 104 | @classmethod 105 | def ext(cls, obj: pystac.Collection, add_if_missing: bool = False) -> "CollectionMLModelExtension": ... 106 | 107 | # @overload 108 | # @classmethod 109 | # def ext(cls, obj: item_assets.AssetDefinition, add_if_missing: bool = False) -> "ItemAssetsMLModelExtension": 110 | # ... 111 | 112 | @classmethod 113 | def ext( 114 | cls, 115 | obj: pystac.Collection | pystac.Item | pystac.Asset, # item_assets.AssetDefinition 116 | add_if_missing: bool = False, 117 | ) -> Union[ 118 | "CollectionMLModelExtension", 119 | "ItemMLModelExtension", 120 | "AssetMLModelExtension", 121 | ]: 122 | """ 123 | Extends the given STAC Object with properties from the :stac-ext:`Machine Learning Model Extension `. 124 | 125 | This extension can be applied to instances of :class:`~pystac.Item` or :class:`~pystac.Asset`. 126 | 127 | Args: 128 | obj: STAC Object to extend with the MLM extension fields. 129 | add_if_missing: Add the MLM extension schema URI to the object if not already in `stac_extensions`. 130 | 131 | Returns: 132 | Extended object. 133 | 134 | Raises: 135 | pystac.ExtensionTypeError: If an invalid object type is passed. 136 | """ 137 | if isinstance(obj, pystac.Collection): 138 | cls.ensure_has_extension(obj, add_if_missing) 139 | return CollectionMLModelExtension(obj) 140 | elif isinstance(obj, pystac.Item): 141 | cls.ensure_has_extension(obj, add_if_missing) 142 | return ItemMLModelExtension(obj) 143 | elif isinstance(obj, pystac.Asset): 144 | cls.ensure_owner_has_extension(obj, add_if_missing) 145 | return AssetMLModelExtension(obj) 146 | # elif isinstance(obj, item_assets.AssetDefinition): 147 | # cls.ensure_owner_has_extension(obj, add_if_missing) 148 | # return ItemAssetsMLModelExtension(obj) 149 | else: 150 | raise pystac.ExtensionTypeError(cls._ext_error_message(obj)) 151 | 152 | @classmethod 153 | def summaries(cls, obj: pystac.Collection, add_if_missing: bool = False) -> "SummariesMLModelExtension": 154 | """Returns the extended summaries object for the given collection.""" 155 | cls.ensure_has_extension(obj, add_if_missing) 156 | return SummariesMLModelExtension(obj) 157 | 158 | 159 | class SummariesMLModelExtension(SummariesExtension): 160 | """ 161 | Summaries annotated with the Machine Learning Model Extension. 162 | 163 | A concrete implementation of :class:`~SummariesExtension` that extends 164 | the ``summaries`` field of a :class:`~pystac.Collection` to include properties 165 | defined in the :stac-ext:`Machine Learning Model `. 166 | """ 167 | 168 | def _check_mlm_property(self, prop: str) -> FieldInfo: 169 | try: 170 | return MLModelProperties.model_fields[prop] 171 | except KeyError as err: 172 | raise AttributeError(f"Name '{prop}' is not a valid MLM property.") from err 173 | 174 | def _validate_mlm_property(self, prop: str, summaries: list[Any]) -> None: 175 | # ignore mypy issue when combined with Annotated 176 | # - https://github.com/pydantic/pydantic/issues/6713 177 | # - https://github.com/pydantic/pydantic/issues/5190 178 | model = MLModelProperties.model_construct() # type: ignore[call-arg] 179 | validator = MLModelProperties.__pydantic_validator__ 180 | for value in summaries: 181 | validator.validate_assignment(model, prop, value) 182 | 183 | def get_mlm_property(self, prop: str) -> list[Any] | None: 184 | self._check_mlm_property(prop) 185 | return self.summaries.get_list(prop) 186 | 187 | def set_mlm_property(self, prop: str, summaries: list[Any]) -> None: 188 | self._check_mlm_property(prop) 189 | self._validate_mlm_property(prop, summaries) 190 | self._set_summary(prop, summaries) 191 | 192 | def __getattr__(self, prop): 193 | return self.get_mlm_property(prop) 194 | 195 | def __setattr__(self, prop, value): 196 | self.set_mlm_property(prop, value) 197 | 198 | 199 | class ItemMLModelExtension(MLModelExtension[pystac.Item]): 200 | """ 201 | Item annotated with the Machine Learning Model Extension. 202 | 203 | A concrete implementation of :class:`MLModelExtension` on an 204 | :class:`~pystac.Item` that extends the properties of the Item to 205 | include properties defined in the :stac-ext:`Machine Learning Model 206 | Extension `. 207 | 208 | This class should generally not be instantiated directly. Instead, call 209 | :meth:`MLModelExtension.ext` on an :class:`~pystac.Item` to extend it. 210 | """ 211 | 212 | def __init__(self, item: pystac.Item): 213 | self.item = item 214 | self.properties = item.properties 215 | 216 | def __repr__(self) -> str: 217 | return f"" 218 | 219 | 220 | # class ItemAssetsMLModelExtension(MLModelExtension[item_assets.AssetDefinition]): 221 | # properties: dict[str, Any] 222 | # asset_defn: item_assets.AssetDefinition 223 | # 224 | # def __init__(self, item_asset: item_assets.AssetDefinition): 225 | # self.asset_defn = item_asset 226 | # self.properties = item_asset.properties 227 | 228 | 229 | class AssetMLModelExtension(MLModelExtension[pystac.Asset]): 230 | """ 231 | Asset annotated with the Machine Learning Model Extension. 232 | 233 | A concrete implementation of :class:`MLModelExtension` on an 234 | :class:`~pystac.Asset` that extends the Asset fields to include 235 | properties defined in the :stac-ext:`Machine Learning Model 236 | Extension `. 237 | 238 | This class should generally not be instantiated directly. Instead, call 239 | :meth:`MLModelExtension.ext` on an :class:`~pystac.Asset` to extend it. 240 | """ 241 | 242 | asset_href: str 243 | """The ``href`` value of the :class:`~pystac.Asset` being extended.""" 244 | 245 | properties: dict[str, Any] 246 | """The :class:`~pystac.Asset` fields, including extension properties.""" 247 | 248 | additional_read_properties: Iterable[dict[str, Any]] | None = None 249 | """If present, this will be a list containing 1 dictionary representing the 250 | properties of the owning :class:`~pystac.Item`.""" 251 | 252 | def __init__(self, asset: pystac.Asset): 253 | self.asset_href = asset.href 254 | self.properties = asset.extra_fields 255 | if asset.owner and isinstance(asset.owner, pystac.Item): 256 | self.additional_read_properties = [asset.owner.properties] 257 | 258 | def __repr__(self) -> str: 259 | return f"" 260 | 261 | 262 | class CollectionMLModelExtension(MLModelExtension[pystac.Collection]): 263 | def __init__(self, collection: pystac.Collection): 264 | self.collection = collection 265 | 266 | 267 | # __all__ = [ 268 | # "MLModelExtension", 269 | # "ModelInput", 270 | # "InputArray", 271 | # "Band", 272 | # "Statistics", 273 | # "ModelOutput", 274 | # "Asset", 275 | # "Runtime", 276 | # "Container", 277 | # "Asset", 278 | # ] 279 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import os 4 | from typing import TYPE_CHECKING, Any, cast 5 | 6 | import pystac 7 | import pytest 8 | 9 | from stac_model.base import JSON 10 | from stac_model.examples import eurosat_resnet as make_eurosat_resnet 11 | from stac_model.schema import SCHEMA_URI 12 | 13 | if TYPE_CHECKING: 14 | from _pytest.fixtures import SubRequest 15 | 16 | TEST_DIR = os.path.dirname(__file__) 17 | EXAMPLES_DIR = os.path.abspath(os.path.join(TEST_DIR, "../examples")) 18 | JSON_SCHEMA_DIR = os.path.abspath(os.path.join(TEST_DIR, "../json-schema")) 19 | 20 | 21 | def get_all_stac_item_examples() -> list[str]: 22 | all_json = glob.glob("**/*.json", root_dir=EXAMPLES_DIR, recursive=True) 23 | all_geojson = glob.glob("**/*.geojson", root_dir=EXAMPLES_DIR, recursive=True) 24 | all_stac_items = [ 25 | path 26 | for path in all_json + all_geojson 27 | if os.path.splitext(os.path.basename(path))[0] not in ["collection", "catalog"] 28 | ] 29 | return all_stac_items 30 | 31 | 32 | @pytest.fixture(scope="session") 33 | def mlm_schema() -> JSON: 34 | with open(os.path.join(JSON_SCHEMA_DIR, "schema.json")) as schema_file: 35 | data = json.load(schema_file) 36 | return cast(JSON, data) 37 | 38 | 39 | @pytest.fixture(scope="session") 40 | def mlm_validator( 41 | request: "SubRequest", 42 | mlm_schema: dict[str, Any], 43 | ) -> pystac.validation.stac_validator.JsonSchemaSTACValidator: 44 | """ 45 | Update the :class:`pystac.validation.RegisteredValidator` with the local MLM JSON schema definition. 46 | 47 | Because the schema is *not yet* uploaded to the expected STAC schema URI, 48 | any call to :func:`pystac.validation.validate` or :meth:`pystac.stac_object.STACObject.validate` results 49 | in ``GetSchemaError`` when the schema retrieval is attempted by the validator.By adding the schema to the 50 | mapping beforehand, remote resolution can be bypassed temporarily. When evaluating modifications to the 51 | current schema, this also ensures that local changes are used instead of the remote reference. 52 | """ 53 | validator = pystac.validation.RegisteredValidator.get_validator() 54 | validator = cast(pystac.validation.stac_validator.JsonSchemaSTACValidator, validator) 55 | validator.schema_cache[SCHEMA_URI] = mlm_schema 56 | pystac.validation.RegisteredValidator.set_validator(validator) # apply globally to allow 'STACObject.validate()' 57 | return validator 58 | 59 | 60 | @pytest.fixture 61 | def mlm_example(request: "SubRequest") -> dict[str, JSON]: 62 | with open(os.path.join(EXAMPLES_DIR, request.param)) as example_file: 63 | data = json.load(example_file) 64 | return cast(dict[str, JSON], data) 65 | 66 | 67 | @pytest.fixture(name="eurosat_resnet") 68 | def eurosat_resnet(): 69 | return make_eurosat_resnet() 70 | -------------------------------------------------------------------------------- /tests/test_schema.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | from typing import Any, cast 4 | 5 | import pystac 6 | import pytest 7 | from jsonschema.exceptions import ValidationError 8 | from pystac.validation.stac_validator import STACValidator 9 | 10 | from stac_model.base import JSON 11 | from stac_model.schema import SCHEMA_URI 12 | 13 | from conftest import get_all_stac_item_examples 14 | 15 | # ignore typing errors introduced by generic JSON manipulation errors 16 | # mypy: disable_error_code="arg-type,call-overload,index,union-attr" 17 | 18 | 19 | @pytest.mark.parametrize( 20 | "mlm_example", # value passed to 'mlm_example' fixture 21 | get_all_stac_item_examples(), 22 | indirect=True, 23 | ) 24 | def test_mlm_schema( 25 | mlm_validator: STACValidator, 26 | mlm_example: dict[str, JSON], 27 | ) -> None: 28 | mlm_item = pystac.Item.from_dict(cast(dict[str, Any], mlm_example)) 29 | validated = pystac.validation.validate(mlm_item, validator=mlm_validator) 30 | assert len(validated) >= len(mlm_item.stac_extensions) # extra STAC core schemas 31 | assert SCHEMA_URI in validated 32 | 33 | 34 | @pytest.mark.parametrize( 35 | "mlm_example", 36 | ["item_raster_bands.json"], 37 | indirect=True, 38 | ) 39 | def test_mlm_no_undefined_prefixed_field_item_properties( 40 | mlm_validator: STACValidator, 41 | mlm_example: dict[str, JSON], 42 | ) -> None: 43 | mlm_data = copy.deepcopy(mlm_example) 44 | mlm_item = pystac.Item.from_dict(mlm_data) 45 | pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid 46 | 47 | # undefined property anywhere in the schema 48 | mlm_data = copy.deepcopy(mlm_example) 49 | mlm_data["properties"]["mlm:unknown"] = "random" 50 | with pytest.raises(pystac.errors.STACValidationError) as exc: 51 | mlm_item = pystac.Item.from_dict(mlm_data) 52 | pystac.validation.validate(mlm_item, validator=mlm_validator) 53 | assert all( 54 | info in str(exc.value.source) 55 | for info in ["mlm:unknown", "^(?!mlm:)"] 56 | ) 57 | 58 | # defined property only allowed at the Asset level 59 | mlm_data = copy.deepcopy(mlm_example) 60 | mlm_data["properties"]["mlm:artifact_type"] = "torch.save" 61 | with pytest.raises(pystac.errors.STACValidationError) as exc: 62 | mlm_item = pystac.Item.from_dict(mlm_data) 63 | pystac.validation.validate(mlm_item, validator=mlm_validator) 64 | errors = cast(list[ValidationError], exc.value.source) 65 | assert "mlm:artifact_type" in str(errors[0].validator_value) 66 | assert errors[0].schema["description"] == "Fields that are disallowed under the Item properties." 67 | 68 | 69 | @pytest.mark.parametrize( 70 | "mlm_example", 71 | ["item_raster_bands.json"], 72 | indirect=True, 73 | ) 74 | @pytest.mark.parametrize( 75 | ["test_field", "test_value"], 76 | [ 77 | ("mlm:unknown", "random"), 78 | ("mlm:name", "test-model"), 79 | ("mlm:input", []), 80 | ("mlm:output", []), 81 | ("mlm:hyperparameters", {"test": {}}), 82 | ] 83 | ) 84 | def test_mlm_no_undefined_prefixed_field_asset_properties( 85 | mlm_validator: STACValidator, 86 | mlm_example: dict[str, JSON], 87 | test_field: str, 88 | test_value: Any, 89 | ) -> None: 90 | mlm_data = copy.deepcopy(mlm_example) 91 | mlm_item = pystac.Item.from_dict(mlm_data) 92 | pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid 93 | assert mlm_data["assets"]["weights"] 94 | 95 | mlm_data = copy.deepcopy(mlm_example) 96 | mlm_data["assets"]["weights"][test_field] = test_value 97 | with pytest.raises(pystac.errors.STACValidationError) as exc: 98 | mlm_item = pystac.Item.from_dict(mlm_data) 99 | pystac.validation.validate(mlm_item, validator=mlm_validator) 100 | assert len(exc.value.source) == 1 101 | errors = cast(list[ValidationError], exc.value.source) 102 | assert test_field in errors[0].instance 103 | assert errors[0].schema["description"] in [ 104 | "All possible MLM fields regardless of the level they apply (Collection, Item, Asset, Link).", 105 | "Fields that are disallowed under the Asset properties." 106 | ] 107 | 108 | 109 | @pytest.mark.parametrize( 110 | "mlm_example", 111 | ["item_raster_bands.json"], 112 | indirect=True, 113 | ) 114 | def test_mlm_allowed_field_asset_properties_override( 115 | mlm_validator: STACValidator, 116 | mlm_example: dict[str, JSON], 117 | ) -> None: 118 | # defined property allowed both at the Item at the Asset level 119 | mlm_data = copy.deepcopy(mlm_example) 120 | mlm_data["assets"]["weights"]["mlm:accelerator"] = "cuda" 121 | mlm_item = pystac.Item.from_dict(mlm_data) 122 | pystac.validation.validate(mlm_item, validator=mlm_validator) 123 | 124 | 125 | @pytest.mark.parametrize( 126 | "mlm_example", 127 | ["item_raster_bands.json"], 128 | indirect=True, 129 | ) 130 | def test_mlm_missing_bands_invalid_if_mlm_input_lists_bands( 131 | mlm_validator: STACValidator, 132 | mlm_example: dict[str, JSON], 133 | ) -> None: 134 | mlm_item = pystac.Item.from_dict(mlm_example) 135 | pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid 136 | 137 | mlm_bands_bad_data = copy.deepcopy(mlm_example) 138 | mlm_bands_bad_data["assets"]["weights"].pop("raster:bands") # no 'None' to raise in case missing 139 | with pytest.raises(pystac.errors.STACValidationError): 140 | mlm_bands_bad_item = pystac.Item.from_dict(mlm_bands_bad_data) 141 | pystac.validation.validate(mlm_bands_bad_item, validator=mlm_validator) 142 | 143 | 144 | @pytest.mark.parametrize( 145 | "mlm_example", 146 | ["item_eo_bands_summarized.json"], 147 | indirect=True, 148 | ) 149 | def test_mlm_eo_bands_invalid_only_in_item_properties( 150 | mlm_validator: STACValidator, 151 | mlm_example: dict[str, JSON], 152 | ) -> None: 153 | mlm_item = pystac.Item.from_dict(mlm_example) 154 | pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid 155 | 156 | mlm_eo_bands_bad_data = copy.deepcopy(mlm_example) 157 | mlm_eo_bands_bad_data["assets"]["weights"].pop("eo:bands") # no 'None' to raise in case missing 158 | with pytest.raises(pystac.errors.STACValidationError): 159 | mlm_eo_bands_bad_item = pystac.Item.from_dict(mlm_eo_bands_bad_data) 160 | pystac.validation.validate(mlm_eo_bands_bad_item, validator=mlm_validator) 161 | 162 | 163 | @pytest.mark.parametrize( 164 | "mlm_example", 165 | ["item_basic.json"], 166 | indirect=True, 167 | ) 168 | def test_mlm_no_input_allowed_but_explicit_empty_array_required( 169 | mlm_validator: STACValidator, 170 | mlm_example: dict[str, JSON], 171 | ) -> None: 172 | mlm_data = copy.deepcopy(mlm_example) 173 | mlm_data["properties"]["mlm:input"] = [] 174 | mlm_item = pystac.Item.from_dict(mlm_data) 175 | pystac.validation.validate(mlm_item, validator=mlm_validator) 176 | 177 | with pytest.raises(pystac.errors.STACValidationError): 178 | mlm_data["properties"].pop("mlm:input") # no 'None' to raise in case missing 179 | mlm_item = pystac.Item.from_dict(mlm_data) 180 | pystac.validation.validate(mlm_item, validator=mlm_validator) 181 | 182 | 183 | @pytest.mark.parametrize( 184 | "mlm_example", 185 | ["item_basic.json"], 186 | indirect=True, 187 | ) 188 | @pytest.mark.parametrize( 189 | ["test_scaling", "is_valid"], 190 | [ 191 | ([{"type": "unknown", "mean": 1, "stddev": 2}], False), 192 | ([{"type": "min-max", "mean": 1, "stddev": 2}], False), 193 | ([{"type": "z-score", "minimum": 1, "maximum": 2}], False), 194 | ([{"type": "min-max", "mean": 1, "stddev": 2}, {"type": "min-max", "minimum": 1, "maximum": 2}], False), 195 | ([{"type": "z-score", "mean": 1, "stddev": 2}, {"type": "z-score", "minimum": 1, "maximum": 2}], False), 196 | ([{"type": "min-max", "minimum": 1, "maximum": 2}], True), 197 | ([{"type": "z-score", "mean": 1, "stddev": 2, "minimum": 1, "maximum": 2}], True), # extra must be ignored 198 | ([{"type": "processing"}], False), 199 | ([{"type": "processing", "format": "test", "expression": "test"}], True), 200 | ( 201 | [ 202 | {"type": "processing", "format": "test", "expression": "test"}, 203 | {"type": "min-max", "minimum": 1, "maximum": 2}, 204 | ], 205 | True, 206 | ), 207 | ], 208 | ) 209 | def test_mlm_input_scaling_combination( 210 | mlm_validator: STACValidator, 211 | mlm_example: dict[str, JSON], 212 | test_scaling: list[dict[str, Any]], 213 | is_valid: bool, 214 | ) -> None: 215 | mlm_data = copy.deepcopy(mlm_example) 216 | mlm_item = pystac.Item.from_dict(mlm_data) 217 | pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid 218 | 219 | mlm_data["properties"]["mlm:input"][0]["value_scaling"] = test_scaling # type: ignore 220 | mlm_item = pystac.Item.from_dict(mlm_data) 221 | if is_valid: 222 | pystac.validation.validate(mlm_item, validator=mlm_validator) 223 | else: 224 | with pytest.raises(pystac.errors.STACValidationError): 225 | pystac.validation.validate(mlm_item, validator=mlm_validator) 226 | 227 | 228 | @pytest.mark.parametrize( 229 | "mlm_example", 230 | ["item_basic.json"], 231 | indirect=True, 232 | ) 233 | def test_mlm_other_non_mlm_assets_allowed( 234 | mlm_validator: STACValidator, 235 | mlm_example: dict[str, JSON], 236 | ) -> None: 237 | mlm_data = copy.deepcopy(mlm_example) 238 | mlm_item = pystac.Item.from_dict(mlm_data) 239 | pystac.validation.validate(mlm_item, validator=mlm_validator) # self-check valid beforehand 240 | 241 | mlm_data["assets"]["sample"] = { 242 | "type": "image/jpeg", 243 | "href": "https://example.com/sample/output.jpg", 244 | "roles": ["preview"], 245 | "title": "Model Output Predictions Sample", 246 | } 247 | mlm_data["assets"]["model-cart"] = { 248 | "type": "text/markdown", 249 | "href": "https://example.com/sample/model.md", 250 | "roles": ["metadata"], 251 | "title": "Model Cart", 252 | } 253 | mlm_item = pystac.Item.from_dict(mlm_data) 254 | pystac.validation.validate(mlm_item, validator=mlm_validator) # still valid 255 | 256 | 257 | @pytest.mark.parametrize( 258 | "mlm_example", 259 | ["item_basic.json"], 260 | indirect=True, 261 | ) 262 | @pytest.mark.parametrize( 263 | ["model_asset_extras", "is_valid"], 264 | [ 265 | ({"roles": ["checkpoint"]}, False), 266 | ({"roles": ["checkpoint", "mlm:model"]}, False), 267 | ({"roles": ["checkpoint"], "mlm:artifact_type": "test"}, False), 268 | ({"roles": ["checkpoint", "mlm:model"], "mlm:artifact_type": "test"}, True), 269 | ] 270 | ) 271 | def test_mlm_at_least_one_asset_model( 272 | mlm_validator: STACValidator, 273 | mlm_example: dict[str, JSON], 274 | model_asset_extras: dict[str, Any], 275 | is_valid: bool, 276 | ) -> None: 277 | mlm_data = copy.deepcopy(mlm_example) 278 | mlm_item = pystac.Item.from_dict(mlm_data) 279 | pystac.validation.validate(mlm_item, validator=mlm_validator) # self-check valid beforehand 280 | 281 | mlm_model = { 282 | "type": "application/octet-stream; application=pytorch", 283 | "href": "https://example.com/sample/checkpoint.pt", 284 | "title": "Model Weights Checkpoint", 285 | } 286 | mlm_model.update(model_asset_extras) 287 | mlm_data["assets"] = { 288 | "model": mlm_model # type: ignore 289 | } 290 | mlm_item = pystac.Item.from_dict(mlm_data) 291 | if is_valid: 292 | pystac.validation.validate(mlm_item, validator=mlm_validator) 293 | else: 294 | with pytest.raises(pystac.errors.STACValidationError) as exc: 295 | pystac.validation.validate(mlm_item, validator=mlm_validator) 296 | errors = cast(list[ValidationError], exc.value.source) 297 | assert errors[0].schema["$comment"] in [ 298 | "At least one Asset must provide the model definition indicated by the 'mlm:model' role.", 299 | "Used to check the artifact type property that is required by a Model Asset annotated by 'mlm:model' role." 300 | ] 301 | 302 | 303 | @pytest.mark.parametrize( 304 | "mlm_example", 305 | ["item_basic.json"], 306 | indirect=True, 307 | ) 308 | def test_mlm_asset_artifact_type_checked( 309 | mlm_validator: STACValidator, 310 | mlm_example: dict[str, JSON], 311 | ) -> None: 312 | mlm_data = copy.deepcopy(mlm_example) 313 | mlm_item = pystac.Item.from_dict(mlm_data) 314 | pystac.validation.validate(mlm_item, validator=mlm_validator) # self-check valid beforehand 315 | 316 | mlm_data["assets"]["model"]["mlm:artifact_type"] = 1234 # type: ignore 317 | mlm_item = pystac.Item.from_dict(mlm_data) 318 | with pytest.raises(pystac.errors.STACValidationError) as exc: 319 | pystac.validation.validate(mlm_item, validator=mlm_validator) 320 | assert "1234 is not of type 'string'" in str(exc.value.source) 321 | 322 | mlm_data["assets"]["model"]["mlm:artifact_type"] = "" # type: ignore 323 | mlm_item = pystac.Item.from_dict(mlm_data) 324 | with pytest.raises(pystac.errors.STACValidationError) as exc: 325 | pystac.validation.validate(mlm_item, validator=mlm_validator) 326 | assert "should be non-empty" in str(exc.value.source) 327 | 328 | 329 | def test_model_metadata_to_dict(eurosat_resnet): 330 | assert eurosat_resnet.item.to_dict() 331 | 332 | 333 | def test_validate_model_metadata(eurosat_resnet): 334 | assert pystac.read_dict(eurosat_resnet.item.to_dict()) 335 | 336 | 337 | def test_validate_model_against_schema(eurosat_resnet, mlm_validator): 338 | mlm_item = pystac.read_dict(eurosat_resnet.item.to_dict()) 339 | validated = pystac.validation.validate(mlm_item, validator=mlm_validator) 340 | assert SCHEMA_URI in validated 341 | 342 | 343 | @pytest.mark.parametrize( 344 | "mlm_example", 345 | ["collection.json"], 346 | indirect=True, 347 | ) 348 | def test_collection_include_all_items(mlm_example): 349 | """ 350 | This is only for self-validation, to make sure all examples are contained in the example STAC collection. 351 | """ 352 | col_links: list[dict[str, str]] = mlm_example["links"] 353 | col_items = {os.path.basename(link["href"]) for link in col_links if link["rel"] == "item"} 354 | all_items = {os.path.basename(path) for path in get_all_stac_item_examples()} 355 | assert all_items == col_items, "Missing STAC Item examples in the example STAC Collection links." 356 | -------------------------------------------------------------------------------- /tests/test_stac_model.py: -------------------------------------------------------------------------------- 1 | import pydantic 2 | import pytest 3 | 4 | from stac_model.input import InputStructure, ModelBand, ModelInput 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "bands", 9 | [ 10 | ["B04", "B03", "B02"], 11 | [{"name": "B04"}, {"name": "B03"}, {"name": "B02"}], 12 | [{"name": "NDVI", "format": "rio-calc", "expression": "(B08 - B04) / (B08 + B04)"}], 13 | [ 14 | "B04", 15 | {"name": "B03"}, 16 | "B02", 17 | {"name": "NDVI", "format": "rio-calc", "expression": "(B08 - B04) / (B08 + B04)"}, 18 | ], 19 | ], 20 | ) 21 | def test_model_band(bands): 22 | mlm_input = ModelInput( 23 | name="test", 24 | bands=bands, 25 | input=InputStructure( 26 | shape=[-1, len(bands), 64, 64], 27 | dim_order=["batch", "channel", "height", "width"], 28 | data_type="float32", 29 | ), 30 | ) 31 | mlm_bands = mlm_input.dict()["bands"] 32 | assert mlm_bands == bands 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "bands", 37 | [ 38 | [{"name": "test", "expression": "missing-format"}], 39 | [{"name": "test", "format": "missing-expression"}], 40 | ], 41 | ) 42 | def test_model_band_format_expression_dependency(bands: list[ModelBand]) -> None: 43 | with pytest.raises(pydantic.ValidationError): 44 | ModelInput( 45 | name="test", 46 | bands=bands, 47 | input=InputStructure( 48 | shape=[-1, len(bands), 64, 64], 49 | dim_order=["batch", "channel", "height", "width"], 50 | data_type="float32", 51 | ), 52 | ) 53 | --------------------------------------------------------------------------------