├── .gitattributes ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ └── bug_report.md ├── ci-hpc-config.yml ├── dependabot.yml ├── labeler.yml ├── pull_request_template.md └── workflows │ ├── downstream-ci-hpc.yml │ ├── pr-conventional-commit.yml │ ├── pr-label-conventional-commits.yml │ ├── pr-label-file-based.yml │ ├── pr-label-public.yml │ ├── push-to-private.yml │ ├── python-publish.yml │ ├── python-pull-request.yml │ ├── readthedocs-pr-update.yml │ └── release-please.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .release-please-config.json ├── .release-please-manifest.json ├── .vscode └── spellright.dict ├── 03-constant-fields.rst ├── CHANGELOG.md ├── CONTRIBUTORS.md ├── LICENSE ├── README.md ├── docs ├── Makefile ├── _static │ ├── 2t_map_example.png │ ├── area-1.png │ ├── concat.png │ ├── cutout-1.png │ ├── cutout-2.png │ ├── cutout-3.png │ ├── cutout-4.png │ ├── cutout-5.png │ ├── cutout-6.png │ ├── join.png │ ├── logo.png │ ├── overlay.png │ ├── schemas │ │ ├── matrix.excalidraw │ │ ├── matrix.png │ │ ├── overview.excalidraw │ │ ├── overview.png │ │ ├── recipe.excalidraw │ │ └── recipe.png │ ├── skip-missing.png │ ├── style.css │ ├── thinning-after.png │ └── thinning-before.png ├── _templates │ ├── .gitkeep │ └── apidoc │ │ └── package.rst.jinja ├── adr │ └── adr-1.md ├── apply-fmt.sh ├── check-index.sh ├── cli │ ├── compare-lam.rst │ ├── compare.rst │ ├── copy.rst │ ├── create.rst │ ├── grib-index.rst │ ├── inspect.rst │ ├── introduction.rst │ ├── patch.rst │ └── scan.rst ├── conf.py ├── datasets │ ├── building │ │ ├── advanced-options.rst │ │ ├── filters.rst │ │ ├── filters │ │ │ ├── empty.rst │ │ │ ├── noop.rst │ │ │ ├── orog_to_z.rst │ │ │ ├── regrid.rst │ │ │ ├── rename.rst │ │ │ ├── rotate_winds.rst │ │ │ ├── select.rst │ │ │ ├── sum.rst │ │ │ ├── unrotate_winds.rst │ │ │ ├── wz_to_w.rst │ │ │ └── yaml │ │ │ │ ├── orog_to_z.yaml │ │ │ │ ├── regrid1.yaml │ │ │ │ ├── regrid2.yaml │ │ │ │ ├── rename.yaml │ │ │ │ ├── sum.yaml │ │ │ │ └── wz_to_w.yaml │ │ ├── handling-missing-dates.rst │ │ ├── handling-missing-values.rst │ │ ├── incremental.rst │ │ ├── introduction.rst │ │ ├── naming-conventions.rst │ │ ├── naming-variables.rst │ │ ├── operations.rst │ │ ├── sources.rst │ │ ├── sources │ │ │ ├── accumulations.rst │ │ │ ├── anemoi-dataset.rst │ │ │ ├── cds.rst │ │ │ ├── eccc-fstd.rst │ │ │ ├── forcings.rst │ │ │ ├── grib-index.rst │ │ │ ├── grib.rst │ │ │ ├── hindcasts.rst │ │ │ ├── mars.rst │ │ │ ├── netcdf.rst │ │ │ ├── opendap.rst │ │ │ ├── recentre.rst │ │ │ ├── repeated-dates.rst │ │ │ ├── xarray-based.rst │ │ │ ├── xarray-kerchunk.py │ │ │ ├── xarray-kerchunk.rst │ │ │ ├── xarray-zarr.rst │ │ │ ├── yaml │ │ │ │ ├── accumulations1.yaml │ │ │ │ ├── accumulations2.yaml │ │ │ │ ├── anemoi-dataset.yaml │ │ │ │ ├── eccc-fstd.yaml │ │ │ │ ├── forcings.yaml │ │ │ │ ├── grib1.yaml │ │ │ │ ├── grib2.yaml │ │ │ │ ├── grib3.yaml │ │ │ │ ├── grib4.yaml │ │ │ │ ├── hindcasts.yaml │ │ │ │ ├── mars-cds.yaml │ │ │ │ ├── mars1.yaml │ │ │ │ ├── mars2.yaml │ │ │ │ ├── netcdf.yaml │ │ │ │ ├── opendap.yaml │ │ │ │ ├── recentre.yaml │ │ │ │ ├── repeated-dates1.yaml │ │ │ │ ├── repeated-dates2.yaml │ │ │ │ ├── repeated-dates3.yaml │ │ │ │ ├── repeated-dates4.yaml │ │ │ │ ├── xarray-based.yaml │ │ │ │ ├── xarray-kerchunk.yaml │ │ │ │ ├── xarray-zarr.yaml │ │ │ │ └── zenodo.yaml │ │ │ └── zenodo.rst │ │ ├── statistics.rst │ │ └── syntax.yaml │ ├── introduction.rst │ ├── using │ │ ├── combining.rst │ │ ├── configuration.rst │ │ ├── configuration.toml │ │ ├── ensembles.rst │ │ ├── grids.rst │ │ ├── introduction.rst │ │ ├── matching.rst │ │ ├── methods.rst │ │ ├── miscellaneous.rst │ │ ├── missing.rst │ │ ├── opening.rst │ │ ├── other.rst │ │ ├── selecting.rst │ │ ├── statistics.rst │ │ ├── subsetting.rst │ │ └── zip.rst │ └── yaml │ │ ├── Makefile │ │ ├── building1.txt │ │ ├── building1.yaml │ │ ├── building2.txt │ │ ├── building2.yaml │ │ ├── building3.txt │ │ ├── building3.yaml │ │ ├── concat.yaml │ │ ├── hindcasts.yaml │ │ ├── input.yaml │ │ ├── missing_dates.yaml │ │ ├── nan.yaml │ │ └── pipe.yaml ├── dev │ └── contributing.rst ├── howtos │ ├── create │ │ ├── 01-grib-data.rst │ │ ├── 02-cf-data.rst │ │ ├── 03-constant-fields.rst │ │ └── yaml │ │ │ ├── grib-flavour1.yaml │ │ │ ├── grib-flavour2.yaml │ │ │ ├── grib-flavour3.yaml │ │ │ ├── grib-flavour4.yaml │ │ │ ├── grib-recipe1.yaml │ │ │ ├── grib-recipe2.yaml │ │ │ ├── grib-recipe3.yaml │ │ │ ├── grib-recipe4.yaml │ │ │ ├── grib-recipe5.yaml │ │ │ ├── netcdf1.yaml │ │ │ ├── opendap1.yaml │ │ │ ├── xarray-flavour1.yaml │ │ │ ├── xarray-patch1.yaml │ │ │ ├── xarray-patch2.yaml │ │ │ └── zarr1.yaml │ ├── introduction.rst │ └── usage │ │ ├── 01-interpolate-step-dataset-combination.rst │ │ ├── 02-coutout-complement-combination.rst │ │ ├── code │ │ ├── cutout-complement1.py │ │ ├── interpolate1.py │ │ └── interpolate2.py │ │ └── yaml │ │ ├── cutout-complement1.yaml │ │ ├── interpolate1.yaml │ │ └── interpolate2.yaml ├── index.rst ├── installing.rst ├── modules │ ├── dataset.rst │ ├── filters.rst │ └── sources.rst ├── overview.rst ├── pptx │ └── images.pptx ├── scripts │ └── api_build.sh ├── usage │ ├── getting_started.rst │ └── yaml │ │ └── aifs-ea-an-oper-0001-mars-o48-2020-2021-6h-v1.yaml └── using │ └── code │ └── trimedge1_.py ├── pyproject.toml ├── src └── anemoi │ └── datasets │ ├── __init__.py │ ├── __main__.py │ ├── check.py │ ├── commands │ ├── __init__.py │ ├── check.py │ ├── cleanup.py │ ├── compare-lam.py │ ├── compare.py │ ├── copy.py │ ├── create.py │ ├── finalise-additions.py │ ├── finalise.py │ ├── grib-index.py │ ├── init-additions.py │ ├── init.py │ ├── inspect.py │ ├── load-additions.py │ ├── load.py │ ├── patch.py │ ├── publish.py │ └── scan.py │ ├── compute │ ├── __init__.py │ └── recentre.py │ ├── create │ ├── __init__.py │ ├── check.py │ ├── chunks.py │ ├── config.py │ ├── filter.py │ ├── filters │ │ ├── __init__.py │ │ ├── empty.py │ │ ├── legacy.py │ │ ├── noop.py │ │ ├── orog_to_z.py │ │ ├── pressure_level_relative_humidity_to_specific_humidity.py │ │ ├── pressure_level_specific_humidity_to_relative_humidity.py │ │ ├── rename.py │ │ ├── rotate_winds.py │ │ ├── single_level_dewpoint_to_relative_humidity.py │ │ ├── single_level_relative_humidity_to_dewpoint.py │ │ ├── single_level_relative_humidity_to_specific_humidity.py │ │ ├── single_level_specific_humidity_to_relative_humidity.py │ │ ├── speeddir_to_uv.py │ │ ├── sum.py │ │ ├── transform.py │ │ ├── unrotate_winds.py │ │ ├── uv_to_speeddir.py │ │ └── wz_to_w.py │ ├── input │ │ ├── __init__.py │ │ ├── action.py │ │ ├── concat.py │ │ ├── context.py │ │ ├── data_sources.py │ │ ├── empty.py │ │ ├── filter.py │ │ ├── function.py │ │ ├── join.py │ │ ├── misc.py │ │ ├── pipe.py │ │ ├── repeated_dates.py │ │ ├── result.py │ │ ├── step.py │ │ ├── template.py │ │ └── trace.py │ ├── patch.py │ ├── persistent.py │ ├── size.py │ ├── source.py │ ├── sources │ │ ├── __init__.py │ │ ├── accumulations.py │ │ ├── accumulations2.py │ │ ├── anemoi_dataset.py │ │ ├── constants.py │ │ ├── eccc_fstd.py │ │ ├── empty.py │ │ ├── forcings.py │ │ ├── grib.py │ │ ├── grib_index.py │ │ ├── hindcasts.py │ │ ├── legacy.py │ │ ├── mars.py │ │ ├── netcdf.py │ │ ├── opendap.py │ │ ├── patterns.py │ │ ├── recentre.py │ │ ├── source.py │ │ ├── tendencies.py │ │ ├── xarray.py │ │ ├── xarray_kerchunk.py │ │ ├── xarray_support │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── coordinates.py │ │ │ ├── field.py │ │ │ ├── fieldlist.py │ │ │ ├── flavour.py │ │ │ ├── grid.py │ │ │ ├── metadata.py │ │ │ ├── patch.py │ │ │ ├── time.py │ │ │ └── variable.py │ │ ├── xarray_zarr.py │ │ └── zenodo.py │ ├── statistics │ │ ├── __init__.py │ │ └── summary.py │ ├── testing.py │ ├── typing.py │ ├── utils.py │ ├── writer.py │ └── zarr.py │ ├── data │ ├── __init__.py │ ├── complement.py │ ├── concat.py │ ├── dataset.py │ ├── debug.css │ ├── debug.py │ ├── ensemble.py │ ├── fill_missing.py │ ├── forwards.py │ ├── grids.py │ ├── indexing.py │ ├── interpolate.py │ ├── join.py │ ├── masked.py │ ├── merge.py │ ├── misc.py │ ├── missing.py │ ├── observations │ │ ├── __init__.py │ │ ├── legacy_obs_dataset.py │ │ └── multi.py │ ├── padded.py │ ├── records │ │ ├── __init__.py │ │ └── backends │ │ │ └── __init__.py │ ├── rescale.py │ ├── select.py │ ├── statistics.py │ ├── stores.py │ ├── subset.py │ ├── unchecked.py │ └── xy.py │ ├── dates │ ├── __init__.py │ └── groups.py │ ├── grids.py │ ├── testing.py │ └── utils │ └── __init__.py ├── tests ├── create-perturbations-full.yaml ├── create-shift.yaml ├── create │ ├── accumulation.yaml │ ├── concat.yaml │ ├── join.yaml │ ├── missing.yaml │ ├── nan.yaml │ ├── pipe.yaml │ ├── recentre.yaml │ ├── regrid.yaml │ ├── run.sh │ ├── test_create.py │ └── test_sources.py ├── test_chunks.py ├── test_data.py ├── test_data_gridded.py ├── test_dates.py ├── test_indexing.py ├── test_records.py └── xarray │ ├── test_netcdf.py │ ├── test_opendap.py │ ├── test_samples.py │ └── test_zarr.py └── tools ├── .gitignore ├── build-obs.py ├── check-obs.py ├── examples ├── Makefile └── an-oper-2023-2023-2p5-6h-v1.yaml ├── grids ├── Makefile ├── grids.ipynb ├── grids1.yaml ├── grids2.yaml ├── grids3.yaml ├── grids4.yaml ├── grids5.yaml ├── grids6.yaml ├── grids7.yaml └── grids_multilam.ipynb ├── make-sample-dataset.py └── upload-sample-dataset.py /.gitattributes: -------------------------------------------------------------------------------- 1 | CHANGELOG.md merge=union 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Workflows 2 | /.github/ @ecmwf/AnemoiSecurity 3 | 4 | # Project configs 5 | /pyproject.toml @ecmwf/AnemoiSecurity 6 | /.pre-commit-config.yaml @ecmwf/AnemoiSecurity 7 | /.release-please-config.json @ecmwf/AnemoiSecurity 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve. **❗ IMPORTANT:** If you have difficulties creating a dataset from any source, please provide us with a sample data file or a URL to the source data. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | ** Version number ** 13 | I am using the following versions/branch/sha1 of the anemoi packages 14 | (alternatively the output of `pip freeze`) 15 | 16 | **To Reproduce** 17 | Steps to reproduce the behavior: 18 | 1. Go to '...' 19 | 2. Run this '....' 20 | 3. See error 21 | 22 | **URL to sample input data** 23 | Provide a URL to a sample input data, or attach a file to that report if it is small enough. 24 | 25 | **Expected behavior** 26 | A clear and concise description of what you expected to happen. 27 | 28 | **Screenshots** 29 | If applicable, add screenshots to help explain your problem. 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ci-hpc-config.yml: -------------------------------------------------------------------------------- 1 | build: 2 | python: '3.10' 3 | modules: 4 | - ninja 5 | python_dependencies: 6 | - ecmwf/anemoi-utils@develop 7 | parallel: 64 8 | pytest_cmd: | 9 | python -m pytest -vv -m 'not notebook and not no_cache_init' --cov=. --cov-report=xml 10 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | # - package-ecosystem: "github-actions" 8 | # directory: "/" 9 | # schedule: 10 | # interval: "monthly" 11 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # This is the configuration file for the labeler action. 2 | # It assigns labels to pull requests based on the files changed in the PR. 3 | # See more here: https://github.com/actions/labeler 4 | dependencies: 5 | - changed-files: 6 | - any-glob-to-any-file: 7 | - "**/requirements.txt" 8 | - "**/setup.py" 9 | - "**/pyproject.toml" 10 | - "**/Pipfile" 11 | - "**/Pipfile.lock" 12 | - "**/requirements/*.txt" 13 | - "**/requirements/*.in" 14 | 15 | documentation: 16 | - changed-files: 17 | - any-glob-to-any-file: 18 | - "**/docs/**/*" 19 | - "*.md" 20 | - "*.rst" 21 | 22 | config: 23 | - changed-files: 24 | - any-glob-to-any-file: 25 | - "**/src/**/config/**/*" 26 | - "**/src/anemoi/inference/config.py" 27 | 28 | CI/CD: 29 | - changed-files: 30 | - any-glob-to-any-file: 31 | - "**/.pre-commit-config.yaml" 32 | - ".github/**/*" 33 | - "tox.ini" 34 | - ".coveragerc" 35 | 36 | tests: 37 | - changed-files: 38 | - any-glob-to-any-file: 39 | - "**/tests/**/*" 40 | - "**/test/**/*" 41 | - "**/test_*.py" 42 | - "**/test.py" 43 | - "**/conftest.py" 44 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | 4 | ## What problem does this change solve? 5 | 6 | 7 | ## What issue or task does this change relate to? 8 | 9 | 10 | ## Additional notes ## 11 | 12 | 13 | ***As a contributor to the Anemoi framework, please ensure that your changes include unit tests, updates to any affected dependencies and documentation, and have been tested in a parallel setting (i.e., with multiple GPUs). As a reviewer, you are also responsible for verifying these aspects and requesting changes if they are not adequately addressed. For guidelines about those please refer to https://anemoi.readthedocs.io/en/latest/*** 14 | -------------------------------------------------------------------------------- /.github/workflows/downstream-ci-hpc.yml: -------------------------------------------------------------------------------- 1 | # This workflow triggers tests on dependent packages. 2 | # The dependency tree itself is defined in ecmwf/downstream-ci/ 3 | name: Test downstream dependent packages 4 | 5 | on: 6 | # Trigger the workflow on push to main or develop, except tag creation 7 | push: 8 | branches: 9 | - 'main' 10 | - 'develop' 11 | tags-ignore: 12 | - '**' 13 | paths-ignore: 14 | - "docs/**" 15 | - "CHANGELOG.md" 16 | - "README.md" 17 | 18 | # Trigger the workflow on pull request 19 | pull_request: 20 | paths-ignore: 21 | - "docs/**" 22 | - "CHANGELOG.md" 23 | - "README.md" 24 | 25 | # Trigger the workflow manually 26 | workflow_dispatch: ~ 27 | 28 | # Trigger after public PR approved for CI 29 | pull_request_target: 30 | types: [labeled] 31 | paths-ignore: 32 | - "docs/**" 33 | - "CHANGELOG.md" 34 | - "README.md" 35 | 36 | jobs: 37 | # Run CI including downstream packages on self-hosted runners 38 | downstream-ci: 39 | name: downstream-ci 40 | if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} 41 | uses: ecmwf/downstream-ci/.github/workflows/downstream-ci.yml@main 42 | with: 43 | anemoi-datasets: ecmwf/anemoi-datasets@${{ github.event.pull_request.head.sha || github.sha }} 44 | codecov_upload: true 45 | # Only run on fedora 46 | skip_matrix_jobs: | 47 | gnu@debian-11 48 | gnu@rocky-8.6 49 | clang@rocky-8.6 50 | gnu@ubuntu-22.04 51 | secrets: inherit 52 | 53 | # # Build downstream packages on HPC 54 | # downstream-ci-hpc: 55 | # name: downstream-ci-hpc 56 | # if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} 57 | # uses: ecmwf/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main 58 | # with: 59 | # anemoi-datasets: ecmwf/anemoi-datasets@${{ github.event.pull_request.head.sha || github.sha }} 60 | # secrets: inherit 61 | -------------------------------------------------------------------------------- /.github/workflows/pr-conventional-commit.yml: -------------------------------------------------------------------------------- 1 | # This workflow ensures that the PR title follows the Conventional Commit format. 2 | name: "[PR] Ensure Conventional Commit Title" 3 | 4 | on: 5 | pull_request_target: 6 | types: 7 | - opened 8 | - edited 9 | - synchronize 10 | - reopened 11 | 12 | permissions: 13 | pull-requests: read 14 | 15 | jobs: 16 | main: 17 | name: Validate PR title 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: amannn/action-semantic-pull-request@v5 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | -------------------------------------------------------------------------------- /.github/workflows/pr-label-conventional-commits.yml: -------------------------------------------------------------------------------- 1 | # This workflow assigns labels to a pull request based on the Conventional Commits format. 2 | # This is necessary for release-please to work properly. 3 | name: "[PR] Label Conventional Commits" 4 | 5 | on: 6 | pull_request: 7 | branches: [main] 8 | types: 9 | [opened, reopened, labeled, unlabeled] 10 | 11 | permissions: 12 | pull-requests: write 13 | 14 | jobs: 15 | assign-labels: 16 | runs-on: ubuntu-latest 17 | name: Assign labels in pull request 18 | if: github.event.pull_request.merged == false 19 | steps: 20 | - uses: actions/checkout@v3 21 | - name: Assign labels from Conventional Commits 22 | id: action-assign-labels 23 | uses: mauroalderete/action-assign-labels@v1 24 | with: 25 | pull-request-number: ${{ github.event.pull_request.number }} 26 | github-token: ${{ secrets.GITHUB_TOKEN }} 27 | conventional-commits: | 28 | conventional-commits: 29 | - type: 'fix' 30 | nouns: ['FIX', 'Fix', 'fix', 'FIXED', 'Fixed', 'fixed'] 31 | labels: ['bug'] 32 | - type: 'feature' 33 | nouns: ['FEATURE', 'Feature', 'feature', 'FEAT', 'Feat', 'feat'] 34 | labels: ['enhancement'] 35 | - type: 'breaking_change' 36 | nouns: ['BREAKING CHANGE', 'BREAKING', 'MAJOR'] 37 | labels: ['breaking change'] 38 | - type: 'documentation' 39 | nouns: ['doc','docs','docu','document','documentation'] 40 | labels: ['documentation'] 41 | - type: 'build' 42 | nouns: ['build','rebuild','ci'] 43 | labels: ['CI/CD'] 44 | - type: 'config' 45 | nouns: ['config', 'conf', 'configuration'] 46 | labels: ['config'] 47 | maintain-labels-not-matched: true 48 | apply-changes: true 49 | -------------------------------------------------------------------------------- /.github/workflows/pr-label-file-based.yml: -------------------------------------------------------------------------------- 1 | # This workflow assigns labels to a pull request based on the files changed in the PR. 2 | # The labels are defined in the `.github/labels.yml` file. 3 | name: "[PR] Label File-based" 4 | on: 5 | pull_request_target: 6 | types: [opened, synchronize] 7 | 8 | permissions: 9 | contents: read 10 | pull-requests: write 11 | 12 | jobs: 13 | labeler: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Assign labels from file changes 17 | uses: actions/labeler@v5 18 | -------------------------------------------------------------------------------- /.github/workflows/pr-label-public.yml: -------------------------------------------------------------------------------- 1 | # Manage labels of pull requests that originate from forks 2 | name: "[PR] Label Forks" 3 | 4 | on: 5 | pull_request_target: 6 | types: [opened, synchronize] 7 | 8 | jobs: 9 | label: 10 | uses: ecmwf/reusable-workflows/.github/workflows/label-pr.yml@v2 11 | -------------------------------------------------------------------------------- /.github/workflows/push-to-private.yml: -------------------------------------------------------------------------------- 1 | # This workflow pushes changes from a public repository to a private repository. 2 | name: Push to private repository 3 | 4 | on: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | push_changes: 11 | if: ${{ !contains(github.repository, 'private') }} 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout source repository 16 | uses: actions/checkout@v3 17 | with: 18 | fetch-depth: 0 19 | fetch-tags: true 20 | 21 | - name: Set up Git configuration 22 | run: | 23 | git config user.name "github-actions[bot]" 24 | git config user.email "github-actions[bot]@users.noreply.github.com" 25 | 26 | - name: Setup SSH key 27 | uses: webfactory/ssh-agent@v0.5.0 28 | with: 29 | ssh-private-key: ${{ secrets.KEY_TO_PRIVATE }} 30 | 31 | - name: Push changes to private repository 32 | run: | 33 | git remote add private git@github.com:${{ github.repository }}-private.git 34 | git push --set-upstream private main 35 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package to PyPI 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | uses: ecmwf/reusable-workflows/.github/workflows/cd-pypi.yml@v2 13 | secrets: inherit 14 | -------------------------------------------------------------------------------- /.github/workflows/python-pull-request.yml: -------------------------------------------------------------------------------- 1 | # This workflow runs pre-commit checks and pytest tests against multiple platforms and Python versions. 2 | name: Code Quality and Testing 3 | 4 | on: 5 | pull_request: 6 | types: [opened, synchronize, reopened] 7 | push: 8 | branches: 9 | - main 10 | schedule: 11 | - cron: "9 2 * * 0" # at 9:02 on sunday 12 | 13 | jobs: 14 | quality: 15 | uses: ecmwf/reusable-workflows/.github/workflows/qa-precommit-run.yml@v2 16 | with: 17 | skip-hooks: "no-commit-to-branch" 18 | 19 | checks: 20 | strategy: 21 | matrix: 22 | python-version: ["3.9", "3.10", "3.11", "3.12"] 23 | uses: ecmwf/reusable-workflows/.github/workflows/qa-pytest-pyproject.yml@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | -------------------------------------------------------------------------------- /.github/workflows/readthedocs-pr-update.yml: -------------------------------------------------------------------------------- 1 | # This workflow adds a link to the experimental documentation build to the PR. 2 | # This does NOT trigger a build of the documentation, this is handled through webhooks. 3 | name: "[PR] Read the Docs Preview" 4 | on: 5 | pull_request_target: 6 | types: 7 | - opened 8 | - synchronize 9 | - reopened 10 | # Execute this action only on PRs that touch 11 | # documentation files. 12 | paths: 13 | - "docs/**" 14 | 15 | permissions: 16 | pull-requests: write 17 | 18 | jobs: 19 | documentation-links: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: readthedocs/actions/preview@v1 23 | with: 24 | project-slug: "anemoi-datasets" 25 | -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses an action to run Release Please to create a release PR. 2 | # It is governed by the config and manifest in the root of the repo. 3 | # For more information see: https://github.com/googleapis/release-please 4 | name: Run Release Please 5 | on: 6 | push: 7 | branches: 8 | - main 9 | - hotfix/* 10 | 11 | permissions: 12 | contents: write 13 | pull-requests: write 14 | 15 | jobs: 16 | release-please: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: googleapis/release-please-action@v4 20 | with: 21 | # this assumes that you have created a personal access token 22 | # (PAT) and configured it as a GitHub action secret named 23 | # `MY_RELEASE_PLEASE_TOKEN` (this secret name is not important). 24 | token: ${{ secrets.RELEASE_PLEASE_TOKEN }} 25 | # optional. customize path to .release-please-config.json 26 | config-file: .release-please-config.json 27 | # Currently releases are done from main 28 | target-branch: ${{ github.ref_name }} 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | __pypackages__/ 8 | 9 | # Distribution / packaging 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | downloads/ 14 | eggs/ 15 | .eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | share/python-wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # Testing and coverage 29 | htmlcov/ 30 | .tox/ 31 | .nox/ 32 | .coverage 33 | .coverage.* 34 | .cache 35 | nosetests.xml 36 | coverage.xml 37 | *.cover 38 | *.py,cover 39 | .hypothesis/ 40 | .pytest_cache/ 41 | cover/ 42 | 43 | # Documentation 44 | docs/_build/ 45 | docs/_api/ 46 | /site 47 | *.mo 48 | *.pot 49 | 50 | # Environments 51 | .env 52 | .envrc 53 | .venv 54 | env/ 55 | venv/ 56 | ENV/ 57 | env.bak/ 58 | venv.bak/ 59 | 60 | # IDEs 61 | .idea/ 62 | .spyderproject 63 | .spyproject 64 | .ropeproject 65 | .vscode/ 66 | *.code-workspace 67 | *.sublime-project 68 | *.sublime-workspace 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # Type checking 74 | .mypy_cache/ 75 | .dmypy.json 76 | dmypy.json 77 | .pyre/ 78 | .pytype/ 79 | 80 | # Data files 81 | *.grib 82 | *.grib1 83 | *.grib2 84 | *.onnx 85 | *.ckpt 86 | *.npy 87 | *.npz 88 | *.zarr/ 89 | *.nc 90 | *.h5 91 | *.hdf5 92 | *.pkl 93 | *.parquet 94 | *.csv 95 | *.xlsx 96 | *.xls 97 | *.json 98 | *.txt 99 | *.zip 100 | *.db 101 | *.tgz 102 | 103 | # ML artifacts 104 | wandb/ 105 | mlruns/ 106 | lightning_logs/ 107 | *.ckpt 108 | *.pt 109 | *.pth 110 | runs/ 111 | checkpoints/ 112 | 113 | # Temporary and system files 114 | *.swp 115 | *.download 116 | *.out 117 | *.sync 118 | *.dot 119 | *.tmp 120 | *.log 121 | *.log.* 122 | .DS_Store 123 | ~* 124 | tmp/ 125 | temp/ 126 | logs/ 127 | _dev/ 128 | _api/ 129 | ./outputs 130 | *tmp_data/ 131 | 132 | # Project specific 133 | ? 134 | ?.* 135 | foo 136 | bar 137 | ~$images.pptx 138 | test.py 139 | test.ipynb 140 | _version.py 141 | *.to_upload 142 | tempCodeRunnerFile.python 143 | Untitled-*.py 144 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.11" 7 | # jobs: 8 | # pre_build: 9 | # - bash docs/scripts/api_build.sh 10 | 11 | sphinx: 12 | configuration: docs/conf.py 13 | 14 | python: 15 | install: 16 | - method: pip 17 | path: . 18 | extra_requirements: 19 | - docs 20 | -------------------------------------------------------------------------------- /.release-please-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "release-type": "python", 3 | "bump-minor-pre-major": true, 4 | "bump-patch-for-minor-pre-major": true, 5 | "separate-pull-requests": true, 6 | "always-update": true, 7 | "changelog-type": "default", 8 | "include-component-in-tag": false, 9 | "include-v-in-tag": false, 10 | "draft-pull-request": true, 11 | "pull-request-title-pattern": "chore${scope}: Release${component} ${version}", 12 | "pull-request-header": ":robot: Automated Release PR\n\nThis PR was created by `release-please` to prepare the next release. Once merged:\n\n1. A new version tag will be created\n2. A GitHub release will be published\n3. The changelog will be updated\n\nChanges to be included in the next release:", 13 | "pull-request-footer": "> [!IMPORTANT]\n> Please do not change the PR title, manifest file, or any other automatically generated content in this PR unless you understand the implications. Changes here can break the release process.\n> :warning: Merging this PR will:\n> - Create a new release\n> - Trigger deployment pipelines\n> - Update package versions\n\n **Before merging:**\n - Ensure all tests pass\n - Review the changelog carefully\n - Get required approvals\n\n [Release-please documentation](https://github.com/googleapis/release-please)", 14 | "packages": { 15 | ".": { 16 | "package-name": "anemoi-datasets" 17 | } 18 | }, 19 | "plugins": [ 20 | { 21 | "type": "sentence-case" 22 | } 23 | ], 24 | "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json" 25 | } 26 | -------------------------------------------------------------------------------- /.release-please-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | ".": "0.5.24" 3 | } 4 | -------------------------------------------------------------------------------- /.vscode/spellright.dict: -------------------------------------------------------------------------------- 1 | Anemoi 2 | zarr 3 | literalinclude 4 | -------------------------------------------------------------------------------- /03-constant-fields.rst: -------------------------------------------------------------------------------- 1 | ######################## 2 | Adding constant fields 3 | ######################## 4 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | ## How to Contribute 2 | 3 | Please see the [read the docs](https://anemoi.readthedocs.io/en/latest/dev/contributing.html). 4 | 5 | 6 | ## Contributors 7 | 8 | Thank you to all the wonderful people who have contributed to Anemoi. Contributions can come in many forms, including code, documentation, bug reports, feature suggestions, design, and more. A list of code-based contributors can be found [here](https://github.com/ecmwf/anemoi-datasets/graphs/contributors). 9 | 10 | 11 | ## Contributing Organisations 12 | 13 | Significant contributions have been made by the following organisations: [DWD](https://www.dwd.de/), [FMI](https://www.ilmatieteenlaitos.fi/), [KNMI](https://www.knmi.nl), [MET Norway](https://www.met.no/), [MeteoSwiss](https://www.meteoswiss.admin.ch/), [RMI](https://www.meteo.be/) & [ECMWF](https://www.ecmwf.int/) 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # anemoi-datasets 2 | 3 | **DISCLAIMER** 4 | This project is **BETA** and will be **Experimental** for the foreseeable future. 5 | Interfaces and functionality are likely to change, and the project itself may be scrapped. 6 | **DO NOT** use this software in any project/software that is operational. 7 | 8 | 9 | 10 | ## Documentation 11 | 12 | The documentation can be found at https://anemoi-datasets.readthedocs.io/. 13 | 14 | ## Install 15 | 16 | Install via `pip` with: 17 | 18 | ``` 19 | $ pip install anemoi-datasets 20 | ``` 21 | 22 | ## License 23 | 24 | ``` 25 | Copyright 2024-2025, Anemoi Contributors. 26 | 27 | Licensed under the Apache License, Version 2.0 (the "License"); 28 | you may not use this file except in compliance with the License. 29 | You may obtain a copy of the License at 30 | 31 | http://www.apache.org/licenses/LICENSE-2.0 32 | 33 | Unless required by applicable law or agreed to in writing, software 34 | distributed under the License is distributed on an "AS IS" BASIS, 35 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 36 | See the License for the specific language governing permissions and 37 | limitations under the License. 38 | 39 | In applying this licence, ECMWF does not waive the privileges and immunities 40 | granted to it by virtue of its status as an intergovernmental organisation 41 | nor does it submit to any jurisdiction. 42 | ``` 43 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env make -f 2 | 3 | # Minimal makefile for Sphinx documentation 4 | # 5 | 6 | # You can set these variables from the command line, and also 7 | # from the environment for the first two. 8 | SPHINXOPTS ?= 9 | SPHINXBUILD ?= sphinx-build 10 | SOURCEDIR = . 11 | BUILDDIR = _build 12 | 13 | # Put it first so that "make" without argument is like "make help". 14 | help: 15 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 16 | 17 | .PHONY: help Makefile 18 | 19 | # Catch-all target: route all unknown targets to Sphinx using the new 20 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 21 | %: Makefile 22 | bash $(SOURCEDIR)/scripts/api_build.sh 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | -------------------------------------------------------------------------------- /docs/_static/2t_map_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/2t_map_example.png -------------------------------------------------------------------------------- /docs/_static/area-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/area-1.png -------------------------------------------------------------------------------- /docs/_static/concat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/concat.png -------------------------------------------------------------------------------- /docs/_static/cutout-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-1.png -------------------------------------------------------------------------------- /docs/_static/cutout-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-2.png -------------------------------------------------------------------------------- /docs/_static/cutout-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-3.png -------------------------------------------------------------------------------- /docs/_static/cutout-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-4.png -------------------------------------------------------------------------------- /docs/_static/cutout-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-5.png -------------------------------------------------------------------------------- /docs/_static/cutout-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-6.png -------------------------------------------------------------------------------- /docs/_static/join.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/join.png -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/_static/overlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/overlay.png -------------------------------------------------------------------------------- /docs/_static/schemas/matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/schemas/matrix.png -------------------------------------------------------------------------------- /docs/_static/schemas/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/schemas/overview.png -------------------------------------------------------------------------------- /docs/_static/schemas/recipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/schemas/recipe.png -------------------------------------------------------------------------------- /docs/_static/skip-missing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/skip-missing.png -------------------------------------------------------------------------------- /docs/_static/style.css: -------------------------------------------------------------------------------- 1 | .wy-side-nav-search { 2 | background-color: #f7f7f7; 3 | } 4 | 5 | /*There is a clash between xarray notebook styles and readthedoc*/ 6 | 7 | .rst-content dl.xr-attrs dt { 8 | all: revert; 9 | font-size: 95%; 10 | white-space: nowrap; 11 | } 12 | 13 | .rst-content dl.xr-attrs dd { 14 | font-size: 95%; 15 | } 16 | 17 | .xr-wrap { 18 | font-size: 85%; 19 | } 20 | 21 | .wy-table-responsive table td, .wy-table-responsive table th { 22 | white-space: inherit; 23 | } 24 | 25 | /* 26 | .wy-table-responsive table td, 27 | .wy-table-responsive table th { 28 | white-space: normal !important; 29 | vertical-align: top !important; 30 | } 31 | 32 | .wy-table-responsive { 33 | margin-bottom: 24px; 34 | max-width: 100%; 35 | overflow: visible; 36 | } */ 37 | 38 | /* Hide notebooks warnings */ 39 | .nboutput .stderr { 40 | display: none; 41 | } 42 | 43 | /* 44 | Set logo size 45 | */ 46 | .wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo { 47 | width: 200px; 48 | } 49 | -------------------------------------------------------------------------------- /docs/_static/thinning-after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/thinning-after.png -------------------------------------------------------------------------------- /docs/_static/thinning-before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/thinning-before.png -------------------------------------------------------------------------------- /docs/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/_templates/apidoc/package.rst.jinja: -------------------------------------------------------------------------------- 1 | {%- macro automodule(modname, options) -%} 2 | .. automodule:: {{ modname }} 3 | {%- for option in options %} 4 | :{{ option }}: 5 | {%- endfor %} 6 | {%- endmacro %} 7 | 8 | {%- macro toctree(docnames) -%} 9 | .. toctree:: 10 | :maxdepth: {{ maxdepth }} 11 | {% for docname in docnames %} 12 | {{ docname }} 13 | {%- endfor %} 14 | {%- endmacro %} 15 | 16 | {%- if is_namespace %} 17 | {{- pkgname.split(".")[1:] | join(".") | e | heading }} 18 | {% else %} 19 | {{- pkgname.split(".")[1:] | join(" ") | e | heading }} 20 | {% endif %} 21 | 22 | {%- if is_namespace %} 23 | .. py:module:: {{ pkgname }} 24 | {% endif %} 25 | 26 | {%- if modulefirst and not is_namespace %} 27 | {{ automodule(["anemoi", pkgname] | join("."), [""]) }} 28 | {% endif %} 29 | 30 | {%- if subpackages %} 31 | Subpackages 32 | ----------- 33 | 34 | {{ toctree(subpackages) }} 35 | {% endif %} 36 | 37 | {%- if submodules %} 38 | {% if separatemodules %} 39 | {{ toctree(submodules) }} 40 | {% else %} 41 | {%- for submodule in submodules %} 42 | {% if show_headings %} 43 | {{- submodule.split(".")[2:] | join(".") | e | heading(2) }} 44 | {% endif %} 45 | {{ automodule(["anemoi", submodule] | join("."), automodule_options) }} 46 | {% endfor %} 47 | {%- endif %} 48 | {%- endif %} 49 | 50 | {%- if not modulefirst and not is_namespace %} 51 | Module contents 52 | --------------- 53 | 54 | {{ automodule(pkgname, automodule_options) }} 55 | {% endif %} 56 | -------------------------------------------------------------------------------- /docs/apply-fmt.sh: -------------------------------------------------------------------------------- 1 | : 2 | for n in $(find . -name '*.rst') 3 | do 4 | rstfmt $n 5 | done 6 | -------------------------------------------------------------------------------- /docs/check-index.sh: -------------------------------------------------------------------------------- 1 | : 2 | # See https://github.com/vscode-restructuredtext/vscode-restructuredtext/issues/280 3 | for n in $(find . -name '*.rst') 4 | do 5 | m=$(echo $n | sed 's/\.rst//' | sed 's,^\./,,') 6 | egrep ":doc:.$m" index.rst > /dev/null || echo $m 7 | done 8 | -------------------------------------------------------------------------------- /docs/cli/compare-lam.rst: -------------------------------------------------------------------------------- 1 | .. _compare_lam_command: 2 | 3 | Compare-LAM Command 4 | =================== 5 | 6 | Compare statistics of two datasets. 7 | This command compares the statistics of each variable in two datasets **only in the overlapping area** between the two. 8 | 9 | Example use cases: 10 | ------------------ 11 | - **Stretched Grid** 12 | - **Boundary LAM** 13 | 14 | In both cases, it is necessary to check the alignment between the variables of the local dataset and those of the global dataset. 15 | Both datasets will coexist on the same grid, and statistical coherence is essential for training stability. 16 | 17 | The `compare-lam` command outputs a table comparing dataset statistics in **HTML format**. 18 | Additionally, a plot of the dataset grids can be displayed and saved if requested. 19 | 20 | Usage: 21 | ****** 22 | .. code:: console 23 | 24 | $ anemoi-datasets compare-lam dataset1 dataset2 -D num_dates -O outpath -R round_ndigits --selected-vars var1 var2 ... [--save-plots] 25 | 26 | Arguments: 27 | ---------- 28 | 29 | - **dataset1**: Path to the first dataset (the global dataset). 30 | - **dataset2**: Path to the second dataset (the LAM dataset). 31 | - **-D, --num-of-dates**: Number of time steps (datapoints) to compare. *(default: 10)* 32 | - **-O, --outpath**: Path to store the output table (and optional plots). *(default: "./")* 33 | - **-R, --round-of-digits**: Number of decimal places to round values to. *(default: 4)* 34 | - **--selected-vars**: List of variables to compare between the datasets. *(default: ["10u", "10v", "2d", "2t"])* 35 | - **--save-plots (optional)**: Enable this flag to save an image of the dataset grids. 36 | 37 | Example: 38 | -------- 39 | 40 | .. code:: console 41 | 42 | $ compare-lam aifs-ea-an-oper-0001-mars-n320-1979-2022-6h-v6.zarr metno-meps-archive-det-opendap-2p5km-2020-2023-6h-v1.zarr -D 10 -O "./" -R 4 --selected-vars 2t msl --save-plots 43 | 44 | Argparse integration: 45 | --------------------- 46 | 47 | .. argparse:: 48 | :module: anemoi.datasets.__main__ 49 | :func: create_parser 50 | :prog: anemoi-datasets 51 | :path: compare-lam 52 | -------------------------------------------------------------------------------- /docs/cli/compare.rst: -------------------------------------------------------------------------------- 1 | .. _compare_command: 2 | 3 | Compare Command 4 | =============== 5 | 6 | Use this command to compare two datasets. 7 | 8 | The command will run a quick comparison of the two datasets and output a summary of the differences. 9 | 10 | .. warning:: 11 | 12 | This command will not compare the data in the datasets, only some of the metadata. 13 | Subsequent versions of this command may include more detailed comparisons. 14 | 15 | 16 | .. argparse:: 17 | :module: anemoi.datasets.__main__ 18 | :func: create_parser 19 | :prog: anemoi-datasets 20 | :path: compare 21 | -------------------------------------------------------------------------------- /docs/cli/copy.rst: -------------------------------------------------------------------------------- 1 | .. _copy_command: 2 | 3 | Copy Command 4 | ============ 5 | 6 | 7 | Copying a dataset from one location to another can be error-prone and 8 | time-consuming. This command-line script allows for incremental copying. 9 | When the copying process fails, it can be resumed. It can be used to copy 10 | files from a local directory to a remote server, from a remote server to a 11 | local directory as long as there is a zarr backend to read and write the data. 12 | 13 | The script uses multiple threads to make the process faster. However, it is 14 | important to consider that making parallel requests to the same server may 15 | not be ideal, for instance if the server internally uses a limited number of 16 | threads to handle requests. 17 | 18 | The option to rechunk the data is available, which can be useful when the 19 | data is stored on a platform that does not support having many small files 20 | or many files in the same directory. However keep in mind that rechunking 21 | has a huge impact on the performance when reading the data: The chunk pattern 22 | for the source dataset has been defined for good reasons, and changing it is 23 | very likely to have a negative impact on the performance. 24 | 25 | .. warning:: 26 | 27 | When resuming the copying process (using ``--resume``), calling the script with the same arguments for ``--block-size`` and ``--rechunk`` is recommended. 28 | Using different values for these arguments to resume copying the same dataset may lead to unexpected behavior. 29 | 30 | 31 | .. argparse:: 32 | :module: anemoi.datasets.__main__ 33 | :func: create_parser 34 | :prog: anemoi-datasets 35 | :path: copy 36 | -------------------------------------------------------------------------------- /docs/cli/create.rst: -------------------------------------------------------------------------------- 1 | .. _create_command: 2 | 3 | Create Command 4 | ============== 5 | 6 | Use this command to create a dataset from a recipe file. 7 | The syntax of the recipe file is described in :doc:`building datasets <../datasets/building/introduction>`. 8 | 9 | .. argparse:: 10 | :module: anemoi.datasets.__main__ 11 | :func: create_parser 12 | :prog: anemoi-datasets 13 | :path: create 14 | -------------------------------------------------------------------------------- /docs/cli/grib-index.rst: -------------------------------------------------------------------------------- 1 | .. _grib-index_command: 2 | 3 | Grib-index Command 4 | ============ 5 | 6 | The `grib-index` command is used to create an index file for GRIB files. The index file is then used 7 | by the `grib-index` :ref:`source `. 8 | 9 | The command will recursively scan the directories provided and open all the GRIB files found. It will 10 | then create an index file for each GRIB file, which will be used to read the data. 11 | 12 | .. code:: bash 13 | 14 | anemoi-datasets grib-index --index index.db /path1/to/grib/files /path2/to/grib/files 15 | 16 | 17 | See :ref:`grib_flavour` for more information about GRIB flavours. 18 | 19 | 20 | .. argparse:: 21 | :module: anemoi.datasets.__main__ 22 | :func: create_parser 23 | :prog: anemoi-datasets 24 | :path: grib-index 25 | -------------------------------------------------------------------------------- /docs/cli/inspect.rst: -------------------------------------------------------------------------------- 1 | .. _inspect_command: 2 | 3 | Inspect Command 4 | =============== 5 | 6 | 7 | Anemoi datasets are stored in a zarr format and can be located on a local file system or on a remote server. 8 | The `inspect` command is used to inspect the contents of a dataset. 9 | This command will output the metadata of the dataset, including the variables, dimensions, and attributes. 10 | 11 | .. code:: console 12 | 13 | $ anemoi-datasets inspect dataset.zarr 14 | 15 | 16 | which will output something like the following. The output should be self-explanatory. 17 | 18 | .. literalinclude:: ../datasets/yaml/building1.txt 19 | :language: console 20 | 21 | .. argparse:: 22 | :module: anemoi.datasets.__main__ 23 | :func: create_parser 24 | :prog: anemoi-datasets 25 | :path: inspect 26 | -------------------------------------------------------------------------------- /docs/cli/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _cli-introduction: 2 | 3 | ################## 4 | Command line tool 5 | ################## 6 | 7 | When you install the `anemoi-datasets` package, this will also install command line tool 8 | called ``anemoi-datasets`` which can be used to manage the zarr datasets. 9 | 10 | The tool can provide help with the ``--help`` options: 11 | 12 | .. code-block:: bash 13 | 14 | % anemoi-datasets --help 15 | 16 | The commands are: 17 | 18 | - :ref:`Create Command ` 19 | - :ref:`Copy Command ` 20 | - :ref:`Inspect Command ` 21 | - :ref:`Compare Command ` 22 | - :ref:`Scan Command ` 23 | - :ref:`Compare LAM Command ` 24 | -------------------------------------------------------------------------------- /docs/cli/patch.rst: -------------------------------------------------------------------------------- 1 | .. _patch_command: 2 | 3 | Patch Command 4 | ============= 5 | 6 | Use this command to patch the metadata of a given dataset 7 | 8 | .. argparse:: 9 | :module: anemoi.datasets.__main__ 10 | :func: create_parser 11 | :prog: anemoi-datasets 12 | :path: patch 13 | -------------------------------------------------------------------------------- /docs/cli/scan.rst: -------------------------------------------------------------------------------- 1 | .. _scan_command: 2 | 3 | Scan Command 4 | ============ 5 | 6 | Use this command to scan for GRIB files 7 | 8 | .. argparse:: 9 | :module: anemoi.datasets.__main__ 10 | :func: create_parser 11 | :prog: anemoi-datasets 12 | :path: scan 13 | -------------------------------------------------------------------------------- /docs/datasets/building/advanced-options.rst: -------------------------------------------------------------------------------- 1 | ################## 2 | Advanced Options 3 | ################## 4 | -------------------------------------------------------------------------------- /docs/datasets/building/filters.rst: -------------------------------------------------------------------------------- 1 | .. _filters: 2 | 3 | ######### 4 | Filters 5 | ######### 6 | 7 | .. warning:: 8 | 9 | This is still a work-in-progress. Some of the filters may be renamed 10 | later. 11 | 12 | Filters are used to modify the data or metadata in a dataset. 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | 17 | filters/select 18 | filters/orog_to_z 19 | filters/regrid 20 | filters/rename 21 | filters/rotate_winds 22 | filters/sum 23 | filters/unrotate_winds 24 | filters/wz_to_w 25 | filters/noop 26 | filters/empty 27 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/empty.rst: -------------------------------------------------------------------------------- 1 | ####### 2 | empty 3 | ####### 4 | 5 | The ``empty`` filter is for debugging purposes. It always returns an 6 | empty set of fields. 7 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/noop.rst: -------------------------------------------------------------------------------- 1 | ###### 2 | noop 3 | ###### 4 | 5 | The ``noop`` filter is for debugging purposes. It returns its input 6 | unchanged. 7 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/orog_to_z.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | orog_to_z 3 | ########### 4 | 5 | The ``orog_to_z`` filter converts orography (in metres) to surface 6 | geopotential height (m^2/s^2) using the equation: 7 | 8 | .. math:: 9 | 10 | z &= g \cdot \textrm{orog}\\ 11 | g &= 9.80665\ m \cdot s^{-1} 12 | 13 | This filter must follow a source that provides orography, which is 14 | replaced by surface geopotential height. 15 | 16 | .. literalinclude:: yaml/orog_to_z.yaml 17 | :language: yaml 18 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/regrid.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | regrid 3 | ######## 4 | 5 | When building a dataset for a specific model, it is possible that the 6 | source grid or resolution does not fit the needs. In that case, it is 7 | possible to add a filter to interpolate the data to a target grid. The 8 | filter is part of the ``anemoi-transform`` package. It will call the 9 | ``interpolate`` function from `earthkit-regrid 10 | `_ if 11 | the keys ``method``, ``in_grid`` and ``out_grid`` are provided and if a 12 | `pre-generated matrix 13 | `_ 14 | exists for this transformation. Otherwise, it is possible to provide a 15 | ``regrid matrix`` previously generated with ``anemoi-transform 16 | make-regrid-matrix``. The generated matrix is an NPZ file containing the 17 | input/output coordinates, the indices, and the weights of the 18 | interpolation. 19 | 20 | ``regrid`` is a :ref:`filter ` that must follow a :ref:`source 21 | ` or another filter in a :ref:`building-pipe` operation. 22 | 23 | .. literalinclude:: yaml/regrid1.yaml 24 | :language: yaml 25 | 26 | .. literalinclude:: yaml/regrid2.yaml 27 | :language: yaml 28 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/rename.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | rename 3 | ######## 4 | 5 | When combining several sources, it is common to have different values 6 | for a given attribute to represent the same concept. For example, 7 | ``temperature_850hPa`` and ``t_850`` are two different ways to represent 8 | the temperature at 850 hPa. The ``rename`` filter allows renaming a key 9 | to another key. It is a :ref:`filter ` that must follow a 10 | :ref:`source ` or another filter in a :ref:`building-pipe` 11 | operation. 12 | 13 | .. literalinclude:: yaml/rename.yaml 14 | :language: yaml 15 | 16 | .. note:: 17 | 18 | The ``rename`` filter was primarily designed to rename the ``param`` 19 | attribute, but any key can be renamed. The ``rename`` filter can take 20 | several renaming keys. 21 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/rotate_winds.rst: -------------------------------------------------------------------------------- 1 | ############## 2 | rotate_winds 3 | ############## 4 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/select.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | select 3 | ######## 4 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/sum.rst: -------------------------------------------------------------------------------- 1 | ##### 2 | sum 3 | ##### 4 | 5 | The ``sum`` filter computes the sum over multiple variables. This can be 6 | useful for computing total precipitation from its components (snow, 7 | rain) or summing the components of total column-integrated water. This 8 | filter must follow a source that provides the list of variables to be 9 | summed up. These variables are removed by the filter and replaced by a 10 | single summed variable. 11 | 12 | .. literalinclude:: yaml/sum.yaml 13 | :language: yaml 14 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/unrotate_winds.rst: -------------------------------------------------------------------------------- 1 | ############### 2 | unrotate_wind 3 | ############### 4 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/wz_to_w.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | wz_to_w 3 | ######### 4 | 5 | The ``wz_to_w`` filter converts geometric vertical velocity (provided in 6 | m/s) to vertical velocity in pressure coordinates (Pa/s). This filter 7 | must follow a source that provides geometric vertical velocity. 8 | Geometric vertical velocity is removed by the filter, and pressure 9 | vertical velocity is added. 10 | 11 | .. literalinclude:: yaml/wz_to_w.yaml 12 | :language: yaml 13 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/yaml/orog_to_z.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | pipe: 3 | - source: 4 | # mars, grib, netcdf, etc. 5 | # source attributes here 6 | # ... 7 | # Must load an orography variable 8 | 9 | - orog_to_z: 10 | orog: orog # Name of orography (input) variable 11 | z: z # Name of z (output) variable 12 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/yaml/regrid1.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | pipe: 3 | - source: 4 | # mars, grib, netcdf, etc. 5 | # source attributes here 6 | # ... 7 | 8 | - regrid: 9 | method: nearest 10 | in_grid: o32 11 | out_grid: o48 12 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/yaml/regrid2.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | pipe: 3 | - source: 4 | # mars, grib, netcdf, etc. 5 | # source attributes here 6 | # ... 7 | 8 | - regrid: 9 | matrix: /path/to/regrid/matrix.npz 10 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/yaml/rename.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | pipe: 3 | - source: 4 | # mars, grib, netcdf, etc. 5 | # source attributes here 6 | # ... 7 | 8 | - rename: 9 | param: 10 | # Map old `param` names to new ones 11 | temperature_2m: 2t 12 | temperature_850hPa: t_850 13 | # ... 14 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/yaml/sum.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | pipe: 3 | - source: 4 | # mars, grib, netcdf, etc. 5 | # source attributes here 6 | # ... 7 | # Must load the variables to be summed 8 | 9 | - sum: 10 | params: 11 | # List of input variables 12 | - variable1 13 | - variable2 14 | - variable3 15 | output: variable_total # Name of output variable 16 | -------------------------------------------------------------------------------- /docs/datasets/building/filters/yaml/wz_to_w.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | pipe: 3 | - source: 4 | # mars, grib, netcdf, etc. 5 | # source attributes here 6 | # ... 7 | # Must load geometric vertical velocity 8 | 9 | - wz_to_w: 10 | wz: wz # Name of geometric vertical velocity (input) variable 11 | x: z # Name of pressure vertical velocity (output) variable 12 | -------------------------------------------------------------------------------- /docs/datasets/building/handling-missing-dates.rst: -------------------------------------------------------------------------------- 1 | ######################## 2 | Handling missing dates 3 | ######################## 4 | 5 | By default, the package will raise an error if there are missing dates. 6 | 7 | Missing dates can be handled by specifying a list of dates in the 8 | configuration file. The dates should be in the same format as the dates 9 | in the time series. The missing dates will be filled with ``np.nan`` 10 | values. 11 | 12 | .. literalinclude:: ../yaml/missing_dates.yaml 13 | :language: yaml 14 | 15 | *Anemoi* will ignore the missing dates when computing the 16 | :ref:`statistics `. 17 | 18 | You can retrieve the list indices corresponding to the missing dates by 19 | accessing the ``missing`` attribute of the dataset object. 20 | 21 | .. code:: python 22 | 23 | print(ds.missing) 24 | 25 | If you access a missing index, the dataset will throw a 26 | ``MissingDateError``. 27 | -------------------------------------------------------------------------------- /docs/datasets/building/handling-missing-values.rst: -------------------------------------------------------------------------------- 1 | ######################### 2 | Handling missing values 3 | ######################### 4 | 5 | When handling data for machine learning models, missing values (`NaNs`) 6 | can pose a challenge, as models require complete data to operate 7 | effectively and may crash otherwise. Ideally, we anticipate having 8 | complete data in all fields. 9 | 10 | However, there are scenarios where `NaNs` naturally occur, such as with 11 | variables only relevant on land or at sea. This happens for sea surface 12 | temperature (`sst`), for example. In such cases, the default behaviour 13 | is to reject data with `NaNs` as invalid. To accommodate `NaNs` and 14 | accurately compute statistics based on them, you can include the 15 | ``allow_nans`` key in the configuration. 16 | 17 | Here's an example of how to implement it: 18 | 19 | .. literalinclude:: ../yaml/nan.yaml 20 | :language: yaml 21 | -------------------------------------------------------------------------------- /docs/datasets/building/naming-variables.rst: -------------------------------------------------------------------------------- 1 | .. _naming-variables: 2 | 3 | ################## 4 | Naming Variables 5 | ################## 6 | 7 | *************** 8 | Rename Filter 9 | *************** 10 | 11 | The rename filter is used to rename variables in a dataset. 12 | 13 | .. _remapping_option: 14 | 15 | ****************** 16 | Remapping Option 17 | ****************** 18 | 19 | TODO. 20 | -------------------------------------------------------------------------------- /docs/datasets/building/operations.rst: -------------------------------------------------------------------------------- 1 | .. _operations: 2 | 3 | ############ 4 | Operations 5 | ############ 6 | 7 | Operations are blocks of YAML code that translate a list of dates into 8 | fields. 9 | 10 | .. _building-join: 11 | 12 | ****** 13 | join 14 | ****** 15 | 16 | The join is the process of combining data from several sources. Each 17 | source is expected to provide different variables for the same dates. 18 | 19 | .. literalinclude:: ../yaml/input.yaml 20 | :language: yaml 21 | 22 | .. _building-concat: 23 | 24 | ******** 25 | concat 26 | ******** 27 | 28 | Concatenation is the process of combining different sets of operations 29 | that handle different dates. This is typically used to build a dataset 30 | that spans several years, when several sources are involved, each 31 | providing a different period. 32 | 33 | .. literalinclude:: ../yaml/concat.yaml 34 | :language: yaml 35 | 36 | .. _building-pipe: 37 | 38 | ****** 39 | pipe 40 | ****** 41 | 42 | The pipe is the process of transforming fields using :ref:`filters 43 | `. The first step of a pipe is typically a source, a join, or 44 | another pipe. The following steps are filters. 45 | 46 | .. literalinclude:: ../yaml/pipe.yaml 47 | :language: yaml 48 | -------------------------------------------------------------------------------- /docs/datasets/building/sources.rst: -------------------------------------------------------------------------------- 1 | .. _sources: 2 | 3 | ######### 4 | Sources 5 | ######### 6 | 7 | The source is a software component that, given a list of dates and 8 | variables, will return the corresponding fields. 9 | 10 | A `source` is responsible for reading data from the source and 11 | converting it to a set of fields. A `source` is also responsible for 12 | handling the metadata of the data, such as the variable names, and more. 13 | 14 | An example of a source is ECMWF’s MARS archive, a collection of GRIB or 15 | NetCDF files, etc. 16 | 17 | The following `sources` are currently available: 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | 22 | sources/accumulations 23 | sources/anemoi-dataset 24 | sources/cds 25 | sources/eccc-fstd 26 | sources/forcings 27 | sources/grib 28 | sources/grib-index 29 | sources/hindcasts 30 | sources/mars 31 | sources/netcdf 32 | sources/opendap 33 | sources/recentre 34 | sources/repeated-dates 35 | sources/xarray-based 36 | sources/xarray-kerchunk 37 | sources/xarray-zarr 38 | sources/zenodo 39 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/anemoi-dataset.rst: -------------------------------------------------------------------------------- 1 | .. _anemoi-dataset_source: 2 | 3 | ################ 4 | anemoi-dataset 5 | ################ 6 | 7 | .. admonition:: Experimental 8 | :class: important 9 | 10 | This source is experimental and may change in the future. 11 | 12 | An anemoi-dataset can be a source for a dataset: 13 | 14 | .. literalinclude:: yaml/anemoi-dataset.yaml 15 | :language: yaml 16 | 17 | The parameters are the same as those used in the ``open_dataset`` 18 | function, which allows you to subset and combine datasets. See 19 | :ref:`opening-datasets` for more information. 20 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/cds.rst: -------------------------------------------------------------------------------- 1 | .. _cds_source: 2 | 3 | ##### 4 | cds 5 | ##### 6 | 7 | For users outside of the ECMWF organisation, it is possible to access 8 | ERA5 data through the Copernicus Climate Data Store ``cdsapi`` instead. 9 | 10 | The steps to set up a CDS API account are detailed `here 11 | `_. 12 | 13 | The only difference with the previous MARS recipes is the addition of 14 | the ``use_cdsapi_dataset`` key: 15 | 16 | .. literalinclude:: yaml/mars-cds.yaml 17 | :language: yaml 18 | 19 | This process can take some time because of the high demand on the CDS 20 | server. 21 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/eccc-fstd.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | eccc-fstd 3 | ########### 4 | 5 | To read files in the standard format used at Environment and Climate 6 | Change Canada (ECCC), the following source can be used: 7 | 8 | .. literalinclude:: yaml/eccc-fstd.yaml 9 | :language: yaml 10 | 11 | The recipe will build a dataset from a standard file using the 12 | ``fstd2nc`` xarray plugin. 13 | 14 | The ``fstd2nc`` dependency is not part of the default anemoi-datasets 15 | installation and has to be installed following the `fstd2nc project 16 | description `_. 17 | 18 | See :ref:`create-cf-data` for more information. 19 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/grib-index.rst: -------------------------------------------------------------------------------- 1 | .. _grib-index_source: 2 | 3 | ############ 4 | grib-index 5 | ############ 6 | 7 | The `grib-index` source is used to read GRIB files with the help of an 8 | index file created with the `grib-index` :ref:`command 9 | `. 10 | 11 | See :ref:`create-grib-data` for more information. 12 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/grib.rst: -------------------------------------------------------------------------------- 1 | .. _grib_source: 2 | 3 | ###### 4 | grib 5 | ###### 6 | 7 | To read all the GRIB from a file, use the following: 8 | 9 | .. literalinclude:: yaml/grib1.yaml 10 | :language: yaml 11 | 12 | You can also read specific GRIB messages by specifying them using the 13 | MARS language (excluding the keywords `date`, `time`, and `step`, as 14 | well as any post-processing options, such as `grid` or `area`): 15 | 16 | .. literalinclude:: yaml/grib2.yaml 17 | :language: yaml 18 | 19 | You can also read a collection of GRIB files, using Unix shell 20 | wildcards_: 21 | 22 | .. literalinclude:: yaml/grib3.yaml 23 | :language: yaml 24 | 25 | You can also use the requested `date` to build the filenames. For 26 | example, if the GRIB files containing the requested data are named 27 | according to the following pattern: ``/path/to/YYYY/MM/YYYYMMDDHH.grib`` 28 | with `YYYY` being the year, `MM` the month, `DD` the day, and `HH` the 29 | hour, you can use the following configuration: 30 | 31 | .. literalinclude:: yaml/grib4.yaml 32 | :language: yaml 33 | 34 | The patterns in between the curly brackets are replaced by the values of 35 | the `date` and formatted according to the Python strftime_ method. 36 | 37 | See :ref:`create-grib-data` for more information. 38 | 39 | .. note:: 40 | 41 | You can combine all the above options when selecting GRIB messages 42 | from a file. 43 | 44 | .. _strftime: https://python.readthedocs.io/en/latest/library/datetime.html#strftime-and-strptime-behavior 45 | 46 | .. _wildcards: https://en.wikipedia.org/wiki/Glob_(programming) 47 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/hindcasts.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | hindcasts 3 | ########### 4 | 5 | .. note:: 6 | 7 | The `hindcasts` source is currently using the `mars` source 8 | internally. This will be changed in the future. 9 | 10 | Hindcasts data, also known as reforecasts, are unique because they are 11 | run for a specific day of the year (such as 1 January or 11 June) for 12 | multiple years. So, for a given reference date like 2022-05-12, we can 13 | have hindcasts for 2001-05-12, 2002-05-12, 2003-05-12, and so on. This 14 | is useful in many cases. For more details, please refer to this ECMWF 15 | documentation. 16 | 17 | The `hindcasts` source has a special argument called `reference_year`, 18 | which represents the year of the reference date. Based on the requested 19 | valid datetime and on the `reference_year`, the `hindcasts` source will 20 | calculate the `hdate`, `date`, and `time` appropriately. 21 | 22 | For example, if the `reference_year` is 2022, then the data for 23 | 2002-05-12 will use data with `hdate=2002-05-12` and `date=2022-05-12`. 24 | 25 | .. literalinclude:: yaml/hindcasts.yaml 26 | :language: yaml 27 | 28 | Using `step` in the `hindcasts` source is implemented and works as 29 | expected. 30 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/mars.rst: -------------------------------------------------------------------------------- 1 | ###### 2 | mars 3 | ###### 4 | 5 | The ``mars`` source will retrieve the data from the ECMWF MARS archive. 6 | For that, you need to have an ECMWF account and build your dataset on 7 | one of the Centre's computers, or use the ``ecmwfapi`` Python package. 8 | 9 | The `yaml` block can contain any keys that follow the `MARS language 10 | specification`_, with the exception of the ``date``, ``time``, and 11 | ``step``. 12 | 13 | The missing keys will be filled with the default values, as defined in 14 | the MARS language specification. 15 | 16 | .. literalinclude:: yaml/mars1.yaml 17 | :language: yaml 18 | 19 | Data from several level types must be requested in separate requests, 20 | with the ``join`` command. 21 | 22 | .. literalinclude:: yaml/mars2.yaml 23 | :language: yaml 24 | 25 | See :ref:`naming-variables` for information on how to name the variables 26 | when mixing single-level and multi-level variables in the same dataset. 27 | 28 | .. _mars language specification: https://confluence.ecmwf.int/display/UDOC/MARS+user+documentation 29 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/netcdf.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | netcdf 3 | ######## 4 | 5 | In the examples below, we explain how to create an anemoi dataset from 6 | one or more netCDF files. 7 | 8 | .. literalinclude:: yaml/netcdf.yaml 9 | :language: yaml 10 | 11 | The netCDF source uses `Xarray 12 | `_ internally to access 13 | the data, and assumes that the netcdf files follow the `CF conventions 14 | `_. You can also read a collection of netCDF 15 | files, using Unix’ shell `wildcards 16 | `_ 17 | 18 | .. warning:: 19 | 20 | We are aware of instances in wich the creation of an anemoi dataset 21 | from a netCDF source is not working as expected due to the missing 22 | information in the files metadata that anemoi-datasets expects. 23 | anemoi-datasets internal routines do their best to infer the missing 24 | information, but in some cases it is not possible. If you encounter 25 | this or similar issues, please open an issue in the anemoi-datasets 26 | repository. 27 | 28 | See :ref:`create-cf-data` for more information. 29 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/opendap.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | opendap 3 | ######### 4 | 5 | .. literalinclude:: yaml/opendap.yaml 6 | :language: yaml 7 | 8 | See :ref:`create-cf-data` for more information. 9 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/recentre.rst: -------------------------------------------------------------------------------- 1 | .. _recentre: 2 | 3 | ########## 4 | recentre 5 | ########## 6 | 7 | Perturbations refer to the small variations centred around a nominal 8 | value of a parameter. When dealing with `ensemble forecasting`_, the 9 | perturbations are related to the difference between `ensemble members` 10 | and their given `centre`. 11 | 12 | The `recentre` function computes a set of new ensemble members centred 13 | on a different centre from previous ensemble members using the following 14 | formula: 15 | 16 | .. math:: 17 | 18 | members_{new} = centre + ( members - \overline{members} ) 19 | 20 | Additionally, some variables must be non-negative to have a physical 21 | meaning (e.g. accumulated variables or `specific humidity`). To ensure 22 | this, positive clipping is performed using the alternative formula: 23 | 24 | .. math:: 25 | 26 | members_{new} = max(0, centre + ( members - \overline{members} )) 27 | 28 | The current implementation enforces that the following variables are 29 | positive when using the `perturbations` function: 30 | 31 | +----------+------------------------------+ 32 | | Variable | Description | 33 | +==========+==============================+ 34 | | q | `Specific humidity`_ | 35 | +----------+------------------------------+ 36 | | cp | `Convective precipitation`_ | 37 | +----------+------------------------------+ 38 | | lsp | `Large-scale precipitation`_ | 39 | +----------+------------------------------+ 40 | | tp | `Total precipitation`_ | 41 | +----------+------------------------------+ 42 | 43 | It uses the following arguments: 44 | 45 | members 46 | A :ref:`reference ` to the ensemble members. 47 | 48 | centre 49 | A :ref:`reference ` to the new centre requested. 50 | 51 | Examples 52 | 53 | .. literalinclude:: yaml/recentre.yaml 54 | :language: yaml 55 | 56 | .. _convective precipitation: https://codes.ecmwf.int/grib/param-db/?id=143 57 | 58 | .. _ensemble forecasting: https://www.ecmwf.int/en/elibrary/75394-ensemble-forecasting 59 | 60 | .. _large-scale precipitation: https://codes.ecmwf.int/grib/param-db/?id=142 61 | 62 | .. _specific humidity: https://codes.ecmwf.int/grib/param-db/?id=133 63 | 64 | .. _total precipitation: https://codes.ecmwf.int/grib/param-db/?id=228 65 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/repeated-dates.rst: -------------------------------------------------------------------------------- 1 | ################ 2 | repeated-dates 3 | ################ 4 | 5 | The `repeated-dates` source is used to repeat a single source multiple 6 | times, so that its data are present on multiple dates. A simple example 7 | of this is when you have a source that contains a constant field, such 8 | as orography or bathymetry, that you want to have repeated on all the 9 | dates of the dataset. 10 | 11 | The general format of the `repeated-dates` source is: 12 | 13 | .. literalinclude:: yaml/repeated_dates1.yaml 14 | :language: yaml 15 | 16 | where ``source`` is any of the :ref:`operations ` or 17 | :ref:`sources ` described in the previous sections. The 18 | ``mode`` parameter can be one of the following: 19 | 20 | ********** 21 | constant 22 | ********** 23 | 24 | .. literalinclude:: yaml/repeated-dates2.yaml 25 | :language: yaml 26 | 27 | ************* 28 | climatology 29 | ************* 30 | 31 | .. literalinclude:: yaml/repeated-dates3.yaml 32 | :language: yaml 33 | 34 | ********* 35 | closest 36 | ********* 37 | 38 | .. literalinclude:: yaml/repeated-dates4.yaml 39 | :language: yaml 40 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/xarray-based.rst: -------------------------------------------------------------------------------- 1 | ###################### 2 | xarray-based Sources 3 | ###################### 4 | 5 | More generally, you can specify any valid xarray.open_dataset_ arguments 6 | as the source, and anemoi-dataset will try to build a dataset from it. 7 | Examples of valid xarray.open_dataset_ arguments include: netCDF, Zarr, 8 | OpenDAP, etc. 9 | 10 | .. literalinclude:: yaml/xarray-based.yaml 11 | :language: yaml 12 | 13 | See :ref:`create-cf-data` for more information. 14 | 15 | .. _cf conventions: http://cfconventions.org/ 16 | 17 | .. _wildcards: https://en.wikipedia.org/wiki/Glob_(programming) 18 | 19 | .. _xarray: https://docs.xarray.dev/en/stable/index.html 20 | 21 | .. _xarray.open_dataset: https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html 22 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/xarray-kerchunk.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import fsspec 4 | import tqdm 5 | from kerchunk.combine import MultiZarrToZarr 6 | from kerchunk.hdf import SingleHdf5ToZarr 7 | 8 | fs = fsspec.filesystem("s3", anon=True) 9 | 10 | pattern = "s3://nsf-ncar-era5/e5.oper.an.pl/202403/e5.oper.an.pl.*.ll025sc.2024????00_2024????23.nc" 11 | 12 | 13 | jsons = [] 14 | 15 | for file in tqdm.tqdm(fs.glob(pattern)): 16 | with fs.open(file, "rb", anon=True) as f: 17 | h5chunks = SingleHdf5ToZarr(f, file) 18 | jsons.append(h5chunks.translate()) 19 | 20 | 21 | mzz = MultiZarrToZarr( 22 | jsons, 23 | remote_protocol="s3", 24 | remote_options={"anon": True}, 25 | concat_dims=["time"], 26 | identical_dims=["latitude", "longitude"], 27 | ) 28 | 29 | with open("combined.json", "w") as f: 30 | json.dump(mzz.translate(), f) 31 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/xarray-kerchunk.rst: -------------------------------------------------------------------------------- 1 | ################# 2 | xarray-kerchunk 3 | ################# 4 | 5 | .. literalinclude:: yaml/xarray-kerchunk.yaml 6 | :language: yaml 7 | 8 | The code below is inspired by the `kerchunk tutorial`_, and makes use of 9 | a subset of the `ERA5 dataset available on AWS`_. You may need to 10 | install the relevant packages before running the code below. 11 | 12 | .. literalinclude:: xarray-kerchunk.py 13 | :language: python 14 | 15 | See :ref:`create-cf-data` for more information. 16 | 17 | .. _era5 dataset available on aws: https://registry.opendata.aws/ecmwf-era5/ 18 | 19 | .. _kerchunk tutorial: https://fsspec.github.io/kerchunk/tutorial.html 20 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/xarray-zarr.rst: -------------------------------------------------------------------------------- 1 | ############# 2 | xarray-zarr 3 | ############# 4 | 5 | Here is an example recipe that builds a dataset using one of the many 6 | regridded versions of ERA5 hosted by Google in an Analysis-Ready, 7 | Cloud-Optimised format. See `here 8 | `_ for more 9 | information. 10 | 11 | .. literalinclude:: yaml/xarray-zarr.yaml 12 | :language: yaml 13 | 14 | Note that, unlike the ``mars`` examples, there is no need to include a 15 | ``grid`` specification. Additionally, to sub-select the vertical levels, 16 | it is necessary to use the :ref:`join ` operation to join 17 | separate lists containing 2D variables and 3D variables. If all vertical 18 | levels are desired, then it is acceptable to specify a single source. 19 | 20 | See :ref:`create-cf-data` for more information. 21 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/accumulations1.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | accumulations: 3 | accumulation_period: 6 4 | class: ea 5 | param: [ tp, cp, sf ] 6 | levtype: sfc 7 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/accumulations2.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | accumulations: 3 | accumulation_period: [ 6, 12 ] 4 | class: od 5 | param: [ tp, cp, sf ] 6 | levtype: sfc 7 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/anemoi-dataset.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | anemoi-dataset: 3 | join: 4 | - dataset: dataset1 5 | select: [ z_500, t_500, u_500, v_500 ] 6 | frequency: 6h 7 | - dataset: dataset2 8 | select: [ msl, 2t, 10u, 10v ] 9 | frequency: 6h 10 | start: 2000 11 | end: 2001 12 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/eccc-fstd.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | eccc-fstd: 3 | path: /path/to/data 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/forcings.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | join: 3 | - source1: 4 | args1: value1 5 | args2: value2 6 | - forcings: 7 | template: ${input.join.0.source1} 8 | param: 9 | - insolation 10 | - cos_julian_day 11 | - sin_julian_day 12 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/grib1.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | grib: 3 | path: /path/to/data.grib 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/grib2.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | grib: 3 | path: /path/to/data.grib 4 | param: [ u, v ] 5 | levelist: [ 1000, 850, 500 ] 6 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/grib3.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | grib: 3 | path: /path/to/*.grib 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/grib4.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | grib: 3 | path: /path/to/{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%Y%m%d%H)}.grib 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/hindcasts.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | input: 3 | hincasts: 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/mars-cds.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | mars: 3 | use_cdsapi_dataset: "reanalysis-era5-complete" 4 | class: ea 5 | levtype: sfc 6 | param: [ 10u, 10v, 2t, msl ] 7 | grid: n320 8 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/mars1.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | mars: 3 | levtype: sfc 4 | param: [ 2t, msl ] 5 | grid: [ 0.25, 0.25 ] 6 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/mars2.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | join: 3 | - mars: 4 | levtype: sfc 5 | param: [ 2t, msl ] 6 | grid: [ 0.25, 0.25 ] 7 | - mars: 8 | levtype: pl 9 | param: [ u, v ] 10 | grid: [ 0.25, 0.25 ] 11 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/netcdf.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | netcdf: 3 | path: /path/to/data.nc 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/opendap.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | opendap: 3 | url: https://... 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/recentre.yaml: -------------------------------------------------------------------------------- 1 | data_sources: 2 | members_source: 3 | mars: 4 | class: ea 5 | expver: "0001" 6 | grid: 20.0/20.0 7 | levtype: sfc 8 | param: [ 10u, 10v, 2t ] 9 | type: an 10 | stream: enda 11 | number: [ 1, 2, 3, 4, 5, 6, 7, 8, 9 ] 12 | 13 | center_source: 14 | mars: 15 | class: ea 16 | expver: "0001" 17 | grid: 20.0/20.0 18 | levtype: sfc 19 | param: [ 10u, 10v, 2t ] 20 | type: an 21 | stream: oper 22 | 23 | input: 24 | recentre: 25 | centre: ${data_sources.center_source} 26 | members: ${data_sources.members_source} 27 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/repeated-dates1.yaml: -------------------------------------------------------------------------------- 1 | 2 | repeated-dates: 3 | mode: mode 4 | # ... parameters related to the mode ... 5 | source: 6 | # ... a source definition ... 7 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/repeated-dates2.yaml: -------------------------------------------------------------------------------- 1 | repeated-dates: 2 | mode: constant 3 | source: 4 | xarray-zarr: 5 | url: dem.zarr 6 | variable: dem 7 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/repeated-dates3.yaml: -------------------------------------------------------------------------------- 1 | repeated-dates: 2 | mode: climatology 3 | year: 2019 4 | day: 15 5 | source: 6 | grib: 7 | path: some/path/to/data.grib 8 | param: [ some_param ] 9 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/repeated-dates4.yaml: -------------------------------------------------------------------------------- 1 | repeated-dates: 2 | mode: closest 3 | frequency: 24h 4 | maximum: 30d 5 | skip_all_nans: true 6 | source: 7 | grib: 8 | path: path/to/data.grib 9 | param: [ some_param ] 10 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/xarray-based.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | xarray-based-source: # netcdf, zarr, opendap, etc. 3 | group: mygroup 4 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/xarray-kerchunk.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2024-03-24T00:00:00 3 | end: 2024-03-24T03:00:00 4 | frequency: 1h 5 | 6 | input: 7 | xarray-kerchunk: 8 | json: combined.json 9 | param: T 10 | level: [ 1000, 50 ] 11 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/xarray-zarr.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2021-01-01T00:00:00 3 | end: 2021-01-10T12:00:00 4 | frequency: 6h 5 | 6 | input: 7 | join: 8 | - xarray-zarr: 9 | url: "gs://gcp-public-data-arco-era5/ar/1959-2022-6h-128x64_equiangular_conservative.zarr" 10 | param: 11 | - surface_pressure 12 | - 2m_temperature 13 | - 10m_u_component_of_wind 14 | - 10m_v_component_of_wind 15 | 16 | - xarray-zarr: 17 | url: "gs://gcp-public-data-arco-era5/ar/1959-2022-6h-128x64_equiangular_conservative.zarr" 18 | param: 19 | - temperature 20 | level: 21 | - 1000 22 | - 500 23 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/yaml/zenodo.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2021-06-28 3 | end: 2021-06-29 4 | 5 | input: 6 | zenodo: 7 | record_id: 6470725 8 | file_key: "{param}_INCA_202106280700.nc" 9 | param: [ RR, PN, PT ] 10 | -------------------------------------------------------------------------------- /docs/datasets/building/sources/zenodo.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | zenodo 3 | ######## 4 | 5 | https://zenodo.org/records/6470725 6 | 7 | .. literalinclude:: yaml/zenodo.yaml 8 | :language: yaml 9 | -------------------------------------------------------------------------------- /docs/datasets/building/syntax.yaml: -------------------------------------------------------------------------------- 1 | description: 2 | 3 | name: 4 | 5 | licence: CC-BY-4.0 6 | 7 | attribution: 8 | 9 | citation: 10 | 11 | dates: 12 | start: 13 | end: 14 | frequency: 15 | 16 | build: 17 | 18 | 19 | input: 20 | 21 | 22 | output: 23 | -------------------------------------------------------------------------------- /docs/datasets/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _datasets-introduction: 2 | 3 | ####################################### 4 | What can you do with anemoi-datasets? 5 | ####################################### 6 | 7 | anemoi-datasets is a framework designed to facilitate the creation and 8 | utilisation of machine learning-optimised meteorological datasets. It 9 | offers tools for both building your own datasets and using existing 10 | datasets. 11 | 12 | - :ref:`Building Your Own Datasets ` Learn how 13 | to create customised datasets tailored to your specific needs, 14 | including data sourcing, filtering, and processing techniques. 15 | 16 | - :ref:`Using Existing Datasets ` Discover methods 17 | to access, manipulate, and analyse pre-existing datasets for various 18 | applications. 19 | 20 | For detailed guidance, refer to the respective sections in the 21 | documentation. 22 | -------------------------------------------------------------------------------- /docs/datasets/using/configuration.rst: -------------------------------------------------------------------------------- 1 | .. _configuration: 2 | 3 | ############### 4 | Configuration 5 | ############### 6 | 7 | When the ``open_dataset`` function is called with a string that does not 8 | end with ``.zarr`` or ``.zip``, it is considered a dataset name and not 9 | a path or a URL. 10 | 11 | In that case, the *Anemoi* configuration is read from 12 | ``~/.config/anemoi/settings.toml``. Below is an example of such a 13 | configuration: 14 | 15 | .. literalinclude:: configuration.toml 16 | :language: toml 17 | 18 | Then, the name passed to ``open_dataset`` is used to look for a possible 19 | path or URL: 20 | 21 | - If the name is listed in the ``[datasets.named]``, the corresponding 22 | path is used. 23 | - Otherwise, the suffix ``.zarr`` is added to the name, and the file is 24 | searched at every location listed in the ``path`` list. 25 | 26 | See :ref:`miscellaneous` to modify the list of named datasets and the 27 | path temporarily. 28 | -------------------------------------------------------------------------------- /docs/datasets/using/configuration.toml: -------------------------------------------------------------------------------- 1 | [datasets] 2 | path = [ 3 | "/home/mlx/ai-ml/datasets/stable", 4 | "/home/mlx/ai-ml/datasets/experimental", 5 | "/home/mlx/ai-ml/datasets/testing", 6 | "s3://ml-datasets", 7 | ] 8 | 9 | [datasets.named] 10 | test = "/home/mlx/test-dataset.zarr" 11 | -------------------------------------------------------------------------------- /docs/datasets/using/ensembles.rst: -------------------------------------------------------------------------------- 1 | .. _selecting-members: 2 | 3 | ################### 4 | Selecting members 5 | ################### 6 | 7 | This section describes how to subset data that are part of an ensemble. 8 | To combine ensembles, see :ref:`ensembles` in the 9 | :ref:`combining-datasets` section. 10 | 11 | .. _number: 12 | 13 | If a dataset is an ensemble, you can select one or more specific members 14 | using the `number` option. You can also use ``numbers`` (which is an 15 | alias for ``number``), and ``member`` (or ``members``). The difference 16 | between the two is that ``number`` is **1-based**, whereas ``member`` is 17 | **0-based**. 18 | 19 | Select a single element: 20 | 21 | .. code:: python 22 | 23 | ds = open_dataset( 24 | dataset, 25 | number=1, 26 | ) 27 | 28 | ... or a list: 29 | 30 | .. code:: python 31 | 32 | ds = open_dataset( 33 | dataset, 34 | number=[1, 3, 5], 35 | ) 36 | -------------------------------------------------------------------------------- /docs/datasets/using/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _using-introduction: 2 | 3 | ########################### 4 | Using an existing dataset 5 | ########################### 6 | 7 | An *Anemoi* dataset is a thin wrapper around a zarr_ store that is 8 | optimised for training data-driven weather forecasting models. It is 9 | organised in such a way that I/O operations are minimised (see 10 | :ref:`overview`). 11 | 12 | .. _zarr: https://zarr.readthedocs.io/ 13 | 14 | To open a dataset, you can use the `open_dataset` function. 15 | 16 | .. code:: python 17 | 18 | print(ds.missing) 19 | 20 | You can then access the data in the dataset using the `ds` object as if 21 | it were a NumPy array. 22 | 23 | .. code:: python 24 | 25 | print(ds.shape) 26 | 27 | print(len(ds)) 28 | 29 | print(ds[0]) 30 | 31 | print(ds[10:20]) 32 | 33 | One of the main features of the *anemoi-datasets* package is the ability 34 | to subset or combine datasets. 35 | 36 | .. code:: python 37 | 38 | from anemoi.datasets import open_dataset 39 | 40 | ds = open_dataset("path/to/dataset.zarr", start=2000, end=2020) 41 | 42 | In that case, a dataset is created that only contains the data between 43 | the years 2000 and 2020. Combining is done by passing multiple paths to 44 | the `open_dataset` function: 45 | 46 | .. code:: python 47 | 48 | from anemoi.datasets import open_dataset 49 | 50 | ds = open_dataset("path/to/dataset1.zarr", "path/to/dataset2.zarr") 51 | 52 | In the latter case, the datasets are combined along the time dimension 53 | or the variable dimension depending on the dataset's structure. 54 | 55 | .. toctree:: 56 | :maxdepth: 1 57 | :hidden: 58 | :caption: Using datasets 59 | 60 | opening 61 | methods 62 | subsetting 63 | combining 64 | selecting 65 | ensembles 66 | grids 67 | zip 68 | statistics 69 | missing 70 | other 71 | matching 72 | miscellaneous 73 | configuration 74 | -------------------------------------------------------------------------------- /docs/datasets/using/matching.rst: -------------------------------------------------------------------------------- 1 | .. _using-matching: 2 | 3 | ##################### 4 | Matching attributes 5 | ##################### 6 | 7 | When :ref:`combining datasets ` with operations like 8 | :ref:`concat`, :ref:`join`, :ref:`ensembles` or :ref:`grids`, some of 9 | the attributes of the input datasets must match, such as the list of 10 | variables for `concat` or the `dates` and `frequency` for `join`. 11 | 12 | You can let the package automatically adjust the attributes of the input 13 | datasets using the `adjust` keyword, to adjust one of the attributes: 14 | 15 | .. code:: python 16 | 17 | ds = open_dataset( 18 | join=[dataset1, dataset2], 19 | adjust="frequency", 20 | ) 21 | 22 | or more than one attribute: 23 | 24 | .. code:: python 25 | 26 | ds = open_dataset( 27 | join=[dataset1, dataset2], 28 | adjust=["start", "end", "frequency"], 29 | ) 30 | 31 | You can also use `dates` as a shortcut for the above. This is equivalent 32 | to: 33 | 34 | .. code:: python 35 | 36 | ds = open_dataset(join=[dataset1, dataset2], adjust="dates") 37 | 38 | To use the common set of variables, use: 39 | 40 | .. code:: python 41 | 42 | ds = open_dataset(concat=[dataset1, dataset2], adjust="variables") 43 | 44 | To match all the attributes: 45 | 46 | .. code:: python 47 | 48 | ds = open_dataset( 49 | cutout=[dataset1, dataset2], 50 | adjust="all", 51 | ) 52 | -------------------------------------------------------------------------------- /docs/datasets/using/miscellaneous.rst: -------------------------------------------------------------------------------- 1 | .. _miscellaneous: 2 | 3 | ######################### 4 | Miscellaneous functions 5 | ######################### 6 | 7 | The two functions below can be used to temporarily modify the 8 | :ref:`configuration ` so that the packages can find named 9 | datasets at given locations. 10 | 11 | Use ``add_dataset_path`` to add a path to the list of paths where the 12 | package searches for datasets: 13 | 14 | .. _add_dataset_path: 15 | 16 | .. code:: python 17 | 18 | from anemoi.datasets import add_dataset_path 19 | from anemoi.datasets import open_dataset 20 | 21 | add_dataset_path("https://object-store.os-api.cci1.ecmwf.int/ml-examples/") 22 | 23 | ds = open_dataset("an-oper-2023-2023-2p5-6h-v1") 24 | 25 | Use ``add_named_dataset`` to add a named dataset to the list of named 26 | datasets: 27 | 28 | .. _add_named_dataset: 29 | 30 | .. code:: python 31 | 32 | from anemoi.datasets import add_named_dataset 33 | from anemoi.datasets import open_dataset 34 | 35 | add_named_dataset( 36 | "example-dataset", 37 | "https://object-store.os-api.cci1.ecmwf.int/ml-examples/an-oper-2023-2023-2p5-6h-v1.zarr", 38 | ) 39 | 40 | ds = open_dataset("example-dataset") 41 | -------------------------------------------------------------------------------- /docs/datasets/using/other.rst: -------------------------------------------------------------------------------- 1 | .. _selecting-other: 2 | 3 | ################## 4 | Other operations 5 | ################## 6 | 7 | .. warning:: 8 | 9 | The operations described in this section do not check that their 10 | inputs are compatible. 11 | 12 | ******* 13 | chain 14 | ******* 15 | 16 | .. code:: python 17 | 18 | ds = open_dataset(chain=[dataset1, dataset2, ...]) 19 | 20 | The `chain` operation is used to combine multiple datasets into a single 21 | dataset. The datasets are combined by concatenating the data arrays 22 | along the first dimension (dates). This is similar to the :ref:`concat` 23 | operation, but no checks are done to see if the datasets are compatible. 24 | This means that the shape of the arrays returned when iterating or 25 | indexing may be different. 26 | 27 | This operation is identical to Python's :py:func:`itertools.chain` 28 | function. 29 | 30 | ********* 31 | shuffle 32 | ********* 33 | 34 | .. code:: python 35 | 36 | ds = open_dataset(dataset, shuffle=True) 37 | 38 | The `shuffle` operation is used to shuffle the data in the dataset along 39 | the first dimension (dates). 40 | -------------------------------------------------------------------------------- /docs/datasets/using/statistics.rst: -------------------------------------------------------------------------------- 1 | .. _selecting-statistics: 2 | 3 | ############ 4 | Statistics 5 | ############ 6 | 7 | When combining datasets, the statistics are not recomputed. Instead, the 8 | statistics of the first dataset encountered are returned by the 9 | ``statistics`` property. 10 | 11 | You can change that behaviour by using the `statistics` option to select 12 | a specific dataset from which to get the statistics: 13 | 14 | .. code:: python 15 | 16 | ds = open_dataset(dataset, statistics=other_dataset) 17 | 18 | # Will return the statistics of "other_dataset" 19 | 20 | print(ds.statistics) 21 | -------------------------------------------------------------------------------- /docs/datasets/using/subsetting.rst: -------------------------------------------------------------------------------- 1 | .. _subsetting-datasets: 2 | 3 | ##################### 4 | Subsetting datasets 5 | ##################### 6 | 7 | Subsetting is the action of filtering the dataset by its first dimension 8 | (dates). 9 | 10 | .. _start: 11 | 12 | ******* 13 | start 14 | ******* 15 | 16 | This option lets you subset the dataset by time. You can pass a date or 17 | a string: 18 | 19 | .. code:: python 20 | 21 | open_dataset(dataset, start=1980) 22 | 23 | .. _end: 24 | 25 | ***** 26 | end 27 | ***** 28 | 29 | As for the start option, you can pass a date or a string: 30 | 31 | .. code:: python 32 | 33 | open_dataset(dataset, end="2020-12-31") 34 | 35 | The following are equivalent ways of describing ``start`` or ``end``: 36 | 37 | - ``2020`` and ``"2020"`` 38 | - ``202306``, ``"202306"`` and ``"2023-06"`` 39 | - ``20200301``, ``"20200301"`` and ``"2020-03-01"`` 40 | 41 | Note that the ``start="2020"`` is equivalent to ``start="2020-01-01"`` 42 | while ``end="2020"`` is equivalent to ``end="2020-12-31"``. 43 | 44 | Note also how the ``frequency`` of the dataset will change how the 45 | ``end`` option is interpreted: - ``end="2020"`` with a ``frequency`` of 46 | one hour is equivalent to ``end="2020-12-31 23:00:00"`` - ``end="2020"`` 47 | with a ``frequency`` of 6 hours is equivalent to ``end="2020-12-31 48 | 18:00:00"`` 49 | 50 | .. _frequency: 51 | 52 | *********** 53 | frequency 54 | *********** 55 | 56 | You can change the frequency of the dataset by passing a string with: 57 | 58 | .. code:: python 59 | 60 | ds = open_dataset(dataset, frequency="6h") 61 | 62 | The new frequency must be a multiple of the original frequency. 63 | 64 | To artificially increase the frequency, you can use the 65 | ``interpolate_frequency`` option. This will create new dates in the 66 | dataset by linearly interpolating the data values between the original 67 | dates. 68 | 69 | .. code:: python 70 | 71 | ds = open_dataset(dataset, interpolate_frequency="10m") 72 | -------------------------------------------------------------------------------- /docs/datasets/yaml/Makefile: -------------------------------------------------------------------------------- 1 | YAML := $(wildcard building*.yaml) 2 | 3 | TARGETS := $(YAML:.yaml=.txt) 4 | 5 | all: $(TARGETS) 6 | 7 | %.zarr: %.yaml 8 | anemoi-datasets create $< $@ --overwrite 9 | 10 | %.txt: %.zarr 11 | ln -sf $< dataset.zarr 12 | anemoi-datasets inspect dataset.zarr > $@ 13 | rm -f dataset.zarr 14 | 15 | 16 | clean:: 17 | rm -fr *.zarr 18 | 19 | .SUFFIXES: .zarr .yaml .txt 20 | .PRECIOUS: %.zarr 21 | -------------------------------------------------------------------------------- /docs/datasets/yaml/building1.txt: -------------------------------------------------------------------------------- 1 | ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ 2 | 📦 Path : dataset.zarr 3 | 🔢 Format version: 0.20.0 4 | 5 | 📅 Start : 2024-01-01 00:00 6 | 📅 End : 2024-01-01 18:00 7 | ⏰ Frequency : 6h 8 | 🚫 Missing : 0 9 | 🌎 Resolution : 1.0 10 | 🌎 Field shape: [181, 360] 11 | 12 | 📐 Shape : 4 × 5 × 1 × 65,160 (5 MiB) 13 | 💽 Size : 2.7 MiB (2,858,121) 14 | 📁 Files : 34 15 | 16 | Index │ Variable │ Min │ Max │ Mean │ Stdev 17 | ──────┼──────────┼──────────┼─────────┼───────────┼───────── 18 | 0 │ 10u │ -24.3116 │ 25.79 │ 0.0595319 │ 5.5856 19 | 1 │ 10v │ -21.2397 │ 21.851 │ -0.270924 │ 4.23947 20 | 2 │ 2t │ 214.979 │ 319.111 │ 277.775 │ 19.9318 21 | 3 │ lsm │ 0 │ 1 │ 0.335152 │ 0.464236 22 | 4 │ msl │ 95708.5 │ 104284 │ 100867 │ 1452.67 23 | ──────┴──────────┴──────────┴─────────┴───────────┴───────── 24 | 🔋 Dataset ready, last update 2 hours ago. 25 | 📊 Statistics ready. 26 | -------------------------------------------------------------------------------- /docs/datasets/yaml/building1.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2024-01-01T00:00:00Z 3 | end: 2024-01-01T18:00:00Z 4 | frequency: 6h 5 | 6 | input: 7 | mars: 8 | param: [2t, msl, 10u, 10v, lsm] 9 | levtype: sfc 10 | grid: [1, 1] 11 | -------------------------------------------------------------------------------- /docs/datasets/yaml/building2.txt: -------------------------------------------------------------------------------- 1 | ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ 2 | 📦 Path : dataset.zarr 3 | 🔢 Format version: 0.20.0 4 | 5 | 📅 Start : 2024-01-01 00:00 6 | 📅 End : 2024-01-01 18:00 7 | ⏰ Frequency : 6h 8 | 🚫 Missing : 0 9 | 🌎 Resolution : 1.0 10 | 🌎 Field shape: [181, 360] 11 | 12 | 📐 Shape : 4 × 11 × 1 × 65,160 (10.9 MiB) 13 | 💽 Size : 5.7 MiB (5,995,688) 14 | 📁 Files : 34 15 | 16 | Index │ Variable │ Min │ Max │ Mean │ Stdev 17 | ──────┼──────────┼─────────────┼─────────────┼─────────────┼──────────── 18 | 0 │ 10u │ -24.3116 │ 25.79 │ 0.0595319 │ 5.5856 19 | 1 │ 10v │ -21.2397 │ 21.851 │ -0.270924 │ 4.23947 20 | 2 │ 2t │ 214.979 │ 319.111 │ 277.775 │ 19.9318 21 | 3 │ lsm │ 0 │ 1 │ 0.335152 │ 0.464236 22 | 4 │ msl │ 95708.5 │ 104284 │ 100867 │ 1452.67 23 | 5 │ q_100 │ 8.95676e-07 │ 5.19827e-06 │ 2.78594e-06 │ 5.39734e-07 24 | 6 │ q_50 │ 1.89449e-06 │ 3.41429e-06 │ 3.00331e-06 │ 1.11219e-07 25 | 7 │ t_100 │ 186.33 │ 233.74 │ 209.958 │ 12.4899 26 | 8 │ t_50 │ 191.921 │ 241.239 │ 213.774 │ 12.3492 27 | 9 │ z_100 │ 146865 │ 163937 │ 157791 │ 4962.71 28 | 10 │ z_50 │ 186876 │ 204383 │ 199752 │ 4158.18 29 | ──────┴──────────┴─────────────┴─────────────┴─────────────┴──────────── 30 | 🔋 Dataset ready, last update 19 seconds ago. 31 | 📊 Statistics ready. 32 | -------------------------------------------------------------------------------- /docs/datasets/yaml/building2.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2024-01-01T00:00:00Z 3 | end: 2024-01-01T18:00:00Z 4 | frequency: 6h 5 | 6 | input: 7 | join: 8 | - mars: 9 | param: [2t, msl, 10u, 10v, lsm] 10 | levtype: sfc 11 | grid: [1, 1] 12 | - mars: 13 | param: [q, t, z] 14 | levtype: pl 15 | level: [50, 100] 16 | grid: [1, 1] 17 | -------------------------------------------------------------------------------- /docs/datasets/yaml/building3.txt: -------------------------------------------------------------------------------- 1 | ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈ 2 | 📦 Path : dataset.zarr 3 | 🔢 Format version: 0.20.0 4 | 5 | 📅 Start : 2024-01-01 00:00 6 | 📅 End : 2024-01-01 18:00 7 | ⏰ Frequency : 6h 8 | 🚫 Missing : 0 9 | 🌎 Resolution : 1.0 10 | 🌎 Field shape: [181, 360] 11 | 12 | 📐 Shape : 4 × 8 × 1 × 65,160 (8 MiB) 13 | 💽 Size : 3.1 MiB (3,283,650) 14 | 📁 Files : 34 15 | 16 | Index │ Variable │ Min │ Max │ Mean │ Stdev 17 | ──────┼──────────────┼─────────────┼──────────┼───────────┼───────── 18 | 0 │ 10u │ -24.3116 │ 25.79 │ 0.0595319 │ 5.5856 19 | 1 │ 10v │ -21.2397 │ 21.851 │ -0.270924 │ 4.23947 20 | 2 │ 2t │ 214.979 │ 319.111 │ 277.775 │ 19.9318 21 | 3 │ cos_latitude │ 6.12323e-17 │ 1 │ 0.633086 │ 0.310546 22 | 4 │ insolation │ 0 │ 0.999995 │ 0.231949 │ 0.299927 23 | 5 │ lsm │ 0 │ 1 │ 0.335152 │ 0.464236 24 | 6 │ msl │ 95708.5 │ 104284 │ 100867 │ 1452.67 25 | 7 │ sin_latitude │ -1 │ 1 │ 0 │ 0.709057 26 | ──────┴──────────────┴─────────────┴──────────┴───────────┴───────── 27 | 🔋 Dataset ready, last update 17 seconds ago. 28 | 📊 Statistics ready. 29 | -------------------------------------------------------------------------------- /docs/datasets/yaml/building3.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2024-01-01T00:00:00Z 3 | end: 2024-01-01T18:00:00Z 4 | frequency: 6h 5 | input: 6 | join: 7 | - mars: 8 | param: [2t, msl, 10u, 10v, lsm] 9 | levtype: sfc 10 | grid: [1, 1] 11 | - mars: 12 | param: [q, t, z] 13 | levtype: pl 14 | level: [50, 100] 15 | grid: [1, 1] 16 | - forcings: 17 | template: ${input.join.0.mars} 18 | param: 19 | - cos_latitude 20 | - sin_latitude 21 | - insolation 22 | -------------------------------------------------------------------------------- /docs/datasets/yaml/concat.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | concat: 3 | - dates: 4 | start: 2020-12-30 00:00:00 5 | end: 2021-01-01 12:00:00 6 | frequency: 12h 7 | 8 | source1: 9 | - args 10 | 11 | - dates: 12 | start: 2021-01-02 00:00:00 13 | end: 2021-01-03 12:00:00 14 | frequency: 12h 15 | 16 | source2: 17 | - args 18 | -------------------------------------------------------------------------------- /docs/datasets/yaml/hindcasts.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | hindcasts: 3 | levtype: sfc 4 | param: [2t, msl] 5 | grid: [0.25, 0.25] 6 | stream: enfh 7 | type: cf 8 | reference_year: 2022 9 | -------------------------------------------------------------------------------- /docs/datasets/yaml/input.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | join: 3 | - source1: 4 | key: date 5 | - source2 6 | - ... 7 | -------------------------------------------------------------------------------- /docs/datasets/yaml/missing_dates.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2017-01-01 00:00:00 3 | end: 2017-01-31 23:00:00 4 | frequency: 1h 5 | 6 | missing: 7 | - 2017-01-02 00:00:00 8 | - 2017-01-03 00:00:00 9 | -------------------------------------------------------------------------------- /docs/datasets/yaml/nan.yaml: -------------------------------------------------------------------------------- 1 | statistics: 2 | allow_nans: [sst, ci] 3 | -------------------------------------------------------------------------------- /docs/datasets/yaml/pipe.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | pipe: 3 | - source 4 | - filter1 5 | - filter2 6 | - ... 7 | -------------------------------------------------------------------------------- /docs/dev/contributing.rst: -------------------------------------------------------------------------------- 1 | .. _dev-contributing: 2 | 3 | #################### 4 | General guidelines 5 | #################### 6 | 7 | Thank you for your interest in Anemoi Datasets! Please follow the 8 | :ref:`general Anemoi contributing guidelines 9 | `. 10 | 11 | These include general guidelines for contributions to Anemoi, 12 | instructions on setting up a development environment, and guidelines on 13 | collaboration on GitHub, writing documentation, testing, and code style. 14 | 15 | ************ 16 | Unit tests 17 | ************ 18 | 19 | Anemoi-datasets include unit tests that can be executed locally using 20 | pytest. For more information on testing, please refer to the 21 | :ref:`general Anemoi testing guidelines 22 | `. 23 | -------------------------------------------------------------------------------- /docs/howtos/create/03-constant-fields.rst: -------------------------------------------------------------------------------- 1 | .. _constant-data: 2 | 3 | ################# 4 | Constant fields 5 | ################# 6 | 7 | (Coming soon) 8 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-flavour1.yaml: -------------------------------------------------------------------------------- 1 | - - levtype: sfc 2 | - levelist: null 3 | 4 | - - { discipline: 0, parameterCategory: 1, parameterNumber: 201 } 5 | - param: csf 6 | 7 | - - { discipline: 0, parameterCategory: 1, parameterNumber: 64 } 8 | - param: tcwv 9 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-flavour2.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | 7 | input: 8 | grib: 9 | path: /path/to/input.grib 10 | flavour: /path/to/flavour.yaml 11 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-flavour3.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | 7 | input: 8 | grib: 9 | path: /path/to/input.grib 10 | flavour: 11 | - - levtype: sfc 12 | - levelist: null 13 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-flavour4.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | flavour: &flavour 7 | - - levtype: sfc 8 | - levelist: null 9 | 10 | input: 11 | join: 12 | - grib: 13 | path: /path/to/input.grib 14 | flavour: *flavour 15 | param: [ z, t, u, v ] 16 | levelist: [ 1000, 850, 500 ] 17 | levtype: pl 18 | 19 | - grib: 20 | path: /path/to/input2.grib 21 | flavour: *flavour 22 | param: [ 2t, msl ] 23 | levtype: sfc 24 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-recipe1.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | 7 | input: 8 | grib: 9 | path: /path/to/input.grib 10 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-recipe2.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | 7 | input: 8 | grib: 9 | path: /path/to/data-{param}-{date:strftime(%Y%m%d%H)}.grib 10 | param: [2t, 10u, 10v] 11 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-recipe3.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | 7 | input: 8 | grib-index: 9 | index: /path/to/index.db 10 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-recipe4.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | 7 | input: 8 | grib-index: 9 | index: /path/to/index.db 10 | param: [2t, 10u, 10v] 11 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/grib-recipe5.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | input: 7 | join: 8 | - grib: 9 | path: /path/to/input.grib 10 | param: [z, t, u, v] 11 | levelist: [1000, 850, 500] 12 | levtype: pl 13 | 14 | - grib: 15 | path: /path/to/input2.grib 16 | param: [2t, msl] 17 | levtype: sfc 18 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/netcdf1.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | 7 | input: 8 | netcdf: 9 | path: /path/to/input.nc 10 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/opendap1.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | input: 7 | opendap: 8 | url: https://www.example.com/path/to/input.nc 9 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/xarray-flavour1.yaml: -------------------------------------------------------------------------------- 1 | rules: 2 | latitude: 3 | name: grid_yt 4 | level: 5 | name: pfull 6 | longitude: 7 | name: grid_xt 8 | time: 9 | name: time 10 | 11 | levtype: pl 12 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/xarray-patch1.yaml: -------------------------------------------------------------------------------- 1 | 2 | dates: 3 | start: 2023-01-01T00:00:00 4 | end: 2023-01-02T18:00:00 5 | frequency: 6h 6 | 7 | input: 8 | netcdf: 9 | path: /path/to/input.nc 10 | patch: 11 | coordinates: [ nav_lat, nav_lon ] 12 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/xarray-patch2.yaml: -------------------------------------------------------------------------------- 1 | attributes: 2 | variable: 3 | attribute1: value1 4 | attribute2: value2 5 | 6 | coordinates: [ x, y ] 7 | -------------------------------------------------------------------------------- /docs/howtos/create/yaml/zarr1.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2023-01-01T00:00:00 3 | end: 2023-01-02T18:00:00 4 | frequency: 6h 5 | 6 | input: 7 | xarray-zarr: 8 | url: https://www.example.com/path/to/input.zarr 9 | -------------------------------------------------------------------------------- /docs/howtos/introduction.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | How-Tos 3 | ######### 4 | 5 | How-Tos are a collection of guides that help you to use anemoi-datasets 6 | in different ways. They are not exhaustive, but they should give you a 7 | good starting point. 8 | 9 | ************************** 10 | Creating anemoi-datasets 11 | ************************** 12 | 13 | - :ref:`create-grib-data` 14 | - :ref:`create-cf-data` 15 | - :ref:`constant-data` 16 | 17 | .. toctree:: 18 | :maxdepth: 1 19 | :glob: 20 | :hidden: 21 | :caption: Creating anemoi-datasets 22 | 23 | create/* 24 | 25 | *********************** 26 | Using anemoi-datasets 27 | *********************** 28 | 29 | - :ref:`interpolate-step` 30 | 31 | .. toctree:: 32 | :maxdepth: 1 33 | :glob: 34 | :hidden: 35 | :caption: Using anemoi-datasets 36 | 37 | usage/* 38 | -------------------------------------------------------------------------------- /docs/howtos/usage/01-interpolate-step-dataset-combination.rst: -------------------------------------------------------------------------------- 1 | .. _interpolate-step: 2 | 3 | ###################################################### 4 | Combine datasets with different timestep frequencies 5 | ###################################################### 6 | 7 | Here we explain how to combine two existing datasets with different 8 | timestep frequencies. In this example, we consider two datasets: 9 | ``dataset-3h`` with an inherent temporal frequency of 3h and 10 | ``dataset-24h`` with an inherent temporal frequency of 24h. The goal is 11 | to combine the two datasets into a dataset with a temporal frequency of 12 | either 3h or 24h. 13 | 14 | ********************************* 15 | Interpolate to higher frequency 16 | ********************************* 17 | 18 | In this case, we will use the ``interpolate_frequency`` option to bring 19 | ``dataset-24h`` to the 3h timestep of ``dataset-3h``. 20 | 21 | .. literalinclude:: code/interpolate1.py 22 | :language: python 23 | 24 | or in the config file: 25 | 26 | .. literalinclude:: yaml/interpolate1.yaml 27 | :language: yaml 28 | 29 | The ``adjust`` option is in case the end or start dates do not exactly 30 | match. 31 | 32 | *************************** 33 | Sample to lower frequency 34 | *************************** 35 | 36 | This case is straightforward; we can just specify the required 24h 37 | frequency for ``dataset-3h``. 38 | 39 | .. literalinclude:: code/interpolate2.py 40 | :language: python 41 | 42 | or for the config file: 43 | 44 | .. literalinclude:: yaml/interpolate2.yaml 45 | :language: yaml 46 | -------------------------------------------------------------------------------- /docs/howtos/usage/02-coutout-complement-combination.rst: -------------------------------------------------------------------------------- 1 | .. _complement-step: 2 | 3 | ############################################## 4 | Combining cutout with complementing datasets 5 | ############################################## 6 | 7 | Here we explain how to combine a cutout with a complementing dataset. 8 | 9 | **************************** 10 | Interpolate to cutout grid 11 | **************************** 12 | 13 | In this case, we will use the a ``lam-dataset`` in a different grid that 14 | contains just one variable (``tp`` in the example below) and a 15 | ``global-dataset``. What we want to do is to interpolate the 16 | ``global-dataset`` to the resulting dataset from the cutout grid 17 | operation. 18 | 19 | .. literalinclude:: code/cutout-complement1.py 20 | 21 | or for the config file: 22 | 23 | .. literalinclude:: yaml/cutout-complement1.yaml 24 | 25 | The ``adjust`` option is in case the end or start dates do not exactly 26 | match. 27 | -------------------------------------------------------------------------------- /docs/howtos/usage/code/cutout-complement1.py: -------------------------------------------------------------------------------- 1 | from anemoi.datasets import open_dataset 2 | 3 | ds = open_dataset( 4 | complement={ 5 | "cutout": [ 6 | "lam-dataset", 7 | { 8 | "dataset": "global-dataset", 9 | "select": ["tp"], 10 | }, 11 | ], 12 | "min_distance_km": 1, 13 | "adjust": "dates", 14 | }, 15 | source="global-dataset", 16 | interpolation="nearest", 17 | ) 18 | -------------------------------------------------------------------------------- /docs/howtos/usage/code/interpolate1.py: -------------------------------------------------------------------------------- 1 | from anemoi.datasets import open_dataset 2 | 3 | ds = open_dataset( 4 | dataset={ 5 | "join": [ 6 | { 7 | "dataset": "dataset-3h", 8 | "frequency": "3h", 9 | }, 10 | { 11 | "dataset": "dataset-24h", 12 | "interpolate_frequency": "3h", 13 | }, 14 | ], 15 | "adjust": "dates", 16 | }, 17 | start="2004-01-01", 18 | end="2023-01-01", 19 | ) 20 | -------------------------------------------------------------------------------- /docs/howtos/usage/code/interpolate2.py: -------------------------------------------------------------------------------- 1 | from anemoi.datasets import open_dataset 2 | 3 | ds = open_dataset( 4 | dataset={ 5 | "join": [ 6 | { 7 | "dataset": "dataset-3h", 8 | "frequency": "24h", 9 | }, 10 | { 11 | "dataset": "dataset-24h", 12 | "frequency": "24h", 13 | }, 14 | ], 15 | "adjust": "dates", 16 | }, 17 | start="2004-01-01", 18 | end="2023-01-01", 19 | ) 20 | -------------------------------------------------------------------------------- /docs/howtos/usage/yaml/cutout-complement1.yaml: -------------------------------------------------------------------------------- 1 | dataset: 2 | complement: 3 | dataset: 4 | cutout: 5 | - lam-dataset 6 | - dataset: global-dataset 7 | select: [ tp ] 8 | min_distance_km: 1 9 | adjust: dates 10 | source: global-dataset 11 | interpolation: nearest 12 | -------------------------------------------------------------------------------- /docs/howtos/usage/yaml/interpolate1.yaml: -------------------------------------------------------------------------------- 1 | dataset: 2 | join: 3 | - dataset: dataset-3h 4 | frequency: 3h 5 | - dataset: dataset-24h 6 | interpolate_frequency: 3h 7 | adjust: dates 8 | start: 2004-01-01 9 | end: 2023-01-01 10 | -------------------------------------------------------------------------------- /docs/howtos/usage/yaml/interpolate2.yaml: -------------------------------------------------------------------------------- 1 | dataset: 2 | join: 3 | - dataset: dataset-3h 4 | frequency: 24h 5 | - dataset: dataset-24h 6 | frequency: 24h 7 | start: 2004-01-01 8 | end: 2023-01-01 9 | -------------------------------------------------------------------------------- /docs/installing.rst: -------------------------------------------------------------------------------- 1 | .. _installing: 2 | 3 | ############ 4 | Installing 5 | ############ 6 | 7 | **************** 8 | Python Version 9 | **************** 10 | 11 | - Python (> 3.9) 12 | 13 | We require at least Python 3.9. 14 | 15 | ************** 16 | Installation 17 | ************** 18 | 19 | Environments 20 | ============ 21 | 22 | We currently do not provide a conda build of anemoi-datasets, so the 23 | suggested installation is through Python virtual environments. 24 | 25 | For Linux, the process to make and use a venv is as follows: 26 | 27 | .. code:: bash 28 | 29 | python -m venv /path/to/my/venv 30 | source /path/to/my/venv/bin/activate 31 | 32 | Instructions 33 | ============ 34 | 35 | To install the package, you can use the following command: 36 | 37 | .. code:: bash 38 | 39 | python -m pip install anemoi-datasets 40 | 41 | If you are interested in creating datasets, you can install the package 42 | with the following command: 43 | 44 | .. code:: bash 45 | 46 | pip install anemoi-datasets[create] 47 | 48 | For an editable install of anemoi-datasets, you can use the following 49 | command. In this case, changes that you make to the anemoi-datasets code 50 | will be reflected in the installed package without having to reinstall 51 | it. 52 | 53 | .. code:: bash 54 | 55 | pip install -e . 56 | 57 | We also maintain other dependency sets for different subsets of 58 | functionality: 59 | 60 | .. code:: bash 61 | 62 | python -m pip install "anemoi-datasets[docs]" # Install optional dependencies for generating docs 63 | 64 | .. literalinclude:: ../pyproject.toml 65 | :language: toml 66 | :start-at: optional-dependencies.all 67 | :end-before: urls.Changelog 68 | 69 | ********************** 70 | Development versions 71 | ********************** 72 | 73 | To install the most recent development version, install from GitHub: 74 | 75 | .. code:: 76 | 77 | $ python -m pip install git@github.com:ecmwf/anemoi-datasets.git 78 | 79 | ********* 80 | Testing 81 | ********* 82 | 83 | To run the test suite after installing anemoi-datasets, install (via 84 | PyPI) `py.test `__ and run ``pytest`` in the 85 | ``datasets`` directory of the anemoi-datasets repository. 86 | -------------------------------------------------------------------------------- /docs/modules/dataset.rst: -------------------------------------------------------------------------------- 1 | .. _dataset-autodoc: 2 | 3 | ######### 4 | Dataset 5 | ######### 6 | 7 | .. automodule:: anemoi.datasets.data.dataset 8 | :members: 9 | :no-undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/modules/filters.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | Filters 3 | ######### 4 | 5 | .. include:: ../_api/anemoi.datasets.create.filters.rst 6 | -------------------------------------------------------------------------------- /docs/modules/sources.rst: -------------------------------------------------------------------------------- 1 | ######### 2 | Sources 3 | ######### 4 | 5 | .. include:: ../_api/anemoi.datasets.create.sources.rst 6 | -------------------------------------------------------------------------------- /docs/pptx/images.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/pptx/images.pptx -------------------------------------------------------------------------------- /docs/scripts/api_build.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | set -e 4 | 5 | script_dir=$(dirname "${BASH_SOURCE[0]}") 6 | docs_dir="$script_dir/.." 7 | source_dir="$script_dir/../../src/" 8 | 9 | 10 | trap 'rm -f $source_dir/anemoi/__init__.py' EXIT 11 | 12 | touch "$source_dir/anemoi/__init__.py" 13 | sphinx-apidoc -M -f -o "$docs_dir/_api" "$source_dir/anemoi" -t "$docs_dir/_templates/apidoc" 14 | -------------------------------------------------------------------------------- /docs/usage/yaml/aifs-ea-an-oper-0001-mars-o48-2020-2021-6h-v1.yaml: -------------------------------------------------------------------------------- 1 | name: era5-o48-2020-2021-6h-v1 2 | 3 | description: Low resolution reduced dataset for documentation purposes 4 | 5 | attribution: ECMWF/C3S 6 | 7 | licence: CC-BY-4.0 8 | 9 | dates: 10 | start: '2020-01-01T00:00:00' 11 | end: '2021-12-31T23:00:00' 12 | frequency: 6h 13 | 14 | input: 15 | join: 16 | - mars: 17 | use_cdsapi_dataset: "reanalysis-era5-complete" 18 | class: ea 19 | expver: '0001' 20 | grid: o48 21 | levtype: sfc 22 | param: 23 | - 10u 24 | - 10v 25 | - 2d 26 | - 2t 27 | - lsm 28 | - msl 29 | - sdor 30 | - skt 31 | - slor 32 | - sp 33 | - tcw 34 | - z 35 | - mars: 36 | use_cdsapi_dataset: "reanalysis-era5-complete" 37 | class: ea 38 | expver: '0001' 39 | grid: o48 40 | level: 41 | - 250 42 | - 500 43 | - 850 44 | - 1000 45 | levtype: pl 46 | param: 47 | - u 48 | - v 49 | - q 50 | - t 51 | - z 52 | - accumulations: 53 | use_cdsapi_dataset: "reanalysis-era5-complete" 54 | accumulation_period: 6 55 | class: ea 56 | expver: '0001' 57 | grid: o48 58 | param: 59 | - cp 60 | - tp 61 | - constants: 62 | param: 63 | - cos_latitude 64 | - cos_longitude 65 | - sin_latitude 66 | - sin_longitude 67 | - cos_julian_day 68 | - cos_local_time 69 | - sin_julian_day 70 | - sin_local_time 71 | - insolation 72 | template: ${input.join.0.mars} 73 | -------------------------------------------------------------------------------- /docs/using/code/trimedge1_.py: -------------------------------------------------------------------------------- 1 | ds = open_dataset(dataset1, trim_edge=(3, 10, 4, 2)) 2 | -------------------------------------------------------------------------------- /src/anemoi/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from typing import List 11 | 12 | from .data import MissingDateError 13 | from .data import add_dataset_path 14 | from .data import add_named_dataset 15 | from .data import list_dataset_names 16 | from .data import open_dataset 17 | 18 | try: 19 | # NOTE: the `_version.py` file must not be present in the git repository 20 | # as it is generated by setuptools at install time 21 | from ._version import __version__ # type: ignore 22 | except ImportError: # pragma: no cover 23 | # Local copy or not installed with setuptools 24 | __version__ = "999" 25 | 26 | __all__: List[str] = [ 27 | "add_dataset_path", 28 | "add_named_dataset", 29 | "list_dataset_names", 30 | "MissingDateError", 31 | "open_dataset", 32 | "__version__", 33 | ] 34 | -------------------------------------------------------------------------------- /src/anemoi/datasets/__main__.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from typing import Any 11 | 12 | from anemoi.utils.cli import cli_main 13 | from anemoi.utils.cli import make_parser 14 | 15 | from . import __version__ 16 | from .commands import COMMANDS 17 | 18 | 19 | # For read-the-docs 20 | def create_parser() -> Any: 21 | """Create the argument parser for the CLI. 22 | 23 | Returns 24 | ------- 25 | Any 26 | The argument parser instance. 27 | """ 28 | return make_parser(__doc__, COMMANDS) 29 | 30 | 31 | def main() -> None: 32 | """The main entry point for the CLI application.""" 33 | cli_main(__version__, __doc__, COMMANDS) 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import os 11 | 12 | from anemoi.utils.cli import Command 13 | from anemoi.utils.cli import Failed 14 | from anemoi.utils.cli import register_commands 15 | 16 | __all__ = ["Command"] 17 | 18 | COMMANDS = register_commands( 19 | os.path.dirname(__file__), 20 | __name__, 21 | lambda x: x.command(), 22 | lambda name, error: Failed(name, error), 23 | ) 24 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/cleanup.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import time 12 | from typing import Any 13 | 14 | from anemoi.utils.humanize import seconds_to_human 15 | 16 | from anemoi.datasets.commands.create import task 17 | 18 | from . import Command 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class Cleanup(Command): 24 | """Create a dataset, step by step.""" 25 | 26 | internal = True 27 | timestamp = True 28 | 29 | def add_arguments(self, subparser: Any) -> None: 30 | """Add command line arguments to the parser. 31 | 32 | Parameters 33 | ---------- 34 | subparser : Any 35 | The argument parser. 36 | """ 37 | subparser.add_argument("path", help="Path to store the created data.") 38 | subparser.add_argument( 39 | "--delta", 40 | help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.", 41 | nargs="+", 42 | ) 43 | 44 | def run(self, args: Any) -> None: 45 | """Execute the cleanup command. 46 | 47 | Parameters 48 | ---------- 49 | args : Any 50 | The command line arguments. 51 | """ 52 | options = vars(args) 53 | options.pop("command") 54 | now = time.time() 55 | step = self.__class__.__name__.lower() 56 | 57 | if "version" in options: 58 | options.pop("version") 59 | 60 | if "debug" in options: 61 | options.pop("debug") 62 | 63 | task(step, options) 64 | 65 | LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}") 66 | 67 | 68 | command = Cleanup 69 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/finalise-additions.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import time 12 | from typing import Any 13 | 14 | from anemoi.utils.humanize import seconds_to_human 15 | 16 | from anemoi.datasets.commands.create import task 17 | 18 | from . import Command 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class FinaliseAdditions(Command): 24 | """Create a dataset, step by step.""" 25 | 26 | internal = True 27 | timestamp = True 28 | 29 | def add_arguments(self, command_parser: Any) -> None: 30 | """Add command line arguments to the parser. 31 | 32 | Parameters 33 | ---------- 34 | command_parser : Any 35 | The argument parser instance to which arguments will be added. 36 | """ 37 | command_parser.add_argument( 38 | "--delta", 39 | help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.", 40 | nargs="+", 41 | ) 42 | 43 | command_parser.add_argument("path", help="Path to store the created data.") 44 | command_parser.add_argument("--trace", action="store_true") 45 | 46 | def run(self, args: Any) -> None: 47 | """Execute the command with the given arguments. 48 | 49 | Parameters 50 | ---------- 51 | args : Any 52 | The arguments passed to the command. 53 | """ 54 | options = vars(args) 55 | options.pop("command") 56 | step = "finalise-additions" 57 | now = time.time() 58 | 59 | if "version" in options: 60 | options.pop("version") 61 | 62 | if "debug" in options: 63 | options.pop("debug") 64 | 65 | task(step, options) 66 | 67 | LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}") 68 | 69 | 70 | command = FinaliseAdditions 71 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/finalise.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import time 12 | from typing import Any 13 | 14 | from anemoi.utils.humanize import seconds_to_human 15 | 16 | from anemoi.datasets.commands.create import task 17 | 18 | from . import Command 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class Finalise(Command): 24 | """Create a dataset, step by step.""" 25 | 26 | internal = True 27 | timestamp = True 28 | 29 | def add_arguments(self, command_parser: Any) -> None: 30 | """Add arguments to the command parser. 31 | 32 | Parameters 33 | ---------- 34 | command_parser : Any 35 | The command parser to which arguments will be added. 36 | """ 37 | command_parser.add_argument("path", help="Path to store the created data.") 38 | command_parser.add_argument("--trace", action="store_true") 39 | 40 | def run(self, args: Any) -> None: 41 | """Execute the finalise command. 42 | 43 | Parameters 44 | ---------- 45 | args : Any 46 | The arguments passed to the command. 47 | """ 48 | options = vars(args) 49 | options.pop("command") 50 | now = time.time() 51 | step = "finalise" 52 | 53 | if "version" in options: 54 | options.pop("version") 55 | 56 | if "debug" in options: 57 | options.pop("debug") 58 | 59 | task(step, options) 60 | 61 | LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}") 62 | 63 | 64 | command = Finalise 65 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/init-additions.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import time 12 | from typing import Any 13 | 14 | from anemoi.utils.humanize import seconds_to_human 15 | 16 | from anemoi.datasets.commands.create import task 17 | 18 | from . import Command 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class InitAdditions(Command): 24 | """Create a dataset, step by step.""" 25 | 26 | internal = True 27 | timestamp = True 28 | 29 | def add_arguments(self, command_parser: Any) -> None: 30 | """Add command line arguments to the parser. 31 | 32 | Parameters 33 | ---------- 34 | command_parser : Any 35 | The argument parser instance. 36 | """ 37 | command_parser.add_argument( 38 | "--delta", 39 | help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.", 40 | nargs="+", 41 | ) 42 | 43 | command_parser.add_argument("path", help="Path to store the created data.") 44 | command_parser.add_argument("--trace", action="store_true") 45 | 46 | def run(self, args: Any) -> None: 47 | """Execute the command with the given arguments. 48 | 49 | Parameters 50 | ---------- 51 | args : Any 52 | The command line arguments. 53 | """ 54 | options = vars(args) 55 | options.pop("command") 56 | step = "init-additions" 57 | now = time.time() 58 | 59 | if "version" in options: 60 | options.pop("version") 61 | 62 | if "debug" in options: 63 | options.pop("debug") 64 | 65 | task(step, options) 66 | 67 | LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}") 68 | 69 | 70 | command = InitAdditions 71 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/load-additions.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import time 12 | from typing import Any 13 | 14 | from anemoi.utils.humanize import seconds_to_human 15 | 16 | from anemoi.datasets.commands.create import task 17 | 18 | from . import Command 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class LoadAdditions(Command): 24 | """Create a dataset, step by step.""" 25 | 26 | internal = True 27 | timestamp = True 28 | 29 | def add_arguments(self, subparser: Any) -> None: 30 | """Add command line arguments to the parser. 31 | 32 | Parameters 33 | ---------- 34 | subparser : Any 35 | The argument parser. 36 | """ 37 | subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.") 38 | subparser.add_argument( 39 | "--delta", 40 | help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.", 41 | nargs="+", 42 | ) 43 | 44 | subparser.add_argument("path", help="Path to store the created data.") 45 | subparser.add_argument("--trace", action="store_true") 46 | 47 | def run(self, args: Any) -> None: 48 | """Execute the command with the given arguments. 49 | 50 | Parameters 51 | ---------- 52 | args : Any 53 | The command line arguments. 54 | """ 55 | options = vars(args) 56 | options.pop("command") 57 | now = time.time() 58 | step = "load-additions" 59 | 60 | if "version" in options: 61 | options.pop("version") 62 | 63 | if "debug" in options: 64 | options.pop("debug") 65 | 66 | task(step, options) 67 | 68 | LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}") 69 | 70 | 71 | command = LoadAdditions 72 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/load.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import time 12 | from typing import Any 13 | 14 | from anemoi.utils.humanize import seconds_to_human 15 | 16 | from anemoi.datasets.commands.create import task 17 | 18 | from . import Command 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class Load(Command): 24 | """Create a dataset, step by step.""" 25 | 26 | internal = True 27 | timestamp = True 28 | 29 | def add_arguments(self, subparser: Any) -> None: 30 | """Add arguments to the command parser. 31 | 32 | Parameters 33 | ---------- 34 | subparser : Any 35 | The command parser. 36 | """ 37 | subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.") 38 | # subparser.add_argument( 39 | # "--delta", 40 | # help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.", 41 | # ) 42 | 43 | subparser.add_argument("path", help="Path to store the created data.") 44 | subparser.add_argument("--cache", help="Location to store the downloaded data.", metavar="DIR") 45 | subparser.add_argument("--trace", action="store_true") 46 | 47 | def run(self, args: Any) -> None: 48 | """Run the command. 49 | 50 | Parameters 51 | ---------- 52 | args : Any 53 | The command arguments. 54 | """ 55 | options = vars(args) 56 | options.pop("command") 57 | now = time.time() 58 | step = "load" 59 | 60 | if "version" in options: 61 | options.pop("version") 62 | 63 | if "debug" in options: 64 | options.pop("debug") 65 | 66 | task(step, options) 67 | 68 | LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}") 69 | 70 | 71 | command = Load 72 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/patch.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import time 12 | from typing import Any 13 | 14 | from anemoi.utils.humanize import seconds_to_human 15 | 16 | from anemoi.datasets.commands.create import task 17 | 18 | from . import Command 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class Patch(Command): 24 | """Create a dataset, step by step.""" 25 | 26 | internal = True 27 | timestamp = True 28 | 29 | def add_arguments(self, parser: Any) -> None: 30 | """Add command-line arguments to the parser. 31 | 32 | Parameters 33 | ---------- 34 | parser : Any 35 | The argument parser instance. 36 | """ 37 | parser.add_argument("path", help="Path to store the created data.") 38 | 39 | def run(self, args: Any) -> None: 40 | """Execute the patch command. 41 | 42 | Parameters 43 | ---------- 44 | args : Any 45 | The command-line arguments. 46 | """ 47 | options = vars(args) 48 | options.pop("command") 49 | now = time.time() 50 | step = self.__class__.__name__.lower() 51 | 52 | if "version" in options: 53 | options.pop("version") 54 | 55 | if "debug" in options: 56 | options.pop("debug") 57 | 58 | task(step, options) 59 | 60 | LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}") 61 | 62 | 63 | command = Patch 64 | -------------------------------------------------------------------------------- /src/anemoi/datasets/commands/publish.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | from typing import Any 12 | 13 | from . import Command 14 | 15 | LOG = logging.getLogger(__name__) 16 | 17 | 18 | class Publish(Command): 19 | """Publish a dataset.""" 20 | 21 | # This is a command that is used to publish a dataset. 22 | # it is a class, inheriting from Command. 23 | 24 | internal = True 25 | timestamp = True 26 | 27 | def add_arguments(self, parser: Any) -> None: 28 | """Add arguments to the command parser. 29 | 30 | Parameters 31 | ---------- 32 | parser : Any 33 | The command parser to which arguments are added. 34 | """ 35 | parser.add_argument("path", help="Path of the dataset to publish.") 36 | 37 | def run(self, args: Any) -> None: 38 | """Execute the publish command. 39 | 40 | Parameters 41 | ---------- 42 | args : Any 43 | The arguments passed to the command. 44 | """ 45 | try: 46 | from anemoi.registry import publish_dataset 47 | except ImportError: 48 | LOG.error("anemoi-registry is not installed. Please install it to use this command.") 49 | return 50 | 51 | publish_dataset(args.path) 52 | 53 | 54 | command = Publish 55 | -------------------------------------------------------------------------------- /src/anemoi/datasets/compute/__init__.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/filter.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2025- Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from abc import ABC 11 | from abc import abstractmethod 12 | from typing import Any 13 | 14 | import earthkit.data as ekd 15 | 16 | 17 | class Filter(ABC): 18 | """A base class for filters.""" 19 | 20 | def __init__(self, context: Any, *args: Any, **kwargs: Any) -> None: 21 | """Initialise the filter. 22 | 23 | Parameters 24 | ---------- 25 | context : Any 26 | The context in which the filter is created. 27 | *args : tuple 28 | Positional arguments. 29 | **kwargs : dict 30 | Keyword arguments. 31 | """ 32 | 33 | self.context = context 34 | 35 | @abstractmethod 36 | def execute(self, data: ekd.FieldList) -> ekd.FieldList: 37 | """Execute the filter. 38 | 39 | Parameters 40 | ---------- 41 | data : ekd.FieldList 42 | The input data. 43 | 44 | Returns 45 | ------- 46 | ekd.FieldList 47 | The output data. 48 | """ 49 | 50 | pass 51 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/filters/__init__.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | # 10 | 11 | from typing import Any 12 | 13 | from anemoi.utils.registry import Registry 14 | 15 | filter_registry = Registry(__name__) 16 | 17 | 18 | def create_filter(context: Any, config: Any) -> Any: 19 | """Create a filter based on the provided configuration. 20 | 21 | Parameters 22 | ---------- 23 | context : Any 24 | The context in which the filter is created. 25 | config : Any 26 | The configuration for the filter. 27 | 28 | Returns 29 | ------- 30 | Any 31 | The created filter. 32 | """ 33 | return filter_registry.from_config(config, context) 34 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/filters/empty.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | from typing import Any 12 | 13 | import earthkit.data as ekd 14 | from anemoi.transform.fields import new_empty_fieldlist 15 | 16 | from .legacy import legacy_filter 17 | 18 | 19 | @legacy_filter(__file__) 20 | def execute(context: Any, input: ekd.FieldList, **kwargs: Any) -> ekd.FieldList: 21 | """Create a pipeline that returns an empty result. 22 | 23 | Parameters 24 | ---------- 25 | context : Any 26 | The context in which the function is executed. 27 | input : List[Any] 28 | List of input fields. 29 | **kwargs : Any 30 | Additional keyword arguments. 31 | 32 | Returns 33 | ------- 34 | Any 35 | An empty result. 36 | """ 37 | return new_empty_fieldlist() 38 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/filters/noop.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from typing import Any 11 | 12 | import earthkit.data as ekd 13 | 14 | from .legacy import legacy_filter 15 | 16 | 17 | @legacy_filter(__file__) 18 | def execute(context: Any, input: ekd.FieldList, *args: Any, **kwargs: Any) -> ekd.FieldList: 19 | """No operation filter that returns the input as is. 20 | 21 | Parameters 22 | ---------- 23 | context : Any 24 | The context in which the function is executed. 25 | input : ekd.FieldList 26 | List of input fields. 27 | *args : Any 28 | Additional arguments. 29 | **kwargs : Any 30 | Additional keyword arguments. 31 | 32 | Returns 33 | ------- 34 | List[Any] 35 | The input list of fields. 36 | """ 37 | return input 38 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/filters/orog_to_z.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from collections import defaultdict 11 | from typing import Any 12 | from typing import Dict 13 | 14 | import earthkit.data as ekd 15 | from anemoi.transform.fields import new_field_from_numpy 16 | from anemoi.transform.fields import new_fieldlist_from_list 17 | 18 | from .legacy import legacy_filter 19 | 20 | 21 | @legacy_filter(__file__) 22 | def execute(context: Any, input: ekd.FieldList, orog: str, z: str = "z") -> ekd.FieldList: 23 | """Convert orography [m] to z (geopotential height). 24 | 25 | Parameters 26 | ---------- 27 | context : Any 28 | The context in which the function is executed. 29 | input : FieldList 30 | List of input fields. 31 | orog : str 32 | Orography parameter. 33 | z : str, optional 34 | Geopotential height parameter. Defaults to "z". 35 | 36 | Returns 37 | ------- 38 | FieldList 39 | List of fields with geopotential height. 40 | """ 41 | result = [] 42 | processed_fields: Dict[tuple, Dict[str, Any]] = defaultdict(dict) 43 | 44 | for f in input: 45 | key = f.metadata(namespace="mars") 46 | param = key.pop("param") 47 | if param == orog: 48 | key = tuple(key.items()) 49 | 50 | if param in processed_fields[key]: 51 | raise ValueError(f"Duplicate field {param} for {key}") 52 | 53 | output = f.to_numpy(flatten=True) * 9.80665 54 | result.append(new_field_from_numpy(f, output, param=z)) 55 | else: 56 | result.append(f) 57 | 58 | return new_fieldlist_from_list(result) 59 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/filters/sum.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from collections import defaultdict 11 | from typing import Any 12 | from typing import Dict 13 | from typing import Hashable 14 | from typing import List 15 | from typing import Tuple 16 | 17 | import earthkit.data as ekd 18 | from anemoi.transform.fields import new_field_from_numpy 19 | from anemoi.transform.fields import new_fieldlist_from_list 20 | 21 | from .legacy import legacy_filter 22 | 23 | 24 | @legacy_filter(__file__) 25 | def execute(context: Any, input: ekd.FieldList, params: List[str], output: str) -> ekd.FieldList: 26 | """Computes the sum over a set of variables. 27 | 28 | Args: 29 | context (Any): The execution context. 30 | input (List[Any]): The list of input fields. 31 | params (List[str]): The list of parameters to sum over. 32 | output (str): The name for the output field. 33 | 34 | Returns: 35 | ekd.FieldList: The resulting FieldArray with summed fields. 36 | """ 37 | result = [] 38 | 39 | needed_fields: Dict[Tuple[Hashable, ...], Dict[str, ekd.Field]] = defaultdict(dict) 40 | 41 | for f in input: 42 | key = f.metadata(namespace="mars") 43 | param = key.pop("param") 44 | if param in params: 45 | key = tuple(key.items()) 46 | 47 | if param in needed_fields[key]: 48 | raise ValueError(f"Duplicate field {param} for {key}") 49 | 50 | needed_fields[key][param] = f 51 | else: 52 | result.append(f) 53 | 54 | for keys, values in needed_fields.items(): 55 | 56 | if len(values) != len(params): 57 | raise ValueError("Missing fields") 58 | 59 | s = None 60 | for k, v in values.items(): 61 | c = v.to_numpy(flatten=True) 62 | if s is None: 63 | s = c 64 | else: 65 | s += c 66 | result.append(new_field_from_numpy(values[list(values.keys())[0]], s, param=output)) 67 | 68 | return new_fieldlist_from_list(result) 69 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/filters/transform.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2025 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from typing import Any 11 | from typing import Dict 12 | 13 | import earthkit.data as ekd 14 | 15 | from ..filter import Filter 16 | 17 | 18 | class TransformFilter(Filter): 19 | """Calls filters from anemoi.transform.filters 20 | 21 | Parameters 22 | ---------- 23 | context : Any 24 | The context in which the filter is created. 25 | name : str 26 | The name of the filter. 27 | config : Dict[str, Any] 28 | The configuration for the filter. 29 | """ 30 | 31 | def __init__(self, context: Any, name: str, config: Dict[str, Any]) -> None: 32 | 33 | from anemoi.transform.filters import create_filter 34 | 35 | self.name = name 36 | self.transform_filter = create_filter(context, config) 37 | 38 | def execute(self, input: ekd.FieldList) -> ekd.FieldList: 39 | """Execute the transformation filter. 40 | 41 | Parameters 42 | ---------- 43 | input : ekd.FieldList 44 | The input data to be transformed. 45 | 46 | Returns 47 | ------- 48 | ekd.FieldList 49 | The transformed data. 50 | """ 51 | return self.transform_filter.forward(input) 52 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/input/empty.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | from functools import cached_property 12 | from typing import List 13 | 14 | from earthkit.data import FieldList 15 | 16 | from .misc import assert_fieldlist 17 | from .result import Result 18 | from .trace import trace_datasource 19 | 20 | LOG = logging.getLogger(__name__) 21 | 22 | 23 | class EmptyResult(Result): 24 | """Class to represent an empty result in the dataset creation process.""" 25 | 26 | empty = True 27 | 28 | def __init__(self, context: object, action_path: list, dates: object) -> None: 29 | """Initializes an EmptyResult instance. 30 | 31 | Parameters 32 | ---------- 33 | context : object 34 | The context object. 35 | action_path : list 36 | The action path. 37 | dates : object 38 | The dates object. 39 | """ 40 | super().__init__(context, action_path + ["empty"], dates) 41 | 42 | @cached_property 43 | @assert_fieldlist 44 | @trace_datasource 45 | def datasource(self) -> FieldList: 46 | """Returns an empty datasource.""" 47 | from earthkit.data import from_source 48 | 49 | return from_source("empty") 50 | 51 | @property 52 | def variables(self) -> List[str]: 53 | """Returns an empty list of variables.""" 54 | return [] 55 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/input/pipe.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import json 11 | import logging 12 | from typing import Any 13 | 14 | from .action import Action 15 | from .action import action_factory 16 | from .step import step_factory 17 | from .trace import trace_select 18 | 19 | LOG = logging.getLogger(__name__) 20 | 21 | 22 | class PipeAction(Action): 23 | """A class to represent a pipeline of actions.""" 24 | 25 | def __init__(self, context: Any, action_path: list, *configs: dict) -> None: 26 | """Initialize the PipeAction. 27 | 28 | Parameters 29 | ---------- 30 | context : Any 31 | The context for the action. 32 | action_path : list 33 | The path of the action. 34 | configs : dict 35 | The configurations for the actions. 36 | """ 37 | super().__init__(context, action_path, *configs) 38 | if len(configs) <= 1: 39 | raise ValueError( 40 | f"PipeAction requires at least two actions, got {len(configs)}\n{json.dumps(configs, indent=2)}" 41 | ) 42 | 43 | current: Any = action_factory(configs[0], context, action_path + ["0"]) 44 | for i, c in enumerate(configs[1:]): 45 | current = step_factory(c, context, action_path + [str(i + 1)], previous_step=current) 46 | self.last_step: Any = current 47 | 48 | @trace_select 49 | def select(self, group_of_dates: Any) -> Any: 50 | """Select data based on the group of dates. 51 | 52 | Parameters 53 | ---------- 54 | group_of_dates : Any 55 | The group of dates to select data for. 56 | 57 | Returns 58 | ------- 59 | Any 60 | The selected data. 61 | """ 62 | return self.last_step.select(group_of_dates) 63 | 64 | def __repr__(self) -> str: 65 | """Return a string representation of the PipeAction.""" 66 | return f"PipeAction({self.last_step})" 67 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/size.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | import logging 12 | import os 13 | from typing import Dict 14 | from typing import Optional 15 | 16 | import tqdm 17 | from anemoi.utils.humanize import bytes_to_human 18 | 19 | LOG = logging.getLogger(__name__) 20 | 21 | 22 | def compute_directory_sizes(path: str) -> Optional[Dict[str, int]]: 23 | """Computes the total size and number of files in a directory. 24 | 25 | Parameters 26 | ---------- 27 | path : str 28 | The path to the directory. 29 | 30 | Returns 31 | ------- 32 | dict of str to int or None 33 | A dictionary with the total size and number of files, or None if the path is not a directory. 34 | """ 35 | if not os.path.isdir(path): 36 | return None 37 | 38 | size, n = 0, 0 39 | bar = tqdm.tqdm(iterable=os.walk(path), desc=f"Computing size of {path}") 40 | for dirpath, _, filenames in bar: 41 | for filename in filenames: 42 | file_path = os.path.join(dirpath, filename) 43 | size += os.path.getsize(file_path) 44 | n += 1 45 | 46 | LOG.info(f"Total size: {bytes_to_human(size)}") 47 | LOG.info(f"Total number of files: {n}") 48 | 49 | return dict(total_size=size, total_number_of_files=n) 50 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/source.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2025- Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from abc import ABC 11 | from abc import abstractmethod 12 | 13 | import earthkit.data as ekd 14 | 15 | from anemoi.datasets.create.typing import DateList 16 | 17 | 18 | class Source(ABC): 19 | """Represents a data source with a given context.""" 20 | 21 | emoji = "📦" # For tracing 22 | 23 | def __init__(self, context: any, *args: tuple, **kwargs: dict): 24 | """Initialise the source. 25 | Parameters 26 | ---------- 27 | context : Any 28 | The context for the data source. 29 | *args : tuple 30 | Additional positional arguments. 31 | **kwargs : dict 32 | Additional keyword arguments. 33 | """ 34 | self.context = context 35 | 36 | @abstractmethod 37 | def execute(self, dates: DateList) -> ekd.FieldList: 38 | """Execute the filter. 39 | 40 | Parameters 41 | ---------- 42 | dates : DateList 43 | The input dates. 44 | 45 | Returns 46 | ------- 47 | ekd.FieldList 48 | The output data. 49 | """ 50 | 51 | pass 52 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/__init__.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | from typing import Any 12 | 13 | from anemoi.utils.registry import Registry 14 | 15 | LOG = logging.getLogger(__name__) 16 | 17 | 18 | source_registry = Registry(__name__) 19 | 20 | 21 | def create_source(context: Any, config: Any) -> Any: 22 | """Create a source based on the provided configuration. 23 | 24 | Parameters 25 | ---------- 26 | context : Any 27 | The context in which the source is created. 28 | config : Any 29 | The configuration for the source. 30 | 31 | Returns 32 | ------- 33 | Any 34 | The created source. 35 | """ 36 | return source_registry.from_config(config, context) 37 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/anemoi_dataset.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2025 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import numpy as np 11 | 12 | from .legacy import legacy_source 13 | 14 | 15 | @legacy_source(__file__) 16 | def execute(context, dates, params=None, **kwargs): 17 | import earthkit.data as ekd 18 | 19 | from anemoi.datasets import open_dataset 20 | 21 | ds = open_dataset(**kwargs) 22 | # dates_to_index = {date: i for i, date in enumerate(ds.dates)} 23 | 24 | indices = [] 25 | for date in dates: 26 | idx = np.where(ds.dates == date)[0] 27 | if len(idx) == 0: 28 | continue 29 | indices.append((int(idx[0]), date)) 30 | 31 | vars = ds.variables 32 | if params is None: 33 | params = vars 34 | 35 | if not isinstance(params, (list, tuple, set)): 36 | params = [params] 37 | 38 | params = set(params) 39 | results = [] 40 | 41 | ensemble = ds.shape[2] > 1 42 | latitudes = ds.latitudes 43 | longitudes = ds.longitudes 44 | 45 | for idx, date in indices: 46 | 47 | metadata = dict(valid_datetime=date, latitudes=latitudes, longitudes=longitudes) 48 | 49 | for j, y in enumerate(ds[idx]): 50 | 51 | param = vars[j] 52 | if param not in params: 53 | continue 54 | 55 | # metadata['name'] = param 56 | # metadata['param_level'] = param 57 | metadata["param"] = param 58 | 59 | for k, e in enumerate(y): 60 | if ensemble: 61 | metadata["number"] = k + 1 62 | 63 | metadata["values"] = e 64 | 65 | results.append(metadata.copy()) 66 | 67 | print(results[0].keys()) 68 | 69 | # "list-of-dicts" does support resolution 70 | results = ekd.from_source("list-of-dicts", results) 71 | 72 | # return new_fieldlist_from_list([new_field_from_latitudes_longitudes(x, latitudes, longitudes) for x in results]) 73 | return results 74 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/constants.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from typing import Any 11 | from typing import Dict 12 | from typing import List 13 | 14 | from earthkit.data import from_source 15 | 16 | from .legacy import legacy_source 17 | 18 | 19 | @legacy_source(__file__) 20 | def constants(context: Any, dates: List[str], template: Dict[str, Any], param: str) -> Any: 21 | """Deprecated function to retrieve constants data. 22 | 23 | Parameters 24 | ---------- 25 | context : Any 26 | The context object for tracing. 27 | dates : list of str 28 | List of dates for which data is required. 29 | template : dict of str to Any 30 | Template dictionary for the data source. 31 | param : str 32 | Parameter to retrieve. 33 | 34 | Returns 35 | ------- 36 | Any 37 | Data retrieved from the source. 38 | """ 39 | from warnings import warn 40 | 41 | warn( 42 | "The source `constants` is deprecated, use `forcings` instead.", 43 | DeprecationWarning, 44 | stacklevel=2, 45 | ) 46 | context.trace("✅", f"from_source(constants, {template}, {param}") 47 | if len(template) == 0: 48 | raise ValueError("Forcings template is empty.") 49 | 50 | return from_source("forcings", source_or_dataset=template, date=dates, param=param) 51 | 52 | 53 | execute: Any = constants 54 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/eccc_fstd.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2025 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | from . import source_registry 12 | from .xarray import XarraySourceBase 13 | 14 | 15 | @source_registry.register("eccc_fstd") 16 | class XarrayECCCSource(XarraySourceBase): 17 | """An Xarray data source that uses the `fstd` engine.""" 18 | 19 | emoji = "🍁" 20 | options = {"engine": "fstd"} 21 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/empty.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | from typing import Any 12 | from typing import List 13 | 14 | import earthkit.data as ekd 15 | 16 | from .legacy import legacy_source 17 | 18 | 19 | @legacy_source(__file__) 20 | def execute(context: Any, dates: List[str], **kwargs: Any) -> ekd.FieldList: 21 | """Executes the loading of an empty data source. 22 | 23 | Parameters 24 | ---------- 25 | context : object 26 | The context in which the function is executed. 27 | dates : list 28 | List of dates for which data is to be loaded. 29 | **kwargs : dict 30 | Additional keyword arguments. 31 | 32 | Returns 33 | ------- 34 | ekd.FieldList 35 | Loaded empty data source. 36 | """ 37 | return ekd.from_source("empty") 38 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/forcings.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from typing import Any 11 | from typing import List 12 | 13 | from earthkit.data import from_source 14 | 15 | from .legacy import legacy_source 16 | 17 | 18 | @legacy_source(__file__) 19 | def forcings(context: Any, dates: List[str], template: str, param: str) -> Any: 20 | """Loads forcing data from a specified source. 21 | 22 | Parameters 23 | ---------- 24 | context : object 25 | The context in which the function is executed. 26 | dates : list 27 | List of dates for which data is to be loaded. 28 | template : FieldList 29 | Template for the data source. 30 | param : str 31 | Parameter for the data source. 32 | 33 | Returns 34 | ------- 35 | object 36 | Loaded forcing data. 37 | """ 38 | context.trace("✅", f"from_source(forcings, {template}, {param}") 39 | return from_source("forcings", source_or_dataset=template, date=dates, param=param) 40 | 41 | 42 | execute = forcings 43 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/netcdf.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | from typing import Any 12 | from typing import List 13 | 14 | import earthkit.data as ekd 15 | 16 | from .legacy import legacy_source 17 | from .xarray import load_many 18 | 19 | 20 | @legacy_source(__file__) 21 | def execute(context: Any, dates: List[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList: 22 | """Execute the loading of multiple NetCDF files. 23 | 24 | Parameters 25 | ---------- 26 | context : object 27 | The context in which the function is executed. 28 | dates : list 29 | List of dates for which data is to be loaded. 30 | path : str 31 | Path to the directory containing the NetCDF files. 32 | *args : tuple 33 | Additional positional arguments. 34 | **kwargs : dict 35 | Additional keyword arguments. 36 | 37 | Returns 38 | ------- 39 | object 40 | The loaded data. 41 | """ 42 | return load_many("📁", context, dates, path, *args, **kwargs) 43 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/opendap.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | from typing import Any 12 | from typing import Dict 13 | from typing import List 14 | 15 | import earthkit.data as ekd 16 | 17 | from .legacy import legacy_source 18 | from .xarray import load_many 19 | 20 | 21 | @legacy_source(__file__) 22 | def execute(context: Dict[str, Any], dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList: 23 | """Execute the data loading process from an OpenDAP source. 24 | 25 | Parameters 26 | ---------- 27 | context : dict 28 | The context in which the function is executed. 29 | dates : list 30 | List of dates for which data is to be loaded. 31 | url : str 32 | The URL of the OpenDAP source. 33 | *args : tuple 34 | Additional positional arguments. 35 | **kwargs : dict 36 | Additional keyword arguments. 37 | 38 | Returns 39 | ------- 40 | xarray.Dataset 41 | The loaded dataset. 42 | """ 43 | return load_many("🌐", context, dates, url, *args, **kwargs) 44 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/patterns.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import datetime 11 | import glob 12 | from typing import Any 13 | from typing import Generator 14 | from typing import List 15 | from typing import Tuple 16 | 17 | from earthkit.data.utils.patterns import Pattern 18 | 19 | 20 | def _expand(paths: List[str]) -> Generator[str, None, None]: 21 | """Expand the given paths to include all matching file paths. 22 | 23 | Parameters 24 | ---------- 25 | paths : List[str] 26 | List of paths to expand. 27 | 28 | Returns 29 | ------- 30 | Generator[str] 31 | Expanded file paths. 32 | """ 33 | if not isinstance(paths, list): 34 | paths = [paths] 35 | 36 | for path in paths: 37 | if path.startswith("file://"): 38 | path = path[7:] 39 | 40 | if path.startswith("http://"): 41 | yield path 42 | continue 43 | 44 | if path.startswith("https://"): 45 | yield path 46 | continue 47 | 48 | cnt = 0 49 | for p in glob.glob(path): 50 | yield p 51 | cnt += 1 52 | if cnt == 0: 53 | yield path 54 | 55 | 56 | def iterate_patterns( 57 | path: str, dates: List[datetime.datetime], **kwargs: Any 58 | ) -> Generator[Tuple[str, List[str]], None, None]: 59 | """Iterate over patterns and expand them with given dates and additional keyword arguments. 60 | 61 | Parameters 62 | ---------- 63 | path : str 64 | The pattern path to iterate over. 65 | dates : List[datetime.datetime] 66 | List of datetime objects to substitute in the pattern. 67 | **kwargs : Any 68 | Additional keyword arguments to substitute in the pattern. 69 | 70 | Returns 71 | ------- 72 | Generator[Tuple[str, List[str]]] 73 | The expanded path and list of ISO formatted dates. 74 | """ 75 | given_paths = path if isinstance(path, list) else [path] 76 | 77 | dates = [d.isoformat() for d in dates] 78 | if len(dates) > 0: 79 | kwargs["date"] = dates 80 | 81 | for path in given_paths: 82 | paths = Pattern(path).substitute(allow_extra=True, **kwargs) 83 | for path in _expand(paths): 84 | yield path, dates 85 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/source.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from datetime import datetime 11 | from typing import Any 12 | from typing import Dict 13 | from typing import List 14 | from typing import Optional 15 | 16 | from earthkit.data import from_source 17 | 18 | from anemoi.datasets.create.utils import to_datetime_list 19 | 20 | from .legacy import legacy_source 21 | 22 | 23 | @legacy_source(__file__) 24 | def source(context: Optional[Any], dates: List[datetime], **kwargs: Any) -> Any: 25 | """Generates a source based on the provided context, dates, and additional keyword arguments. 26 | 27 | Parameters 28 | ---------- 29 | context : Optional[Any] 30 | The context in which the source is generated. 31 | dates : List[datetime] 32 | A list of datetime objects representing the dates. 33 | **kwargs : Any 34 | Additional keyword arguments for the source generation. 35 | 36 | Returns 37 | ------- 38 | Any 39 | The generated source. 40 | """ 41 | name = kwargs.pop("name") 42 | context.trace("✅", f"from_source({name}, {dates}, {kwargs}") 43 | if kwargs["date"] == "$from_dates": 44 | kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates}) 45 | if kwargs["time"] == "$from_dates": 46 | kwargs["time"] = list({d.strftime("%H%M") for d in dates}) 47 | return from_source(name, **kwargs) 48 | 49 | 50 | execute = source 51 | 52 | if __name__ == "__main__": 53 | import yaml 54 | 55 | config: Dict[str, Any] = yaml.safe_load( 56 | """ 57 | name: mars 58 | class: ea 59 | expver: '0001' 60 | grid: 20.0/20.0 61 | levtype: sfc 62 | param: [2t] 63 | number: [0, 1] 64 | date: $from_dates 65 | time: $from_dates 66 | """ 67 | ) 68 | dates: List[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]") 69 | dates = to_datetime_list(dates) 70 | 71 | for f in source(None, dates, **config): 72 | print(f, f.to_numpy().mean()) 73 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/xarray_kerchunk.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | from . import source_registry 12 | from .xarray import XarraySourceBase 13 | 14 | 15 | @source_registry.register("xarray_kerchunk") 16 | class XarrayKerchunkSource(XarraySourceBase): 17 | """An Xarray data source that uses the `kerchunk` engine.""" 18 | 19 | emoji = "🧱" 20 | 21 | def __init__(self, context, json, *args, **kwargs: dict): 22 | super().__init__(context, *args, **kwargs) 23 | 24 | self.path_or_url = "reference://" 25 | 26 | self.options = { 27 | "engine": "zarr", 28 | "backend_kwargs": { 29 | "consolidated": False, 30 | "storage_options": { 31 | "fo": json, 32 | "remote_protocol": "s3", 33 | "remote_options": {"anon": True}, 34 | }, 35 | }, 36 | } 37 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/xarray_support/README.md: -------------------------------------------------------------------------------- 1 | The code under this directory will be migrated to earthkit-data in the future 2 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/xarray_support/patch.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | import logging 12 | from typing import Any 13 | from typing import Dict 14 | from typing import List 15 | 16 | import xarray as xr 17 | 18 | LOG = logging.getLogger(__name__) 19 | 20 | 21 | def patch_attributes(ds: xr.Dataset, attributes: Dict[str, Dict[str, Any]]) -> Any: 22 | """Patch the attributes of the dataset. 23 | 24 | Parameters 25 | ---------- 26 | ds : xr.Dataset 27 | The dataset to patch. 28 | attributes : Dict[str, Dict[str, Any]] 29 | The attributes to patch. 30 | 31 | Returns 32 | ------- 33 | Any 34 | The patched dataset. 35 | """ 36 | for name, value in attributes.items(): 37 | variable = ds[name] 38 | variable.attrs.update(value) 39 | 40 | return ds 41 | 42 | 43 | def patch_coordinates(ds: xr.Dataset, coordinates: List[str]) -> Any: 44 | """Patch the coordinates of the dataset. 45 | 46 | Parameters 47 | ---------- 48 | ds : xr.Dataset 49 | The dataset to patch. 50 | coordinates : List[str] 51 | The coordinates to patch. 52 | 53 | Returns 54 | ------- 55 | Any 56 | The patched dataset. 57 | """ 58 | for name in coordinates: 59 | ds = ds.assign_coords({name: ds[name]}) 60 | 61 | return ds 62 | 63 | 64 | PATCHES = { 65 | "attributes": patch_attributes, 66 | "coordinates": patch_coordinates, 67 | } 68 | 69 | 70 | def patch_dataset(ds: xr.Dataset, patch: Dict[str, Dict[str, Any]]) -> Any: 71 | """Patch the dataset. 72 | 73 | Parameters 74 | ---------- 75 | ds : xr.Dataset 76 | The dataset to patch. 77 | patch : Dict[str, Dict[str, Any]] 78 | The patch to apply. 79 | 80 | Returns 81 | ------- 82 | Any 83 | The patched dataset. 84 | """ 85 | for what, values in patch.items(): 86 | if what not in PATCHES: 87 | raise ValueError(f"Unknown patch type {what!r}") 88 | 89 | ds = PATCHES[what](ds, values) 90 | 91 | return ds 92 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/xarray_zarr.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | from typing import Any 11 | from typing import List 12 | 13 | import earthkit.data as ekd 14 | 15 | from .legacy import legacy_source 16 | from .xarray import load_many 17 | 18 | 19 | @legacy_source(__file__) 20 | def execute(context: Any, dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList: 21 | """Execute the data loading process. 22 | 23 | Parameters 24 | ---------- 25 | context : Any 26 | The context in which the execution occurs. 27 | dates : List[str] 28 | List of dates for which data is to be loaded. 29 | url : str 30 | The URL from which data is to be loaded. 31 | *args : tuple 32 | Additional positional arguments. 33 | **kwargs : dict 34 | Additional keyword arguments. 35 | 36 | Returns 37 | ------- 38 | ekd.FieldList 39 | The loaded data. 40 | """ 41 | return load_many("🇿", context, dates, url, *args, **kwargs) 42 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/sources/zenodo.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | from typing import Any 12 | from typing import Dict 13 | from typing import List 14 | 15 | import earthkit.data as ekd 16 | from earthkit.data.core.fieldlist import MultiFieldList 17 | from earthkit.data.sources.url import download_and_cache 18 | 19 | from .legacy import legacy_source 20 | from .patterns import iterate_patterns 21 | from .xarray import load_one 22 | 23 | 24 | @legacy_source(__file__) 25 | def execute(context: Any, dates: Any, record_id: str, file_key: str, *args: Any, **kwargs: Any) -> ekd.FieldList: 26 | """Executes the download and processing of files from Zenodo. 27 | 28 | Parameters 29 | ---------- 30 | context : Any 31 | The context in which the function is executed. 32 | dates : Any 33 | The dates for which the data is required. 34 | record_id : str 35 | The Zenodo record ID. 36 | file_key : str 37 | The key to identify the file. 38 | *args : Any 39 | Additional arguments. 40 | **kwargs : Any 41 | Additional keyword arguments. 42 | 43 | Returns 44 | ------- 45 | MultiFieldList 46 | A list of fields loaded from the downloaded files. 47 | """ 48 | import requests 49 | 50 | result: List[Any] = [] 51 | 52 | URLPATTERN = "https://zenodo.org/api/records/{record_id}" 53 | url = URLPATTERN.format(record_id=record_id) 54 | r = requests.get(url) 55 | r.raise_for_status() 56 | record: Dict[str, Any] = r.json() 57 | 58 | urls: Dict[str, str] = {} 59 | for file in record["files"]: 60 | urls[file["key"]] = file["links"]["self"] 61 | 62 | for url, dates in iterate_patterns(file_key, dates, **kwargs): 63 | if url not in urls: 64 | continue 65 | 66 | path = download_and_cache(urls[url]) 67 | result.append(load_one("?", context, dates, path, options={}, flavour=None, **kwargs)) 68 | 69 | return MultiFieldList(result) 70 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/typing.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2025- Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import datetime 11 | from typing import List 12 | 13 | Date = datetime.datetime 14 | 15 | DateList = List[Date] 16 | -------------------------------------------------------------------------------- /src/anemoi/datasets/create/writer.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | import logging 12 | from typing import Any 13 | 14 | import numpy as np 15 | from numpy.typing import NDArray 16 | 17 | LOG = logging.getLogger(__name__) 18 | 19 | 20 | class ViewCacheArray: 21 | """A class that provides a caching mechanism for writing to a NumPy-like array. 22 | 23 | The is initialised with a NumPy-like array, a shape and a list to reindex the first 24 | dimension. The array is used to store the final data, while the cache is used to 25 | temporarily store the data before flushing it to the array. 26 | 27 | The `flush` method copies the contents of the cache to the final array. 28 | """ 29 | 30 | def __init__(self, array: NDArray[Any], *, shape: tuple[int, ...], indexes: list[int]): 31 | """Initialize the ViewCacheArray. 32 | 33 | Parameters 34 | ---------- 35 | array : NDArray[Any] 36 | The NumPy-like array to store the final data. 37 | shape : tuple[int, ...] 38 | The shape of the cache array. 39 | indexes : list[int] 40 | List to reindex the first dimension. 41 | """ 42 | assert len(indexes) == shape[0], (len(indexes), shape[0]) 43 | self.array = array 44 | self.dtype = array.dtype 45 | self.cache = np.full(shape, np.nan, dtype=self.dtype) 46 | self.indexes = indexes 47 | 48 | def __setitem__(self, key: tuple[int, ...], value: NDArray[Any]) -> None: 49 | """Set the value in the cache array at the specified key. 50 | 51 | Parameters 52 | ---------- 53 | key : tuple[int, ...] 54 | The index key to set the value. 55 | value : NDArray[Any] 56 | The value to set in the cache array. 57 | """ 58 | self.cache[key] = value 59 | 60 | def flush(self) -> None: 61 | """Copy the contents of the cache to the final array.""" 62 | for i in range(self.cache.shape[0]): 63 | global_i = self.indexes[i] 64 | self.array[global_i] = self.cache[i] 65 | -------------------------------------------------------------------------------- /src/anemoi/datasets/data/debug.css: -------------------------------------------------------------------------------- 1 | table.dataset td { 2 | vertical-align: top; 3 | text-align: left !important; 4 | } 5 | 6 | table.dataset span.dataset { 7 | font-weight: bold !important; 8 | } 9 | 10 | table.dataset span.values { 11 | font-style: italic !important; 12 | } 13 | -------------------------------------------------------------------------------- /src/anemoi/datasets/data/observations/multi.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | import logging 11 | import os 12 | 13 | from anemoi.datasets.data import open_dataset 14 | 15 | LOG = logging.getLogger(__name__) 16 | 17 | 18 | class LegacyDatasets: 19 | def __init__(self, paths, start=None, end=None, **kwargs): 20 | self.paths = paths 21 | 22 | if not start or not end: 23 | print( 24 | "❌❌ Warning: start and end not provided, using the minima first and maximal last dates of the datasets" 25 | ) 26 | lst = [self._open_dataset(p, **kwargs) for p in paths] 27 | start = min([d.dates[0] for d in lst]) 28 | end = max([d.dates[-1] for d in lst]) 29 | 30 | self._datasets = { 31 | os.path.basename(p).split(".")[0]: self._open_dataset(p, start=start, end=end, padding="empty") 32 | for p in paths 33 | } 34 | 35 | first = list(self._datasets.values())[0] 36 | for name, dataset in self._datasets.items(): 37 | if dataset.dates[0] != first.dates[0] or dataset.dates[-1] != first.dates[-1]: 38 | raise ValueError("Datasets have different start and end times") 39 | if dataset.frequency != first.frequency: 40 | raise ValueError("Datasets have different frequencies") 41 | 42 | self._keys = self._datasets.keys 43 | 44 | self._first = list(self._datasets.values())[0] 45 | 46 | def _open_dataset(self, p, **kwargs): 47 | if p.startswith("observations-"): 48 | return open_dataset(observations=p, **kwargs) 49 | else: 50 | print("❗ Opening non-observations dataset:", p) 51 | return open_dataset(p, **kwargs) 52 | 53 | def items(self): 54 | return self._datasets.items() 55 | 56 | @property 57 | def dates(self): 58 | return self._first.dates 59 | 60 | def __len__(self): 61 | return len(self._first) 62 | 63 | def __getitem__(self, i): 64 | return {k: d[i] for k, d in self._datasets.items()} 65 | -------------------------------------------------------------------------------- /src/anemoi/datasets/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | -------------------------------------------------------------------------------- /tests/create-perturbations-full.yaml: -------------------------------------------------------------------------------- 1 | description: "develop version of the dataset for a few days and a few variables, once data on mars is cached it should take a few seconds to generate the dataset" 2 | dataset_status: testing 3 | purpose: aifs 4 | name: create-pertubations 5 | config_format_version: 2 6 | 7 | common: 8 | mars_request_sfc: &common_sfc 9 | name: mars 10 | class: ea 11 | date: $datetime_format($dates,%Y%m%d) 12 | time: $datetime_format($dates,%H%M) 13 | expver: '0001' 14 | grid: 20.0/20.0 15 | levtype: sfc 16 | #param: [2t] 17 | param: [10u, 10v, 2d, 2t, lsm, msl, sdor, skt, slor, sp, tcw, z] 18 | mars_request_pl: &common_pl 19 | name: mars 20 | class: ea 21 | date: $datetime_format($dates,%Y%m%d) 22 | time: $datetime_format($dates,%H%M) 23 | expver: '0001' 24 | grid: 20.0/20.0 25 | levtype: pl 26 | #param: [q] 27 | # level: [50, 100] 28 | param: [q, t, u, v, w, z] 29 | level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000] 30 | dates: &dates_anchor 31 | start: 2020-12-30 00:00:00 32 | end: 2021-01-03 12:00:00 33 | frequency: 12h 34 | 35 | dates: 36 | <<: *dates_anchor 37 | 38 | input: 39 | dates: 40 | <<: *dates_anchor 41 | join: 42 | - function: 43 | name: perturbations 44 | ensembles: 45 | <<: *common_sfc 46 | stream: enda 47 | type: an 48 | number: 0/to/9 49 | 50 | center: 51 | <<: *common_sfc 52 | stream: oper 53 | type: an 54 | 55 | mean: 56 | <<: *common_sfc 57 | stream: enda 58 | type: em 59 | 60 | - function: 61 | name: perturbations 62 | ensembles: 63 | <<: *common_pl 64 | stream: enda 65 | type: an 66 | number: 0/to/9 67 | 68 | center: 69 | <<: *common_pl 70 | stream: oper 71 | type: an 72 | 73 | mean: 74 | <<: *common_pl 75 | stream: enda 76 | type: em 77 | 78 | statistics: 79 | end: 2021 80 | -------------------------------------------------------------------------------- /tests/create-shift.yaml: -------------------------------------------------------------------------------- 1 | description: "develop version of the dataset for a few days and a few variables, once data on mars is cached it should take a few seconds to generate the dataset" 2 | dataset_status: testing 3 | purpose: aifs 4 | name: test-small 5 | config_format_version: 2 6 | 7 | common: 8 | mars_request: &mars_request 9 | expver: "0001" 10 | class: ea 11 | grid: 20./20. 12 | 13 | dates: 14 | start: 2020-12-30 00:00:00 15 | end: 2021-01-03 12:00:00 16 | frequency: 12h 17 | 18 | input: 19 | join: 20 | - mars: 21 | <<: *mars_request 22 | param: [2t] 23 | levtype: sfc 24 | stream: oper 25 | type: an 26 | 27 | - forcings: 28 | template: ${input.join.0.mars} 29 | param: 30 | - insolation 31 | 32 | - date_shift: 33 | delta: -25 34 | forcings: 35 | template: ${input.join.0.mars} 36 | param: 37 | - insolation 38 | 39 | statistics: 40 | end: 2021 41 | -------------------------------------------------------------------------------- /tests/create/accumulation.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2021-01-10 18:00:00 3 | #start: 2021-01-10 19:00:00 4 | end: 2021-01-12 12:00:00 5 | frequency: 6h 6 | 7 | input: 8 | accumulations: 9 | expver: "0001" 10 | class: ea 11 | 12 | stream: oper 13 | #stream: enda 14 | 15 | grid: 20./20. 16 | #grid: o96 17 | levtype: sfc 18 | param: [ tp , cp] 19 | # accumulation_period: [0, 6] 20 | accumulation_period: 24 21 | -------------------------------------------------------------------------------- /tests/create/concat.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2020-12-30 00:00:00 3 | end: 2021-01-03 12:00:00 4 | frequency: 12h 5 | 6 | common: 7 | mars_request: &mars_request 8 | expver: "0001" 9 | class: ea 10 | grid: 20./20. 11 | levtype: sfc 12 | stream: oper 13 | type: an 14 | param: [2t] 15 | 16 | input: 17 | concat: 18 | - dates: 19 | start: 2020-12-30 00:00:00 20 | end: 2021-01-01 12:00:00 21 | frequency: 12h 22 | mars: 23 | <<: *mars_request 24 | - dates: 25 | start: 2021-01-02 00:00:00 26 | end: 2021-01-03 12:00:00 27 | frequency: 12h 28 | mars: 29 | <<: *mars_request 30 | 31 | statistics: 32 | end: 2021 33 | -------------------------------------------------------------------------------- /tests/create/join.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | class: ea 5 | grid: 20./20. 6 | 7 | dates: 8 | start: 2020-12-30 00:00:00 9 | end: 2021-01-03 12:00:00 10 | frequency: 12h 11 | 12 | input: 13 | join: 14 | - mars: 15 | <<: *mars_request 16 | param: [2t] 17 | levtype: sfc 18 | stream: oper 19 | type: an 20 | 21 | - mars: 22 | <<: *mars_request 23 | param: [q, t] 24 | levtype: pl 25 | level: [50, 100] 26 | stream: oper 27 | type: an 28 | 29 | - accumulations: 30 | <<: *mars_request 31 | levtype: sfc 32 | param: [cp, tp] 33 | # accumulation_period: 6h 34 | 35 | - forcings: 36 | template: ${input.join.0.mars} 37 | param: 38 | - cos_latitude 39 | 40 | naming_scheme: "{param}_{levelist}{level_units}_{accumultion_period}" 41 | 42 | statistics: 43 | end: 2021 44 | -------------------------------------------------------------------------------- /tests/create/missing.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | class: ea 5 | grid: 20./20. 6 | 7 | dates: 8 | start: 2020-12-30 00:00:00 9 | end: 2021-01-03 12:00:00 10 | frequency: 12h 11 | missing: ['2020-12-30 12:00:00', '2021-01-03 00:00:00'] 12 | 13 | data_sources: 14 | - mars: 15 | <<: *mars_request 16 | param: [2t] 17 | levtype: sfc 18 | stream: oper 19 | type: an 20 | 21 | input: 22 | forcings: 23 | template: ${data_sources.0.mars} 24 | param: 25 | - cos_latitude 26 | #- sin_latitude 27 | 28 | statistics: 29 | end: 2021-01-02 30 | -------------------------------------------------------------------------------- /tests/create/nan.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2020-12-30 00:00:00 3 | end: 2021-01-03 12:00:00 4 | frequency: 12h 5 | 6 | input: 7 | mars: 8 | expver: "0001" 9 | class: ea 10 | grid: 20./20. 11 | param: [2t, sst] 12 | levtype: sfc 13 | stream: oper 14 | type: an 15 | 16 | statistics: 17 | end: 2020 18 | allow_nans: [sst] 19 | -------------------------------------------------------------------------------- /tests/create/pipe.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | class: ea 5 | grid: 20./20. 6 | 7 | dates: &dates_anchor 8 | start: 2020-12-30 00:00:00 9 | end: 2021-01-03 12:00:00 10 | frequency: 12h 11 | 12 | input: 13 | join: 14 | - mars: 15 | <<: *mars_request 16 | param: [2t] 17 | levtype: sfc 18 | 19 | - pipe: 20 | - mars: 21 | <<: *mars_request 22 | param: [q, t] 23 | levtype: pl 24 | level: [50, 100] 25 | stream: oper 26 | type: an 27 | - filter: 28 | param: [q] 29 | - filter: 30 | level: [50] 31 | 32 | - accumulations: 33 | <<: *mars_request 34 | param: [cp, tp] 35 | 36 | - forcings: 37 | template: ${input.join.0.mars} 38 | param: 39 | - cos_latitude 40 | 41 | statistics: 42 | end: 2021 43 | -------------------------------------------------------------------------------- /tests/create/recentre.yaml: -------------------------------------------------------------------------------- 1 | 2 | dates: 3 | start: 2021-01-01 00:00:00 4 | #start: 2020-12-30 00:00:00 5 | end: 2021-01-03 12:00:00 6 | frequency: 12h 7 | 8 | build: 9 | group_by: monthly 10 | 11 | common: 12 | global: &global 13 | class: ea 14 | expver: "0001" 15 | grid: 20.0/20.0 16 | sfc: &sfc 17 | <<: *global 18 | levtype: sfc 19 | # param: [2t] 20 | param: [10u, 10v, 2d, 2t, lsm, msl, sdor, skt, slor, sp, tcw, z] 21 | acc: &acc 22 | <<: *global 23 | levtype: sfc 24 | #param: [tp] 25 | param: [cp, tp] 26 | pl: &pl 27 | <<: *global 28 | levtype: pl 29 | #param: [q] 30 | #level: [50] 31 | param: [q, t, u, v, w, z] 32 | level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000] 33 | 34 | ensembles: &ensembles 35 | stream: enda 36 | type: an 37 | number: [1, 2, 4] 38 | # number: [1, 2, 3, 4, 5, 6, 7, 8, 9] 39 | centre: ¢re 40 | stream: oper 41 | type: an 42 | 43 | 44 | data_sources: 45 | ensembles: 46 | join: 47 | - mars: 48 | <<: *ensembles 49 | <<: *sfc 50 | - mars: 51 | <<: *ensembles 52 | <<: *pl 53 | - accumulations: 54 | <<: *ensembles 55 | <<: *acc 56 | centre: 57 | join: 58 | - mars: 59 | <<: *centre 60 | <<: *sfc 61 | - mars: 62 | <<: *centre 63 | <<: *pl 64 | - accumulations: 65 | <<: *centre 66 | <<: *acc 67 | 68 | input: 69 | join: 70 | - recentre: 71 | # the ensemble data which has one additional dimension 72 | members: ${data_sources.ensembles} 73 | # the new centre of the data 74 | centre: ${data_sources.centre} 75 | - forcings: 76 | template: ${input.join.0.recentre} 77 | param: 78 | - cos_latitude 79 | - cos_longitude 80 | - sin_latitude 81 | - sin_longitude 82 | - cos_julian_day 83 | - cos_local_time 84 | - sin_julian_day 85 | - sin_local_time 86 | - insolation 87 | -------------------------------------------------------------------------------- /tests/create/regrid.yaml: -------------------------------------------------------------------------------- 1 | dates: 2 | start: 2020-12-30 00:00:00 3 | end: 2021-01-03 12:00:00 4 | frequency: 12h 5 | 6 | input: 7 | join: 8 | - mars: 9 | expver: "0001" 10 | class: ea 11 | grid: o48 12 | param: [ 2t ] 13 | levtype: sfc 14 | stream: oper 15 | type: an 16 | - pipe: 17 | - mars: 18 | expver: "0001" 19 | class: ea 20 | grid: o32 21 | param: [ z ] 22 | levtype: pl 23 | level: [ 500 ] 24 | stream: oper 25 | type: an 26 | - regrid: 27 | # method: linear 28 | method: nearest 29 | in_grid: o32 30 | out_grid: o48 31 | -------------------------------------------------------------------------------- /tests/create/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eux 3 | NAME=${1:-join} 4 | 5 | anemoi-datasets create-step init $NAME.yaml $NAME.zarr --overwrite 6 | anemoi-datasets create-step load $NAME.zarr --part 1/2 7 | anemoi-datasets create-step load $NAME.zarr --part 2/2 8 | 9 | anemoi-datasets create-step statistics $NAME.zarr 10 | anemoi-datasets create-step size $NAME.zarr 11 | # anemoi-datasets create-step finalise $NAME.zarr 12 | 13 | anemoi-datasets create-step patch $NAME.zarr 14 | 15 | anemoi-datasets create-step init-additions $NAME.zarr --delta 12h 16 | anemoi-datasets create-step run-additions $NAME.zarr --part 1/2 --delta 12h 17 | anemoi-datasets create-step run-additions $NAME.zarr --part 2/2 --delta 12h 18 | anemoi-datasets create-step finalise-additions $NAME.zarr --delta 12h 19 | 20 | anemoi-datasets create-step cleanup $NAME.zarr 21 | -------------------------------------------------------------------------------- /tests/test_indexing.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | import numpy as np 12 | 13 | from anemoi.datasets.data.indexing import length_to_slices 14 | 15 | 16 | def test_length_to_slices() -> None: 17 | """Test the length_to_slices function with various inputs.""" 18 | lengths = [5, 7, 11, 13] 19 | datasets = [np.random.rand(n) for n in lengths] 20 | total = sum(lengths) 21 | 22 | combined = np.concatenate(datasets) 23 | 24 | for start in range(total): 25 | for stop in range(start, total): 26 | for step in range(1, stop - start + 1): 27 | index = slice(start, stop, step) 28 | print(index) 29 | slices = length_to_slices(index, lengths) 30 | result = [d[i] for (d, i) in zip(datasets, slices) if i is not None] 31 | result = np.concatenate(result) 32 | 33 | if (combined[index].shape != result.shape) or not (combined[index] == result).all(): 34 | print(index) 35 | print(combined[index]) 36 | print(result) 37 | print(slices) 38 | assert (combined[index] == result).all(), index 39 | 40 | 41 | if __name__ == "__main__": 42 | test_length_to_slices() 43 | -------------------------------------------------------------------------------- /tests/xarray/test_opendap.py: -------------------------------------------------------------------------------- 1 | # (C) Copyright 2024 Anemoi contributors. 2 | # 3 | # This software is licensed under the terms of the Apache Licence Version 2.0 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 5 | # 6 | # In applying this licence, ECMWF does not waive the privileges and immunities 7 | # granted to it by virtue of its status as an intergovernmental organisation 8 | # nor does it submit to any jurisdiction. 9 | 10 | 11 | import xarray as xr 12 | from anemoi.utils.testing import skip_if_offline 13 | from anemoi.utils.testing import skip_slow_tests 14 | 15 | from anemoi.datasets.create.sources.xarray import XarrayFieldList 16 | from anemoi.datasets.testing import assert_field_list 17 | 18 | 19 | @skip_if_offline 20 | @skip_slow_tests 21 | def test_opendap() -> None: 22 | """Test loading and validating the opendap dataset.""" 23 | ds = xr.open_dataset( 24 | "https://thredds.met.no/thredds/dodsC/meps25epsarchive/2023/01/01/meps_det_2_5km_20230101T00Z.nc", 25 | ) 26 | 27 | fs = XarrayFieldList.from_xarray(ds) 28 | assert_field_list(fs, 79529, "2023-01-01T00:00:00", "2023-01-03T18:00:00") 29 | 30 | 31 | if __name__ == "__main__": 32 | for name, obj in list(globals().items()): 33 | if name.startswith("test_") and callable(obj): 34 | print(f"Running {name}...") 35 | obj() 36 | -------------------------------------------------------------------------------- /tools/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | -------------------------------------------------------------------------------- /tools/build-obs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import os 5 | import shutil 6 | 7 | import tqdm 8 | 9 | from anemoi.datasets import open_dataset 10 | 11 | LOG = logging.getLogger(__name__) 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser(description="open a dataset and build a new one") 16 | parser.add_argument("input", help="input dataset") 17 | parser.add_argument("output", help="output dataset") 18 | parser.add_argument("--backend", help="backend to use", type=str, default="npz1") 19 | parser.add_argument("--overwrite", help="overwrite output directory if it exists", action="store_true") 20 | args = parser.parse_args() 21 | build(**vars(args)) 22 | 23 | 24 | def build(input, output, backend, overwrite=False): 25 | ds = open_dataset(input, backend=backend) 26 | print(f"Using dataset {ds} as input") 27 | print(f"{input} backend is '{ds.metadata['backend']}'") 28 | print(f"Dataset has {len(ds)} records, from {ds.start_date} to {ds.end_date}") 29 | print(f"Converting dataset to {output} using new backend '{backend}'") 30 | 31 | from anemoi.datasets.data.records.backends import writer_backend_factory 32 | 33 | if os.path.exists(output): 34 | if overwrite: 35 | LOG.warning(f"Output directory {output} already exists, removing it") 36 | shutil.rmtree(output) 37 | else: 38 | raise FileExistsError(f"Output directory {output} already exists, use --overwrite to remove it") 39 | writer = writer_backend_factory(backend, output) 40 | 41 | for i in tqdm.tqdm(range(len(ds))): 42 | writer.write(i, ds[i]) 43 | 44 | writer.write_statistics(ds.statistics) 45 | 46 | metadata = ds.metadata.copy() 47 | metadata["backend"] = backend 48 | writer.write_metadata(metadata) 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /tools/check-obs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | 5 | import numpy as np 6 | 7 | from anemoi.datasets import open_dataset 8 | 9 | LOG = logging.getLogger(__name__) 10 | 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser(description="open two datasets and compare them") 14 | parser.add_argument("dataset", help="dataset to check") 15 | parser.add_argument("reference", help="reference dataset") 16 | args = parser.parse_args() 17 | compare(args.dataset, args.reference) 18 | 19 | 20 | def _compare_nested_dicts(a, b): 21 | if isinstance(a, dict) and isinstance(b, dict): 22 | if a.keys() != b.keys(): 23 | return False 24 | return all(_compare_nested_dicts(a[k], b[k]) for k in a) 25 | elif isinstance(a, np.ndarray) and isinstance(b, np.ndarray): 26 | if a.shape != b.shape: 27 | return False 28 | return np.array_equal(a, b) 29 | assert False, f"Unsupported types for comparison: {type(a)} and {type(b)}" 30 | 31 | 32 | def compare(input, reference): 33 | ds = open_dataset(input) 34 | ref = open_dataset(reference) 35 | 36 | if len(ds) != len(ref): 37 | raise ValueError(f"Datasets have different lengths: {len(ds)} != {len(ref)}") 38 | 39 | for i in range(len(ds)): 40 | if ds[i] != ref[i]: 41 | raise ValueError(f"Datasets differ at index {i}: {ds[i]} != {ref[i]}") 42 | if ds.dates[i] != ref.dates[i]: 43 | raise ValueError(f"Dates differ at index {i}: {ds.dates[i]} != {ref.dates[i]}") 44 | print("✅ Data and dates are identical") 45 | 46 | ds_metadata = ds.metadata.copy() 47 | ref_metadata = ref.metadata.copy() 48 | ds_metadata.pop("backend", None) 49 | ref_metadata.pop("backend", None) 50 | if ds_metadata != ref_metadata: 51 | raise ValueError("Metadata differs between datasets (excluding backend)") 52 | print("✅ Metadata is identical") 53 | 54 | if not _compare_nested_dicts(ds.statistics, ref.statistics): 55 | raise ValueError("Statistics differ between datasets") 56 | print("✅ Statistics are identical") 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /tools/examples/Makefile: -------------------------------------------------------------------------------- 1 | YAML := $(wildcard *.yaml) 2 | 3 | TARGETS := $(YAML:.yaml=.zarr) 4 | 5 | all: $(TARGETS) 6 | 7 | %.zarr: %.yaml 8 | anemoi-datasets create $< $@ --overwrite 9 | 10 | 11 | clean:: 12 | rm -fr *.zarr *.sync *.statistics 13 | 14 | .SUFFIXES: .zarr .yaml 15 | -------------------------------------------------------------------------------- /tools/examples/an-oper-2023-2023-2p5-6h-v1.yaml: -------------------------------------------------------------------------------- 1 | description: "Example for the anemoi documentation" 2 | name: an-oper-2023-2023-2p5-6h-v1 3 | licence: CC-BY-4.0 4 | attribution: ECMWF 5 | 6 | dates: 7 | start: 2023-01-01 00:00:00 8 | end: 2023-12-31 18:00:00 9 | frequency: 6h 10 | 11 | input: 12 | 13 | join: 14 | - mars: 15 | grid: [2.5, 2.5] 16 | levtype: sfc 17 | param: [10u, 10v, 2d, 2t, lsm, msl, sdor, skt, slor, sp, tcw, z] 18 | 19 | - mars: 20 | grid: [2.5, 2.5] 21 | levtype: pl 22 | param: [q, t, u, v, w, z] 23 | level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000] 24 | 25 | - accumulations: 26 | grid: [2.5, 2.5] 27 | param: [cp, tp] 28 | levtype: sfc 29 | 30 | - constants: 31 | template: ${input.join.0.mars} 32 | param: 33 | - cos_latitude 34 | - cos_longitude 35 | - sin_latitude 36 | - sin_longitude 37 | - cos_julian_day 38 | - cos_local_time 39 | - sin_julian_day 40 | - sin_local_time 41 | - insolation 42 | -------------------------------------------------------------------------------- /tools/grids/Makefile: -------------------------------------------------------------------------------- 1 | YAML := $(wildcard *.yaml) 2 | 3 | TARGETS := $(YAML:.yaml=.zarr) 4 | 5 | all: $(TARGETS) 6 | 7 | %.zarr: %.yaml 8 | anemoi-datasets create $< $@ --overwrite 9 | 10 | 11 | clean:: 12 | rm -fr *.zarr 13 | 14 | .SUFFIXES: .zarr .yaml 15 | -------------------------------------------------------------------------------- /tools/grids/grids1.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | grid: 1/1 5 | 6 | dates: 7 | start: 2024-01-01 00:00:00 8 | end: 2024-01-01 18:00:00 9 | frequency: 6h 10 | 11 | input: 12 | join: 13 | - mars: 14 | <<: *mars_request 15 | param: [2t, 10u, 10v, lsm] 16 | levtype: sfc 17 | stream: oper 18 | type: an 19 | - mars: 20 | <<: *mars_request 21 | param: [q, t, z] 22 | levtype: pl 23 | level: [50, 100] 24 | stream: oper 25 | type: an 26 | - accumulations: 27 | <<: *mars_request 28 | levtype: sfc 29 | param: [cp, tp] 30 | - forcings: 31 | template: ${input.join.0.mars} 32 | param: 33 | - cos_latitude 34 | - sin_latitude 35 | 36 | output: 37 | order_by: [valid_datetime, param_level, number] 38 | remapping: 39 | param_level: "{param}_{levelist}" 40 | statistics: param_level 41 | -------------------------------------------------------------------------------- /tools/grids/grids2.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | grid: 0.5/0.5 5 | area: [28, 0, -14, 40] 6 | rotation: [-20, -40] 7 | 8 | dates: 9 | start: 2024-01-01 00:00:00 10 | end: 2024-01-01 18:00:00 11 | frequency: 6h 12 | 13 | input: 14 | join: 15 | - mars: 16 | <<: *mars_request 17 | param: [2t, 10u, 10v, lsm] 18 | levtype: sfc 19 | stream: oper 20 | type: an 21 | - mars: 22 | <<: *mars_request 23 | param: [q, t, z] 24 | levtype: pl 25 | level: [50, 100] 26 | stream: oper 27 | type: an 28 | - accumulations: 29 | <<: *mars_request 30 | levtype: sfc 31 | param: [cp, tp] 32 | - forcings: 33 | template: ${input.join.0.mars} 34 | param: 35 | - cos_latitude 36 | - sin_latitude 37 | 38 | output: 39 | order_by: [valid_datetime, param_level, number] 40 | remapping: 41 | param_level: "{param}_{levelist}" 42 | statistics: param_level 43 | -------------------------------------------------------------------------------- /tools/grids/grids3.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | grid: 0.25/0.25 5 | area: [40, 25, 20, 60] 6 | rotation: [-20, -40] 7 | 8 | dates: 9 | start: 2024-01-01 00:00:00 10 | end: 2024-01-01 18:00:00 11 | frequency: 6h 12 | 13 | input: 14 | join: 15 | - mars: 16 | <<: *mars_request 17 | param: [2t, 10u, 10v, lsm] 18 | levtype: sfc 19 | stream: oper 20 | type: an 21 | - mars: 22 | <<: *mars_request 23 | param: [q, t, z] 24 | levtype: pl 25 | level: [50, 100] 26 | stream: oper 27 | type: an 28 | - accumulations: 29 | <<: *mars_request 30 | levtype: sfc 31 | param: [cp, tp] 32 | - forcings: 33 | template: ${input.join.0.mars} 34 | param: 35 | - cos_latitude 36 | - sin_latitude 37 | 38 | output: 39 | order_by: [valid_datetime, param_level, number] 40 | remapping: 41 | param_level: "{param}_{levelist}" 42 | statistics: param_level 43 | -------------------------------------------------------------------------------- /tools/grids/grids4.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | grid: 0.5/0.5 5 | area: [30, 90, 10, 120] 6 | 7 | dates: 8 | start: 2024-01-01 00:00:00 9 | end: 2024-01-01 18:00:00 10 | frequency: 6h 11 | 12 | input: 13 | join: 14 | - mars: 15 | <<: *mars_request 16 | param: [2t, 10u, 10v, lsm] 17 | levtype: sfc 18 | stream: oper 19 | type: an 20 | - mars: 21 | <<: *mars_request 22 | param: [q, t, z] 23 | levtype: pl 24 | level: [50, 100] 25 | stream: oper 26 | type: an 27 | - accumulations: 28 | <<: *mars_request 29 | levtype: sfc 30 | param: [cp, tp] 31 | - forcings: 32 | template: ${input.join.0.mars} 33 | param: 34 | - cos_latitude 35 | - sin_latitude 36 | 37 | output: 38 | order_by: [valid_datetime, param_level, number] 39 | remapping: 40 | param_level: "{param}_{levelist}" 41 | statistics: param_level 42 | -------------------------------------------------------------------------------- /tools/grids/grids5.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | grid: 0.2/0.2 5 | area: [25, 100, 20, 105] 6 | 7 | dates: 8 | start: 2024-01-01 00:00:00 9 | end: 2024-01-01 18:00:00 10 | frequency: 6h 11 | 12 | input: 13 | join: 14 | - mars: 15 | <<: *mars_request 16 | param: [2t, 10u, 10v, lsm] 17 | levtype: sfc 18 | stream: oper 19 | type: an 20 | - mars: 21 | <<: *mars_request 22 | param: [q, t, z] 23 | levtype: pl 24 | level: [50, 100] 25 | stream: oper 26 | type: an 27 | - accumulations: 28 | <<: *mars_request 29 | levtype: sfc 30 | param: [cp, tp] 31 | - forcings: 32 | template: ${input.join.0.mars} 33 | param: 34 | - cos_latitude 35 | - sin_latitude 36 | 37 | output: 38 | order_by: [valid_datetime, param_level, number] 39 | remapping: 40 | param_level: "{param}_{levelist}" 41 | statistics: param_level 42 | -------------------------------------------------------------------------------- /tools/grids/grids6.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | grid: 10/10 5 | area: [90, -40, -40, 180] 6 | 7 | dates: 8 | start: 2024-01-01 00:00:00 9 | end: 2024-01-01 18:00:00 10 | frequency: 6h 11 | 12 | input: 13 | join: 14 | - mars: 15 | <<: *mars_request 16 | param: [2t, 10u, 10v, lsm] 17 | levtype: sfc 18 | stream: oper 19 | type: an 20 | - mars: 21 | <<: *mars_request 22 | param: [q, t, z] 23 | levtype: pl 24 | level: [50, 100] 25 | stream: oper 26 | type: an 27 | - accumulations: 28 | <<: *mars_request 29 | levtype: sfc 30 | param: [cp, tp] 31 | - forcings: 32 | template: ${input.join.0.mars} 33 | param: 34 | - cos_latitude 35 | - sin_latitude 36 | 37 | output: 38 | order_by: [valid_datetime, param_level, number] 39 | remapping: 40 | param_level: "{param}_{levelist}" 41 | statistics: param_level 42 | -------------------------------------------------------------------------------- /tools/grids/grids7.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | mars_request: &mars_request 3 | expver: "0001" 4 | grid: 2/2 5 | area: [90, -40, -40, 180] 6 | 7 | dates: 8 | start: 2024-01-01 00:00:00 9 | end: 2024-01-01 18:00:00 10 | frequency: 6h 11 | 12 | input: 13 | join: 14 | - mars: 15 | <<: *mars_request 16 | param: [2t, 10u, 10v, lsm] 17 | levtype: sfc 18 | stream: oper 19 | type: an 20 | - mars: 21 | <<: *mars_request 22 | param: [q, t, z] 23 | levtype: pl 24 | level: [50, 100] 25 | stream: oper 26 | type: an 27 | - accumulations: 28 | <<: *mars_request 29 | levtype: sfc 30 | param: [cp, tp] 31 | - forcings: 32 | template: ${input.join.0.mars} 33 | param: 34 | - cos_latitude 35 | - sin_latitude 36 | 37 | output: 38 | order_by: [valid_datetime, param_level, number] 39 | remapping: 40 | param_level: "{param}_{levelist}" 41 | statistics: param_level 42 | -------------------------------------------------------------------------------- /tools/make-sample-dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # (C) Copyright 2024 Anemoi contributors. 3 | # 4 | # This software is licensed under the terms of the Apache Licence Version 2.0 5 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 6 | # 7 | # In applying this licence, ECMWF does not waive the privileges and immunities 8 | # granted to it by virtue of its status as an intergovernmental organisation 9 | # nor does it submit to any jurisdiction. 10 | 11 | 12 | import argparse 13 | import os 14 | import shutil 15 | 16 | import xarray as xr 17 | 18 | parser = argparse.ArgumentParser(description="Create a sample dataset") 19 | parser.add_argument("input", type=str, help="Input file name") 20 | parser.add_argument("output", type=str, help="Output file name") 21 | args = parser.parse_args() 22 | 23 | if os.path.exists(args.output): 24 | if os.path.isdir(args.output): 25 | shutil.rmtree(args.output) 26 | else: 27 | os.unlink(args.output) 28 | 29 | if args.input.endswith(".zarr"): 30 | ds = xr.open_zarr(args.input) 31 | else: 32 | ds = xr.open_dataset(args.input) 33 | 34 | if args.output.endswith(".zarr"): 35 | ds.to_zarr(args.output, consolidated=True) 36 | else: 37 | ds.to_netcdf(args.output) 38 | -------------------------------------------------------------------------------- /tools/upload-sample-dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # (C) Copyright 2024 Anemoi contributors. 3 | # 4 | # This software is licensed under the terms of the Apache Licence Version 2.0 5 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 6 | # 7 | # In applying this licence, ECMWF does not waive the privileges and immunities 8 | # granted to it by virtue of its status as an intergovernmental organisation 9 | # nor does it submit to any jurisdiction. 10 | 11 | 12 | # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. 13 | # This software is licensed under the terms of the Apache Licence Version 2.0 14 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. 15 | # In applying this licence, ECMWF does not waive the privileges and immunities 16 | # granted to it by virtue of its status as an intergovernmental organisation 17 | # nor does it submit to any jurisdiction. 18 | 19 | import argparse 20 | import logging 21 | import os 22 | 23 | from anemoi.utils.remote import transfer 24 | 25 | LOG = logging.getLogger(__name__) 26 | 27 | logging.basicConfig(level=logging.INFO) 28 | 29 | parser = argparse.ArgumentParser(description="Upload sample dataset to S3") 30 | parser.add_argument("--bucket", type=str, help="S3 target path", default="s3://ml-tests/test-data/") 31 | parser.add_argument("source", type=str, help="Path to the sample dataset") 32 | parser.add_argument("target", type=str, help="Path to the sample dataset") 33 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing data") 34 | 35 | args = parser.parse_args() 36 | 37 | source = args.source 38 | target = args.target 39 | bucket = args.bucket 40 | 41 | assert os.path.exists(source), f"Source {source} does not exist" 42 | 43 | if not target.startswith("s3://"): 44 | if target.startswith("/"): 45 | target = target[1:] 46 | if bucket.endswith("/"): 47 | bucket = bucket[:-1] 48 | target = os.path.join(bucket, target) 49 | 50 | LOG.info(f"Uploading {source} to {target}") 51 | transfer(source, target, overwrite=args.overwrite) 52 | LOG.info("Upload complete") 53 | --------------------------------------------------------------------------------