├── .gitattributes
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    ├── ci-hpc-config.yml
    ├── dependabot.yml
    ├── labeler.yml
    ├── pull_request_template.md
    └── workflows
    │   ├── downstream-ci-hpc.yml
    │   ├── pr-conventional-commit.yml
    │   ├── pr-label-conventional-commits.yml
    │   ├── pr-label-file-based.yml
    │   ├── pr-label-public.yml
    │   ├── push-to-private.yml
    │   ├── python-publish.yml
    │   ├── python-pull-request.yml
    │   ├── readthedocs-pr-update.yml
    │   └── release-please.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .release-please-config.json
├── .release-please-manifest.json
├── .vscode
    └── spellright.dict
├── 03-constant-fields.rst
├── CHANGELOG.md
├── CONTRIBUTORS.md
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── _static
    │   ├── 2t_map_example.png
    │   ├── area-1.png
    │   ├── concat.png
    │   ├── cutout-1.png
    │   ├── cutout-2.png
    │   ├── cutout-3.png
    │   ├── cutout-4.png
    │   ├── cutout-5.png
    │   ├── cutout-6.png
    │   ├── join.png
    │   ├── logo.png
    │   ├── overlay.png
    │   ├── schemas
    │   │   ├── matrix.excalidraw
    │   │   ├── matrix.png
    │   │   ├── overview.excalidraw
    │   │   ├── overview.png
    │   │   ├── recipe.excalidraw
    │   │   └── recipe.png
    │   ├── skip-missing.png
    │   ├── style.css
    │   ├── thinning-after.png
    │   └── thinning-before.png
    ├── _templates
    │   ├── .gitkeep
    │   └── apidoc
    │   │   └── package.rst.jinja
    ├── adr
    │   └── adr-1.md
    ├── apply-fmt.sh
    ├── check-index.sh
    ├── cli
    │   ├── compare-lam.rst
    │   ├── compare.rst
    │   ├── copy.rst
    │   ├── create.rst
    │   ├── grib-index.rst
    │   ├── inspect.rst
    │   ├── introduction.rst
    │   ├── patch.rst
    │   └── scan.rst
    ├── conf.py
    ├── datasets
    │   ├── building
    │   │   ├── advanced-options.rst
    │   │   ├── filters.rst
    │   │   ├── filters
    │   │   │   ├── empty.rst
    │   │   │   ├── noop.rst
    │   │   │   ├── orog_to_z.rst
    │   │   │   ├── regrid.rst
    │   │   │   ├── rename.rst
    │   │   │   ├── rotate_winds.rst
    │   │   │   ├── select.rst
    │   │   │   ├── sum.rst
    │   │   │   ├── unrotate_winds.rst
    │   │   │   ├── wz_to_w.rst
    │   │   │   └── yaml
    │   │   │   │   ├── orog_to_z.yaml
    │   │   │   │   ├── regrid1.yaml
    │   │   │   │   ├── regrid2.yaml
    │   │   │   │   ├── rename.yaml
    │   │   │   │   ├── sum.yaml
    │   │   │   │   └── wz_to_w.yaml
    │   │   ├── handling-missing-dates.rst
    │   │   ├── handling-missing-values.rst
    │   │   ├── incremental.rst
    │   │   ├── introduction.rst
    │   │   ├── naming-conventions.rst
    │   │   ├── naming-variables.rst
    │   │   ├── operations.rst
    │   │   ├── sources.rst
    │   │   ├── sources
    │   │   │   ├── accumulations.rst
    │   │   │   ├── anemoi-dataset.rst
    │   │   │   ├── cds.rst
    │   │   │   ├── eccc-fstd.rst
    │   │   │   ├── forcings.rst
    │   │   │   ├── grib-index.rst
    │   │   │   ├── grib.rst
    │   │   │   ├── hindcasts.rst
    │   │   │   ├── mars.rst
    │   │   │   ├── netcdf.rst
    │   │   │   ├── opendap.rst
    │   │   │   ├── recentre.rst
    │   │   │   ├── repeated-dates.rst
    │   │   │   ├── xarray-based.rst
    │   │   │   ├── xarray-kerchunk.py
    │   │   │   ├── xarray-kerchunk.rst
    │   │   │   ├── xarray-zarr.rst
    │   │   │   ├── yaml
    │   │   │   │   ├── accumulations1.yaml
    │   │   │   │   ├── accumulations2.yaml
    │   │   │   │   ├── anemoi-dataset.yaml
    │   │   │   │   ├── eccc-fstd.yaml
    │   │   │   │   ├── forcings.yaml
    │   │   │   │   ├── grib1.yaml
    │   │   │   │   ├── grib2.yaml
    │   │   │   │   ├── grib3.yaml
    │   │   │   │   ├── grib4.yaml
    │   │   │   │   ├── hindcasts.yaml
    │   │   │   │   ├── mars-cds.yaml
    │   │   │   │   ├── mars1.yaml
    │   │   │   │   ├── mars2.yaml
    │   │   │   │   ├── netcdf.yaml
    │   │   │   │   ├── opendap.yaml
    │   │   │   │   ├── recentre.yaml
    │   │   │   │   ├── repeated-dates1.yaml
    │   │   │   │   ├── repeated-dates2.yaml
    │   │   │   │   ├── repeated-dates3.yaml
    │   │   │   │   ├── repeated-dates4.yaml
    │   │   │   │   ├── xarray-based.yaml
    │   │   │   │   ├── xarray-kerchunk.yaml
    │   │   │   │   ├── xarray-zarr.yaml
    │   │   │   │   └── zenodo.yaml
    │   │   │   └── zenodo.rst
    │   │   ├── statistics.rst
    │   │   └── syntax.yaml
    │   ├── introduction.rst
    │   ├── using
    │   │   ├── combining.rst
    │   │   ├── configuration.rst
    │   │   ├── configuration.toml
    │   │   ├── ensembles.rst
    │   │   ├── grids.rst
    │   │   ├── introduction.rst
    │   │   ├── matching.rst
    │   │   ├── methods.rst
    │   │   ├── miscellaneous.rst
    │   │   ├── missing.rst
    │   │   ├── opening.rst
    │   │   ├── other.rst
    │   │   ├── selecting.rst
    │   │   ├── statistics.rst
    │   │   ├── subsetting.rst
    │   │   └── zip.rst
    │   └── yaml
    │   │   ├── Makefile
    │   │   ├── building1.txt
    │   │   ├── building1.yaml
    │   │   ├── building2.txt
    │   │   ├── building2.yaml
    │   │   ├── building3.txt
    │   │   ├── building3.yaml
    │   │   ├── concat.yaml
    │   │   ├── hindcasts.yaml
    │   │   ├── input.yaml
    │   │   ├── missing_dates.yaml
    │   │   ├── nan.yaml
    │   │   └── pipe.yaml
    ├── dev
    │   └── contributing.rst
    ├── howtos
    │   ├── create
    │   │   ├── 01-grib-data.rst
    │   │   ├── 02-cf-data.rst
    │   │   ├── 03-constant-fields.rst
    │   │   └── yaml
    │   │   │   ├── grib-flavour1.yaml
    │   │   │   ├── grib-flavour2.yaml
    │   │   │   ├── grib-flavour3.yaml
    │   │   │   ├── grib-flavour4.yaml
    │   │   │   ├── grib-recipe1.yaml
    │   │   │   ├── grib-recipe2.yaml
    │   │   │   ├── grib-recipe3.yaml
    │   │   │   ├── grib-recipe4.yaml
    │   │   │   ├── grib-recipe5.yaml
    │   │   │   ├── netcdf1.yaml
    │   │   │   ├── opendap1.yaml
    │   │   │   ├── xarray-flavour1.yaml
    │   │   │   ├── xarray-patch1.yaml
    │   │   │   ├── xarray-patch2.yaml
    │   │   │   └── zarr1.yaml
    │   ├── introduction.rst
    │   └── usage
    │   │   ├── 01-interpolate-step-dataset-combination.rst
    │   │   ├── 02-coutout-complement-combination.rst
    │   │   ├── code
    │   │       ├── cutout-complement1.py
    │   │       ├── interpolate1.py
    │   │       └── interpolate2.py
    │   │   └── yaml
    │   │       ├── cutout-complement1.yaml
    │   │       ├── interpolate1.yaml
    │   │       └── interpolate2.yaml
    ├── index.rst
    ├── installing.rst
    ├── modules
    │   ├── dataset.rst
    │   ├── filters.rst
    │   └── sources.rst
    ├── overview.rst
    ├── pptx
    │   └── images.pptx
    ├── scripts
    │   └── api_build.sh
    ├── usage
    │   ├── getting_started.rst
    │   └── yaml
    │   │   └── aifs-ea-an-oper-0001-mars-o48-2020-2021-6h-v1.yaml
    └── using
    │   └── code
    │       └── trimedge1_.py
├── pyproject.toml
├── src
    └── anemoi
    │   └── datasets
    │       ├── __init__.py
    │       ├── __main__.py
    │       ├── check.py
    │       ├── commands
    │           ├── __init__.py
    │           ├── check.py
    │           ├── cleanup.py
    │           ├── compare-lam.py
    │           ├── compare.py
    │           ├── copy.py
    │           ├── create.py
    │           ├── finalise-additions.py
    │           ├── finalise.py
    │           ├── grib-index.py
    │           ├── init-additions.py
    │           ├── init.py
    │           ├── inspect.py
    │           ├── load-additions.py
    │           ├── load.py
    │           ├── patch.py
    │           ├── publish.py
    │           └── scan.py
    │       ├── compute
    │           ├── __init__.py
    │           └── recentre.py
    │       ├── create
    │           ├── __init__.py
    │           ├── check.py
    │           ├── chunks.py
    │           ├── config.py
    │           ├── filter.py
    │           ├── filters
    │           │   ├── __init__.py
    │           │   ├── empty.py
    │           │   ├── legacy.py
    │           │   ├── noop.py
    │           │   ├── orog_to_z.py
    │           │   ├── pressure_level_relative_humidity_to_specific_humidity.py
    │           │   ├── pressure_level_specific_humidity_to_relative_humidity.py
    │           │   ├── rename.py
    │           │   ├── rotate_winds.py
    │           │   ├── single_level_dewpoint_to_relative_humidity.py
    │           │   ├── single_level_relative_humidity_to_dewpoint.py
    │           │   ├── single_level_relative_humidity_to_specific_humidity.py
    │           │   ├── single_level_specific_humidity_to_relative_humidity.py
    │           │   ├── speeddir_to_uv.py
    │           │   ├── sum.py
    │           │   ├── transform.py
    │           │   ├── unrotate_winds.py
    │           │   ├── uv_to_speeddir.py
    │           │   └── wz_to_w.py
    │           ├── input
    │           │   ├── __init__.py
    │           │   ├── action.py
    │           │   ├── concat.py
    │           │   ├── context.py
    │           │   ├── data_sources.py
    │           │   ├── empty.py
    │           │   ├── filter.py
    │           │   ├── function.py
    │           │   ├── join.py
    │           │   ├── misc.py
    │           │   ├── pipe.py
    │           │   ├── repeated_dates.py
    │           │   ├── result.py
    │           │   ├── step.py
    │           │   ├── template.py
    │           │   └── trace.py
    │           ├── patch.py
    │           ├── persistent.py
    │           ├── size.py
    │           ├── source.py
    │           ├── sources
    │           │   ├── __init__.py
    │           │   ├── accumulations.py
    │           │   ├── accumulations2.py
    │           │   ├── anemoi_dataset.py
    │           │   ├── constants.py
    │           │   ├── eccc_fstd.py
    │           │   ├── empty.py
    │           │   ├── forcings.py
    │           │   ├── grib.py
    │           │   ├── grib_index.py
    │           │   ├── hindcasts.py
    │           │   ├── legacy.py
    │           │   ├── mars.py
    │           │   ├── netcdf.py
    │           │   ├── opendap.py
    │           │   ├── patterns.py
    │           │   ├── recentre.py
    │           │   ├── source.py
    │           │   ├── tendencies.py
    │           │   ├── xarray.py
    │           │   ├── xarray_kerchunk.py
    │           │   ├── xarray_support
    │           │   │   ├── README.md
    │           │   │   ├── __init__.py
    │           │   │   ├── coordinates.py
    │           │   │   ├── field.py
    │           │   │   ├── fieldlist.py
    │           │   │   ├── flavour.py
    │           │   │   ├── grid.py
    │           │   │   ├── metadata.py
    │           │   │   ├── patch.py
    │           │   │   ├── time.py
    │           │   │   └── variable.py
    │           │   ├── xarray_zarr.py
    │           │   └── zenodo.py
    │           ├── statistics
    │           │   ├── __init__.py
    │           │   └── summary.py
    │           ├── testing.py
    │           ├── typing.py
    │           ├── utils.py
    │           ├── writer.py
    │           └── zarr.py
    │       ├── data
    │           ├── __init__.py
    │           ├── complement.py
    │           ├── concat.py
    │           ├── dataset.py
    │           ├── debug.css
    │           ├── debug.py
    │           ├── ensemble.py
    │           ├── fill_missing.py
    │           ├── forwards.py
    │           ├── grids.py
    │           ├── indexing.py
    │           ├── interpolate.py
    │           ├── join.py
    │           ├── masked.py
    │           ├── merge.py
    │           ├── misc.py
    │           ├── missing.py
    │           ├── observations
    │           │   ├── __init__.py
    │           │   ├── legacy_obs_dataset.py
    │           │   └── multi.py
    │           ├── padded.py
    │           ├── records
    │           │   ├── __init__.py
    │           │   └── backends
    │           │   │   └── __init__.py
    │           ├── rescale.py
    │           ├── select.py
    │           ├── statistics.py
    │           ├── stores.py
    │           ├── subset.py
    │           ├── unchecked.py
    │           └── xy.py
    │       ├── dates
    │           ├── __init__.py
    │           └── groups.py
    │       ├── grids.py
    │       ├── testing.py
    │       └── utils
    │           └── __init__.py
├── tests
    ├── create-perturbations-full.yaml
    ├── create-shift.yaml
    ├── create
    │   ├── accumulation.yaml
    │   ├── concat.yaml
    │   ├── join.yaml
    │   ├── missing.yaml
    │   ├── nan.yaml
    │   ├── pipe.yaml
    │   ├── recentre.yaml
    │   ├── regrid.yaml
    │   ├── run.sh
    │   ├── test_create.py
    │   └── test_sources.py
    ├── test_chunks.py
    ├── test_data.py
    ├── test_data_gridded.py
    ├── test_dates.py
    ├── test_indexing.py
    ├── test_records.py
    └── xarray
    │   ├── test_netcdf.py
    │   ├── test_opendap.py
    │   ├── test_samples.py
    │   └── test_zarr.py
└── tools
    ├── .gitignore
    ├── build-obs.py
    ├── check-obs.py
    ├── examples
        ├── Makefile
        └── an-oper-2023-2023-2p5-6h-v1.yaml
    ├── grids
        ├── Makefile
        ├── grids.ipynb
        ├── grids1.yaml
        ├── grids2.yaml
        ├── grids3.yaml
        ├── grids4.yaml
        ├── grids5.yaml
        ├── grids6.yaml
        ├── grids7.yaml
        └── grids_multilam.ipynb
    ├── make-sample-dataset.py
    └── upload-sample-dataset.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | CHANGELOG.md merge=union
2 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Workflows
2 | /.github/ @ecmwf/AnemoiSecurity
3 | 
4 | # Project configs
5 | /pyproject.toml @ecmwf/AnemoiSecurity
6 | /.pre-commit-config.yaml @ecmwf/AnemoiSecurity
7 | /.release-please-config.json @ecmwf/AnemoiSecurity
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve. **❗ IMPORTANT:** If you have difficulties creating a dataset from any source, please provide us with a sample data file or a URL to the source data.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | **Describe the bug**
10 | A clear and concise description of what the bug is.
11 | 
12 | ** Version number **
13 | I am using the following versions/branch/sha1 of the anemoi packages
14 | (alternatively the output of `pip freeze`)
15 | 
16 | **To Reproduce**
17 | Steps to reproduce the behavior:
18 | 1. Go to '...'
19 | 2. Run this '....'
20 | 3. See error
21 | 
22 | **URL to sample input data**
23 | Provide a URL to a sample input data, or attach a file to that report if it is small enough.
24 | 
25 | **Expected behavior**
26 | A clear and concise description of what you expected to happen.
27 | 
28 | **Screenshots**
29 | If applicable, add screenshots to help explain your problem.
30 | 
31 | **Additional context**
32 | Add any other context about the problem here.
33 | 


--------------------------------------------------------------------------------
/.github/ci-hpc-config.yml:
--------------------------------------------------------------------------------
 1 | build:
 2 |   python: '3.10'
 3 |   modules:
 4 |     - ninja
 5 |   python_dependencies:
 6 |     - ecmwf/anemoi-utils@develop
 7 |   parallel: 64
 8 |   pytest_cmd: |
 9 |     python -m pytest -vv -m 'not notebook and not no_cache_init' --cov=. --cov-report=xml
10 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: "pip"
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: "weekly"
 7 | # - package-ecosystem: "github-actions"
 8 | #   directory: "/"
 9 | #   schedule:
10 | #     interval: "monthly"
11 | 


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
 1 | # This is the configuration file for the labeler action.
 2 | # It assigns labels to pull requests based on the files changed in the PR.
 3 | # See more here: https://github.com/actions/labeler
 4 | dependencies:
 5 | - changed-files:
 6 |   - any-glob-to-any-file:
 7 |     - "**/requirements.txt"
 8 |     - "**/setup.py"
 9 |     - "**/pyproject.toml"
10 |     - "**/Pipfile"
11 |     - "**/Pipfile.lock"
12 |     - "**/requirements/*.txt"
13 |     - "**/requirements/*.in"
14 | 
15 | documentation:
16 | - changed-files:
17 |   - any-glob-to-any-file:
18 |     - "**/docs/**/*"
19 |     - "*.md"
20 |     - "*.rst"
21 | 
22 | config:
23 | - changed-files:
24 |   - any-glob-to-any-file:
25 |     - "**/src/**/config/**/*"
26 |     - "**/src/anemoi/inference/config.py"
27 | 
28 | CI/CD:
29 | - changed-files:
30 |   - any-glob-to-any-file:
31 |     - "**/.pre-commit-config.yaml"
32 |     - ".github/**/*"
33 |     - "tox.ini"
34 |     - ".coveragerc"
35 | 
36 | tests:
37 | - changed-files:
38 |   - any-glob-to-any-file:
39 |     - "**/tests/**/*"
40 |     - "**/test/**/*"
41 |     - "**/test_*.py"
42 |     - "**/test.py"
43 |     - "**/conftest.py"
44 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | <!-- What issue or task does this change relate to? -->
 3 | 
 4 | ## What problem does this change solve?
 5 | <!-- Describe if it's a bugfix, new feature, doc update, or breaking change -->
 6 | 
 7 | ## What issue or task does this change relate to?
 8 | <!-- link to Issue Number -->
 9 | 
10 | ##  Additional notes ##
11 | <!-- Include any additional information, caveats, or considerations that the reviewer should be aware of. -->
12 | 
13 | ***As a contributor to the Anemoi framework, please ensure that your changes include unit tests, updates to any affected dependencies and documentation, and have been tested in a parallel setting  (i.e., with multiple GPUs). As a reviewer, you are also responsible for verifying these aspects and requesting changes if they are not adequately addressed. For guidelines about those please refer to https://anemoi.readthedocs.io/en/latest/***
14 | 


--------------------------------------------------------------------------------
/.github/workflows/downstream-ci-hpc.yml:
--------------------------------------------------------------------------------
 1 | # This workflow triggers tests on dependent packages.
 2 | # The dependency tree itself is defined in ecmwf/downstream-ci/
 3 | name: Test downstream dependent packages
 4 | 
 5 | on:
 6 |   # Trigger the workflow on push to main or develop, except tag creation
 7 |   push:
 8 |     branches:
 9 |       - 'main'
10 |       - 'develop'
11 |     tags-ignore:
12 |       - '**'
13 |     paths-ignore:
14 |       - "docs/**"
15 |       - "CHANGELOG.md"
16 |       - "README.md"
17 | 
18 |   # Trigger the workflow on pull request
19 |   pull_request:
20 |     paths-ignore:
21 |       - "docs/**"
22 |       - "CHANGELOG.md"
23 |       - "README.md"
24 | 
25 |   # Trigger the workflow manually
26 |   workflow_dispatch: ~
27 | 
28 |   # Trigger after public PR approved for CI
29 |   pull_request_target:
30 |     types: [labeled]
31 |     paths-ignore:
32 |       - "docs/**"
33 |       - "CHANGELOG.md"
34 |       - "README.md"
35 | 
36 | jobs:
37 |   # Run CI including downstream packages on self-hosted runners
38 |   downstream-ci:
39 |     name: downstream-ci
40 |     if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
41 |     uses: ecmwf/downstream-ci/.github/workflows/downstream-ci.yml@main
42 |     with:
43 |       anemoi-datasets: ecmwf/anemoi-datasets@${{ github.event.pull_request.head.sha || github.sha }}
44 |       codecov_upload: true
45 |       # Only run on fedora
46 |       skip_matrix_jobs: |
47 |         gnu@debian-11
48 |         gnu@rocky-8.6
49 |         clang@rocky-8.6
50 |         gnu@ubuntu-22.04
51 |     secrets: inherit
52 | 
53 |   # # Build downstream packages on HPC
54 |   # downstream-ci-hpc:
55 |   #   name: downstream-ci-hpc
56 |   #   if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
57 |   #   uses: ecmwf/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main
58 |   #   with:
59 |   #     anemoi-datasets: ecmwf/anemoi-datasets@${{ github.event.pull_request.head.sha || github.sha }}
60 |   #   secrets: inherit
61 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-conventional-commit.yml:
--------------------------------------------------------------------------------
 1 | # This workflow ensures that the PR title follows the Conventional Commit format.
 2 | name: "[PR] Ensure Conventional Commit Title"
 3 | 
 4 | on:
 5 |   pull_request_target:
 6 |     types:
 7 |       - opened
 8 |       - edited
 9 |       - synchronize
10 |       - reopened
11 | 
12 | permissions:
13 |   pull-requests: read
14 | 
15 | jobs:
16 |   main:
17 |     name: Validate PR title
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: amannn/action-semantic-pull-request@v5
21 |         env:
22 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-label-conventional-commits.yml:
--------------------------------------------------------------------------------
 1 | # This workflow assigns labels to a pull request based on the Conventional Commits format.
 2 | # This is necessary for release-please to work properly.
 3 | name: "[PR] Label Conventional Commits"
 4 | 
 5 | on:
 6 |   pull_request:
 7 |     branches: [main]
 8 |     types:
 9 |       [opened, reopened, labeled, unlabeled]
10 | 
11 | permissions:
12 |   pull-requests: write
13 | 
14 | jobs:
15 |   assign-labels:
16 |     runs-on: ubuntu-latest
17 |     name: Assign labels in pull request
18 |     if: github.event.pull_request.merged == false
19 |     steps:
20 |       - uses: actions/checkout@v3
21 |       - name: Assign labels from Conventional Commits
22 |         id: action-assign-labels
23 |         uses: mauroalderete/action-assign-labels@v1
24 |         with:
25 |           pull-request-number: ${{ github.event.pull_request.number }}
26 |           github-token: ${{ secrets.GITHUB_TOKEN }}
27 |           conventional-commits: |
28 |             conventional-commits:
29 |               - type: 'fix'
30 |                 nouns: ['FIX', 'Fix', 'fix', 'FIXED', 'Fixed', 'fixed']
31 |                 labels: ['bug']
32 |               - type: 'feature'
33 |                 nouns: ['FEATURE', 'Feature', 'feature', 'FEAT', 'Feat', 'feat']
34 |                 labels: ['enhancement']
35 |               - type: 'breaking_change'
36 |                 nouns: ['BREAKING CHANGE', 'BREAKING', 'MAJOR']
37 |                 labels: ['breaking change']
38 |               - type: 'documentation'
39 |                 nouns: ['doc','docs','docu','document','documentation']
40 |                 labels: ['documentation']
41 |               - type: 'build'
42 |                 nouns: ['build','rebuild','ci']
43 |                 labels: ['CI/CD']
44 |               - type: 'config'
45 |                 nouns: ['config', 'conf', 'configuration']
46 |                 labels: ['config']
47 |           maintain-labels-not-matched: true
48 |           apply-changes: true
49 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-label-file-based.yml:
--------------------------------------------------------------------------------
 1 | # This workflow assigns labels to a pull request based on the files changed in the PR.
 2 | # The labels are defined in the `.github/labels.yml` file.
 3 | name: "[PR] Label File-based"
 4 | on:
 5 |   pull_request_target:
 6 |     types: [opened, synchronize]
 7 | 
 8 | permissions:
 9 |   contents: read
10 |   pull-requests: write
11 | 
12 | jobs:
13 |   labeler:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |     - name: Assign labels from file changes
17 |       uses: actions/labeler@v5
18 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-label-public.yml:
--------------------------------------------------------------------------------
 1 | # Manage labels of pull requests that originate from forks
 2 | name: "[PR] Label Forks"
 3 | 
 4 | on:
 5 |   pull_request_target:
 6 |     types: [opened, synchronize]
 7 | 
 8 | jobs:
 9 |   label:
10 |     uses: ecmwf/reusable-workflows/.github/workflows/label-pr.yml@v2
11 | 


--------------------------------------------------------------------------------
/.github/workflows/push-to-private.yml:
--------------------------------------------------------------------------------
 1 | # This workflow pushes changes from a public repository to a private repository.
 2 | name: Push to private repository
 3 | 
 4 | on:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   push_changes:
11 |     if: ${{ !contains(github.repository, 'private') }}
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - name: Checkout source repository
16 |       uses: actions/checkout@v3
17 |       with:
18 |         fetch-depth: 0
19 |         fetch-tags: true
20 | 
21 |     - name: Set up Git configuration
22 |       run: |
23 |         git config user.name "github-actions[bot]"
24 |         git config user.email "github-actions[bot]@users.noreply.github.com"
25 | 
26 |     - name: Setup SSH key
27 |       uses: webfactory/ssh-agent@v0.5.0
28 |       with:
29 |         ssh-private-key: ${{ secrets.KEY_TO_PRIVATE }}
30 | 
31 |     - name: Push changes to private repository
32 |       run: |
33 |         git remote add private git@github.com:${{ github.repository }}-private.git
34 |         git push --set-upstream private main
35 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package to PyPI
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 |     uses: ecmwf/reusable-workflows/.github/workflows/cd-pypi.yml@v2
13 |     secrets: inherit
14 | 


--------------------------------------------------------------------------------
/.github/workflows/python-pull-request.yml:
--------------------------------------------------------------------------------
 1 | # This workflow runs pre-commit checks and pytest tests against multiple platforms and Python versions.
 2 | name: Code Quality and Testing
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     types: [opened, synchronize, reopened]
 7 |   push:
 8 |     branches:
 9 |       - main
10 |   schedule:
11 |     - cron: "9 2 * * 0" # at 9:02 on sunday
12 | 
13 | jobs:
14 |   quality:
15 |     uses: ecmwf/reusable-workflows/.github/workflows/qa-precommit-run.yml@v2
16 |     with:
17 |       skip-hooks: "no-commit-to-branch"
18 | 
19 |   checks:
20 |     strategy:
21 |       matrix:
22 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
23 |     uses: ecmwf/reusable-workflows/.github/workflows/qa-pytest-pyproject.yml@v2
24 |     with:
25 |       python-version: ${{ matrix.python-version }}
26 | 


--------------------------------------------------------------------------------
/.github/workflows/readthedocs-pr-update.yml:
--------------------------------------------------------------------------------
 1 | # This workflow adds a link to the experimental documentation build to the PR.
 2 | # This does NOT trigger a build of the documentation, this is handled through webhooks.
 3 | name: "[PR] Read the Docs Preview"
 4 | on:
 5 |   pull_request_target:
 6 |     types:
 7 |       - opened
 8 |       - synchronize
 9 |       - reopened
10 |     # Execute this action only on PRs that touch
11 |     # documentation files.
12 |     paths:
13 |       - "docs/**"
14 | 
15 | permissions:
16 |   pull-requests: write
17 | 
18 | jobs:
19 |   documentation-links:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - uses: readthedocs/actions/preview@v1
23 |         with:
24 |           project-slug: "anemoi-datasets"
25 | 


--------------------------------------------------------------------------------
/.github/workflows/release-please.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses an action to run Release Please to create a release PR.
 2 | # It is governed by the config and manifest in the root of the repo.
 3 | # For more information see: https://github.com/googleapis/release-please
 4 | name: Run Release Please
 5 | on:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |       - hotfix/*
10 | 
11 | permissions:
12 |   contents: write
13 |   pull-requests: write
14 | 
15 | jobs:
16 |   release-please:
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - uses: googleapis/release-please-action@v4
20 |         with:
21 |           # this assumes that you have created a personal access token
22 |           # (PAT) and configured it as a GitHub action secret named
23 |           # `MY_RELEASE_PLEASE_TOKEN` (this secret name is not important).
24 |           token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
25 |           # optional. customize path to .release-please-config.json
26 |           config-file: .release-please-config.json
27 |           # Currently releases are done from main
28 |           target-branch: ${{ github.ref_name }}
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Python
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.so
  6 | .Python
  7 | __pypackages__/
  8 | 
  9 | # Distribution / packaging
 10 | build/
 11 | develop-eggs/
 12 | dist/
 13 | downloads/
 14 | eggs/
 15 | .eggs/
 16 | lib/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | share/python-wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # Testing and coverage
 29 | htmlcov/
 30 | .tox/
 31 | .nox/
 32 | .coverage
 33 | .coverage.*
 34 | .cache
 35 | nosetests.xml
 36 | coverage.xml
 37 | *.cover
 38 | *.py,cover
 39 | .hypothesis/
 40 | .pytest_cache/
 41 | cover/
 42 | 
 43 | # Documentation
 44 | docs/_build/
 45 | docs/_api/
 46 | /site
 47 | *.mo
 48 | *.pot
 49 | 
 50 | # Environments
 51 | .env
 52 | .envrc
 53 | .venv
 54 | env/
 55 | venv/
 56 | ENV/
 57 | env.bak/
 58 | venv.bak/
 59 | 
 60 | # IDEs
 61 | .idea/
 62 | .spyderproject
 63 | .spyproject
 64 | .ropeproject
 65 | .vscode/
 66 | *.code-workspace
 67 | *.sublime-project
 68 | *.sublime-workspace
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # Type checking
 74 | .mypy_cache/
 75 | .dmypy.json
 76 | dmypy.json
 77 | .pyre/
 78 | .pytype/
 79 | 
 80 | # Data files
 81 | *.grib
 82 | *.grib1
 83 | *.grib2
 84 | *.onnx
 85 | *.ckpt
 86 | *.npy
 87 | *.npz
 88 | *.zarr/
 89 | *.nc
 90 | *.h5
 91 | *.hdf5
 92 | *.pkl
 93 | *.parquet
 94 | *.csv
 95 | *.xlsx
 96 | *.xls
 97 | *.json
 98 | *.txt
 99 | *.zip
100 | *.db
101 | *.tgz
102 | 
103 | # ML artifacts
104 | wandb/
105 | mlruns/
106 | lightning_logs/
107 | *.ckpt
108 | *.pt
109 | *.pth
110 | runs/
111 | checkpoints/
112 | 
113 | # Temporary and system files
114 | *.swp
115 | *.download
116 | *.out
117 | *.sync
118 | *.dot
119 | *.tmp
120 | *.log
121 | *.log.*
122 | .DS_Store
123 | ~*
124 | tmp/
125 | temp/
126 | logs/
127 | _dev/
128 | _api/
129 | ./outputs
130 | *tmp_data/
131 | 
132 | # Project specific
133 | ?
134 | ?.*
135 | foo
136 | bar
137 | ~$images.pptx
138 | test.py
139 | test.ipynb
140 | _version.py
141 | *.to_upload
142 | tempCodeRunnerFile.python
143 | Untitled-*.py
144 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.11"
 7 | #  jobs:
 8 | #    pre_build:
 9 | #    - bash docs/scripts/api_build.sh
10 | 
11 | sphinx:
12 |   configuration: docs/conf.py
13 | 
14 | python:
15 |   install:
16 |   - method: pip
17 |     path: .
18 |     extra_requirements:
19 |     - docs
20 | 


--------------------------------------------------------------------------------
/.release-please-config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "release-type": "python",
 3 |   "bump-minor-pre-major": true,
 4 |   "bump-patch-for-minor-pre-major": true,
 5 |   "separate-pull-requests": true,
 6 |   "always-update": true,
 7 |   "changelog-type": "default",
 8 |   "include-component-in-tag": false,
 9 |   "include-v-in-tag": false,
10 |   "draft-pull-request": true,
11 |   "pull-request-title-pattern": "chore${scope}: Release${component} ${version}",
12 |   "pull-request-header": ":robot: Automated Release PR\n\nThis PR was created by `release-please` to prepare the next release. Once merged:\n\n1. A new version tag will be created\n2. A GitHub release will be published\n3. The changelog will be updated\n\nChanges to be included in the next release:",
13 |   "pull-request-footer": "> [!IMPORTANT]\n> Please do not change the PR title, manifest file, or any other automatically generated content in this PR unless you understand the implications. Changes here can break the release process.\n> :warning: Merging this PR will:\n> - Create a new release\n> - Trigger deployment pipelines\n> - Update package versions\n\n **Before merging:**\n - Ensure all tests pass\n - Review the changelog carefully\n - Get required approvals\n\n [Release-please documentation](https://github.com/googleapis/release-please)",
14 |   "packages": {
15 |     ".": {
16 |         "package-name": "anemoi-datasets"
17 |     }
18 |   },
19 |   "plugins": [
20 |     {
21 |       "type": "sentence-case"
22 |     }
23 |   ],
24 |   "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json"
25 | }
26 | 


--------------------------------------------------------------------------------
/.release-please-manifest.json:
--------------------------------------------------------------------------------
1 | {
2 |     ".": "0.5.24"
3 | }
4 | 


--------------------------------------------------------------------------------
/.vscode/spellright.dict:
--------------------------------------------------------------------------------
1 | Anemoi
2 | zarr
3 | literalinclude
4 | 


--------------------------------------------------------------------------------
/03-constant-fields.rst:
--------------------------------------------------------------------------------
1 | ########################
2 |  Adding constant fields
3 | ########################
4 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
 1 | ## How to Contribute
 2 | 
 3 | Please see the [read the docs](https://anemoi.readthedocs.io/en/latest/dev/contributing.html).
 4 | 
 5 | 
 6 | ## Contributors
 7 | 
 8 | Thank you to all the wonderful people who have contributed to Anemoi. Contributions can come in many forms, including code, documentation, bug reports, feature suggestions, design, and more. A list of code-based contributors can be found [here](https://github.com/ecmwf/anemoi-datasets/graphs/contributors).
 9 | 
10 | 
11 | ## Contributing Organisations
12 | 
13 | Significant contributions have been made by the following organisations: [DWD](https://www.dwd.de/), [FMI](https://www.ilmatieteenlaitos.fi/), [KNMI](https://www.knmi.nl), [MET Norway](https://www.met.no/), [MeteoSwiss](https://www.meteoswiss.admin.ch/), [RMI](https://www.meteo.be/) & [ECMWF](https://www.ecmwf.int/)
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # anemoi-datasets
 2 | 
 3 | **DISCLAIMER**
 4 | This project is **BETA** and will be **Experimental** for the foreseeable future.
 5 | Interfaces and functionality are likely to change, and the project itself may be scrapped.
 6 | **DO NOT** use this software in any project/software that is operational.
 7 | 
 8 | 
 9 | 
10 | ## Documentation
11 | 
12 | The documentation can be found at https://anemoi-datasets.readthedocs.io/.
13 | 
14 | ## Install
15 | 
16 | Install via `pip` with:
17 | 
18 | ```
19 | $ pip install anemoi-datasets
20 | ```
21 | 
22 | ## License
23 | 
24 | ```
25 | Copyright 2024-2025, Anemoi Contributors.
26 | 
27 | Licensed under the Apache License, Version 2.0 (the "License");
28 | you may not use this file except in compliance with the License.
29 | You may obtain a copy of the License at
30 | 
31 |     http://www.apache.org/licenses/LICENSE-2.0
32 | 
33 | Unless required by applicable law or agreed to in writing, software
34 | distributed under the License is distributed on an "AS IS" BASIS,
35 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36 | See the License for the specific language governing permissions and
37 | limitations under the License.
38 | 
39 | In applying this licence, ECMWF does not waive the privileges and immunities
40 | granted to it by virtue of its status as an intergovernmental organisation
41 | nor does it submit to any jurisdiction.
42 | ```
43 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env make -f
 2 | 
 3 | # Minimal makefile for Sphinx documentation
 4 | #
 5 | 
 6 | # You can set these variables from the command line, and also
 7 | # from the environment for the first two.
 8 | SPHINXOPTS    ?=
 9 | SPHINXBUILD   ?= sphinx-build
10 | SOURCEDIR     = .
11 | BUILDDIR      = _build
12 | 
13 | # Put it first so that "make" without argument is like "make help".
14 | help:
15 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
16 | 
17 | .PHONY: help Makefile
18 | 
19 | # Catch-all target: route all unknown targets to Sphinx using the new
20 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
21 | %: Makefile
22 | 	bash $(SOURCEDIR)/scripts/api_build.sh
23 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 | 


--------------------------------------------------------------------------------
/docs/_static/2t_map_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/2t_map_example.png


--------------------------------------------------------------------------------
/docs/_static/area-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/area-1.png


--------------------------------------------------------------------------------
/docs/_static/concat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/concat.png


--------------------------------------------------------------------------------
/docs/_static/cutout-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-1.png


--------------------------------------------------------------------------------
/docs/_static/cutout-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-2.png


--------------------------------------------------------------------------------
/docs/_static/cutout-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-3.png


--------------------------------------------------------------------------------
/docs/_static/cutout-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-4.png


--------------------------------------------------------------------------------
/docs/_static/cutout-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-5.png


--------------------------------------------------------------------------------
/docs/_static/cutout-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/cutout-6.png


--------------------------------------------------------------------------------
/docs/_static/join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/join.png


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/_static/overlay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/overlay.png


--------------------------------------------------------------------------------
/docs/_static/schemas/matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/schemas/matrix.png


--------------------------------------------------------------------------------
/docs/_static/schemas/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/schemas/overview.png


--------------------------------------------------------------------------------
/docs/_static/schemas/recipe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/schemas/recipe.png


--------------------------------------------------------------------------------
/docs/_static/skip-missing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/skip-missing.png


--------------------------------------------------------------------------------
/docs/_static/style.css:
--------------------------------------------------------------------------------
 1 | .wy-side-nav-search {
 2 |     background-color: #f7f7f7;
 3 | }
 4 | 
 5 | /*There is a clash between xarray notebook styles and readthedoc*/
 6 | 
 7 | .rst-content dl.xr-attrs dt {
 8 |     all: revert;
 9 |     font-size: 95%;
10 |     white-space: nowrap;
11 | }
12 | 
13 | .rst-content dl.xr-attrs dd {
14 |     font-size: 95%;
15 | }
16 | 
17 | .xr-wrap {
18 |     font-size: 85%;
19 | }
20 | 
21 | .wy-table-responsive table td, .wy-table-responsive table th {
22 |     white-space: inherit;
23 | }
24 | 
25 | /*
26 | .wy-table-responsive table td,
27 | .wy-table-responsive table th {
28 |     white-space: normal !important;
29 |     vertical-align: top !important;
30 | }
31 | 
32 | .wy-table-responsive {
33 |     margin-bottom: 24px;
34 |     max-width: 100%;
35 |     overflow: visible;
36 | } */
37 | 
38 | /* Hide notebooks warnings */
39 | .nboutput .stderr {
40 |     display: none;
41 | }
42 | 
43 | /*
44 | Set logo size
45 | */
46 | .wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo {
47 |     width: 200px;
48 | }
49 | 


--------------------------------------------------------------------------------
/docs/_static/thinning-after.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/thinning-after.png


--------------------------------------------------------------------------------
/docs/_static/thinning-before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_static/thinning-before.png


--------------------------------------------------------------------------------
/docs/_templates/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/_templates/.gitkeep


--------------------------------------------------------------------------------
/docs/_templates/apidoc/package.rst.jinja:
--------------------------------------------------------------------------------
 1 | {%- macro automodule(modname, options) -%}
 2 | .. automodule:: {{ modname }}
 3 | {%- for option in options %}
 4 |    :{{ option }}:
 5 | {%- endfor %}
 6 | {%- endmacro %}
 7 | 
 8 | {%- macro toctree(docnames) -%}
 9 | .. toctree::
10 |    :maxdepth: {{ maxdepth }}
11 | {% for docname in docnames %}
12 |    {{ docname }}
13 | {%- endfor %}
14 | {%- endmacro %}
15 | 
16 | {%- if is_namespace %}
17 | {{- pkgname.split(".")[1:] | join(".") | e | heading }}
18 | {% else %}
19 | {{- pkgname.split(".")[1:] | join(" ") | e | heading }}
20 | {% endif %}
21 | 
22 | {%- if is_namespace %}
23 | .. py:module:: {{ pkgname }}
24 | {% endif %}
25 | 
26 | {%- if modulefirst and not is_namespace %}
27 | {{ automodule(["anemoi", pkgname] | join("."), [""]) }}
28 | {% endif %}
29 | 
30 | {%- if subpackages %}
31 | Subpackages
32 | -----------
33 | 
34 | {{ toctree(subpackages) }}
35 | {% endif %}
36 | 
37 | {%- if submodules %}
38 | {% if separatemodules %}
39 | {{ toctree(submodules) }}
40 | {% else %}
41 | {%- for submodule in submodules %}
42 | {% if show_headings %}
43 | {{- submodule.split(".")[2:] | join(".") | e | heading(2) }}
44 | {% endif %}
45 | {{ automodule(["anemoi", submodule] | join("."), automodule_options) }}
46 | {% endfor %}
47 | {%- endif %}
48 | {%- endif %}
49 | 
50 | {%- if not modulefirst and not is_namespace %}
51 | Module contents
52 | ---------------
53 | 
54 | {{ automodule(pkgname, automodule_options) }}
55 | {% endif %}
56 | 


--------------------------------------------------------------------------------
/docs/apply-fmt.sh:
--------------------------------------------------------------------------------
1 | :
2 | for n in $(find . -name '*.rst')
3 | do
4 |    rstfmt  $n
5 | done
6 | 


--------------------------------------------------------------------------------
/docs/check-index.sh:
--------------------------------------------------------------------------------
1 | :
2 | # See https://github.com/vscode-restructuredtext/vscode-restructuredtext/issues/280
3 | for n in $(find . -name '*.rst')
4 | do
5 |    m=$(echo $n | sed 's/\.rst//' | sed 's,^\./,,')
6 |    egrep ":doc:.$m" index.rst > /dev/null || echo $m
7 | done
8 | 


--------------------------------------------------------------------------------
/docs/cli/compare-lam.rst:
--------------------------------------------------------------------------------
 1 | .. _compare_lam_command:
 2 | 
 3 | Compare-LAM Command
 4 | ===================
 5 | 
 6 | Compare statistics of two datasets.
 7 | This command compares the statistics of each variable in two datasets **only in the overlapping area** between the two.
 8 | 
 9 | Example use cases:
10 | ------------------
11 | - **Stretched Grid**
12 | - **Boundary LAM**
13 | 
14 | In both cases, it is necessary to check the alignment between the variables of the local dataset and those of the global dataset.
15 | Both datasets will coexist on the same grid, and statistical coherence is essential for training stability.
16 | 
17 | The `compare-lam` command outputs a table comparing dataset statistics in **HTML format**.
18 | Additionally, a plot of the dataset grids can be displayed and saved if requested.
19 | 
20 | Usage:
21 | ******
22 | .. code:: console
23 | 
24 |    $ anemoi-datasets compare-lam dataset1 dataset2 -D num_dates -O outpath -R round_ndigits --selected-vars var1 var2 ... [--save-plots]
25 | 
26 | Arguments:
27 | ----------
28 | 
29 | - **dataset1**: Path to the first dataset (the global dataset).
30 | - **dataset2**: Path to the second dataset (the LAM dataset).
31 | - **-D, --num-of-dates**: Number of time steps (datapoints) to compare. *(default: 10)*
32 | - **-O, --outpath**: Path to store the output table (and optional plots). *(default: "./")*
33 | - **-R, --round-of-digits**: Number of decimal places to round values to. *(default: 4)*
34 | - **--selected-vars**: List of variables to compare between the datasets. *(default: ["10u", "10v", "2d", "2t"])*
35 | - **--save-plots (optional)**: Enable this flag to save an image of the dataset grids.
36 | 
37 | Example:
38 | --------
39 | 
40 | .. code:: console
41 | 
42 |    $ compare-lam aifs-ea-an-oper-0001-mars-n320-1979-2022-6h-v6.zarr metno-meps-archive-det-opendap-2p5km-2020-2023-6h-v1.zarr -D 10 -O "./" -R 4 --selected-vars 2t msl --save-plots
43 | 
44 | Argparse integration:
45 | ---------------------
46 | 
47 | .. argparse::
48 |     :module: anemoi.datasets.__main__
49 |     :func: create_parser
50 |     :prog: anemoi-datasets
51 |     :path: compare-lam
52 | 


--------------------------------------------------------------------------------
/docs/cli/compare.rst:
--------------------------------------------------------------------------------
 1 | .. _compare_command:
 2 | 
 3 | Compare Command
 4 | ===============
 5 | 
 6 | Use this command to compare two datasets.
 7 | 
 8 | The command will run a quick comparison of the two datasets and output a summary of the differences.
 9 | 
10 | .. warning::
11 | 
12 |     This command will not compare the data in the datasets, only some of the metadata.
13 |     Subsequent versions of this command may include more detailed comparisons.
14 | 
15 | 
16 | .. argparse::
17 |     :module: anemoi.datasets.__main__
18 |     :func: create_parser
19 |     :prog: anemoi-datasets
20 |     :path: compare
21 | 


--------------------------------------------------------------------------------
/docs/cli/copy.rst:
--------------------------------------------------------------------------------
 1 | .. _copy_command:
 2 | 
 3 | Copy Command
 4 | ============
 5 | 
 6 | 
 7 | Copying a dataset from one location to another can be error-prone and
 8 | time-consuming. This command-line script allows for incremental copying.
 9 | When the copying process fails, it can be resumed. It can be used to copy
10 | files from a local directory to a remote server, from a remote server to a
11 | local directory as long as there is a zarr backend to read and write the data.
12 | 
13 | The script uses multiple threads to make the process faster. However, it is
14 | important to consider that making parallel requests to the same server may
15 | not be ideal, for instance if the server internally uses a limited number of
16 | threads to handle requests.
17 | 
18 | The option to rechunk the data is available, which can be useful when the
19 | data is stored on a platform that does not support having many small files
20 | or many files in the same directory. However keep in mind that rechunking
21 | has a huge impact on the performance when reading the data: The chunk pattern
22 | for the source dataset has been defined for good reasons, and changing it is
23 | very likely to have a negative impact on the performance.
24 | 
25 | .. warning::
26 | 
27 |     When resuming the copying process (using ``--resume``), calling the script with the same arguments for ``--block-size`` and ``--rechunk`` is recommended.
28 |     Using different values for these arguments to resume copying the same dataset may lead to unexpected behavior.
29 | 
30 | 
31 | .. argparse::
32 |     :module: anemoi.datasets.__main__
33 |     :func: create_parser
34 |     :prog: anemoi-datasets
35 |     :path: copy
36 | 


--------------------------------------------------------------------------------
/docs/cli/create.rst:
--------------------------------------------------------------------------------
 1 | .. _create_command:
 2 | 
 3 | Create Command
 4 | ==============
 5 | 
 6 | Use this command to create a dataset from a recipe file.
 7 | The syntax of the recipe file is described in :doc:`building datasets <../datasets/building/introduction>`.
 8 | 
 9 | .. argparse::
10 |     :module: anemoi.datasets.__main__
11 |     :func: create_parser
12 |     :prog: anemoi-datasets
13 |     :path: create
14 | 


--------------------------------------------------------------------------------
/docs/cli/grib-index.rst:
--------------------------------------------------------------------------------
 1 | .. _grib-index_command:
 2 | 
 3 | Grib-index Command
 4 | ============
 5 | 
 6 | The `grib-index` command is used to create an index file for GRIB files. The index file is then used
 7 | by the `grib-index` :ref:`source <grib-index_source>`.
 8 | 
 9 | The command will recursively scan the directories provided and open all the GRIB files found. It will
10 | then create an index file for each GRIB file, which will be used to read the data.
11 | 
12 | .. code:: bash
13 | 
14 |     anemoi-datasets grib-index --index index.db /path1/to/grib/files /path2/to/grib/files
15 | 
16 | 
17 | See :ref:`grib_flavour` for more information about GRIB flavours.
18 | 
19 | 
20 | .. argparse::
21 |     :module: anemoi.datasets.__main__
22 |     :func: create_parser
23 |     :prog: anemoi-datasets
24 |     :path: grib-index
25 | 


--------------------------------------------------------------------------------
/docs/cli/inspect.rst:
--------------------------------------------------------------------------------
 1 | .. _inspect_command:
 2 | 
 3 | Inspect Command
 4 | ===============
 5 | 
 6 | 
 7 | Anemoi datasets are stored in a zarr format and can be located on a local file system or on a remote server.
 8 | The `inspect` command is used to inspect the contents of a dataset.
 9 | This command will output the metadata of the dataset, including the variables, dimensions, and attributes.
10 | 
11 | .. code:: console
12 | 
13 |    $ anemoi-datasets inspect dataset.zarr
14 | 
15 | 
16 | which will output something like the following. The output should be self-explanatory.
17 | 
18 | .. literalinclude:: ../datasets/yaml/building1.txt
19 |    :language: console
20 | 
21 | .. argparse::
22 |     :module: anemoi.datasets.__main__
23 |     :func: create_parser
24 |     :prog: anemoi-datasets
25 |     :path: inspect
26 | 


--------------------------------------------------------------------------------
/docs/cli/introduction.rst:
--------------------------------------------------------------------------------
 1 | .. _cli-introduction:
 2 | 
 3 | ##################
 4 | Command line tool
 5 | ##################
 6 | 
 7 | When you install the `anemoi-datasets` package, this will also install command line tool
 8 | called ``anemoi-datasets`` which can be used to manage the zarr datasets.
 9 | 
10 | The tool can provide help with the ``--help`` options:
11 | 
12 | .. code-block:: bash
13 | 
14 |     % anemoi-datasets --help
15 | 
16 | The commands are:
17 | 
18 | - :ref:`Create Command <create_command>`
19 | - :ref:`Copy Command <copy_command>`
20 | - :ref:`Inspect Command <Inspect_command>`
21 | - :ref:`Compare Command <compare_command>`
22 | - :ref:`Scan Command <scan_command>`
23 | - :ref:`Compare LAM Command <compare_lam_command>`
24 | 


--------------------------------------------------------------------------------
/docs/cli/patch.rst:
--------------------------------------------------------------------------------
 1 | .. _patch_command:
 2 | 
 3 | Patch Command
 4 | =============
 5 | 
 6 | Use this command to patch the metadata of a given dataset
 7 | 
 8 | .. argparse::
 9 |     :module: anemoi.datasets.__main__
10 |     :func: create_parser
11 |     :prog: anemoi-datasets
12 |     :path: patch
13 | 


--------------------------------------------------------------------------------
/docs/cli/scan.rst:
--------------------------------------------------------------------------------
 1 | .. _scan_command:
 2 | 
 3 | Scan Command
 4 | ============
 5 | 
 6 | Use this command to scan for GRIB files
 7 | 
 8 | .. argparse::
 9 |     :module: anemoi.datasets.__main__
10 |     :func: create_parser
11 |     :prog: anemoi-datasets
12 |     :path: scan
13 | 


--------------------------------------------------------------------------------
/docs/datasets/building/advanced-options.rst:
--------------------------------------------------------------------------------
1 | ##################
2 |  Advanced Options
3 | ##################
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters.rst:
--------------------------------------------------------------------------------
 1 | .. _filters:
 2 | 
 3 | #########
 4 |  Filters
 5 | #########
 6 | 
 7 | .. warning::
 8 | 
 9 |    This is still a work-in-progress. Some of the filters may be renamed
10 |    later.
11 | 
12 | Filters are used to modify the data or metadata in a dataset.
13 | 
14 | .. toctree::
15 |    :maxdepth: 1
16 | 
17 |    filters/select
18 |    filters/orog_to_z
19 |    filters/regrid
20 |    filters/rename
21 |    filters/rotate_winds
22 |    filters/sum
23 |    filters/unrotate_winds
24 |    filters/wz_to_w
25 |    filters/noop
26 |    filters/empty
27 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/empty.rst:
--------------------------------------------------------------------------------
1 | #######
2 |  empty
3 | #######
4 | 
5 | The ``empty`` filter is for debugging purposes. It always returns an
6 | empty set of fields.
7 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/noop.rst:
--------------------------------------------------------------------------------
1 | ######
2 |  noop
3 | ######
4 | 
5 | The ``noop`` filter is for debugging purposes. It returns its input
6 | unchanged.
7 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/orog_to_z.rst:
--------------------------------------------------------------------------------
 1 | ###########
 2 |  orog_to_z
 3 | ###########
 4 | 
 5 | The ``orog_to_z`` filter converts orography (in metres) to surface
 6 | geopotential height (m^2/s^2) using the equation:
 7 | 
 8 | .. math::
 9 | 
10 |    z &= g \cdot \textrm{orog}\\
11 |    g &= 9.80665\ m \cdot s^{-1}
12 | 
13 | This filter must follow a source that provides orography, which is
14 | replaced by surface geopotential height.
15 | 
16 | .. literalinclude:: yaml/orog_to_z.yaml
17 |    :language: yaml
18 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/regrid.rst:
--------------------------------------------------------------------------------
 1 | ########
 2 |  regrid
 3 | ########
 4 | 
 5 | When building a dataset for a specific model, it is possible that the
 6 | source grid or resolution does not fit the needs. In that case, it is
 7 | possible to add a filter to interpolate the data to a target grid. The
 8 | filter is part of the ``anemoi-transform`` package. It will call the
 9 | ``interpolate`` function from `earthkit-regrid
10 | <https://earthkit-regrid.readthedocs.io/en/latest/interpolate.html>`_ if
11 | the keys ``method``, ``in_grid`` and ``out_grid`` are provided and if a
12 | `pre-generated matrix
13 | <https://earthkit-regrid.readthedocs.io/en/latest/inventory/index.html>`_
14 | exists for this transformation. Otherwise, it is possible to provide a
15 | ``regrid matrix`` previously generated with ``anemoi-transform
16 | make-regrid-matrix``. The generated matrix is an NPZ file containing the
17 | input/output coordinates, the indices, and the weights of the
18 | interpolation.
19 | 
20 | ``regrid`` is a :ref:`filter <filters>` that must follow a :ref:`source
21 | <sources>` or another filter in a :ref:`building-pipe` operation.
22 | 
23 | .. literalinclude:: yaml/regrid1.yaml
24 |    :language: yaml
25 | 
26 | .. literalinclude:: yaml/regrid2.yaml
27 |    :language: yaml
28 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/rename.rst:
--------------------------------------------------------------------------------
 1 | ########
 2 |  rename
 3 | ########
 4 | 
 5 | When combining several sources, it is common to have different values
 6 | for a given attribute to represent the same concept. For example,
 7 | ``temperature_850hPa`` and ``t_850`` are two different ways to represent
 8 | the temperature at 850 hPa. The ``rename`` filter allows renaming a key
 9 | to another key. It is a :ref:`filter <filters>` that must follow a
10 | :ref:`source <sources>` or another filter in a :ref:`building-pipe`
11 | operation.
12 | 
13 | .. literalinclude:: yaml/rename.yaml
14 |    :language: yaml
15 | 
16 | .. note::
17 | 
18 |    The ``rename`` filter was primarily designed to rename the ``param``
19 |    attribute, but any key can be renamed. The ``rename`` filter can take
20 |    several renaming keys.
21 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/rotate_winds.rst:
--------------------------------------------------------------------------------
1 | ##############
2 |  rotate_winds
3 | ##############
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/select.rst:
--------------------------------------------------------------------------------
1 | ########
2 |  select
3 | ########
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/sum.rst:
--------------------------------------------------------------------------------
 1 | #####
 2 |  sum
 3 | #####
 4 | 
 5 | The ``sum`` filter computes the sum over multiple variables. This can be
 6 | useful for computing total precipitation from its components (snow,
 7 | rain) or summing the components of total column-integrated water. This
 8 | filter must follow a source that provides the list of variables to be
 9 | summed up. These variables are removed by the filter and replaced by a
10 | single summed variable.
11 | 
12 | .. literalinclude:: yaml/sum.yaml
13 |    :language: yaml
14 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/unrotate_winds.rst:
--------------------------------------------------------------------------------
1 | ###############
2 |  unrotate_wind
3 | ###############
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/wz_to_w.rst:
--------------------------------------------------------------------------------
 1 | #########
 2 |  wz_to_w
 3 | #########
 4 | 
 5 | The ``wz_to_w`` filter converts geometric vertical velocity (provided in
 6 | m/s) to vertical velocity in pressure coordinates (Pa/s). This filter
 7 | must follow a source that provides geometric vertical velocity.
 8 | Geometric vertical velocity is removed by the filter, and pressure
 9 | vertical velocity is added.
10 | 
11 | .. literalinclude:: yaml/wz_to_w.yaml
12 |    :language: yaml
13 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/yaml/orog_to_z.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   pipe:
 3 |   - source:
 4 |       # mars, grib, netcdf, etc.
 5 |       # source attributes here
 6 |       # ...
 7 |       # Must load an orography variable
 8 | 
 9 |   - orog_to_z:
10 |       orog: orog # Name of orography (input) variable
11 |       z: z # Name of z (output) variable
12 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/yaml/regrid1.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   pipe:
 3 |   - source:
 4 |       # mars, grib, netcdf, etc.
 5 |       # source attributes here
 6 |       # ...
 7 | 
 8 |   - regrid:
 9 |       method: nearest
10 |       in_grid: o32
11 |       out_grid: o48
12 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/yaml/regrid2.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   pipe:
 3 |   - source:
 4 |       # mars, grib, netcdf, etc.
 5 |       # source attributes here
 6 |       # ...
 7 | 
 8 |   - regrid:
 9 |       matrix: /path/to/regrid/matrix.npz
10 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/yaml/rename.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   pipe:
 3 |   - source:
 4 |       # mars, grib, netcdf, etc.
 5 |       # source attributes here
 6 |       # ...
 7 | 
 8 |   - rename:
 9 |       param:
10 |         # Map old `param` names to new ones
11 |         temperature_2m: 2t
12 |         temperature_850hPa: t_850
13 |         # ...
14 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/yaml/sum.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   pipe:
 3 |   - source:
 4 |       # mars, grib, netcdf, etc.
 5 |       # source attributes here
 6 |       # ...
 7 |       # Must load the variables to be summed
 8 | 
 9 |   - sum:
10 |       params:
11 |       # List of input variables
12 |       - variable1
13 |       - variable2
14 |       - variable3
15 |       output: variable_total # Name of output variable
16 | 


--------------------------------------------------------------------------------
/docs/datasets/building/filters/yaml/wz_to_w.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   pipe:
 3 |   - source:
 4 |       # mars, grib, netcdf, etc.
 5 |       # source attributes here
 6 |       # ...
 7 |       # Must load geometric vertical velocity
 8 | 
 9 |   - wz_to_w:
10 |       wz: wz # Name of geometric vertical velocity (input) variable
11 |       x: z # Name of pressure vertical velocity (output) variable
12 | 


--------------------------------------------------------------------------------
/docs/datasets/building/handling-missing-dates.rst:
--------------------------------------------------------------------------------
 1 | ########################
 2 |  Handling missing dates
 3 | ########################
 4 | 
 5 | By default, the package will raise an error if there are missing dates.
 6 | 
 7 | Missing dates can be handled by specifying a list of dates in the
 8 | configuration file. The dates should be in the same format as the dates
 9 | in the time series. The missing dates will be filled with ``np.nan``
10 | values.
11 | 
12 | .. literalinclude:: ../yaml/missing_dates.yaml
13 |    :language: yaml
14 | 
15 | *Anemoi* will ignore the missing dates when computing the
16 | :ref:`statistics <gathering_statistics>`.
17 | 
18 | You can retrieve the list indices corresponding to the missing dates by
19 | accessing the ``missing`` attribute of the dataset object.
20 | 
21 | .. code:: python
22 | 
23 |    print(ds.missing)
24 | 
25 | If you access a missing index, the dataset will throw a
26 | ``MissingDateError``.
27 | 


--------------------------------------------------------------------------------
/docs/datasets/building/handling-missing-values.rst:
--------------------------------------------------------------------------------
 1 | #########################
 2 |  Handling missing values
 3 | #########################
 4 | 
 5 | When handling data for machine learning models, missing values (`NaNs`)
 6 | can pose a challenge, as models require complete data to operate
 7 | effectively and may crash otherwise. Ideally, we anticipate having
 8 | complete data in all fields.
 9 | 
10 | However, there are scenarios where `NaNs` naturally occur, such as with
11 | variables only relevant on land or at sea. This happens for sea surface
12 | temperature (`sst`), for example. In such cases, the default behaviour
13 | is to reject data with `NaNs` as invalid. To accommodate `NaNs` and
14 | accurately compute statistics based on them, you can include the
15 | ``allow_nans`` key in the configuration.
16 | 
17 | Here's an example of how to implement it:
18 | 
19 | .. literalinclude:: ../yaml/nan.yaml
20 |    :language: yaml
21 | 


--------------------------------------------------------------------------------
/docs/datasets/building/naming-variables.rst:
--------------------------------------------------------------------------------
 1 | .. _naming-variables:
 2 | 
 3 | ##################
 4 |  Naming Variables
 5 | ##################
 6 | 
 7 | ***************
 8 |  Rename Filter
 9 | ***************
10 | 
11 | The rename filter is used to rename variables in a dataset.
12 | 
13 | .. _remapping_option:
14 | 
15 | ******************
16 |  Remapping Option
17 | ******************
18 | 
19 | TODO.
20 | 


--------------------------------------------------------------------------------
/docs/datasets/building/operations.rst:
--------------------------------------------------------------------------------
 1 | .. _operations:
 2 | 
 3 | ############
 4 |  Operations
 5 | ############
 6 | 
 7 | Operations are blocks of YAML code that translate a list of dates into
 8 | fields.
 9 | 
10 | .. _building-join:
11 | 
12 | ******
13 |  join
14 | ******
15 | 
16 | The join is the process of combining data from several sources. Each
17 | source is expected to provide different variables for the same dates.
18 | 
19 | .. literalinclude:: ../yaml/input.yaml
20 |    :language: yaml
21 | 
22 | .. _building-concat:
23 | 
24 | ********
25 |  concat
26 | ********
27 | 
28 | Concatenation is the process of combining different sets of operations
29 | that handle different dates. This is typically used to build a dataset
30 | that spans several years, when several sources are involved, each
31 | providing a different period.
32 | 
33 | .. literalinclude:: ../yaml/concat.yaml
34 |    :language: yaml
35 | 
36 | .. _building-pipe:
37 | 
38 | ******
39 |  pipe
40 | ******
41 | 
42 | The pipe is the process of transforming fields using :ref:`filters
43 | <filters>`. The first step of a pipe is typically a source, a join, or
44 | another pipe. The following steps are filters.
45 | 
46 | .. literalinclude:: ../yaml/pipe.yaml
47 |    :language: yaml
48 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources.rst:
--------------------------------------------------------------------------------
 1 | .. _sources:
 2 | 
 3 | #########
 4 |  Sources
 5 | #########
 6 | 
 7 | The source is a software component that, given a list of dates and
 8 | variables, will return the corresponding fields.
 9 | 
10 | A `source` is responsible for reading data from the source and
11 | converting it to a set of fields. A `source` is also responsible for
12 | handling the metadata of the data, such as the variable names, and more.
13 | 
14 | An example of a source is ECMWF’s MARS archive, a collection of GRIB or
15 | NetCDF files, etc.
16 | 
17 | The following `sources` are currently available:
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 | 
22 |    sources/accumulations
23 |    sources/anemoi-dataset
24 |    sources/cds
25 |    sources/eccc-fstd
26 |    sources/forcings
27 |    sources/grib
28 |    sources/grib-index
29 |    sources/hindcasts
30 |    sources/mars
31 |    sources/netcdf
32 |    sources/opendap
33 |    sources/recentre
34 |    sources/repeated-dates
35 |    sources/xarray-based
36 |    sources/xarray-kerchunk
37 |    sources/xarray-zarr
38 |    sources/zenodo
39 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/anemoi-dataset.rst:
--------------------------------------------------------------------------------
 1 | .. _anemoi-dataset_source:
 2 | 
 3 | ################
 4 |  anemoi-dataset
 5 | ################
 6 | 
 7 | .. admonition:: Experimental
 8 |    :class: important
 9 | 
10 |       This source is experimental and may change in the future.
11 | 
12 | An anemoi-dataset can be a source for a dataset:
13 | 
14 | .. literalinclude:: yaml/anemoi-dataset.yaml
15 |    :language: yaml
16 | 
17 | The parameters are the same as those used in the ``open_dataset``
18 | function, which allows you to subset and combine datasets. See
19 | :ref:`opening-datasets` for more information.
20 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/cds.rst:
--------------------------------------------------------------------------------
 1 | .. _cds_source:
 2 | 
 3 | #####
 4 |  cds
 5 | #####
 6 | 
 7 | For users outside of the ECMWF organisation, it is possible to access
 8 | ERA5 data through the Copernicus Climate Data Store ``cdsapi`` instead.
 9 | 
10 | The steps to set up a CDS API account are detailed `here
11 | <https://cds.climate.copernicus.eu/how-to-api>`_.
12 | 
13 | The only difference with the previous MARS recipes is the addition of
14 | the ``use_cdsapi_dataset`` key:
15 | 
16 | .. literalinclude:: yaml/mars-cds.yaml
17 |    :language: yaml
18 | 
19 | This process can take some time because of the high demand on the CDS
20 | server.
21 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/eccc-fstd.rst:
--------------------------------------------------------------------------------
 1 | ###########
 2 |  eccc-fstd
 3 | ###########
 4 | 
 5 | To read files in the standard format used at Environment and Climate
 6 | Change Canada (ECCC), the following source can be used:
 7 | 
 8 | .. literalinclude:: yaml/eccc-fstd.yaml
 9 |    :language: yaml
10 | 
11 | The recipe will build a dataset from a standard file using the
12 | ``fstd2nc`` xarray plugin.
13 | 
14 | The ``fstd2nc`` dependency is not part of the default anemoi-datasets
15 | installation and has to be installed following the `fstd2nc project
16 | description <https://github.com/neishm/fstd2nc>`_.
17 | 
18 | See :ref:`create-cf-data` for more information.
19 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/grib-index.rst:
--------------------------------------------------------------------------------
 1 | .. _grib-index_source:
 2 | 
 3 | ############
 4 |  grib-index
 5 | ############
 6 | 
 7 | The `grib-index` source is used to read GRIB files with the help of an
 8 | index file created with the `grib-index` :ref:`command
 9 | <grib-index_command>`.
10 | 
11 | See :ref:`create-grib-data` for more information.
12 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/grib.rst:
--------------------------------------------------------------------------------
 1 | .. _grib_source:
 2 | 
 3 | ######
 4 |  grib
 5 | ######
 6 | 
 7 | To read all the GRIB from a file, use the following:
 8 | 
 9 | .. literalinclude:: yaml/grib1.yaml
10 |    :language: yaml
11 | 
12 | You can also read specific GRIB messages by specifying them using the
13 | MARS language (excluding the keywords `date`, `time`, and `step`, as
14 | well as any post-processing options, such as `grid` or `area`):
15 | 
16 | .. literalinclude:: yaml/grib2.yaml
17 |    :language: yaml
18 | 
19 | You can also read a collection of GRIB files, using Unix shell
20 | wildcards_:
21 | 
22 | .. literalinclude:: yaml/grib3.yaml
23 |    :language: yaml
24 | 
25 | You can also use the requested `date` to build the filenames. For
26 | example, if the GRIB files containing the requested data are named
27 | according to the following pattern: ``/path/to/YYYY/MM/YYYYMMDDHH.grib``
28 | with `YYYY` being the year, `MM` the month, `DD` the day, and `HH` the
29 | hour, you can use the following configuration:
30 | 
31 | .. literalinclude:: yaml/grib4.yaml
32 |    :language: yaml
33 | 
34 | The patterns in between the curly brackets are replaced by the values of
35 | the `date` and formatted according to the Python strftime_ method.
36 | 
37 | See :ref:`create-grib-data` for more information.
38 | 
39 | .. note::
40 | 
41 |    You can combine all the above options when selecting GRIB messages
42 |    from a file.
43 | 
44 | .. _strftime: https://python.readthedocs.io/en/latest/library/datetime.html#strftime-and-strptime-behavior
45 | 
46 | .. _wildcards: https://en.wikipedia.org/wiki/Glob_(programming)
47 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/hindcasts.rst:
--------------------------------------------------------------------------------
 1 | ###########
 2 |  hindcasts
 3 | ###########
 4 | 
 5 | .. note::
 6 | 
 7 |    The `hindcasts` source is currently using the `mars` source
 8 |    internally. This will be changed in the future.
 9 | 
10 | Hindcasts data, also known as reforecasts, are unique because they are
11 | run for a specific day of the year (such as 1 January or 11 June) for
12 | multiple years. So, for a given reference date like 2022-05-12, we can
13 | have hindcasts for 2001-05-12, 2002-05-12, 2003-05-12, and so on. This
14 | is useful in many cases. For more details, please refer to this ECMWF
15 | documentation.
16 | 
17 | The `hindcasts` source has a special argument called `reference_year`,
18 | which represents the year of the reference date. Based on the requested
19 | valid datetime and on the `reference_year`, the `hindcasts` source will
20 | calculate the `hdate`, `date`, and `time` appropriately.
21 | 
22 | For example, if the `reference_year` is 2022, then the data for
23 | 2002-05-12 will use data with `hdate=2002-05-12` and `date=2022-05-12`.
24 | 
25 | .. literalinclude:: yaml/hindcasts.yaml
26 |    :language: yaml
27 | 
28 | Using `step` in the `hindcasts` source is implemented and works as
29 | expected.
30 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/mars.rst:
--------------------------------------------------------------------------------
 1 | ######
 2 |  mars
 3 | ######
 4 | 
 5 | The ``mars`` source will retrieve the data from the ECMWF MARS archive.
 6 | For that, you need to have an ECMWF account and build your dataset on
 7 | one of the Centre's computers, or use the ``ecmwfapi`` Python package.
 8 | 
 9 | The `yaml` block can contain any keys that follow the `MARS language
10 | specification`_, with the exception of the ``date``, ``time``, and
11 | ``step``.
12 | 
13 | The missing keys will be filled with the default values, as defined in
14 | the MARS language specification.
15 | 
16 | .. literalinclude:: yaml/mars1.yaml
17 |    :language: yaml
18 | 
19 | Data from several level types must be requested in separate requests,
20 | with the ``join`` command.
21 | 
22 | .. literalinclude:: yaml/mars2.yaml
23 |    :language: yaml
24 | 
25 | See :ref:`naming-variables` for information on how to name the variables
26 | when mixing single-level and multi-level variables in the same dataset.
27 | 
28 | .. _mars language specification: https://confluence.ecmwf.int/display/UDOC/MARS+user+documentation
29 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/netcdf.rst:
--------------------------------------------------------------------------------
 1 | ########
 2 |  netcdf
 3 | ########
 4 | 
 5 | In the examples below, we explain how to create an anemoi dataset from
 6 | one or more netCDF files.
 7 | 
 8 | .. literalinclude:: yaml/netcdf.yaml
 9 |    :language: yaml
10 | 
11 | The netCDF source uses `Xarray
12 | <https://docs.xarray.dev/en/stable/index.html>`_ internally to access
13 | the data, and assumes that the netcdf files follow the `CF conventions
14 | <https://cfconventions.org/>`_. You can also read a collection of netCDF
15 | files, using Unix’ shell `wildcards
16 | <https://en.wikipedia.org/wiki/Glob_(programming)>`_
17 | 
18 | .. warning::
19 | 
20 |    We are aware of instances in wich the creation of an anemoi dataset
21 |    from a netCDF source is not working as expected due to the missing
22 |    information in the files metadata that anemoi-datasets expects.
23 |    anemoi-datasets internal routines do their best to infer the missing
24 |    information, but in some cases it is not possible. If you encounter
25 |    this or similar issues, please open an issue in the anemoi-datasets
26 |    repository.
27 | 
28 | See :ref:`create-cf-data` for more information.
29 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/opendap.rst:
--------------------------------------------------------------------------------
1 | #########
2 |  opendap
3 | #########
4 | 
5 | .. literalinclude:: yaml/opendap.yaml
6 |    :language: yaml
7 | 
8 | See :ref:`create-cf-data` for more information.
9 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/recentre.rst:
--------------------------------------------------------------------------------
 1 | .. _recentre:
 2 | 
 3 | ##########
 4 |  recentre
 5 | ##########
 6 | 
 7 | Perturbations refer to the small variations centred around a nominal
 8 | value of a parameter. When dealing with `ensemble forecasting`_, the
 9 | perturbations are related to the difference between `ensemble members`
10 | and their given `centre`.
11 | 
12 | The `recentre` function computes a set of new ensemble members centred
13 | on a different centre from previous ensemble members using the following
14 | formula:
15 | 
16 | .. math::
17 | 
18 |    members_{new} = centre + ( members - \overline{members} )
19 | 
20 | Additionally, some variables must be non-negative to have a physical
21 | meaning (e.g. accumulated variables or `specific humidity`). To ensure
22 | this, positive clipping is performed using the alternative formula:
23 | 
24 | .. math::
25 | 
26 |    members_{new} = max(0, centre + ( members - \overline{members} ))
27 | 
28 | The current implementation enforces that the following variables are
29 | positive when using the `perturbations` function:
30 | 
31 | +----------+------------------------------+
32 | | Variable | Description                  |
33 | +==========+==============================+
34 | | q        | `Specific humidity`_         |
35 | +----------+------------------------------+
36 | | cp       | `Convective precipitation`_  |
37 | +----------+------------------------------+
38 | | lsp      | `Large-scale precipitation`_ |
39 | +----------+------------------------------+
40 | | tp       | `Total precipitation`_       |
41 | +----------+------------------------------+
42 | 
43 | It uses the following arguments:
44 | 
45 | members
46 |    A :ref:`reference <yaml-reference>` to the ensemble members.
47 | 
48 | centre
49 |    A :ref:`reference <yaml-reference>` to the new centre requested.
50 | 
51 | Examples
52 | 
53 | .. literalinclude:: yaml/recentre.yaml
54 |    :language: yaml
55 | 
56 | .. _convective precipitation: https://codes.ecmwf.int/grib/param-db/?id=143
57 | 
58 | .. _ensemble forecasting: https://www.ecmwf.int/en/elibrary/75394-ensemble-forecasting
59 | 
60 | .. _large-scale precipitation: https://codes.ecmwf.int/grib/param-db/?id=142
61 | 
62 | .. _specific humidity: https://codes.ecmwf.int/grib/param-db/?id=133
63 | 
64 | .. _total precipitation: https://codes.ecmwf.int/grib/param-db/?id=228
65 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/repeated-dates.rst:
--------------------------------------------------------------------------------
 1 | ################
 2 |  repeated-dates
 3 | ################
 4 | 
 5 | The `repeated-dates` source is used to repeat a single source multiple
 6 | times, so that its data are present on multiple dates. A simple example
 7 | of this is when you have a source that contains a constant field, such
 8 | as orography or bathymetry, that you want to have repeated on all the
 9 | dates of the dataset.
10 | 
11 | The general format of the `repeated-dates` source is:
12 | 
13 | .. literalinclude:: yaml/repeated_dates1.yaml
14 |    :language: yaml
15 | 
16 | where ``source`` is any of the :ref:`operations <operations>` or
17 | :ref:`sources <sources>` described in the previous sections. The
18 | ``mode`` parameter can be one of the following:
19 | 
20 | **********
21 |  constant
22 | **********
23 | 
24 | .. literalinclude:: yaml/repeated-dates2.yaml
25 |    :language: yaml
26 | 
27 | *************
28 |  climatology
29 | *************
30 | 
31 | .. literalinclude:: yaml/repeated-dates3.yaml
32 |    :language: yaml
33 | 
34 | *********
35 |  closest
36 | *********
37 | 
38 | .. literalinclude:: yaml/repeated-dates4.yaml
39 |    :language: yaml
40 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/xarray-based.rst:
--------------------------------------------------------------------------------
 1 | ######################
 2 |  xarray-based Sources
 3 | ######################
 4 | 
 5 | More generally, you can specify any valid xarray.open_dataset_ arguments
 6 | as the source, and anemoi-dataset will try to build a dataset from it.
 7 | Examples of valid xarray.open_dataset_ arguments include: netCDF, Zarr,
 8 | OpenDAP, etc.
 9 | 
10 | .. literalinclude:: yaml/xarray-based.yaml
11 |    :language: yaml
12 | 
13 | See :ref:`create-cf-data` for more information.
14 | 
15 | .. _cf conventions: http://cfconventions.org/
16 | 
17 | .. _wildcards: https://en.wikipedia.org/wiki/Glob_(programming)
18 | 
19 | .. _xarray: https://docs.xarray.dev/en/stable/index.html
20 | 
21 | .. _xarray.open_dataset: https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html
22 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/xarray-kerchunk.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import fsspec
 4 | import tqdm
 5 | from kerchunk.combine import MultiZarrToZarr
 6 | from kerchunk.hdf import SingleHdf5ToZarr
 7 | 
 8 | fs = fsspec.filesystem("s3", anon=True)
 9 | 
10 | pattern = "s3://nsf-ncar-era5/e5.oper.an.pl/202403/e5.oper.an.pl.*.ll025sc.2024????00_2024????23.nc"
11 | 
12 | 
13 | jsons = []
14 | 
15 | for file in tqdm.tqdm(fs.glob(pattern)):
16 |     with fs.open(file, "rb", anon=True) as f:
17 |         h5chunks = SingleHdf5ToZarr(f, file)
18 |         jsons.append(h5chunks.translate())
19 | 
20 | 
21 | mzz = MultiZarrToZarr(
22 |     jsons,
23 |     remote_protocol="s3",
24 |     remote_options={"anon": True},
25 |     concat_dims=["time"],
26 |     identical_dims=["latitude", "longitude"],
27 | )
28 | 
29 | with open("combined.json", "w") as f:
30 |     json.dump(mzz.translate(), f)
31 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/xarray-kerchunk.rst:
--------------------------------------------------------------------------------
 1 | #################
 2 |  xarray-kerchunk
 3 | #################
 4 | 
 5 | .. literalinclude:: yaml/xarray-kerchunk.yaml
 6 |    :language: yaml
 7 | 
 8 | The code below is inspired by the `kerchunk tutorial`_, and makes use of
 9 | a subset of the `ERA5 dataset available on AWS`_. You may need to
10 | install the relevant packages before running the code below.
11 | 
12 | .. literalinclude:: xarray-kerchunk.py
13 |    :language: python
14 | 
15 | See :ref:`create-cf-data` for more information.
16 | 
17 | .. _era5 dataset available on aws: https://registry.opendata.aws/ecmwf-era5/
18 | 
19 | .. _kerchunk tutorial: https://fsspec.github.io/kerchunk/tutorial.html
20 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/xarray-zarr.rst:
--------------------------------------------------------------------------------
 1 | #############
 2 |  xarray-zarr
 3 | #############
 4 | 
 5 | Here is an example recipe that builds a dataset using one of the many
 6 | regridded versions of ERA5 hosted by Google in an Analysis-Ready,
 7 | Cloud-Optimised format. See `here
 8 | <https://cloud.google.com/storage/docs/public-datasets/era5>`_ for more
 9 | information.
10 | 
11 | .. literalinclude:: yaml/xarray-zarr.yaml
12 |    :language: yaml
13 | 
14 | Note that, unlike the ``mars`` examples, there is no need to include a
15 | ``grid`` specification. Additionally, to sub-select the vertical levels,
16 | it is necessary to use the :ref:`join <building-join>` operation to join
17 | separate lists containing 2D variables and 3D variables. If all vertical
18 | levels are desired, then it is acceptable to specify a single source.
19 | 
20 | See :ref:`create-cf-data` for more information.
21 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/accumulations1.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   accumulations:
3 |     accumulation_period: 6
4 |     class: ea
5 |     param: [ tp, cp, sf ]
6 |     levtype: sfc
7 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/accumulations2.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   accumulations:
3 |     accumulation_period: [ 6, 12 ]
4 |     class: od
5 |     param: [ tp, cp, sf ]
6 |     levtype: sfc
7 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/anemoi-dataset.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   anemoi-dataset:
 3 |     join:
 4 |     - dataset: dataset1
 5 |       select: [ z_500, t_500, u_500, v_500 ]
 6 |       frequency: 6h
 7 |     - dataset: dataset2
 8 |       select: [ msl, 2t, 10u, 10v ]
 9 |       frequency: 6h
10 |     start: 2000
11 |     end: 2001
12 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/eccc-fstd.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   eccc-fstd:
3 |     path: /path/to/data
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/forcings.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   join:
 3 |   - source1:
 4 |     args1: value1
 5 |     args2: value2
 6 |   - forcings:
 7 |     template: ${input.join.0.source1}
 8 |     param:
 9 |     - insolation
10 |     - cos_julian_day
11 |     - sin_julian_day
12 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/grib1.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   grib:
3 |     path: /path/to/data.grib
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/grib2.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   grib:
3 |     path: /path/to/data.grib
4 |     param: [ u, v ]
5 |     levelist: [ 1000, 850, 500 ]
6 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/grib3.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   grib:
3 |     path: /path/to/*.grib
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/grib4.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   grib:
3 |     path: /path/to/{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%Y%m%d%H)}.grib
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/hindcasts.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | input:
3 |    hincasts:
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/mars-cds.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |     mars:
3 |         use_cdsapi_dataset: "reanalysis-era5-complete"
4 |         class: ea
5 |         levtype: sfc
6 |         param: [ 10u, 10v, 2t, msl ]
7 |         grid: n320
8 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/mars1.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   mars:
3 |     levtype: sfc
4 |     param: [ 2t, msl ]
5 |     grid: [ 0.25, 0.25 ]
6 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/mars2.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   join:
 3 |   - mars:
 4 |       levtype: sfc
 5 |       param: [ 2t, msl ]
 6 |       grid: [ 0.25, 0.25 ]
 7 |   - mars:
 8 |       levtype: pl
 9 |       param: [ u, v ]
10 |       grid: [ 0.25, 0.25 ]
11 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/netcdf.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   netcdf:
3 |     path: /path/to/data.nc
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/opendap.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   opendap:
3 |     url: https://...
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/recentre.yaml:
--------------------------------------------------------------------------------
 1 | data_sources:
 2 |   members_source:
 3 |     mars:
 4 |       class: ea
 5 |       expver: "0001"
 6 |       grid: 20.0/20.0
 7 |       levtype: sfc
 8 |       param: [ 10u, 10v, 2t ]
 9 |       type: an
10 |       stream: enda
11 |       number: [ 1, 2, 3, 4, 5, 6, 7, 8, 9 ]
12 | 
13 |   center_source:
14 |     mars:
15 |       class: ea
16 |       expver: "0001"
17 |       grid: 20.0/20.0
18 |       levtype: sfc
19 |       param: [ 10u, 10v, 2t ]
20 |       type: an
21 |       stream: oper
22 | 
23 | input:
24 |   recentre:
25 |     centre: ${data_sources.center_source}
26 |     members: ${data_sources.members_source}
27 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/repeated-dates1.yaml:
--------------------------------------------------------------------------------
1 | 
2 | repeated-dates:
3 |     mode: mode
4 |     # ... parameters related to the mode ...
5 |     source:
6 |       # ... a source definition ...
7 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/repeated-dates2.yaml:
--------------------------------------------------------------------------------
1 | repeated-dates:
2 |   mode: constant
3 |   source:
4 |     xarray-zarr:
5 |       url: dem.zarr
6 |       variable: dem
7 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/repeated-dates3.yaml:
--------------------------------------------------------------------------------
1 | repeated-dates:
2 |   mode: climatology
3 |   year: 2019
4 |   day: 15
5 |   source:
6 |     grib:
7 |       path: some/path/to/data.grib
8 |       param: [ some_param ]
9 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/repeated-dates4.yaml:
--------------------------------------------------------------------------------
 1 | repeated-dates:
 2 |   mode: closest
 3 |   frequency: 24h
 4 |   maximum: 30d
 5 |   skip_all_nans: true
 6 |   source:
 7 |     grib:
 8 |       path: path/to/data.grib
 9 |       param: [ some_param ]
10 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/xarray-based.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |    xarray-based-source: # netcdf, zarr, opendap, etc.
3 |       group: mygroup
4 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/xarray-kerchunk.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2024-03-24T00:00:00
 3 |   end: 2024-03-24T03:00:00
 4 |   frequency: 1h
 5 | 
 6 | input:
 7 |   xarray-kerchunk:
 8 |     json: combined.json
 9 |     param: T
10 |     level: [ 1000, 50 ]
11 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/xarray-zarr.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2021-01-01T00:00:00
 3 |   end: 2021-01-10T12:00:00
 4 |   frequency: 6h
 5 | 
 6 | input:
 7 |   join:
 8 |   - xarray-zarr:
 9 |       url: "gs://gcp-public-data-arco-era5/ar/1959-2022-6h-128x64_equiangular_conservative.zarr"
10 |       param:
11 |       - surface_pressure
12 |       - 2m_temperature
13 |       - 10m_u_component_of_wind
14 |       - 10m_v_component_of_wind
15 | 
16 |   - xarray-zarr:
17 |       url: "gs://gcp-public-data-arco-era5/ar/1959-2022-6h-128x64_equiangular_conservative.zarr"
18 |       param:
19 |       - temperature
20 |       level:
21 |       - 1000
22 |       - 500
23 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/yaml/zenodo.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2021-06-28
 3 |   end: 2021-06-29
 4 | 
 5 | input:
 6 |   zenodo:
 7 |     record_id: 6470725
 8 |     file_key: "{param}_INCA_202106280700.nc"
 9 |     param: [ RR, PN, PT ]
10 | 


--------------------------------------------------------------------------------
/docs/datasets/building/sources/zenodo.rst:
--------------------------------------------------------------------------------
1 | ########
2 |  zenodo
3 | ########
4 | 
5 | https://zenodo.org/records/6470725
6 | 
7 | .. literalinclude:: yaml/zenodo.yaml
8 |    :language: yaml
9 | 


--------------------------------------------------------------------------------
/docs/datasets/building/syntax.yaml:
--------------------------------------------------------------------------------
 1 | description:
 2 | 
 3 | name:
 4 | 
 5 | licence: CC-BY-4.0
 6 | 
 7 | attribution:
 8 | 
 9 | citation:
10 | 
11 | dates:
12 |   start:
13 |   end:
14 |   frequency:
15 | 
16 | build:
17 | 
18 | 
19 | input:
20 | 
21 | 
22 | output:
23 | 


--------------------------------------------------------------------------------
/docs/datasets/introduction.rst:
--------------------------------------------------------------------------------
 1 | .. _datasets-introduction:
 2 | 
 3 | #######################################
 4 |  What can you do with anemoi-datasets?
 5 | #######################################
 6 | 
 7 | anemoi-datasets is a framework designed to facilitate the creation and
 8 | utilisation of machine learning-optimised meteorological datasets. It
 9 | offers tools for both building your own datasets and using existing
10 | datasets.
11 | 
12 | -  :ref:`Building Your Own Datasets <building-introduction>` Learn how
13 |    to create customised datasets tailored to your specific needs,
14 |    including data sourcing, filtering, and processing techniques.
15 | 
16 | -  :ref:`Using Existing Datasets <using-introduction>` Discover methods
17 |    to access, manipulate, and analyse pre-existing datasets for various
18 |    applications.
19 | 
20 | For detailed guidance, refer to the respective sections in the
21 | documentation.
22 | 


--------------------------------------------------------------------------------
/docs/datasets/using/configuration.rst:
--------------------------------------------------------------------------------
 1 | .. _configuration:
 2 | 
 3 | ###############
 4 |  Configuration
 5 | ###############
 6 | 
 7 | When the ``open_dataset`` function is called with a string that does not
 8 | end with ``.zarr`` or ``.zip``, it is considered a dataset name and not
 9 | a path or a URL.
10 | 
11 | In that case, the *Anemoi* configuration is read from
12 | ``~/.config/anemoi/settings.toml``. Below is an example of such a
13 | configuration:
14 | 
15 | .. literalinclude:: configuration.toml
16 |    :language: toml
17 | 
18 | Then, the name passed to ``open_dataset`` is used to look for a possible
19 | path or URL:
20 | 
21 | -  If the name is listed in the ``[datasets.named]``, the corresponding
22 |    path is used.
23 | -  Otherwise, the suffix ``.zarr`` is added to the name, and the file is
24 |    searched at every location listed in the ``path`` list.
25 | 
26 | See :ref:`miscellaneous` to modify the list of named datasets and the
27 | path temporarily.
28 | 


--------------------------------------------------------------------------------
/docs/datasets/using/configuration.toml:
--------------------------------------------------------------------------------
 1 | [datasets]
 2 | path = [
 3 |     "/home/mlx/ai-ml/datasets/stable",
 4 |     "/home/mlx/ai-ml/datasets/experimental",
 5 |     "/home/mlx/ai-ml/datasets/testing",
 6 |     "s3://ml-datasets",
 7 | ]
 8 | 
 9 | [datasets.named]
10 | test = "/home/mlx/test-dataset.zarr"
11 | 


--------------------------------------------------------------------------------
/docs/datasets/using/ensembles.rst:
--------------------------------------------------------------------------------
 1 | .. _selecting-members:
 2 | 
 3 | ###################
 4 |  Selecting members
 5 | ###################
 6 | 
 7 | This section describes how to subset data that are part of an ensemble.
 8 | To combine ensembles, see :ref:`ensembles` in the
 9 | :ref:`combining-datasets` section.
10 | 
11 | .. _number:
12 | 
13 | If a dataset is an ensemble, you can select one or more specific members
14 | using the `number` option. You can also use ``numbers`` (which is an
15 | alias for ``number``), and ``member`` (or ``members``). The difference
16 | between the two is that ``number`` is **1-based**, whereas ``member`` is
17 | **0-based**.
18 | 
19 | Select a single element:
20 | 
21 | .. code:: python
22 | 
23 |    ds = open_dataset(
24 |        dataset,
25 |        number=1,
26 |    )
27 | 
28 | ... or a list:
29 | 
30 | .. code:: python
31 | 
32 |    ds = open_dataset(
33 |        dataset,
34 |        number=[1, 3, 5],
35 |    )
36 | 


--------------------------------------------------------------------------------
/docs/datasets/using/introduction.rst:
--------------------------------------------------------------------------------
 1 | .. _using-introduction:
 2 | 
 3 | ###########################
 4 |  Using an existing dataset
 5 | ###########################
 6 | 
 7 | An *Anemoi* dataset is a thin wrapper around a zarr_ store that is
 8 | optimised for training data-driven weather forecasting models. It is
 9 | organised in such a way that I/O operations are minimised (see
10 | :ref:`overview`).
11 | 
12 | .. _zarr: https://zarr.readthedocs.io/
13 | 
14 | To open a dataset, you can use the `open_dataset` function.
15 | 
16 | .. code:: python
17 | 
18 |    print(ds.missing)
19 | 
20 | You can then access the data in the dataset using the `ds` object as if
21 | it were a NumPy array.
22 | 
23 | .. code:: python
24 | 
25 |    print(ds.shape)
26 | 
27 |    print(len(ds))
28 | 
29 |    print(ds[0])
30 | 
31 |    print(ds[10:20])
32 | 
33 | One of the main features of the *anemoi-datasets* package is the ability
34 | to subset or combine datasets.
35 | 
36 | .. code:: python
37 | 
38 |    from anemoi.datasets import open_dataset
39 | 
40 |    ds = open_dataset("path/to/dataset.zarr", start=2000, end=2020)
41 | 
42 | In that case, a dataset is created that only contains the data between
43 | the years 2000 and 2020. Combining is done by passing multiple paths to
44 | the `open_dataset` function:
45 | 
46 | .. code:: python
47 | 
48 |    from anemoi.datasets import open_dataset
49 | 
50 |    ds = open_dataset("path/to/dataset1.zarr", "path/to/dataset2.zarr")
51 | 
52 | In the latter case, the datasets are combined along the time dimension
53 | or the variable dimension depending on the dataset's structure.
54 | 
55 | .. toctree::
56 |    :maxdepth: 1
57 |    :hidden:
58 |    :caption: Using datasets
59 | 
60 |    opening
61 |    methods
62 |    subsetting
63 |    combining
64 |    selecting
65 |    ensembles
66 |    grids
67 |    zip
68 |    statistics
69 |    missing
70 |    other
71 |    matching
72 |    miscellaneous
73 |    configuration
74 | 


--------------------------------------------------------------------------------
/docs/datasets/using/matching.rst:
--------------------------------------------------------------------------------
 1 | .. _using-matching:
 2 | 
 3 | #####################
 4 |  Matching attributes
 5 | #####################
 6 | 
 7 | When :ref:`combining datasets <combining-datasets>` with operations like
 8 | :ref:`concat`, :ref:`join`, :ref:`ensembles` or :ref:`grids`, some of
 9 | the attributes of the input datasets must match, such as the list of
10 | variables for `concat` or the `dates` and `frequency` for `join`.
11 | 
12 | You can let the package automatically adjust the attributes of the input
13 | datasets using the `adjust` keyword, to adjust one of the attributes:
14 | 
15 | .. code:: python
16 | 
17 |    ds = open_dataset(
18 |        join=[dataset1, dataset2],
19 |        adjust="frequency",
20 |    )
21 | 
22 | or more than one attribute:
23 | 
24 | .. code:: python
25 | 
26 |    ds = open_dataset(
27 |        join=[dataset1, dataset2],
28 |        adjust=["start", "end", "frequency"],
29 |    )
30 | 
31 | You can also use `dates` as a shortcut for the above. This is equivalent
32 | to:
33 | 
34 | .. code:: python
35 | 
36 |    ds = open_dataset(join=[dataset1, dataset2], adjust="dates")
37 | 
38 | To use the common set of variables, use:
39 | 
40 | .. code:: python
41 | 
42 |    ds = open_dataset(concat=[dataset1, dataset2], adjust="variables")
43 | 
44 | To match all the attributes:
45 | 
46 | .. code:: python
47 | 
48 |    ds = open_dataset(
49 |        cutout=[dataset1, dataset2],
50 |        adjust="all",
51 |    )
52 | 


--------------------------------------------------------------------------------
/docs/datasets/using/miscellaneous.rst:
--------------------------------------------------------------------------------
 1 | .. _miscellaneous:
 2 | 
 3 | #########################
 4 |  Miscellaneous functions
 5 | #########################
 6 | 
 7 | The two functions below can be used to temporarily modify the
 8 | :ref:`configuration <configuration>` so that the packages can find named
 9 | datasets at given locations.
10 | 
11 | Use ``add_dataset_path`` to add a path to the list of paths where the
12 | package searches for datasets:
13 | 
14 | .. _add_dataset_path:
15 | 
16 | .. code:: python
17 | 
18 |    from anemoi.datasets import add_dataset_path
19 |    from anemoi.datasets import open_dataset
20 | 
21 |    add_dataset_path("https://object-store.os-api.cci1.ecmwf.int/ml-examples/")
22 | 
23 |    ds = open_dataset("an-oper-2023-2023-2p5-6h-v1")
24 | 
25 | Use ``add_named_dataset`` to add a named dataset to the list of named
26 | datasets:
27 | 
28 | .. _add_named_dataset:
29 | 
30 | .. code:: python
31 | 
32 |    from anemoi.datasets import add_named_dataset
33 |    from anemoi.datasets import open_dataset
34 | 
35 |    add_named_dataset(
36 |        "example-dataset",
37 |        "https://object-store.os-api.cci1.ecmwf.int/ml-examples/an-oper-2023-2023-2p5-6h-v1.zarr",
38 |    )
39 | 
40 |    ds = open_dataset("example-dataset")
41 | 


--------------------------------------------------------------------------------
/docs/datasets/using/other.rst:
--------------------------------------------------------------------------------
 1 | .. _selecting-other:
 2 | 
 3 | ##################
 4 |  Other operations
 5 | ##################
 6 | 
 7 | .. warning::
 8 | 
 9 |    The operations described in this section do not check that their
10 |    inputs are compatible.
11 | 
12 | *******
13 |  chain
14 | *******
15 | 
16 | .. code:: python
17 | 
18 |    ds = open_dataset(chain=[dataset1, dataset2, ...])
19 | 
20 | The `chain` operation is used to combine multiple datasets into a single
21 | dataset. The datasets are combined by concatenating the data arrays
22 | along the first dimension (dates). This is similar to the :ref:`concat`
23 | operation, but no checks are done to see if the datasets are compatible.
24 | This means that the shape of the arrays returned when iterating or
25 | indexing may be different.
26 | 
27 | This operation is identical to Python's :py:func:`itertools.chain`
28 | function.
29 | 
30 | *********
31 |  shuffle
32 | *********
33 | 
34 | .. code:: python
35 | 
36 |    ds = open_dataset(dataset, shuffle=True)
37 | 
38 | The `shuffle` operation is used to shuffle the data in the dataset along
39 | the first dimension (dates).
40 | 


--------------------------------------------------------------------------------
/docs/datasets/using/statistics.rst:
--------------------------------------------------------------------------------
 1 | .. _selecting-statistics:
 2 | 
 3 | ############
 4 |  Statistics
 5 | ############
 6 | 
 7 | When combining datasets, the statistics are not recomputed. Instead, the
 8 | statistics of the first dataset encountered are returned by the
 9 | ``statistics`` property.
10 | 
11 | You can change that behaviour by using the `statistics` option to select
12 | a specific dataset from which to get the statistics:
13 | 
14 | .. code:: python
15 | 
16 |    ds = open_dataset(dataset, statistics=other_dataset)
17 | 
18 |    # Will return the statistics of "other_dataset"
19 | 
20 |    print(ds.statistics)
21 | 


--------------------------------------------------------------------------------
/docs/datasets/using/subsetting.rst:
--------------------------------------------------------------------------------
 1 | .. _subsetting-datasets:
 2 | 
 3 | #####################
 4 |  Subsetting datasets
 5 | #####################
 6 | 
 7 | Subsetting is the action of filtering the dataset by its first dimension
 8 | (dates).
 9 | 
10 | .. _start:
11 | 
12 | *******
13 |  start
14 | *******
15 | 
16 | This option lets you subset the dataset by time. You can pass a date or
17 | a string:
18 | 
19 | .. code:: python
20 | 
21 |    open_dataset(dataset, start=1980)
22 | 
23 | .. _end:
24 | 
25 | *****
26 |  end
27 | *****
28 | 
29 | As for the start option, you can pass a date or a string:
30 | 
31 | .. code:: python
32 | 
33 |    open_dataset(dataset, end="2020-12-31")
34 | 
35 | The following are equivalent ways of describing ``start`` or ``end``:
36 | 
37 | -  ``2020`` and ``"2020"``
38 | -  ``202306``, ``"202306"`` and ``"2023-06"``
39 | -  ``20200301``, ``"20200301"`` and ``"2020-03-01"``
40 | 
41 | Note that the ``start="2020"`` is equivalent to ``start="2020-01-01"``
42 | while ``end="2020"`` is equivalent to ``end="2020-12-31"``.
43 | 
44 | Note also how the ``frequency`` of the dataset will change how the
45 | ``end`` option is interpreted: - ``end="2020"`` with a ``frequency`` of
46 | one hour is equivalent to ``end="2020-12-31 23:00:00"`` - ``end="2020"``
47 | with a ``frequency`` of 6 hours is equivalent to ``end="2020-12-31
48 | 18:00:00"``
49 | 
50 | .. _frequency:
51 | 
52 | ***********
53 |  frequency
54 | ***********
55 | 
56 | You can change the frequency of the dataset by passing a string with:
57 | 
58 | .. code:: python
59 | 
60 |    ds = open_dataset(dataset, frequency="6h")
61 | 
62 | The new frequency must be a multiple of the original frequency.
63 | 
64 | To artificially increase the frequency, you can use the
65 | ``interpolate_frequency`` option. This will create new dates in the
66 | dataset by linearly interpolating the data values between the original
67 | dates.
68 | 
69 | .. code:: python
70 | 
71 |    ds = open_dataset(dataset, interpolate_frequency="10m")
72 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/Makefile:
--------------------------------------------------------------------------------
 1 | YAML := $(wildcard building*.yaml)
 2 | 
 3 | TARGETS := $(YAML:.yaml=.txt)
 4 | 
 5 | all: $(TARGETS)
 6 | 
 7 | %.zarr: %.yaml
 8 | 	anemoi-datasets create $< $@ --overwrite
 9 | 
10 | %.txt: %.zarr
11 | 	ln -sf $< dataset.zarr
12 | 	anemoi-datasets inspect dataset.zarr > $@
13 | 	rm -f dataset.zarr
14 | 
15 | 
16 | clean::
17 | 	rm -fr *.zarr
18 | 
19 | .SUFFIXES: .zarr .yaml .txt
20 | .PRECIOUS: %.zarr
21 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/building1.txt:
--------------------------------------------------------------------------------
 1 | ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈
 2 | 📦 Path          : dataset.zarr
 3 | 🔢 Format version: 0.20.0
 4 | 
 5 | 📅 Start      : 2024-01-01 00:00
 6 | 📅 End        : 2024-01-01 18:00
 7 | ⏰ Frequency  : 6h
 8 | 🚫 Missing    : 0
 9 | 🌎 Resolution : 1.0
10 | 🌎 Field shape: [181, 360]
11 | 
12 | 📐 Shape      : 4 × 5 × 1 × 65,160 (5 MiB)
13 | 💽 Size       : 2.7 MiB (2,858,121)
14 | 📁 Files      : 34
15 | 
16 |    Index │ Variable │      Min │     Max │      Mean │    Stdev
17 |    ──────┼──────────┼──────────┼─────────┼───────────┼─────────
18 |        0 │ 10u      │ -24.3116 │   25.79 │ 0.0595319 │   5.5856
19 |        1 │ 10v      │ -21.2397 │  21.851 │ -0.270924 │  4.23947
20 |        2 │ 2t       │  214.979 │ 319.111 │   277.775 │  19.9318
21 |        3 │ lsm      │        0 │       1 │  0.335152 │ 0.464236
22 |        4 │ msl      │  95708.5 │  104284 │    100867 │  1452.67
23 |    ──────┴──────────┴──────────┴─────────┴───────────┴─────────
24 | 🔋 Dataset ready, last update 2 hours ago.
25 | 📊 Statistics ready.
26 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/building1.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2024-01-01T00:00:00Z
 3 |   end: 2024-01-01T18:00:00Z
 4 |   frequency: 6h
 5 | 
 6 | input:
 7 |   mars:
 8 |     param: [2t, msl, 10u, 10v, lsm]
 9 |     levtype: sfc
10 |     grid: [1, 1]
11 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/building2.txt:
--------------------------------------------------------------------------------
 1 | ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈
 2 | 📦 Path          : dataset.zarr
 3 | 🔢 Format version: 0.20.0
 4 | 
 5 | 📅 Start      : 2024-01-01 00:00
 6 | 📅 End        : 2024-01-01 18:00
 7 | ⏰ Frequency  : 6h
 8 | 🚫 Missing    : 0
 9 | 🌎 Resolution : 1.0
10 | 🌎 Field shape: [181, 360]
11 | 
12 | 📐 Shape      : 4 × 11 × 1 × 65,160 (10.9 MiB)
13 | 💽 Size       : 5.7 MiB (5,995,688)
14 | 📁 Files      : 34
15 | 
16 |    Index │ Variable │         Min │         Max │        Mean │       Stdev
17 |    ──────┼──────────┼─────────────┼─────────────┼─────────────┼────────────
18 |        0 │ 10u      │    -24.3116 │       25.79 │   0.0595319 │      5.5856
19 |        1 │ 10v      │    -21.2397 │      21.851 │   -0.270924 │     4.23947
20 |        2 │ 2t       │     214.979 │     319.111 │     277.775 │     19.9318
21 |        3 │ lsm      │           0 │           1 │    0.335152 │    0.464236
22 |        4 │ msl      │     95708.5 │      104284 │      100867 │     1452.67
23 |        5 │ q_100    │ 8.95676e-07 │ 5.19827e-06 │ 2.78594e-06 │ 5.39734e-07
24 |        6 │ q_50     │ 1.89449e-06 │ 3.41429e-06 │ 3.00331e-06 │ 1.11219e-07
25 |        7 │ t_100    │      186.33 │      233.74 │     209.958 │     12.4899
26 |        8 │ t_50     │     191.921 │     241.239 │     213.774 │     12.3492
27 |        9 │ z_100    │      146865 │      163937 │      157791 │     4962.71
28 |       10 │ z_50     │      186876 │      204383 │      199752 │     4158.18
29 |    ──────┴──────────┴─────────────┴─────────────┴─────────────┴────────────
30 | 🔋 Dataset ready, last update 19 seconds ago.
31 | 📊 Statistics ready.
32 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/building2.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2024-01-01T00:00:00Z
 3 |   end: 2024-01-01T18:00:00Z
 4 |   frequency: 6h
 5 | 
 6 | input:
 7 |   join:
 8 |   - mars:
 9 |       param: [2t, msl, 10u, 10v, lsm]
10 |       levtype: sfc
11 |       grid: [1, 1]
12 |   - mars:
13 |       param: [q, t, z]
14 |       levtype: pl
15 |       level: [50, 100]
16 |       grid: [1, 1]
17 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/building3.txt:
--------------------------------------------------------------------------------
 1 | ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈
 2 | 📦 Path          : dataset.zarr
 3 | 🔢 Format version: 0.20.0
 4 | 
 5 | 📅 Start      : 2024-01-01 00:00
 6 | 📅 End        : 2024-01-01 18:00
 7 | ⏰ Frequency  : 6h
 8 | 🚫 Missing    : 0
 9 | 🌎 Resolution : 1.0
10 | 🌎 Field shape: [181, 360]
11 | 
12 | 📐 Shape      : 4 × 8 × 1 × 65,160 (8 MiB)
13 | 💽 Size       : 3.1 MiB (3,283,650)
14 | 📁 Files      : 34
15 | 
16 |    Index │ Variable     │         Min │      Max │      Mean │    Stdev
17 |    ──────┼──────────────┼─────────────┼──────────┼───────────┼─────────
18 |        0 │ 10u          │    -24.3116 │    25.79 │ 0.0595319 │   5.5856
19 |        1 │ 10v          │    -21.2397 │   21.851 │ -0.270924 │  4.23947
20 |        2 │ 2t           │     214.979 │  319.111 │   277.775 │  19.9318
21 |        3 │ cos_latitude │ 6.12323e-17 │        1 │  0.633086 │ 0.310546
22 |        4 │ insolation   │           0 │ 0.999995 │  0.231949 │ 0.299927
23 |        5 │ lsm          │           0 │        1 │  0.335152 │ 0.464236
24 |        6 │ msl          │     95708.5 │   104284 │    100867 │  1452.67
25 |        7 │ sin_latitude │          -1 │        1 │         0 │ 0.709057
26 |    ──────┴──────────────┴─────────────┴──────────┴───────────┴─────────
27 | 🔋 Dataset ready, last update 17 seconds ago.
28 | 📊 Statistics ready.
29 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/building3.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2024-01-01T00:00:00Z
 3 |   end: 2024-01-01T18:00:00Z
 4 |   frequency: 6h
 5 | input:
 6 |   join:
 7 |   - mars:
 8 |       param: [2t, msl, 10u, 10v, lsm]
 9 |       levtype: sfc
10 |       grid: [1, 1]
11 |   - mars:
12 |       param: [q, t, z]
13 |       levtype: pl
14 |       level: [50, 100]
15 |       grid: [1, 1]
16 |   - forcings:
17 |       template: ${input.join.0.mars}
18 |       param:
19 |       - cos_latitude
20 |       - sin_latitude
21 |       - insolation
22 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/concat.yaml:
--------------------------------------------------------------------------------
 1 | input:
 2 |   concat:
 3 |   - dates:
 4 |       start: 2020-12-30 00:00:00
 5 |       end: 2021-01-01 12:00:00
 6 |       frequency: 12h
 7 | 
 8 |     source1:
 9 |     - args
10 | 
11 |   - dates:
12 |       start: 2021-01-02 00:00:00
13 |       end: 2021-01-03 12:00:00
14 |       frequency: 12h
15 | 
16 |     source2:
17 |     - args
18 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/hindcasts.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   hindcasts:
3 |     levtype: sfc
4 |     param: [2t, msl]
5 |     grid: [0.25, 0.25]
6 |     stream: enfh
7 |     type: cf
8 |     reference_year: 2022
9 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/input.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   join:
3 |     - source1:
4 |         key: date
5 |     - source2
6 |     - ...
7 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/missing_dates.yaml:
--------------------------------------------------------------------------------
1 | dates:
2 |   start: 2017-01-01 00:00:00
3 |   end: 2017-01-31 23:00:00
4 |   frequency: 1h
5 | 
6 |   missing:
7 |   - 2017-01-02 00:00:00
8 |   - 2017-01-03 00:00:00
9 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/nan.yaml:
--------------------------------------------------------------------------------
1 | statistics:
2 |   allow_nans: [sst, ci]
3 | 


--------------------------------------------------------------------------------
/docs/datasets/yaml/pipe.yaml:
--------------------------------------------------------------------------------
1 | input:
2 |   pipe:
3 |     - source
4 |     - filter1
5 |     - filter2
6 |     - ...
7 | 


--------------------------------------------------------------------------------
/docs/dev/contributing.rst:
--------------------------------------------------------------------------------
 1 | .. _dev-contributing:
 2 | 
 3 | ####################
 4 |  General guidelines
 5 | ####################
 6 | 
 7 | Thank you for your interest in Anemoi Datasets! Please follow the
 8 | :ref:`general Anemoi contributing guidelines
 9 | <anemoi-docs:contributing>`.
10 | 
11 | These include general guidelines for contributions to Anemoi,
12 | instructions on setting up a development environment, and guidelines on
13 | collaboration on GitHub, writing documentation, testing, and code style.
14 | 
15 | ************
16 |  Unit tests
17 | ************
18 | 
19 | Anemoi-datasets include unit tests that can be executed locally using
20 | pytest. For more information on testing, please refer to the
21 | :ref:`general Anemoi testing guidelines
22 | <anemoi-docs:testing-guidelines>`.
23 | 


--------------------------------------------------------------------------------
/docs/howtos/create/03-constant-fields.rst:
--------------------------------------------------------------------------------
1 | .. _constant-data:
2 | 
3 | #################
4 |  Constant fields
5 | #################
6 | 
7 | (Coming soon)
8 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-flavour1.yaml:
--------------------------------------------------------------------------------
1 | - - levtype: sfc
2 |   - levelist: null
3 | 
4 | - - { discipline: 0, parameterCategory: 1, parameterNumber: 201 }
5 |   - param: csf
6 | 
7 | - - { discipline: 0, parameterCategory: 1, parameterNumber: 64 }
8 |   - param: tcwv
9 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-flavour2.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | 
 7 | input:
 8 |   grib:
 9 |     path: /path/to/input.grib
10 |     flavour: /path/to/flavour.yaml
11 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-flavour3.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | 
 7 | input:
 8 |   grib:
 9 |     path: /path/to/input.grib
10 |     flavour:
11 |       - - levtype: sfc
12 |         - levelist: null
13 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-flavour4.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | flavour: &flavour
 7 |   - - levtype: sfc
 8 |     - levelist: null
 9 | 
10 | input:
11 |   join:
12 |   - grib:
13 |       path: /path/to/input.grib
14 |       flavour: *flavour
15 |       param: [ z, t, u, v ]
16 |       levelist: [ 1000, 850, 500 ]
17 |       levtype: pl
18 | 
19 |   - grib:
20 |       path: /path/to/input2.grib
21 |       flavour: *flavour
22 |       param: [ 2t, msl ]
23 |       levtype: sfc
24 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-recipe1.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | 
 7 | input:
 8 |   grib:
 9 |     path: /path/to/input.grib
10 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-recipe2.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | 
 7 | input:
 8 |   grib:
 9 |     path: /path/to/data-{param}-{date:strftime(%Y%m%d%H)}.grib
10 |     param: [2t, 10u, 10v]
11 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-recipe3.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | 
 7 | input:
 8 |   grib-index:
 9 |     index: /path/to/index.db
10 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-recipe4.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | 
 7 | input:
 8 |   grib-index:
 9 |     index: /path/to/index.db
10 |     param: [2t, 10u, 10v]
11 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/grib-recipe5.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | input:
 7 |   join:
 8 |     - grib:
 9 |         path: /path/to/input.grib
10 |         param: [z, t, u, v]
11 |         levelist: [1000, 850, 500]
12 |         levtype: pl
13 | 
14 |     - grib:
15 |         path: /path/to/input2.grib
16 |         param: [2t, msl]
17 |         levtype: sfc
18 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/netcdf1.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2023-01-01T00:00:00
 3 |   end: 2023-01-02T18:00:00
 4 |   frequency: 6h
 5 | 
 6 | 
 7 | input:
 8 |   netcdf:
 9 |     path: /path/to/input.nc
10 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/opendap1.yaml:
--------------------------------------------------------------------------------
1 | dates:
2 |   start: 2023-01-01T00:00:00
3 |   end: 2023-01-02T18:00:00
4 |   frequency: 6h
5 | 
6 | input:
7 |   opendap:
8 |     url: https://www.example.com/path/to/input.nc
9 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/xarray-flavour1.yaml:
--------------------------------------------------------------------------------
 1 | rules:
 2 |   latitude:
 3 |     name: grid_yt
 4 |   level:
 5 |     name: pfull
 6 |   longitude:
 7 |     name: grid_xt
 8 |   time:
 9 |     name: time
10 | 
11 | levtype: pl
12 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/xarray-patch1.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | dates:
 3 |   start: 2023-01-01T00:00:00
 4 |   end: 2023-01-02T18:00:00
 5 |   frequency: 6h
 6 | 
 7 | input:
 8 |   netcdf:
 9 |     path: /path/to/input.nc
10 |     patch:
11 |       coordinates: [ nav_lat, nav_lon ]
12 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/xarray-patch2.yaml:
--------------------------------------------------------------------------------
1 | attributes:
2 |   variable:
3 |     attribute1: value1
4 |     attribute2: value2
5 | 
6 | coordinates: [ x, y ]
7 | 


--------------------------------------------------------------------------------
/docs/howtos/create/yaml/zarr1.yaml:
--------------------------------------------------------------------------------
1 | dates:
2 |   start: 2023-01-01T00:00:00
3 |   end: 2023-01-02T18:00:00
4 |   frequency: 6h
5 | 
6 | input:
7 |   xarray-zarr:
8 |     url: https://www.example.com/path/to/input.zarr
9 | 


--------------------------------------------------------------------------------
/docs/howtos/introduction.rst:
--------------------------------------------------------------------------------
 1 | #########
 2 |  How-Tos
 3 | #########
 4 | 
 5 | How-Tos are a collection of guides that help you to use anemoi-datasets
 6 | in different ways. They are not exhaustive, but they should give you a
 7 | good starting point.
 8 | 
 9 | **************************
10 |  Creating anemoi-datasets
11 | **************************
12 | 
13 | -  :ref:`create-grib-data`
14 | -  :ref:`create-cf-data`
15 | -  :ref:`constant-data`
16 | 
17 | .. toctree::
18 |    :maxdepth: 1
19 |    :glob:
20 |    :hidden:
21 |    :caption: Creating anemoi-datasets
22 | 
23 |    create/*
24 | 
25 | ***********************
26 |  Using anemoi-datasets
27 | ***********************
28 | 
29 | -  :ref:`interpolate-step`
30 | 
31 | .. toctree::
32 |    :maxdepth: 1
33 |    :glob:
34 |    :hidden:
35 |    :caption: Using anemoi-datasets
36 | 
37 |    usage/*
38 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/01-interpolate-step-dataset-combination.rst:
--------------------------------------------------------------------------------
 1 | .. _interpolate-step:
 2 | 
 3 | ######################################################
 4 |  Combine datasets with different timestep frequencies
 5 | ######################################################
 6 | 
 7 | Here we explain how to combine two existing datasets with different
 8 | timestep frequencies. In this example, we consider two datasets:
 9 | ``dataset-3h`` with an inherent temporal frequency of 3h and
10 | ``dataset-24h`` with an inherent temporal frequency of 24h. The goal is
11 | to combine the two datasets into a dataset with a temporal frequency of
12 | either 3h or 24h.
13 | 
14 | *********************************
15 |  Interpolate to higher frequency
16 | *********************************
17 | 
18 | In this case, we will use the ``interpolate_frequency`` option to bring
19 | ``dataset-24h`` to the 3h timestep of ``dataset-3h``.
20 | 
21 | .. literalinclude:: code/interpolate1.py
22 |    :language: python
23 | 
24 | or in the config file:
25 | 
26 | .. literalinclude:: yaml/interpolate1.yaml
27 |    :language: yaml
28 | 
29 | The ``adjust`` option is in case the end or start dates do not exactly
30 | match.
31 | 
32 | ***************************
33 |  Sample to lower frequency
34 | ***************************
35 | 
36 | This case is straightforward; we can just specify the required 24h
37 | frequency for ``dataset-3h``.
38 | 
39 | .. literalinclude:: code/interpolate2.py
40 |    :language: python
41 | 
42 | or for the config file:
43 | 
44 | .. literalinclude:: yaml/interpolate2.yaml
45 |    :language: yaml
46 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/02-coutout-complement-combination.rst:
--------------------------------------------------------------------------------
 1 | .. _complement-step:
 2 | 
 3 | ##############################################
 4 |  Combining cutout with complementing datasets
 5 | ##############################################
 6 | 
 7 | Here we explain how to combine a cutout with a complementing dataset.
 8 | 
 9 | ****************************
10 |  Interpolate to cutout grid
11 | ****************************
12 | 
13 | In this case, we will use the a ``lam-dataset`` in a different grid that
14 | contains just one variable (``tp`` in the example below) and a
15 | ``global-dataset``. What we want to do is to interpolate the
16 | ``global-dataset`` to the resulting dataset from the cutout grid
17 | operation.
18 | 
19 | .. literalinclude:: code/cutout-complement1.py
20 | 
21 | or for the config file:
22 | 
23 | .. literalinclude:: yaml/cutout-complement1.yaml
24 | 
25 | The ``adjust`` option is in case the end or start dates do not exactly
26 | match.
27 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/code/cutout-complement1.py:
--------------------------------------------------------------------------------
 1 | from anemoi.datasets import open_dataset
 2 | 
 3 | ds = open_dataset(
 4 |     complement={
 5 |         "cutout": [
 6 |             "lam-dataset",
 7 |             {
 8 |                 "dataset": "global-dataset",
 9 |                 "select": ["tp"],
10 |             },
11 |         ],
12 |         "min_distance_km": 1,
13 |         "adjust": "dates",
14 |     },
15 |     source="global-dataset",
16 |     interpolation="nearest",
17 | )
18 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/code/interpolate1.py:
--------------------------------------------------------------------------------
 1 | from anemoi.datasets import open_dataset
 2 | 
 3 | ds = open_dataset(
 4 |     dataset={
 5 |         "join": [
 6 |             {
 7 |                 "dataset": "dataset-3h",
 8 |                 "frequency": "3h",
 9 |             },
10 |             {
11 |                 "dataset": "dataset-24h",
12 |                 "interpolate_frequency": "3h",
13 |             },
14 |         ],
15 |         "adjust": "dates",
16 |     },
17 |     start="2004-01-01",
18 |     end="2023-01-01",
19 | )
20 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/code/interpolate2.py:
--------------------------------------------------------------------------------
 1 | from anemoi.datasets import open_dataset
 2 | 
 3 | ds = open_dataset(
 4 |     dataset={
 5 |         "join": [
 6 |             {
 7 |                 "dataset": "dataset-3h",
 8 |                 "frequency": "24h",
 9 |             },
10 |             {
11 |                 "dataset": "dataset-24h",
12 |                 "frequency": "24h",
13 |             },
14 |         ],
15 |         "adjust": "dates",
16 |     },
17 |     start="2004-01-01",
18 |     end="2023-01-01",
19 | )
20 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/yaml/cutout-complement1.yaml:
--------------------------------------------------------------------------------
 1 | dataset:
 2 |    complement:
 3 |       dataset:
 4 |          cutout:
 5 |           - lam-dataset
 6 |           - dataset: global-dataset
 7 |             select: [ tp ]
 8 |          min_distance_km: 1
 9 |          adjust: dates
10 |    source: global-dataset
11 |    interpolation: nearest
12 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/yaml/interpolate1.yaml:
--------------------------------------------------------------------------------
 1 | dataset:
 2 |   join:
 3 |   - dataset: dataset-3h
 4 |     frequency: 3h
 5 |   - dataset: dataset-24h
 6 |     interpolate_frequency: 3h
 7 |   adjust: dates
 8 | start: 2004-01-01
 9 | end: 2023-01-01
10 | 


--------------------------------------------------------------------------------
/docs/howtos/usage/yaml/interpolate2.yaml:
--------------------------------------------------------------------------------
1 | dataset:
2 |   join:
3 |   - dataset: dataset-3h
4 |     frequency: 24h
5 |   - dataset: dataset-24h
6 |     frequency: 24h
7 | start: 2004-01-01
8 | end: 2023-01-01
9 | 


--------------------------------------------------------------------------------
/docs/installing.rst:
--------------------------------------------------------------------------------
 1 | .. _installing:
 2 | 
 3 | ############
 4 |  Installing
 5 | ############
 6 | 
 7 | ****************
 8 |  Python Version
 9 | ****************
10 | 
11 | -  Python (> 3.9)
12 | 
13 | We require at least Python 3.9.
14 | 
15 | **************
16 |  Installation
17 | **************
18 | 
19 | Environments
20 | ============
21 | 
22 | We currently do not provide a conda build of anemoi-datasets, so the
23 | suggested installation is through Python virtual environments.
24 | 
25 | For Linux, the process to make and use a venv is as follows:
26 | 
27 | .. code:: bash
28 | 
29 |    python -m venv /path/to/my/venv
30 |    source /path/to/my/venv/bin/activate
31 | 
32 | Instructions
33 | ============
34 | 
35 | To install the package, you can use the following command:
36 | 
37 | .. code:: bash
38 | 
39 |    python -m pip install anemoi-datasets
40 | 
41 | If you are interested in creating datasets, you can install the package
42 | with the following command:
43 | 
44 | .. code:: bash
45 | 
46 |    pip install anemoi-datasets[create]
47 | 
48 | For an editable install of anemoi-datasets, you can use the following
49 | command. In this case, changes that you make to the anemoi-datasets code
50 | will be reflected in the installed package without having to reinstall
51 | it.
52 | 
53 | .. code:: bash
54 | 
55 |    pip install -e .
56 | 
57 | We also maintain other dependency sets for different subsets of
58 | functionality:
59 | 
60 | .. code:: bash
61 | 
62 |    python -m pip install "anemoi-datasets[docs]" # Install optional dependencies for generating docs
63 | 
64 | .. literalinclude:: ../pyproject.toml
65 |    :language: toml
66 |    :start-at: optional-dependencies.all
67 |    :end-before: urls.Changelog
68 | 
69 | **********************
70 |  Development versions
71 | **********************
72 | 
73 | To install the most recent development version, install from GitHub:
74 | 
75 | .. code::
76 | 
77 |    $ python -m pip install git@github.com:ecmwf/anemoi-datasets.git
78 | 
79 | *********
80 |  Testing
81 | *********
82 | 
83 | To run the test suite after installing anemoi-datasets, install (via
84 | PyPI) `py.test <https://pytest.org>`__ and run ``pytest`` in the
85 | ``datasets`` directory of the anemoi-datasets repository.
86 | 


--------------------------------------------------------------------------------
/docs/modules/dataset.rst:
--------------------------------------------------------------------------------
 1 | .. _dataset-autodoc:
 2 | 
 3 | #########
 4 |  Dataset
 5 | #########
 6 | 
 7 | .. automodule:: anemoi.datasets.data.dataset
 8 |    :members:
 9 |    :no-undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/docs/modules/filters.rst:
--------------------------------------------------------------------------------
1 | #########
2 |  Filters
3 | #########
4 | 
5 | .. include:: ../_api/anemoi.datasets.create.filters.rst
6 | 


--------------------------------------------------------------------------------
/docs/modules/sources.rst:
--------------------------------------------------------------------------------
1 | #########
2 |  Sources
3 | #########
4 | 
5 | .. include:: ../_api/anemoi.datasets.create.sources.rst
6 | 


--------------------------------------------------------------------------------
/docs/pptx/images.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ecmwf/anemoi-datasets/37afc0d6489f2d6c4b3ce3f9901c40e4cec5c4eb/docs/pptx/images.pptx


--------------------------------------------------------------------------------
/docs/scripts/api_build.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | set -e
 4 | 
 5 | script_dir=$(dirname "${BASH_SOURCE[0]}")
 6 | docs_dir="$script_dir/.."
 7 | source_dir="$script_dir/../../src/"
 8 | 
 9 | 
10 | trap 'rm -f $source_dir/anemoi/__init__.py' EXIT
11 | 
12 | touch "$source_dir/anemoi/__init__.py"
13 | sphinx-apidoc -M -f -o "$docs_dir/_api" "$source_dir/anemoi" -t "$docs_dir/_templates/apidoc"
14 | 


--------------------------------------------------------------------------------
/docs/usage/yaml/aifs-ea-an-oper-0001-mars-o48-2020-2021-6h-v1.yaml:
--------------------------------------------------------------------------------
 1 | name: era5-o48-2020-2021-6h-v1
 2 | 
 3 | description: Low resolution reduced dataset for documentation purposes
 4 | 
 5 | attribution: ECMWF/C3S
 6 | 
 7 | licence: CC-BY-4.0
 8 | 
 9 | dates:
10 |   start: '2020-01-01T00:00:00'
11 |   end: '2021-12-31T23:00:00'
12 |   frequency: 6h
13 | 
14 | input:
15 |   join:
16 |   - mars:
17 |       use_cdsapi_dataset: "reanalysis-era5-complete"
18 |       class: ea
19 |       expver: '0001'
20 |       grid: o48
21 |       levtype: sfc
22 |       param:
23 |       - 10u
24 |       - 10v
25 |       - 2d
26 |       - 2t
27 |       - lsm
28 |       - msl
29 |       - sdor
30 |       - skt
31 |       - slor
32 |       - sp
33 |       - tcw
34 |       - z
35 |   - mars:
36 |       use_cdsapi_dataset: "reanalysis-era5-complete"
37 |       class: ea
38 |       expver: '0001'
39 |       grid: o48
40 |       level:
41 |       - 250
42 |       - 500
43 |       - 850
44 |       - 1000
45 |       levtype: pl
46 |       param:
47 |       - u
48 |       - v
49 |       - q
50 |       - t
51 |       - z
52 |   - accumulations:
53 |       use_cdsapi_dataset: "reanalysis-era5-complete"
54 |       accumulation_period: 6
55 |       class: ea
56 |       expver: '0001'
57 |       grid: o48
58 |       param:
59 |       - cp
60 |       - tp
61 |   - constants:
62 |       param:
63 |       - cos_latitude
64 |       - cos_longitude
65 |       - sin_latitude
66 |       - sin_longitude
67 |       - cos_julian_day
68 |       - cos_local_time
69 |       - sin_julian_day
70 |       - sin_local_time
71 |       - insolation
72 |       template: ${input.join.0.mars}
73 | 


--------------------------------------------------------------------------------
/docs/using/code/trimedge1_.py:
--------------------------------------------------------------------------------
1 | ds = open_dataset(dataset1, trim_edge=(3, 10, 4, 2))
2 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from typing import List
11 | 
12 | from .data import MissingDateError
13 | from .data import add_dataset_path
14 | from .data import add_named_dataset
15 | from .data import list_dataset_names
16 | from .data import open_dataset
17 | 
18 | try:
19 |     # NOTE: the `_version.py` file must not be present in the git repository
20 |     #   as it is generated by setuptools at install time
21 |     from ._version import __version__  # type: ignore
22 | except ImportError:  # pragma: no cover
23 |     # Local copy or not installed with setuptools
24 |     __version__ = "999"
25 | 
26 | __all__: List[str] = [
27 |     "add_dataset_path",
28 |     "add_named_dataset",
29 |     "list_dataset_names",
30 |     "MissingDateError",
31 |     "open_dataset",
32 |     "__version__",
33 | ]
34 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/__main__.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from typing import Any
11 | 
12 | from anemoi.utils.cli import cli_main
13 | from anemoi.utils.cli import make_parser
14 | 
15 | from . import __version__
16 | from .commands import COMMANDS
17 | 
18 | 
19 | # For read-the-docs
20 | def create_parser() -> Any:
21 |     """Create the argument parser for the CLI.
22 | 
23 |     Returns
24 |     -------
25 |     Any
26 |         The argument parser instance.
27 |     """
28 |     return make_parser(__doc__, COMMANDS)
29 | 
30 | 
31 | def main() -> None:
32 |     """The main entry point for the CLI application."""
33 |     cli_main(__version__, __doc__, COMMANDS)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import os
11 | 
12 | from anemoi.utils.cli import Command
13 | from anemoi.utils.cli import Failed
14 | from anemoi.utils.cli import register_commands
15 | 
16 | __all__ = ["Command"]
17 | 
18 | COMMANDS = register_commands(
19 |     os.path.dirname(__file__),
20 |     __name__,
21 |     lambda x: x.command(),
22 |     lambda name, error: Failed(name, error),
23 | )
24 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/cleanup.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import time
12 | from typing import Any
13 | 
14 | from anemoi.utils.humanize import seconds_to_human
15 | 
16 | from anemoi.datasets.commands.create import task
17 | 
18 | from . import Command
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class Cleanup(Command):
24 |     """Create a dataset, step by step."""
25 | 
26 |     internal = True
27 |     timestamp = True
28 | 
29 |     def add_arguments(self, subparser: Any) -> None:
30 |         """Add command line arguments to the parser.
31 | 
32 |         Parameters
33 |         ----------
34 |         subparser : Any
35 |             The argument parser.
36 |         """
37 |         subparser.add_argument("path", help="Path to store the created data.")
38 |         subparser.add_argument(
39 |             "--delta",
40 |             help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
41 |             nargs="+",
42 |         )
43 | 
44 |     def run(self, args: Any) -> None:
45 |         """Execute the cleanup command.
46 | 
47 |         Parameters
48 |         ----------
49 |         args : Any
50 |             The command line arguments.
51 |         """
52 |         options = vars(args)
53 |         options.pop("command")
54 |         now = time.time()
55 |         step = self.__class__.__name__.lower()
56 | 
57 |         if "version" in options:
58 |             options.pop("version")
59 | 
60 |         if "debug" in options:
61 |             options.pop("debug")
62 | 
63 |         task(step, options)
64 | 
65 |         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
66 | 
67 | 
68 | command = Cleanup
69 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/finalise-additions.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import time
12 | from typing import Any
13 | 
14 | from anemoi.utils.humanize import seconds_to_human
15 | 
16 | from anemoi.datasets.commands.create import task
17 | 
18 | from . import Command
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class FinaliseAdditions(Command):
24 |     """Create a dataset, step by step."""
25 | 
26 |     internal = True
27 |     timestamp = True
28 | 
29 |     def add_arguments(self, command_parser: Any) -> None:
30 |         """Add command line arguments to the parser.
31 | 
32 |         Parameters
33 |         ----------
34 |         command_parser : Any
35 |             The argument parser instance to which arguments will be added.
36 |         """
37 |         command_parser.add_argument(
38 |             "--delta",
39 |             help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
40 |             nargs="+",
41 |         )
42 | 
43 |         command_parser.add_argument("path", help="Path to store the created data.")
44 |         command_parser.add_argument("--trace", action="store_true")
45 | 
46 |     def run(self, args: Any) -> None:
47 |         """Execute the command with the given arguments.
48 | 
49 |         Parameters
50 |         ----------
51 |         args : Any
52 |             The arguments passed to the command.
53 |         """
54 |         options = vars(args)
55 |         options.pop("command")
56 |         step = "finalise-additions"
57 |         now = time.time()
58 | 
59 |         if "version" in options:
60 |             options.pop("version")
61 | 
62 |         if "debug" in options:
63 |             options.pop("debug")
64 | 
65 |         task(step, options)
66 | 
67 |         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
68 | 
69 | 
70 | command = FinaliseAdditions
71 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/finalise.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import time
12 | from typing import Any
13 | 
14 | from anemoi.utils.humanize import seconds_to_human
15 | 
16 | from anemoi.datasets.commands.create import task
17 | 
18 | from . import Command
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class Finalise(Command):
24 |     """Create a dataset, step by step."""
25 | 
26 |     internal = True
27 |     timestamp = True
28 | 
29 |     def add_arguments(self, command_parser: Any) -> None:
30 |         """Add arguments to the command parser.
31 | 
32 |         Parameters
33 |         ----------
34 |         command_parser : Any
35 |             The command parser to which arguments will be added.
36 |         """
37 |         command_parser.add_argument("path", help="Path to store the created data.")
38 |         command_parser.add_argument("--trace", action="store_true")
39 | 
40 |     def run(self, args: Any) -> None:
41 |         """Execute the finalise command.
42 | 
43 |         Parameters
44 |         ----------
45 |         args : Any
46 |             The arguments passed to the command.
47 |         """
48 |         options = vars(args)
49 |         options.pop("command")
50 |         now = time.time()
51 |         step = "finalise"
52 | 
53 |         if "version" in options:
54 |             options.pop("version")
55 | 
56 |         if "debug" in options:
57 |             options.pop("debug")
58 | 
59 |         task(step, options)
60 | 
61 |         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
62 | 
63 | 
64 | command = Finalise
65 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/init-additions.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import time
12 | from typing import Any
13 | 
14 | from anemoi.utils.humanize import seconds_to_human
15 | 
16 | from anemoi.datasets.commands.create import task
17 | 
18 | from . import Command
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class InitAdditions(Command):
24 |     """Create a dataset, step by step."""
25 | 
26 |     internal = True
27 |     timestamp = True
28 | 
29 |     def add_arguments(self, command_parser: Any) -> None:
30 |         """Add command line arguments to the parser.
31 | 
32 |         Parameters
33 |         ----------
34 |         command_parser : Any
35 |             The argument parser instance.
36 |         """
37 |         command_parser.add_argument(
38 |             "--delta",
39 |             help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
40 |             nargs="+",
41 |         )
42 | 
43 |         command_parser.add_argument("path", help="Path to store the created data.")
44 |         command_parser.add_argument("--trace", action="store_true")
45 | 
46 |     def run(self, args: Any) -> None:
47 |         """Execute the command with the given arguments.
48 | 
49 |         Parameters
50 |         ----------
51 |         args : Any
52 |             The command line arguments.
53 |         """
54 |         options = vars(args)
55 |         options.pop("command")
56 |         step = "init-additions"
57 |         now = time.time()
58 | 
59 |         if "version" in options:
60 |             options.pop("version")
61 | 
62 |         if "debug" in options:
63 |             options.pop("debug")
64 | 
65 |         task(step, options)
66 | 
67 |         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
68 | 
69 | 
70 | command = InitAdditions
71 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/load-additions.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import time
12 | from typing import Any
13 | 
14 | from anemoi.utils.humanize import seconds_to_human
15 | 
16 | from anemoi.datasets.commands.create import task
17 | 
18 | from . import Command
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class LoadAdditions(Command):
24 |     """Create a dataset, step by step."""
25 | 
26 |     internal = True
27 |     timestamp = True
28 | 
29 |     def add_arguments(self, subparser: Any) -> None:
30 |         """Add command line arguments to the parser.
31 | 
32 |         Parameters
33 |         ----------
34 |         subparser : Any
35 |             The argument parser.
36 |         """
37 |         subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.")
38 |         subparser.add_argument(
39 |             "--delta",
40 |             help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
41 |             nargs="+",
42 |         )
43 | 
44 |         subparser.add_argument("path", help="Path to store the created data.")
45 |         subparser.add_argument("--trace", action="store_true")
46 | 
47 |     def run(self, args: Any) -> None:
48 |         """Execute the command with the given arguments.
49 | 
50 |         Parameters
51 |         ----------
52 |         args : Any
53 |             The command line arguments.
54 |         """
55 |         options = vars(args)
56 |         options.pop("command")
57 |         now = time.time()
58 |         step = "load-additions"
59 | 
60 |         if "version" in options:
61 |             options.pop("version")
62 | 
63 |         if "debug" in options:
64 |             options.pop("debug")
65 | 
66 |         task(step, options)
67 | 
68 |         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
69 | 
70 | 
71 | command = LoadAdditions
72 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/load.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import time
12 | from typing import Any
13 | 
14 | from anemoi.utils.humanize import seconds_to_human
15 | 
16 | from anemoi.datasets.commands.create import task
17 | 
18 | from . import Command
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class Load(Command):
24 |     """Create a dataset, step by step."""
25 | 
26 |     internal = True
27 |     timestamp = True
28 | 
29 |     def add_arguments(self, subparser: Any) -> None:
30 |         """Add arguments to the command parser.
31 | 
32 |         Parameters
33 |         ----------
34 |         subparser : Any
35 |             The command parser.
36 |         """
37 |         subparser.add_argument("--parts", nargs="+", help="Only load the specified parts of the dataset.")
38 |         # subparser.add_argument(
39 |         #        "--delta",
40 |         #        help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
41 |         #    )
42 | 
43 |         subparser.add_argument("path", help="Path to store the created data.")
44 |         subparser.add_argument("--cache", help="Location to store the downloaded data.", metavar="DIR")
45 |         subparser.add_argument("--trace", action="store_true")
46 | 
47 |     def run(self, args: Any) -> None:
48 |         """Run the command.
49 | 
50 |         Parameters
51 |         ----------
52 |         args : Any
53 |             The command arguments.
54 |         """
55 |         options = vars(args)
56 |         options.pop("command")
57 |         now = time.time()
58 |         step = "load"
59 | 
60 |         if "version" in options:
61 |             options.pop("version")
62 | 
63 |         if "debug" in options:
64 |             options.pop("debug")
65 | 
66 |         task(step, options)
67 | 
68 |         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
69 | 
70 | 
71 | command = Load
72 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/patch.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import time
12 | from typing import Any
13 | 
14 | from anemoi.utils.humanize import seconds_to_human
15 | 
16 | from anemoi.datasets.commands.create import task
17 | 
18 | from . import Command
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class Patch(Command):
24 |     """Create a dataset, step by step."""
25 | 
26 |     internal = True
27 |     timestamp = True
28 | 
29 |     def add_arguments(self, parser: Any) -> None:
30 |         """Add command-line arguments to the parser.
31 | 
32 |         Parameters
33 |         ----------
34 |         parser : Any
35 |             The argument parser instance.
36 |         """
37 |         parser.add_argument("path", help="Path to store the created data.")
38 | 
39 |     def run(self, args: Any) -> None:
40 |         """Execute the patch command.
41 | 
42 |         Parameters
43 |         ----------
44 |         args : Any
45 |             The command-line arguments.
46 |         """
47 |         options = vars(args)
48 |         options.pop("command")
49 |         now = time.time()
50 |         step = self.__class__.__name__.lower()
51 | 
52 |         if "version" in options:
53 |             options.pop("version")
54 | 
55 |         if "debug" in options:
56 |             options.pop("debug")
57 | 
58 |         task(step, options)
59 | 
60 |         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")
61 | 
62 | 
63 | command = Patch
64 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/commands/publish.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | from typing import Any
12 | 
13 | from . import Command
14 | 
15 | LOG = logging.getLogger(__name__)
16 | 
17 | 
18 | class Publish(Command):
19 |     """Publish a dataset."""
20 | 
21 |     # This is a command that is used to publish a dataset.
22 |     # it is a class, inheriting from Command.
23 | 
24 |     internal = True
25 |     timestamp = True
26 | 
27 |     def add_arguments(self, parser: Any) -> None:
28 |         """Add arguments to the command parser.
29 | 
30 |         Parameters
31 |         ----------
32 |         parser : Any
33 |             The command parser to which arguments are added.
34 |         """
35 |         parser.add_argument("path", help="Path of the dataset to publish.")
36 | 
37 |     def run(self, args: Any) -> None:
38 |         """Execute the publish command.
39 | 
40 |         Parameters
41 |         ----------
42 |         args : Any
43 |             The arguments passed to the command.
44 |         """
45 |         try:
46 |             from anemoi.registry import publish_dataset
47 |         except ImportError:
48 |             LOG.error("anemoi-registry is not installed. Please install it to use this command.")
49 |             return
50 | 
51 |         publish_dataset(args.path)
52 | 
53 | 
54 | command = Publish
55 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/compute/__init__.py:
--------------------------------------------------------------------------------
1 | # (C) Copyright 2024 Anemoi contributors.
2 | #
3 | # This software is licensed under the terms of the Apache Licence Version 2.0
4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5 | #
6 | # In applying this licence, ECMWF does not waive the privileges and immunities
7 | # granted to it by virtue of its status as an intergovernmental organisation
8 | # nor does it submit to any jurisdiction.
9 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/filter.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2025- Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from abc import ABC
11 | from abc import abstractmethod
12 | from typing import Any
13 | 
14 | import earthkit.data as ekd
15 | 
16 | 
17 | class Filter(ABC):
18 |     """A base class for filters."""
19 | 
20 |     def __init__(self, context: Any, *args: Any, **kwargs: Any) -> None:
21 |         """Initialise the filter.
22 | 
23 |         Parameters
24 |         ----------
25 |         context : Any
26 |             The context in which the filter is created.
27 |         *args : tuple
28 |             Positional arguments.
29 |         **kwargs : dict
30 |             Keyword arguments.
31 |         """
32 | 
33 |         self.context = context
34 | 
35 |     @abstractmethod
36 |     def execute(self, data: ekd.FieldList) -> ekd.FieldList:
37 |         """Execute the filter.
38 | 
39 |         Parameters
40 |         ----------
41 |         data : ekd.FieldList
42 |             The input data.
43 | 
44 |         Returns
45 |         -------
46 |         ekd.FieldList
47 |             The output data.
48 |         """
49 | 
50 |         pass
51 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/filters/__init__.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | #
10 | 
11 | from typing import Any
12 | 
13 | from anemoi.utils.registry import Registry
14 | 
15 | filter_registry = Registry(__name__)
16 | 
17 | 
18 | def create_filter(context: Any, config: Any) -> Any:
19 |     """Create a filter based on the provided configuration.
20 | 
21 |     Parameters
22 |     ----------
23 |     context : Any
24 |         The context in which the filter is created.
25 |     config : Any
26 |         The configuration for the filter.
27 | 
28 |     Returns
29 |     -------
30 |     Any
31 |         The created filter.
32 |     """
33 |     return filter_registry.from_config(config, context)
34 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/filters/empty.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | from typing import Any
12 | 
13 | import earthkit.data as ekd
14 | from anemoi.transform.fields import new_empty_fieldlist
15 | 
16 | from .legacy import legacy_filter
17 | 
18 | 
19 | @legacy_filter(__file__)
20 | def execute(context: Any, input: ekd.FieldList, **kwargs: Any) -> ekd.FieldList:
21 |     """Create a pipeline that returns an empty result.
22 | 
23 |     Parameters
24 |     ----------
25 |     context : Any
26 |         The context in which the function is executed.
27 |     input : List[Any]
28 |         List of input fields.
29 |     **kwargs : Any
30 |         Additional keyword arguments.
31 | 
32 |     Returns
33 |     -------
34 |     Any
35 |         An empty result.
36 |     """
37 |     return new_empty_fieldlist()
38 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/filters/noop.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from typing import Any
11 | 
12 | import earthkit.data as ekd
13 | 
14 | from .legacy import legacy_filter
15 | 
16 | 
17 | @legacy_filter(__file__)
18 | def execute(context: Any, input: ekd.FieldList, *args: Any, **kwargs: Any) -> ekd.FieldList:
19 |     """No operation filter that returns the input as is.
20 | 
21 |     Parameters
22 |     ----------
23 |     context : Any
24 |         The context in which the function is executed.
25 |     input : ekd.FieldList
26 |         List of input fields.
27 |     *args : Any
28 |         Additional arguments.
29 |     **kwargs : Any
30 |         Additional keyword arguments.
31 | 
32 |     Returns
33 |     -------
34 |     List[Any]
35 |         The input list of fields.
36 |     """
37 |     return input
38 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/filters/orog_to_z.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from collections import defaultdict
11 | from typing import Any
12 | from typing import Dict
13 | 
14 | import earthkit.data as ekd
15 | from anemoi.transform.fields import new_field_from_numpy
16 | from anemoi.transform.fields import new_fieldlist_from_list
17 | 
18 | from .legacy import legacy_filter
19 | 
20 | 
21 | @legacy_filter(__file__)
22 | def execute(context: Any, input: ekd.FieldList, orog: str, z: str = "z") -> ekd.FieldList:
23 |     """Convert orography [m] to z (geopotential height).
24 | 
25 |     Parameters
26 |     ----------
27 |     context : Any
28 |         The context in which the function is executed.
29 |     input : FieldList
30 |         List of input fields.
31 |     orog : str
32 |         Orography parameter.
33 |     z : str, optional
34 |         Geopotential height parameter. Defaults to "z".
35 | 
36 |     Returns
37 |     -------
38 |     FieldList
39 |         List of fields with geopotential height.
40 |     """
41 |     result = []
42 |     processed_fields: Dict[tuple, Dict[str, Any]] = defaultdict(dict)
43 | 
44 |     for f in input:
45 |         key = f.metadata(namespace="mars")
46 |         param = key.pop("param")
47 |         if param == orog:
48 |             key = tuple(key.items())
49 | 
50 |             if param in processed_fields[key]:
51 |                 raise ValueError(f"Duplicate field {param} for {key}")
52 | 
53 |             output = f.to_numpy(flatten=True) * 9.80665
54 |             result.append(new_field_from_numpy(f, output, param=z))
55 |         else:
56 |             result.append(f)
57 | 
58 |     return new_fieldlist_from_list(result)
59 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/filters/sum.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from collections import defaultdict
11 | from typing import Any
12 | from typing import Dict
13 | from typing import Hashable
14 | from typing import List
15 | from typing import Tuple
16 | 
17 | import earthkit.data as ekd
18 | from anemoi.transform.fields import new_field_from_numpy
19 | from anemoi.transform.fields import new_fieldlist_from_list
20 | 
21 | from .legacy import legacy_filter
22 | 
23 | 
24 | @legacy_filter(__file__)
25 | def execute(context: Any, input: ekd.FieldList, params: List[str], output: str) -> ekd.FieldList:
26 |     """Computes the sum over a set of variables.
27 | 
28 |     Args:
29 |         context (Any): The execution context.
30 |         input (List[Any]): The list of input fields.
31 |         params (List[str]): The list of parameters to sum over.
32 |         output (str): The name for the output field.
33 | 
34 |     Returns:
35 |         ekd.FieldList: The resulting FieldArray with summed fields.
36 |     """
37 |     result = []
38 | 
39 |     needed_fields: Dict[Tuple[Hashable, ...], Dict[str, ekd.Field]] = defaultdict(dict)
40 | 
41 |     for f in input:
42 |         key = f.metadata(namespace="mars")
43 |         param = key.pop("param")
44 |         if param in params:
45 |             key = tuple(key.items())
46 | 
47 |             if param in needed_fields[key]:
48 |                 raise ValueError(f"Duplicate field {param} for {key}")
49 | 
50 |             needed_fields[key][param] = f
51 |         else:
52 |             result.append(f)
53 | 
54 |     for keys, values in needed_fields.items():
55 | 
56 |         if len(values) != len(params):
57 |             raise ValueError("Missing fields")
58 | 
59 |         s = None
60 |         for k, v in values.items():
61 |             c = v.to_numpy(flatten=True)
62 |             if s is None:
63 |                 s = c
64 |             else:
65 |                 s += c
66 |         result.append(new_field_from_numpy(values[list(values.keys())[0]], s, param=output))
67 | 
68 |     return new_fieldlist_from_list(result)
69 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/filters/transform.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2025  Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from typing import Any
11 | from typing import Dict
12 | 
13 | import earthkit.data as ekd
14 | 
15 | from ..filter import Filter
16 | 
17 | 
18 | class TransformFilter(Filter):
19 |     """Calls filters from anemoi.transform.filters
20 | 
21 |     Parameters
22 |     ----------
23 |     context : Any
24 |         The context in which the filter is created.
25 |     name : str
26 |         The name of the filter.
27 |     config : Dict[str, Any]
28 |         The configuration for the filter.
29 |     """
30 | 
31 |     def __init__(self, context: Any, name: str, config: Dict[str, Any]) -> None:
32 | 
33 |         from anemoi.transform.filters import create_filter
34 | 
35 |         self.name = name
36 |         self.transform_filter = create_filter(context, config)
37 | 
38 |     def execute(self, input: ekd.FieldList) -> ekd.FieldList:
39 |         """Execute the transformation filter.
40 | 
41 |         Parameters
42 |         ----------
43 |         input : ekd.FieldList
44 |             The input data to be transformed.
45 | 
46 |         Returns
47 |         -------
48 |         ekd.FieldList
49 |             The transformed data.
50 |         """
51 |         return self.transform_filter.forward(input)
52 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/input/empty.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | from functools import cached_property
12 | from typing import List
13 | 
14 | from earthkit.data import FieldList
15 | 
16 | from .misc import assert_fieldlist
17 | from .result import Result
18 | from .trace import trace_datasource
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | class EmptyResult(Result):
24 |     """Class to represent an empty result in the dataset creation process."""
25 | 
26 |     empty = True
27 | 
28 |     def __init__(self, context: object, action_path: list, dates: object) -> None:
29 |         """Initializes an EmptyResult instance.
30 | 
31 |         Parameters
32 |         ----------
33 |         context : object
34 |             The context object.
35 |         action_path : list
36 |             The action path.
37 |         dates : object
38 |             The dates object.
39 |         """
40 |         super().__init__(context, action_path + ["empty"], dates)
41 | 
42 |     @cached_property
43 |     @assert_fieldlist
44 |     @trace_datasource
45 |     def datasource(self) -> FieldList:
46 |         """Returns an empty datasource."""
47 |         from earthkit.data import from_source
48 | 
49 |         return from_source("empty")
50 | 
51 |     @property
52 |     def variables(self) -> List[str]:
53 |         """Returns an empty list of variables."""
54 |         return []
55 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/input/pipe.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import json
11 | import logging
12 | from typing import Any
13 | 
14 | from .action import Action
15 | from .action import action_factory
16 | from .step import step_factory
17 | from .trace import trace_select
18 | 
19 | LOG = logging.getLogger(__name__)
20 | 
21 | 
22 | class PipeAction(Action):
23 |     """A class to represent a pipeline of actions."""
24 | 
25 |     def __init__(self, context: Any, action_path: list, *configs: dict) -> None:
26 |         """Initialize the PipeAction.
27 | 
28 |         Parameters
29 |         ----------
30 |         context : Any
31 |             The context for the action.
32 |         action_path : list
33 |             The path of the action.
34 |         configs : dict
35 |             The configurations for the actions.
36 |         """
37 |         super().__init__(context, action_path, *configs)
38 |         if len(configs) <= 1:
39 |             raise ValueError(
40 |                 f"PipeAction requires at least two actions, got {len(configs)}\n{json.dumps(configs, indent=2)}"
41 |             )
42 | 
43 |         current: Any = action_factory(configs[0], context, action_path + ["0"])
44 |         for i, c in enumerate(configs[1:]):
45 |             current = step_factory(c, context, action_path + [str(i + 1)], previous_step=current)
46 |         self.last_step: Any = current
47 | 
48 |     @trace_select
49 |     def select(self, group_of_dates: Any) -> Any:
50 |         """Select data based on the group of dates.
51 | 
52 |         Parameters
53 |         ----------
54 |         group_of_dates : Any
55 |             The group of dates to select data for.
56 | 
57 |         Returns
58 |         -------
59 |         Any
60 |             The selected data.
61 |         """
62 |         return self.last_step.select(group_of_dates)
63 | 
64 |     def __repr__(self) -> str:
65 |         """Return a string representation of the PipeAction."""
66 |         return f"PipeAction({self.last_step})"
67 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/size.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | import logging
12 | import os
13 | from typing import Dict
14 | from typing import Optional
15 | 
16 | import tqdm
17 | from anemoi.utils.humanize import bytes_to_human
18 | 
19 | LOG = logging.getLogger(__name__)
20 | 
21 | 
22 | def compute_directory_sizes(path: str) -> Optional[Dict[str, int]]:
23 |     """Computes the total size and number of files in a directory.
24 | 
25 |     Parameters
26 |     ----------
27 |     path : str
28 |         The path to the directory.
29 | 
30 |     Returns
31 |     -------
32 |     dict of str to int or None
33 |         A dictionary with the total size and number of files, or None if the path is not a directory.
34 |     """
35 |     if not os.path.isdir(path):
36 |         return None
37 | 
38 |     size, n = 0, 0
39 |     bar = tqdm.tqdm(iterable=os.walk(path), desc=f"Computing size of {path}")
40 |     for dirpath, _, filenames in bar:
41 |         for filename in filenames:
42 |             file_path = os.path.join(dirpath, filename)
43 |             size += os.path.getsize(file_path)
44 |             n += 1
45 | 
46 |     LOG.info(f"Total size: {bytes_to_human(size)}")
47 |     LOG.info(f"Total number of files: {n}")
48 | 
49 |     return dict(total_size=size, total_number_of_files=n)
50 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/source.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2025- Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from abc import ABC
11 | from abc import abstractmethod
12 | 
13 | import earthkit.data as ekd
14 | 
15 | from anemoi.datasets.create.typing import DateList
16 | 
17 | 
18 | class Source(ABC):
19 |     """Represents a data source with a given context."""
20 | 
21 |     emoji = "📦"  # For tracing
22 | 
23 |     def __init__(self, context: any, *args: tuple, **kwargs: dict):
24 |         """Initialise the source.
25 |         Parameters
26 |         ----------
27 |         context : Any
28 |             The context for the data source.
29 |         *args : tuple
30 |             Additional positional arguments.
31 |         **kwargs : dict
32 |             Additional keyword arguments.
33 |         """
34 |         self.context = context
35 | 
36 |     @abstractmethod
37 |     def execute(self, dates: DateList) -> ekd.FieldList:
38 |         """Execute the filter.
39 | 
40 |         Parameters
41 |         ----------
42 |         dates : DateList
43 |             The input dates.
44 | 
45 |         Returns
46 |         -------
47 |         ekd.FieldList
48 |             The output data.
49 |         """
50 | 
51 |         pass
52 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/__init__.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | from typing import Any
12 | 
13 | from anemoi.utils.registry import Registry
14 | 
15 | LOG = logging.getLogger(__name__)
16 | 
17 | 
18 | source_registry = Registry(__name__)
19 | 
20 | 
21 | def create_source(context: Any, config: Any) -> Any:
22 |     """Create a source based on the provided configuration.
23 | 
24 |     Parameters
25 |     ----------
26 |     context : Any
27 |         The context in which the source is created.
28 |     config : Any
29 |         The configuration for the source.
30 | 
31 |     Returns
32 |     -------
33 |     Any
34 |         The created source.
35 |     """
36 |     return source_registry.from_config(config, context)
37 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/anemoi_dataset.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2025 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import numpy as np
11 | 
12 | from .legacy import legacy_source
13 | 
14 | 
15 | @legacy_source(__file__)
16 | def execute(context, dates, params=None, **kwargs):
17 |     import earthkit.data as ekd
18 | 
19 |     from anemoi.datasets import open_dataset
20 | 
21 |     ds = open_dataset(**kwargs)
22 |     # dates_to_index = {date: i for i, date in enumerate(ds.dates)}
23 | 
24 |     indices = []
25 |     for date in dates:
26 |         idx = np.where(ds.dates == date)[0]
27 |         if len(idx) == 0:
28 |             continue
29 |         indices.append((int(idx[0]), date))
30 | 
31 |     vars = ds.variables
32 |     if params is None:
33 |         params = vars
34 | 
35 |     if not isinstance(params, (list, tuple, set)):
36 |         params = [params]
37 | 
38 |     params = set(params)
39 |     results = []
40 | 
41 |     ensemble = ds.shape[2] > 1
42 |     latitudes = ds.latitudes
43 |     longitudes = ds.longitudes
44 | 
45 |     for idx, date in indices:
46 | 
47 |         metadata = dict(valid_datetime=date, latitudes=latitudes, longitudes=longitudes)
48 | 
49 |         for j, y in enumerate(ds[idx]):
50 | 
51 |             param = vars[j]
52 |             if param not in params:
53 |                 continue
54 | 
55 |             # metadata['name'] = param
56 |             # metadata['param_level'] = param
57 |             metadata["param"] = param
58 | 
59 |             for k, e in enumerate(y):
60 |                 if ensemble:
61 |                     metadata["number"] = k + 1
62 | 
63 |                 metadata["values"] = e
64 | 
65 |                 results.append(metadata.copy())
66 | 
67 |     print(results[0].keys())
68 | 
69 |     # "list-of-dicts" does support resolution
70 |     results = ekd.from_source("list-of-dicts", results)
71 | 
72 |     # return new_fieldlist_from_list([new_field_from_latitudes_longitudes(x, latitudes, longitudes) for x in results])
73 |     return results
74 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/constants.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from typing import Any
11 | from typing import Dict
12 | from typing import List
13 | 
14 | from earthkit.data import from_source
15 | 
16 | from .legacy import legacy_source
17 | 
18 | 
19 | @legacy_source(__file__)
20 | def constants(context: Any, dates: List[str], template: Dict[str, Any], param: str) -> Any:
21 |     """Deprecated function to retrieve constants data.
22 | 
23 |     Parameters
24 |     ----------
25 |     context : Any
26 |         The context object for tracing.
27 |     dates : list of str
28 |         List of dates for which data is required.
29 |     template : dict of str to Any
30 |         Template dictionary for the data source.
31 |     param : str
32 |         Parameter to retrieve.
33 | 
34 |     Returns
35 |     -------
36 |     Any
37 |         Data retrieved from the source.
38 |     """
39 |     from warnings import warn
40 | 
41 |     warn(
42 |         "The source `constants` is deprecated, use `forcings` instead.",
43 |         DeprecationWarning,
44 |         stacklevel=2,
45 |     )
46 |     context.trace("✅", f"from_source(constants, {template}, {param}")
47 |     if len(template) == 0:
48 |         raise ValueError("Forcings template is empty.")
49 | 
50 |     return from_source("forcings", source_or_dataset=template, date=dates, param=param)
51 | 
52 | 
53 | execute: Any = constants
54 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/eccc_fstd.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2025 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | from . import source_registry
12 | from .xarray import XarraySourceBase
13 | 
14 | 
15 | @source_registry.register("eccc_fstd")
16 | class XarrayECCCSource(XarraySourceBase):
17 |     """An Xarray data source that uses the `fstd` engine."""
18 | 
19 |     emoji = "🍁"
20 |     options = {"engine": "fstd"}
21 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/empty.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | from typing import Any
12 | from typing import List
13 | 
14 | import earthkit.data as ekd
15 | 
16 | from .legacy import legacy_source
17 | 
18 | 
19 | @legacy_source(__file__)
20 | def execute(context: Any, dates: List[str], **kwargs: Any) -> ekd.FieldList:
21 |     """Executes the loading of an empty data source.
22 | 
23 |     Parameters
24 |     ----------
25 |     context : object
26 |         The context in which the function is executed.
27 |     dates : list
28 |         List of dates for which data is to be loaded.
29 |     **kwargs : dict
30 |         Additional keyword arguments.
31 | 
32 |     Returns
33 |     -------
34 |     ekd.FieldList
35 |         Loaded empty data source.
36 |     """
37 |     return ekd.from_source("empty")
38 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/forcings.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from typing import Any
11 | from typing import List
12 | 
13 | from earthkit.data import from_source
14 | 
15 | from .legacy import legacy_source
16 | 
17 | 
18 | @legacy_source(__file__)
19 | def forcings(context: Any, dates: List[str], template: str, param: str) -> Any:
20 |     """Loads forcing data from a specified source.
21 | 
22 |     Parameters
23 |     ----------
24 |     context : object
25 |         The context in which the function is executed.
26 |     dates : list
27 |         List of dates for which data is to be loaded.
28 |     template : FieldList
29 |         Template for the data source.
30 |     param : str
31 |         Parameter for the data source.
32 | 
33 |     Returns
34 |     -------
35 |     object
36 |         Loaded forcing data.
37 |     """
38 |     context.trace("✅", f"from_source(forcings, {template}, {param}")
39 |     return from_source("forcings", source_or_dataset=template, date=dates, param=param)
40 | 
41 | 
42 | execute = forcings
43 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/netcdf.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | from typing import Any
12 | from typing import List
13 | 
14 | import earthkit.data as ekd
15 | 
16 | from .legacy import legacy_source
17 | from .xarray import load_many
18 | 
19 | 
20 | @legacy_source(__file__)
21 | def execute(context: Any, dates: List[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
22 |     """Execute the loading of multiple NetCDF files.
23 | 
24 |     Parameters
25 |     ----------
26 |     context : object
27 |         The context in which the function is executed.
28 |     dates : list
29 |         List of dates for which data is to be loaded.
30 |     path : str
31 |         Path to the directory containing the NetCDF files.
32 |     *args : tuple
33 |         Additional positional arguments.
34 |     **kwargs : dict
35 |         Additional keyword arguments.
36 | 
37 |     Returns
38 |     -------
39 |     object
40 |         The loaded data.
41 |     """
42 |     return load_many("📁", context, dates, path, *args, **kwargs)
43 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/opendap.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | from typing import Any
12 | from typing import Dict
13 | from typing import List
14 | 
15 | import earthkit.data as ekd
16 | 
17 | from .legacy import legacy_source
18 | from .xarray import load_many
19 | 
20 | 
21 | @legacy_source(__file__)
22 | def execute(context: Dict[str, Any], dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
23 |     """Execute the data loading process from an OpenDAP source.
24 | 
25 |     Parameters
26 |     ----------
27 |     context : dict
28 |         The context in which the function is executed.
29 |     dates : list
30 |         List of dates for which data is to be loaded.
31 |     url : str
32 |         The URL of the OpenDAP source.
33 |     *args : tuple
34 |         Additional positional arguments.
35 |     **kwargs : dict
36 |         Additional keyword arguments.
37 | 
38 |     Returns
39 |     -------
40 |     xarray.Dataset
41 |         The loaded dataset.
42 |     """
43 |     return load_many("🌐", context, dates, url, *args, **kwargs)
44 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/patterns.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import datetime
11 | import glob
12 | from typing import Any
13 | from typing import Generator
14 | from typing import List
15 | from typing import Tuple
16 | 
17 | from earthkit.data.utils.patterns import Pattern
18 | 
19 | 
20 | def _expand(paths: List[str]) -> Generator[str, None, None]:
21 |     """Expand the given paths to include all matching file paths.
22 | 
23 |     Parameters
24 |     ----------
25 |     paths : List[str]
26 |         List of paths to expand.
27 | 
28 |     Returns
29 |     -------
30 |     Generator[str]
31 |         Expanded file paths.
32 |     """
33 |     if not isinstance(paths, list):
34 |         paths = [paths]
35 | 
36 |     for path in paths:
37 |         if path.startswith("file://"):
38 |             path = path[7:]
39 | 
40 |         if path.startswith("http://"):
41 |             yield path
42 |             continue
43 | 
44 |         if path.startswith("https://"):
45 |             yield path
46 |             continue
47 | 
48 |         cnt = 0
49 |         for p in glob.glob(path):
50 |             yield p
51 |             cnt += 1
52 |         if cnt == 0:
53 |             yield path
54 | 
55 | 
56 | def iterate_patterns(
57 |     path: str, dates: List[datetime.datetime], **kwargs: Any
58 | ) -> Generator[Tuple[str, List[str]], None, None]:
59 |     """Iterate over patterns and expand them with given dates and additional keyword arguments.
60 | 
61 |     Parameters
62 |     ----------
63 |     path : str
64 |         The pattern path to iterate over.
65 |     dates : List[datetime.datetime]
66 |         List of datetime objects to substitute in the pattern.
67 |     **kwargs : Any
68 |         Additional keyword arguments to substitute in the pattern.
69 | 
70 |     Returns
71 |     -------
72 |     Generator[Tuple[str, List[str]]]
73 |         The expanded path and list of ISO formatted dates.
74 |     """
75 |     given_paths = path if isinstance(path, list) else [path]
76 | 
77 |     dates = [d.isoformat() for d in dates]
78 |     if len(dates) > 0:
79 |         kwargs["date"] = dates
80 | 
81 |     for path in given_paths:
82 |         paths = Pattern(path).substitute(allow_extra=True, **kwargs)
83 |         for path in _expand(paths):
84 |             yield path, dates
85 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/source.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from datetime import datetime
11 | from typing import Any
12 | from typing import Dict
13 | from typing import List
14 | from typing import Optional
15 | 
16 | from earthkit.data import from_source
17 | 
18 | from anemoi.datasets.create.utils import to_datetime_list
19 | 
20 | from .legacy import legacy_source
21 | 
22 | 
23 | @legacy_source(__file__)
24 | def source(context: Optional[Any], dates: List[datetime], **kwargs: Any) -> Any:
25 |     """Generates a source based on the provided context, dates, and additional keyword arguments.
26 | 
27 |     Parameters
28 |     ----------
29 |     context : Optional[Any]
30 |         The context in which the source is generated.
31 |     dates : List[datetime]
32 |         A list of datetime objects representing the dates.
33 |     **kwargs : Any
34 |         Additional keyword arguments for the source generation.
35 | 
36 |     Returns
37 |     -------
38 |     Any
39 |         The generated source.
40 |     """
41 |     name = kwargs.pop("name")
42 |     context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
43 |     if kwargs["date"] == "$from_dates":
44 |         kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
45 |     if kwargs["time"] == "$from_dates":
46 |         kwargs["time"] = list({d.strftime("%H%M") for d in dates})
47 |     return from_source(name, **kwargs)
48 | 
49 | 
50 | execute = source
51 | 
52 | if __name__ == "__main__":
53 |     import yaml
54 | 
55 |     config: Dict[str, Any] = yaml.safe_load(
56 |         """
57 |       name: mars
58 |       class: ea
59 |       expver: '0001'
60 |       grid: 20.0/20.0
61 |       levtype: sfc
62 |       param: [2t]
63 |       number: [0, 1]
64 |       date: $from_dates
65 |       time: $from_dates
66 |     """
67 |     )
68 |     dates: List[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
69 |     dates = to_datetime_list(dates)
70 | 
71 |     for f in source(None, dates, **config):
72 |         print(f, f.to_numpy().mean())
73 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/xarray_kerchunk.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | from . import source_registry
12 | from .xarray import XarraySourceBase
13 | 
14 | 
15 | @source_registry.register("xarray_kerchunk")
16 | class XarrayKerchunkSource(XarraySourceBase):
17 |     """An Xarray data source that uses the `kerchunk` engine."""
18 | 
19 |     emoji = "🧱"
20 | 
21 |     def __init__(self, context, json, *args, **kwargs: dict):
22 |         super().__init__(context, *args, **kwargs)
23 | 
24 |         self.path_or_url = "reference://"
25 | 
26 |         self.options = {
27 |             "engine": "zarr",
28 |             "backend_kwargs": {
29 |                 "consolidated": False,
30 |                 "storage_options": {
31 |                     "fo": json,
32 |                     "remote_protocol": "s3",
33 |                     "remote_options": {"anon": True},
34 |                 },
35 |             },
36 |         }
37 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/xarray_support/README.md:
--------------------------------------------------------------------------------
1 | The code under this directory will be migrated to earthkit-data in the future
2 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/xarray_support/patch.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | import logging
12 | from typing import Any
13 | from typing import Dict
14 | from typing import List
15 | 
16 | import xarray as xr
17 | 
18 | LOG = logging.getLogger(__name__)
19 | 
20 | 
21 | def patch_attributes(ds: xr.Dataset, attributes: Dict[str, Dict[str, Any]]) -> Any:
22 |     """Patch the attributes of the dataset.
23 | 
24 |     Parameters
25 |     ----------
26 |     ds : xr.Dataset
27 |         The dataset to patch.
28 |     attributes : Dict[str, Dict[str, Any]]
29 |         The attributes to patch.
30 | 
31 |     Returns
32 |     -------
33 |     Any
34 |         The patched dataset.
35 |     """
36 |     for name, value in attributes.items():
37 |         variable = ds[name]
38 |         variable.attrs.update(value)
39 | 
40 |     return ds
41 | 
42 | 
43 | def patch_coordinates(ds: xr.Dataset, coordinates: List[str]) -> Any:
44 |     """Patch the coordinates of the dataset.
45 | 
46 |     Parameters
47 |     ----------
48 |     ds : xr.Dataset
49 |         The dataset to patch.
50 |     coordinates : List[str]
51 |         The coordinates to patch.
52 | 
53 |     Returns
54 |     -------
55 |     Any
56 |         The patched dataset.
57 |     """
58 |     for name in coordinates:
59 |         ds = ds.assign_coords({name: ds[name]})
60 | 
61 |     return ds
62 | 
63 | 
64 | PATCHES = {
65 |     "attributes": patch_attributes,
66 |     "coordinates": patch_coordinates,
67 | }
68 | 
69 | 
70 | def patch_dataset(ds: xr.Dataset, patch: Dict[str, Dict[str, Any]]) -> Any:
71 |     """Patch the dataset.
72 | 
73 |     Parameters
74 |     ----------
75 |     ds : xr.Dataset
76 |         The dataset to patch.
77 |     patch : Dict[str, Dict[str, Any]]
78 |         The patch to apply.
79 | 
80 |     Returns
81 |     -------
82 |     Any
83 |         The patched dataset.
84 |     """
85 |     for what, values in patch.items():
86 |         if what not in PATCHES:
87 |             raise ValueError(f"Unknown patch type {what!r}")
88 | 
89 |         ds = PATCHES[what](ds, values)
90 | 
91 |     return ds
92 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/xarray_zarr.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | from typing import Any
11 | from typing import List
12 | 
13 | import earthkit.data as ekd
14 | 
15 | from .legacy import legacy_source
16 | from .xarray import load_many
17 | 
18 | 
19 | @legacy_source(__file__)
20 | def execute(context: Any, dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
21 |     """Execute the data loading process.
22 | 
23 |     Parameters
24 |     ----------
25 |     context : Any
26 |         The context in which the execution occurs.
27 |     dates : List[str]
28 |         List of dates for which data is to be loaded.
29 |     url : str
30 |         The URL from which data is to be loaded.
31 |     *args : tuple
32 |         Additional positional arguments.
33 |     **kwargs : dict
34 |         Additional keyword arguments.
35 | 
36 |     Returns
37 |     -------
38 |     ekd.FieldList
39 |         The loaded data.
40 |     """
41 |     return load_many("🇿", context, dates, url, *args, **kwargs)
42 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/sources/zenodo.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | from typing import Any
12 | from typing import Dict
13 | from typing import List
14 | 
15 | import earthkit.data as ekd
16 | from earthkit.data.core.fieldlist import MultiFieldList
17 | from earthkit.data.sources.url import download_and_cache
18 | 
19 | from .legacy import legacy_source
20 | from .patterns import iterate_patterns
21 | from .xarray import load_one
22 | 
23 | 
24 | @legacy_source(__file__)
25 | def execute(context: Any, dates: Any, record_id: str, file_key: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
26 |     """Executes the download and processing of files from Zenodo.
27 | 
28 |     Parameters
29 |     ----------
30 |     context : Any
31 |         The context in which the function is executed.
32 |     dates : Any
33 |         The dates for which the data is required.
34 |     record_id : str
35 |         The Zenodo record ID.
36 |     file_key : str
37 |         The key to identify the file.
38 |     *args : Any
39 |         Additional arguments.
40 |     **kwargs : Any
41 |         Additional keyword arguments.
42 | 
43 |     Returns
44 |     -------
45 |     MultiFieldList
46 |         A list of fields loaded from the downloaded files.
47 |     """
48 |     import requests
49 | 
50 |     result: List[Any] = []
51 | 
52 |     URLPATTERN = "https://zenodo.org/api/records/{record_id}"
53 |     url = URLPATTERN.format(record_id=record_id)
54 |     r = requests.get(url)
55 |     r.raise_for_status()
56 |     record: Dict[str, Any] = r.json()
57 | 
58 |     urls: Dict[str, str] = {}
59 |     for file in record["files"]:
60 |         urls[file["key"]] = file["links"]["self"]
61 | 
62 |     for url, dates in iterate_patterns(file_key, dates, **kwargs):
63 |         if url not in urls:
64 |             continue
65 | 
66 |         path = download_and_cache(urls[url])
67 |         result.append(load_one("?", context, dates, path, options={}, flavour=None, **kwargs))
68 | 
69 |     return MultiFieldList(result)
70 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/typing.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2025- Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import datetime
11 | from typing import List
12 | 
13 | Date = datetime.datetime
14 | 
15 | DateList = List[Date]
16 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/create/writer.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | import logging
12 | from typing import Any
13 | 
14 | import numpy as np
15 | from numpy.typing import NDArray
16 | 
17 | LOG = logging.getLogger(__name__)
18 | 
19 | 
20 | class ViewCacheArray:
21 |     """A class that provides a caching mechanism for writing to a NumPy-like array.
22 | 
23 |     The is initialised with a NumPy-like array, a shape and a list to reindex the first
24 |     dimension. The array is used to store the final data, while the cache is used to
25 |     temporarily store the data before flushing it to the array.
26 | 
27 |     The `flush` method copies the contents of the cache to the final array.
28 |     """
29 | 
30 |     def __init__(self, array: NDArray[Any], *, shape: tuple[int, ...], indexes: list[int]):
31 |         """Initialize the ViewCacheArray.
32 | 
33 |         Parameters
34 |         ----------
35 |         array : NDArray[Any]
36 |             The NumPy-like array to store the final data.
37 |         shape : tuple[int, ...]
38 |             The shape of the cache array.
39 |         indexes : list[int]
40 |             List to reindex the first dimension.
41 |         """
42 |         assert len(indexes) == shape[0], (len(indexes), shape[0])
43 |         self.array = array
44 |         self.dtype = array.dtype
45 |         self.cache = np.full(shape, np.nan, dtype=self.dtype)
46 |         self.indexes = indexes
47 | 
48 |     def __setitem__(self, key: tuple[int, ...], value: NDArray[Any]) -> None:
49 |         """Set the value in the cache array at the specified key.
50 | 
51 |         Parameters
52 |         ----------
53 |         key : tuple[int, ...]
54 |             The index key to set the value.
55 |         value : NDArray[Any]
56 |             The value to set in the cache array.
57 |         """
58 |         self.cache[key] = value
59 | 
60 |     def flush(self) -> None:
61 |         """Copy the contents of the cache to the final array."""
62 |         for i in range(self.cache.shape[0]):
63 |             global_i = self.indexes[i]
64 |             self.array[global_i] = self.cache[i]
65 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/data/debug.css:
--------------------------------------------------------------------------------
 1 | table.dataset td {
 2 |     vertical-align: top;
 3 |     text-align: left !important;
 4 | }
 5 | 
 6 | table.dataset span.dataset {
 7 |     font-weight: bold !important;
 8 | }
 9 | 
10 | table.dataset span.values {
11 |     font-style: italic !important;
12 | }
13 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/data/observations/multi.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | import logging
11 | import os
12 | 
13 | from anemoi.datasets.data import open_dataset
14 | 
15 | LOG = logging.getLogger(__name__)
16 | 
17 | 
18 | class LegacyDatasets:
19 |     def __init__(self, paths, start=None, end=None, **kwargs):
20 |         self.paths = paths
21 | 
22 |         if not start or not end:
23 |             print(
24 |                 "❌❌ Warning: start and end not provided, using the minima first and maximal last dates of the datasets"
25 |             )
26 |             lst = [self._open_dataset(p, **kwargs) for p in paths]
27 |             start = min([d.dates[0] for d in lst])
28 |             end = max([d.dates[-1] for d in lst])
29 | 
30 |         self._datasets = {
31 |             os.path.basename(p).split(".")[0]: self._open_dataset(p, start=start, end=end, padding="empty")
32 |             for p in paths
33 |         }
34 | 
35 |         first = list(self._datasets.values())[0]
36 |         for name, dataset in self._datasets.items():
37 |             if dataset.dates[0] != first.dates[0] or dataset.dates[-1] != first.dates[-1]:
38 |                 raise ValueError("Datasets have different start and end times")
39 |             if dataset.frequency != first.frequency:
40 |                 raise ValueError("Datasets have different frequencies")
41 | 
42 |         self._keys = self._datasets.keys
43 | 
44 |         self._first = list(self._datasets.values())[0]
45 | 
46 |     def _open_dataset(self, p, **kwargs):
47 |         if p.startswith("observations-"):
48 |             return open_dataset(observations=p, **kwargs)
49 |         else:
50 |             print("❗ Opening non-observations dataset:", p)
51 |             return open_dataset(p, **kwargs)
52 | 
53 |     def items(self):
54 |         return self._datasets.items()
55 | 
56 |     @property
57 |     def dates(self):
58 |         return self._first.dates
59 | 
60 |     def __len__(self):
61 |         return len(self._first)
62 | 
63 |     def __getitem__(self, i):
64 |         return {k: d[i] for k, d in self._datasets.items()}
65 | 


--------------------------------------------------------------------------------
/src/anemoi/datasets/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # (C) Copyright 2024 Anemoi contributors.
2 | #
3 | # This software is licensed under the terms of the Apache Licence Version 2.0
4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5 | #
6 | # In applying this licence, ECMWF does not waive the privileges and immunities
7 | # granted to it by virtue of its status as an intergovernmental organisation
8 | # nor does it submit to any jurisdiction.
9 | 


--------------------------------------------------------------------------------
/tests/create-perturbations-full.yaml:
--------------------------------------------------------------------------------
 1 | description: "develop version of the dataset for a few days and a few variables, once data on mars is cached it should take a few seconds to generate the dataset"
 2 | dataset_status: testing
 3 | purpose: aifs
 4 | name: create-pertubations
 5 | config_format_version: 2
 6 | 
 7 | common:
 8 |   mars_request_sfc: &common_sfc
 9 |     name: mars
10 |     class: ea
11 |     date: $datetime_format($dates,%Y%m%d)
12 |     time: $datetime_format($dates,%H%M)
13 |     expver: '0001'
14 |     grid: 20.0/20.0
15 |     levtype: sfc
16 |     #param: [2t]
17 |     param: [10u, 10v, 2d, 2t, lsm, msl, sdor, skt, slor, sp, tcw, z]
18 |   mars_request_pl: &common_pl
19 |     name: mars
20 |     class: ea
21 |     date: $datetime_format($dates,%Y%m%d)
22 |     time: $datetime_format($dates,%H%M)
23 |     expver: '0001'
24 |     grid: 20.0/20.0
25 |     levtype: pl
26 |     #param: [q]
27 |     # level: [50, 100]
28 |     param: [q, t, u, v, w, z]
29 |     level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
30 |   dates: &dates_anchor
31 |     start: 2020-12-30 00:00:00
32 |     end: 2021-01-03 12:00:00
33 |     frequency: 12h
34 | 
35 | dates:
36 |   <<: *dates_anchor
37 | 
38 | input:
39 |   dates:
40 |     <<: *dates_anchor
41 |     join:
42 |     - function:
43 |         name: perturbations
44 |         ensembles:
45 |           <<: *common_sfc
46 |           stream: enda
47 |           type: an
48 |           number: 0/to/9
49 | 
50 |         center:
51 |           <<: *common_sfc
52 |           stream: oper
53 |           type: an
54 | 
55 |         mean:
56 |           <<: *common_sfc
57 |           stream: enda
58 |           type: em
59 | 
60 |     - function:
61 |         name: perturbations
62 |         ensembles:
63 |           <<: *common_pl
64 |           stream: enda
65 |           type: an
66 |           number: 0/to/9
67 | 
68 |         center:
69 |           <<: *common_pl
70 |           stream: oper
71 |           type: an
72 | 
73 |         mean:
74 |           <<: *common_pl
75 |           stream: enda
76 |           type: em
77 | 
78 | statistics:
79 |   end: 2021
80 | 


--------------------------------------------------------------------------------
/tests/create-shift.yaml:
--------------------------------------------------------------------------------
 1 | description: "develop version of the dataset for a few days and a few variables, once data on mars is cached it should take a few seconds to generate the dataset"
 2 | dataset_status: testing
 3 | purpose: aifs
 4 | name: test-small
 5 | config_format_version: 2
 6 | 
 7 | common:
 8 |   mars_request: &mars_request
 9 |     expver: "0001"
10 |     class: ea
11 |     grid: 20./20.
12 | 
13 | dates:
14 |   start: 2020-12-30 00:00:00
15 |   end: 2021-01-03 12:00:00
16 |   frequency: 12h
17 | 
18 | input:
19 |   join:
20 |     - mars:
21 |         <<: *mars_request
22 |         param: [2t]
23 |         levtype: sfc
24 |         stream: oper
25 |         type: an
26 | 
27 |     - forcings:
28 |         template: ${input.join.0.mars}
29 |         param:
30 |           - insolation
31 | 
32 |     - date_shift:
33 |         delta: -25
34 |         forcings:
35 |           template: ${input.join.0.mars}
36 |           param:
37 |             - insolation
38 | 
39 | statistics:
40 |   end: 2021
41 | 


--------------------------------------------------------------------------------
/tests/create/accumulation.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2021-01-10 18:00:00
 3 |   #start: 2021-01-10 19:00:00
 4 |   end: 2021-01-12 12:00:00
 5 |   frequency: 6h
 6 | 
 7 | input:
 8 |     accumulations:
 9 |         expver: "0001"
10 |         class: ea
11 | 
12 |         stream: oper
13 |         #stream: enda
14 | 
15 |         grid: 20./20.
16 |         #grid: o96
17 |         levtype: sfc
18 |         param: [ tp , cp]
19 |         # accumulation_period: [0, 6]
20 |         accumulation_period: 24
21 | 


--------------------------------------------------------------------------------
/tests/create/concat.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2020-12-30 00:00:00
 3 |   end: 2021-01-03 12:00:00
 4 |   frequency: 12h
 5 | 
 6 | common:
 7 |   mars_request: &mars_request
 8 |     expver: "0001"
 9 |     class: ea
10 |     grid: 20./20.
11 |     levtype: sfc
12 |     stream: oper
13 |     type: an
14 |     param: [2t]
15 | 
16 | input:
17 |   concat:
18 |     - dates:
19 |         start: 2020-12-30 00:00:00
20 |         end: 2021-01-01 12:00:00
21 |         frequency: 12h
22 |       mars:
23 |         <<: *mars_request
24 |     - dates:
25 |         start: 2021-01-02 00:00:00
26 |         end: 2021-01-03 12:00:00
27 |         frequency: 12h
28 |       mars:
29 |         <<: *mars_request
30 | 
31 | statistics:
32 |   end: 2021
33 | 


--------------------------------------------------------------------------------
/tests/create/join.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     class: ea
 5 |     grid: 20./20.
 6 | 
 7 | dates:
 8 |   start: 2020-12-30 00:00:00
 9 |   end: 2021-01-03 12:00:00
10 |   frequency: 12h
11 | 
12 | input:
13 |   join:
14 |     - mars:
15 |         <<: *mars_request
16 |         param: [2t]
17 |         levtype: sfc
18 |         stream: oper
19 |         type: an
20 | 
21 |     - mars:
22 |         <<: *mars_request
23 |         param: [q, t]
24 |         levtype: pl
25 |         level: [50, 100]
26 |         stream: oper
27 |         type: an
28 | 
29 |     - accumulations:
30 |         <<: *mars_request
31 |         levtype: sfc
32 |         param: [cp, tp]
33 |         # accumulation_period: 6h
34 | 
35 |     - forcings:
36 |         template: ${input.join.0.mars}
37 |         param:
38 |           - cos_latitude
39 | 
40 | naming_scheme: "{param}_{levelist}{level_units}_{accumultion_period}"
41 | 
42 | statistics:
43 |   end: 2021
44 | 


--------------------------------------------------------------------------------
/tests/create/missing.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     class: ea
 5 |     grid: 20./20.
 6 | 
 7 | dates:
 8 |   start: 2020-12-30 00:00:00
 9 |   end: 2021-01-03 12:00:00
10 |   frequency: 12h
11 |   missing: ['2020-12-30 12:00:00', '2021-01-03 00:00:00']
12 | 
13 | data_sources:
14 |   - mars:
15 |       <<: *mars_request
16 |       param: [2t]
17 |       levtype: sfc
18 |       stream: oper
19 |       type: an
20 | 
21 | input:
22 |   forcings:
23 |     template: ${data_sources.0.mars}
24 |     param:
25 |       - cos_latitude
26 |       #- sin_latitude
27 | 
28 | statistics:
29 |   end: 2021-01-02
30 | 


--------------------------------------------------------------------------------
/tests/create/nan.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2020-12-30 00:00:00
 3 |   end: 2021-01-03 12:00:00
 4 |   frequency: 12h
 5 | 
 6 | input:
 7 |   mars:
 8 |     expver: "0001"
 9 |     class: ea
10 |     grid: 20./20.
11 |     param: [2t, sst]
12 |     levtype: sfc
13 |     stream: oper
14 |     type: an
15 | 
16 | statistics:
17 |   end: 2020
18 |   allow_nans: [sst]
19 | 


--------------------------------------------------------------------------------
/tests/create/pipe.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     class: ea
 5 |     grid: 20./20.
 6 | 
 7 | dates: &dates_anchor
 8 |   start: 2020-12-30 00:00:00
 9 |   end: 2021-01-03 12:00:00
10 |   frequency: 12h
11 | 
12 | input:
13 |   join:
14 |     - mars:
15 |         <<: *mars_request
16 |         param: [2t]
17 |         levtype: sfc
18 | 
19 |     - pipe:
20 |         - mars:
21 |             <<: *mars_request
22 |             param: [q, t]
23 |             levtype: pl
24 |             level: [50, 100]
25 |             stream: oper
26 |             type: an
27 |         - filter:
28 |             param: [q]
29 |         - filter:
30 |             level: [50]
31 | 
32 |     - accumulations:
33 |         <<: *mars_request
34 |         param: [cp, tp]
35 | 
36 |     - forcings:
37 |         template: ${input.join.0.mars}
38 |         param:
39 |           - cos_latitude
40 | 
41 | statistics:
42 |   end: 2021
43 | 


--------------------------------------------------------------------------------
/tests/create/recentre.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | dates:
 3 |   start: 2021-01-01 00:00:00
 4 |   #start: 2020-12-30 00:00:00
 5 |   end: 2021-01-03 12:00:00
 6 |   frequency: 12h
 7 | 
 8 | build:
 9 |   group_by: monthly
10 | 
11 | common:
12 |   global: &global
13 |     class: ea
14 |     expver: "0001"
15 |     grid: 20.0/20.0
16 |   sfc: &sfc
17 |     <<: *global
18 |     levtype: sfc
19 |     # param: [2t]
20 |     param: [10u, 10v, 2d, 2t, lsm, msl, sdor, skt, slor, sp, tcw, z]
21 |   acc: &acc
22 |     <<: *global
23 |     levtype: sfc
24 |     #param: [tp]
25 |     param: [cp, tp]
26 |   pl: &pl
27 |     <<: *global
28 |     levtype: pl
29 |     #param: [q]
30 |     #level: [50]
31 |     param: [q, t, u, v, w, z]
32 |     level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
33 | 
34 |   ensembles: &ensembles
35 |     stream: enda
36 |     type: an
37 |     number: [1, 2, 4]
38 |     # number: [1, 2, 3, 4, 5, 6, 7, 8, 9]
39 |   centre: &centre
40 |     stream: oper
41 |     type: an
42 | 
43 | 
44 | data_sources:
45 |   ensembles:
46 |     join:
47 |       - mars:
48 |           <<: *ensembles
49 |           <<: *sfc
50 |       - mars:
51 |           <<: *ensembles
52 |           <<: *pl
53 |       - accumulations:
54 |           <<: *ensembles
55 |           <<: *acc
56 |   centre:
57 |     join:
58 |       - mars:
59 |           <<: *centre
60 |           <<: *sfc
61 |       - mars:
62 |           <<: *centre
63 |           <<: *pl
64 |       - accumulations:
65 |           <<: *centre
66 |           <<: *acc
67 | 
68 | input:
69 |   join:
70 |     - recentre:
71 |         # the ensemble data which has one additional dimension
72 |         members: ${data_sources.ensembles}
73 |         # the new centre of the data
74 |         centre: ${data_sources.centre}
75 |     - forcings:
76 |         template: ${input.join.0.recentre}
77 |         param:
78 |           - cos_latitude
79 |           - cos_longitude
80 |           - sin_latitude
81 |           - sin_longitude
82 |           - cos_julian_day
83 |           - cos_local_time
84 |           - sin_julian_day
85 |           - sin_local_time
86 |           - insolation
87 | 


--------------------------------------------------------------------------------
/tests/create/regrid.yaml:
--------------------------------------------------------------------------------
 1 | dates:
 2 |   start: 2020-12-30 00:00:00
 3 |   end: 2021-01-03 12:00:00
 4 |   frequency: 12h
 5 | 
 6 | input:
 7 |   join:
 8 |   - mars:
 9 |       expver: "0001"
10 |       class: ea
11 |       grid: o48
12 |       param: [ 2t ]
13 |       levtype: sfc
14 |       stream: oper
15 |       type: an
16 |   - pipe:
17 |     - mars:
18 |         expver: "0001"
19 |         class: ea
20 |         grid: o32
21 |         param: [ z ]
22 |         levtype: pl
23 |         level: [ 500 ]
24 |         stream: oper
25 |         type: an
26 |     - regrid:
27 |         # method: linear
28 |         method: nearest
29 |         in_grid: o32
30 |         out_grid: o48
31 | 


--------------------------------------------------------------------------------
/tests/create/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eux
 3 | NAME=${1:-join}
 4 | 
 5 | anemoi-datasets create-step init $NAME.yaml $NAME.zarr --overwrite
 6 | anemoi-datasets create-step load $NAME.zarr --part 1/2
 7 | anemoi-datasets create-step load $NAME.zarr --part 2/2
 8 | 
 9 | anemoi-datasets create-step statistics $NAME.zarr
10 | anemoi-datasets create-step size $NAME.zarr
11 | # anemoi-datasets create-step finalise $NAME.zarr
12 | 
13 | anemoi-datasets create-step patch $NAME.zarr
14 | 
15 | anemoi-datasets create-step init-additions $NAME.zarr --delta 12h
16 | anemoi-datasets create-step run-additions $NAME.zarr --part 1/2 --delta 12h
17 | anemoi-datasets create-step run-additions $NAME.zarr --part 2/2 --delta 12h
18 | anemoi-datasets create-step finalise-additions $NAME.zarr --delta 12h
19 | 
20 | anemoi-datasets create-step cleanup $NAME.zarr
21 | 


--------------------------------------------------------------------------------
/tests/test_indexing.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | import numpy as np
12 | 
13 | from anemoi.datasets.data.indexing import length_to_slices
14 | 
15 | 
16 | def test_length_to_slices() -> None:
17 |     """Test the length_to_slices function with various inputs."""
18 |     lengths = [5, 7, 11, 13]
19 |     datasets = [np.random.rand(n) for n in lengths]
20 |     total = sum(lengths)
21 | 
22 |     combined = np.concatenate(datasets)
23 | 
24 |     for start in range(total):
25 |         for stop in range(start, total):
26 |             for step in range(1, stop - start + 1):
27 |                 index = slice(start, stop, step)
28 |                 print(index)
29 |                 slices = length_to_slices(index, lengths)
30 |                 result = [d[i] for (d, i) in zip(datasets, slices) if i is not None]
31 |                 result = np.concatenate(result)
32 | 
33 |                 if (combined[index].shape != result.shape) or not (combined[index] == result).all():
34 |                     print(index)
35 |                     print(combined[index])
36 |                     print(result)
37 |                     print(slices)
38 |                 assert (combined[index] == result).all(), index
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     test_length_to_slices()
43 | 


--------------------------------------------------------------------------------
/tests/xarray/test_opendap.py:
--------------------------------------------------------------------------------
 1 | # (C) Copyright 2024 Anemoi contributors.
 2 | #
 3 | # This software is licensed under the terms of the Apache Licence Version 2.0
 4 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 5 | #
 6 | # In applying this licence, ECMWF does not waive the privileges and immunities
 7 | # granted to it by virtue of its status as an intergovernmental organisation
 8 | # nor does it submit to any jurisdiction.
 9 | 
10 | 
11 | import xarray as xr
12 | from anemoi.utils.testing import skip_if_offline
13 | from anemoi.utils.testing import skip_slow_tests
14 | 
15 | from anemoi.datasets.create.sources.xarray import XarrayFieldList
16 | from anemoi.datasets.testing import assert_field_list
17 | 
18 | 
19 | @skip_if_offline
20 | @skip_slow_tests
21 | def test_opendap() -> None:
22 |     """Test loading and validating the opendap dataset."""
23 |     ds = xr.open_dataset(
24 |         "https://thredds.met.no/thredds/dodsC/meps25epsarchive/2023/01/01/meps_det_2_5km_20230101T00Z.nc",
25 |     )
26 | 
27 |     fs = XarrayFieldList.from_xarray(ds)
28 |     assert_field_list(fs, 79529, "2023-01-01T00:00:00", "2023-01-03T18:00:00")
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     for name, obj in list(globals().items()):
33 |         if name.startswith("test_") and callable(obj):
34 |             print(f"Running {name}...")
35 |             obj()
36 | 


--------------------------------------------------------------------------------
/tools/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | 


--------------------------------------------------------------------------------
/tools/build-obs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import logging
 4 | import os
 5 | import shutil
 6 | 
 7 | import tqdm
 8 | 
 9 | from anemoi.datasets import open_dataset
10 | 
11 | LOG = logging.getLogger(__name__)
12 | 
13 | 
14 | def main():
15 |     parser = argparse.ArgumentParser(description="open a dataset and build a new one")
16 |     parser.add_argument("input", help="input dataset")
17 |     parser.add_argument("output", help="output dataset")
18 |     parser.add_argument("--backend", help="backend to use", type=str, default="npz1")
19 |     parser.add_argument("--overwrite", help="overwrite output directory if it exists", action="store_true")
20 |     args = parser.parse_args()
21 |     build(**vars(args))
22 | 
23 | 
24 | def build(input, output, backend, overwrite=False):
25 |     ds = open_dataset(input, backend=backend)
26 |     print(f"Using dataset {ds} as input")
27 |     print(f"{input} backend is '{ds.metadata['backend']}'")
28 |     print(f"Dataset has {len(ds)} records, from {ds.start_date} to {ds.end_date}")
29 |     print(f"Converting dataset to {output} using new backend '{backend}'")
30 | 
31 |     from anemoi.datasets.data.records.backends import writer_backend_factory
32 | 
33 |     if os.path.exists(output):
34 |         if overwrite:
35 |             LOG.warning(f"Output directory {output} already exists, removing it")
36 |             shutil.rmtree(output)
37 |         else:
38 |             raise FileExistsError(f"Output directory {output} already exists, use --overwrite to remove it")
39 |     writer = writer_backend_factory(backend, output)
40 | 
41 |     for i in tqdm.tqdm(range(len(ds))):
42 |         writer.write(i, ds[i])
43 | 
44 |     writer.write_statistics(ds.statistics)
45 | 
46 |     metadata = ds.metadata.copy()
47 |     metadata["backend"] = backend
48 |     writer.write_metadata(metadata)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/tools/check-obs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import logging
 4 | 
 5 | import numpy as np
 6 | 
 7 | from anemoi.datasets import open_dataset
 8 | 
 9 | LOG = logging.getLogger(__name__)
10 | 
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser(description="open two datasets and compare them")
14 |     parser.add_argument("dataset", help="dataset to check")
15 |     parser.add_argument("reference", help="reference dataset")
16 |     args = parser.parse_args()
17 |     compare(args.dataset, args.reference)
18 | 
19 | 
20 | def _compare_nested_dicts(a, b):
21 |     if isinstance(a, dict) and isinstance(b, dict):
22 |         if a.keys() != b.keys():
23 |             return False
24 |         return all(_compare_nested_dicts(a[k], b[k]) for k in a)
25 |     elif isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
26 |         if a.shape != b.shape:
27 |             return False
28 |         return np.array_equal(a, b)
29 |     assert False, f"Unsupported types for comparison: {type(a)} and {type(b)}"
30 | 
31 | 
32 | def compare(input, reference):
33 |     ds = open_dataset(input)
34 |     ref = open_dataset(reference)
35 | 
36 |     if len(ds) != len(ref):
37 |         raise ValueError(f"Datasets have different lengths: {len(ds)} != {len(ref)}")
38 | 
39 |     for i in range(len(ds)):
40 |         if ds[i] != ref[i]:
41 |             raise ValueError(f"Datasets differ at index {i}: {ds[i]} != {ref[i]}")
42 |         if ds.dates[i] != ref.dates[i]:
43 |             raise ValueError(f"Dates differ at index {i}: {ds.dates[i]} != {ref.dates[i]}")
44 |     print("✅ Data and dates are identical")
45 | 
46 |     ds_metadata = ds.metadata.copy()
47 |     ref_metadata = ref.metadata.copy()
48 |     ds_metadata.pop("backend", None)
49 |     ref_metadata.pop("backend", None)
50 |     if ds_metadata != ref_metadata:
51 |         raise ValueError("Metadata differs between datasets (excluding backend)")
52 |     print("✅ Metadata is identical")
53 | 
54 |     if not _compare_nested_dicts(ds.statistics, ref.statistics):
55 |         raise ValueError("Statistics differ between datasets")
56 |     print("✅ Statistics are identical")
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     main()
61 | 


--------------------------------------------------------------------------------
/tools/examples/Makefile:
--------------------------------------------------------------------------------
 1 | YAML := $(wildcard *.yaml)
 2 | 
 3 | TARGETS := $(YAML:.yaml=.zarr)
 4 | 
 5 | all: $(TARGETS)
 6 | 
 7 | %.zarr: %.yaml
 8 | 	anemoi-datasets create $< $@ --overwrite
 9 | 
10 | 
11 | clean::
12 | 	rm -fr *.zarr *.sync *.statistics
13 | 
14 | .SUFFIXES: .zarr .yaml
15 | 


--------------------------------------------------------------------------------
/tools/examples/an-oper-2023-2023-2p5-6h-v1.yaml:
--------------------------------------------------------------------------------
 1 | description: "Example for the anemoi documentation"
 2 | name: an-oper-2023-2023-2p5-6h-v1
 3 | licence: CC-BY-4.0
 4 | attribution: ECMWF
 5 | 
 6 | dates:
 7 |   start: 2023-01-01 00:00:00
 8 |   end: 2023-12-31 18:00:00
 9 |   frequency: 6h
10 | 
11 | input:
12 | 
13 |     join:
14 |     - mars:
15 |         grid: [2.5, 2.5]
16 |         levtype: sfc
17 |         param: [10u, 10v, 2d, 2t, lsm, msl, sdor, skt, slor, sp, tcw, z]
18 | 
19 |     - mars:
20 |         grid: [2.5, 2.5]
21 |         levtype: pl
22 |         param: [q, t, u, v, w, z]
23 |         level: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
24 | 
25 |     - accumulations:
26 |         grid: [2.5, 2.5]
27 |         param: [cp, tp]
28 |         levtype: sfc
29 | 
30 |     - constants:
31 |         template: ${input.join.0.mars}
32 |         param:
33 |         - cos_latitude
34 |         - cos_longitude
35 |         - sin_latitude
36 |         - sin_longitude
37 |         - cos_julian_day
38 |         - cos_local_time
39 |         - sin_julian_day
40 |         - sin_local_time
41 |         - insolation
42 | 


--------------------------------------------------------------------------------
/tools/grids/Makefile:
--------------------------------------------------------------------------------
 1 | YAML := $(wildcard *.yaml)
 2 | 
 3 | TARGETS := $(YAML:.yaml=.zarr)
 4 | 
 5 | all: $(TARGETS)
 6 | 
 7 | %.zarr: %.yaml
 8 | 	anemoi-datasets create $< $@ --overwrite
 9 | 
10 | 
11 | clean::
12 | 	rm -fr *.zarr
13 | 
14 | .SUFFIXES: .zarr .yaml
15 | 


--------------------------------------------------------------------------------
/tools/grids/grids1.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     grid: 1/1
 5 | 
 6 | dates:
 7 |   start: 2024-01-01 00:00:00
 8 |   end: 2024-01-01 18:00:00
 9 |   frequency: 6h
10 | 
11 | input:
12 |   join:
13 |   - mars:
14 |       <<: *mars_request
15 |       param: [2t, 10u, 10v, lsm]
16 |       levtype: sfc
17 |       stream: oper
18 |       type: an
19 |   - mars:
20 |       <<: *mars_request
21 |       param: [q, t, z]
22 |       levtype: pl
23 |       level: [50, 100]
24 |       stream: oper
25 |       type: an
26 |   - accumulations:
27 |       <<: *mars_request
28 |       levtype: sfc
29 |       param: [cp, tp]
30 |   - forcings:
31 |       template: ${input.join.0.mars}
32 |       param:
33 |       - cos_latitude
34 |       - sin_latitude
35 | 
36 | output:
37 |   order_by: [valid_datetime, param_level, number]
38 |   remapping:
39 |     param_level: "{param}_{levelist}"
40 |   statistics: param_level
41 | 


--------------------------------------------------------------------------------
/tools/grids/grids2.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     grid: 0.5/0.5
 5 |     area: [28, 0, -14, 40]
 6 |     rotation: [-20, -40]
 7 | 
 8 | dates:
 9 |   start: 2024-01-01 00:00:00
10 |   end: 2024-01-01 18:00:00
11 |   frequency: 6h
12 | 
13 | input:
14 |   join:
15 |   - mars:
16 |       <<: *mars_request
17 |       param: [2t, 10u, 10v, lsm]
18 |       levtype: sfc
19 |       stream: oper
20 |       type: an
21 |   - mars:
22 |       <<: *mars_request
23 |       param: [q, t, z]
24 |       levtype: pl
25 |       level: [50, 100]
26 |       stream: oper
27 |       type: an
28 |   - accumulations:
29 |       <<: *mars_request
30 |       levtype: sfc
31 |       param: [cp, tp]
32 |   - forcings:
33 |       template: ${input.join.0.mars}
34 |       param:
35 |       - cos_latitude
36 |       - sin_latitude
37 | 
38 | output:
39 |   order_by: [valid_datetime, param_level, number]
40 |   remapping:
41 |     param_level: "{param}_{levelist}"
42 |   statistics: param_level
43 | 


--------------------------------------------------------------------------------
/tools/grids/grids3.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     grid: 0.25/0.25
 5 |     area: [40, 25, 20, 60]
 6 |     rotation: [-20, -40]
 7 | 
 8 | dates:
 9 |   start: 2024-01-01 00:00:00
10 |   end: 2024-01-01 18:00:00
11 |   frequency: 6h
12 | 
13 | input:
14 |   join:
15 |   - mars:
16 |       <<: *mars_request
17 |       param: [2t, 10u, 10v, lsm]
18 |       levtype: sfc
19 |       stream: oper
20 |       type: an
21 |   - mars:
22 |       <<: *mars_request
23 |       param: [q, t, z]
24 |       levtype: pl
25 |       level: [50, 100]
26 |       stream: oper
27 |       type: an
28 |   - accumulations:
29 |       <<: *mars_request
30 |       levtype: sfc
31 |       param: [cp, tp]
32 |   - forcings:
33 |       template: ${input.join.0.mars}
34 |       param:
35 |       - cos_latitude
36 |       - sin_latitude
37 | 
38 | output:
39 |   order_by: [valid_datetime, param_level, number]
40 |   remapping:
41 |     param_level: "{param}_{levelist}"
42 |   statistics: param_level
43 | 


--------------------------------------------------------------------------------
/tools/grids/grids4.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     grid: 0.5/0.5
 5 |     area: [30, 90, 10, 120]
 6 | 
 7 | dates:
 8 |   start: 2024-01-01 00:00:00
 9 |   end: 2024-01-01 18:00:00
10 |   frequency: 6h
11 | 
12 | input:
13 |   join:
14 |   - mars:
15 |       <<: *mars_request
16 |       param: [2t, 10u, 10v, lsm]
17 |       levtype: sfc
18 |       stream: oper
19 |       type: an
20 |   - mars:
21 |       <<: *mars_request
22 |       param: [q, t, z]
23 |       levtype: pl
24 |       level: [50, 100]
25 |       stream: oper
26 |       type: an
27 |   - accumulations:
28 |       <<: *mars_request
29 |       levtype: sfc
30 |       param: [cp, tp]
31 |   - forcings:
32 |       template: ${input.join.0.mars}
33 |       param:
34 |       - cos_latitude
35 |       - sin_latitude
36 | 
37 | output:
38 |   order_by: [valid_datetime, param_level, number]
39 |   remapping:
40 |     param_level: "{param}_{levelist}"
41 |   statistics: param_level
42 | 


--------------------------------------------------------------------------------
/tools/grids/grids5.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     grid: 0.2/0.2
 5 |     area: [25, 100, 20, 105]
 6 | 
 7 | dates:
 8 |   start: 2024-01-01 00:00:00
 9 |   end: 2024-01-01 18:00:00
10 |   frequency: 6h
11 | 
12 | input:
13 |   join:
14 |   - mars:
15 |       <<: *mars_request
16 |       param: [2t, 10u, 10v, lsm]
17 |       levtype: sfc
18 |       stream: oper
19 |       type: an
20 |   - mars:
21 |       <<: *mars_request
22 |       param: [q, t, z]
23 |       levtype: pl
24 |       level: [50, 100]
25 |       stream: oper
26 |       type: an
27 |   - accumulations:
28 |       <<: *mars_request
29 |       levtype: sfc
30 |       param: [cp, tp]
31 |   - forcings:
32 |       template: ${input.join.0.mars}
33 |       param:
34 |       - cos_latitude
35 |       - sin_latitude
36 | 
37 | output:
38 |   order_by: [valid_datetime, param_level, number]
39 |   remapping:
40 |     param_level: "{param}_{levelist}"
41 |   statistics: param_level
42 | 


--------------------------------------------------------------------------------
/tools/grids/grids6.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     grid: 10/10
 5 |     area: [90, -40, -40, 180]
 6 | 
 7 | dates:
 8 |   start: 2024-01-01 00:00:00
 9 |   end: 2024-01-01 18:00:00
10 |   frequency: 6h
11 | 
12 | input:
13 |   join:
14 |   - mars:
15 |       <<: *mars_request
16 |       param: [2t, 10u, 10v, lsm]
17 |       levtype: sfc
18 |       stream: oper
19 |       type: an
20 |   - mars:
21 |       <<: *mars_request
22 |       param: [q, t, z]
23 |       levtype: pl
24 |       level: [50, 100]
25 |       stream: oper
26 |       type: an
27 |   - accumulations:
28 |       <<: *mars_request
29 |       levtype: sfc
30 |       param: [cp, tp]
31 |   - forcings:
32 |       template: ${input.join.0.mars}
33 |       param:
34 |       - cos_latitude
35 |       - sin_latitude
36 | 
37 | output:
38 |   order_by: [valid_datetime, param_level, number]
39 |   remapping:
40 |     param_level: "{param}_{levelist}"
41 |   statistics: param_level
42 | 


--------------------------------------------------------------------------------
/tools/grids/grids7.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   mars_request: &mars_request
 3 |     expver: "0001"
 4 |     grid: 2/2
 5 |     area: [90, -40, -40, 180]
 6 | 
 7 | dates:
 8 |   start: 2024-01-01 00:00:00
 9 |   end: 2024-01-01 18:00:00
10 |   frequency: 6h
11 | 
12 | input:
13 |   join:
14 |   - mars:
15 |       <<: *mars_request
16 |       param: [2t, 10u, 10v, lsm]
17 |       levtype: sfc
18 |       stream: oper
19 |       type: an
20 |   - mars:
21 |       <<: *mars_request
22 |       param: [q, t, z]
23 |       levtype: pl
24 |       level: [50, 100]
25 |       stream: oper
26 |       type: an
27 |   - accumulations:
28 |       <<: *mars_request
29 |       levtype: sfc
30 |       param: [cp, tp]
31 |   - forcings:
32 |       template: ${input.join.0.mars}
33 |       param:
34 |       - cos_latitude
35 |       - sin_latitude
36 | 
37 | output:
38 |   order_by: [valid_datetime, param_level, number]
39 |   remapping:
40 |     param_level: "{param}_{levelist}"
41 |   statistics: param_level
42 | 


--------------------------------------------------------------------------------
/tools/make-sample-dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # (C) Copyright 2024 Anemoi contributors.
 3 | #
 4 | # This software is licensed under the terms of the Apache Licence Version 2.0
 5 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 6 | #
 7 | # In applying this licence, ECMWF does not waive the privileges and immunities
 8 | # granted to it by virtue of its status as an intergovernmental organisation
 9 | # nor does it submit to any jurisdiction.
10 | 
11 | 
12 | import argparse
13 | import os
14 | import shutil
15 | 
16 | import xarray as xr
17 | 
18 | parser = argparse.ArgumentParser(description="Create a sample dataset")
19 | parser.add_argument("input", type=str, help="Input file name")
20 | parser.add_argument("output", type=str, help="Output file name")
21 | args = parser.parse_args()
22 | 
23 | if os.path.exists(args.output):
24 |     if os.path.isdir(args.output):
25 |         shutil.rmtree(args.output)
26 |     else:
27 |         os.unlink(args.output)
28 | 
29 | if args.input.endswith(".zarr"):
30 |     ds = xr.open_zarr(args.input)
31 | else:
32 |     ds = xr.open_dataset(args.input)
33 | 
34 | if args.output.endswith(".zarr"):
35 |     ds.to_zarr(args.output, consolidated=True)
36 | else:
37 |     ds.to_netcdf(args.output)
38 | 


--------------------------------------------------------------------------------
/tools/upload-sample-dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # (C) Copyright 2024 Anemoi contributors.
 3 | #
 4 | # This software is licensed under the terms of the Apache Licence Version 2.0
 5 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 6 | #
 7 | # In applying this licence, ECMWF does not waive the privileges and immunities
 8 | # granted to it by virtue of its status as an intergovernmental organisation
 9 | # nor does it submit to any jurisdiction.
10 | 
11 | 
12 | # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
13 | # This software is licensed under the terms of the Apache Licence Version 2.0
14 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
15 | # In applying this licence, ECMWF does not waive the privileges and immunities
16 | # granted to it by virtue of its status as an intergovernmental organisation
17 | # nor does it submit to any jurisdiction.
18 | 
19 | import argparse
20 | import logging
21 | import os
22 | 
23 | from anemoi.utils.remote import transfer
24 | 
25 | LOG = logging.getLogger(__name__)
26 | 
27 | logging.basicConfig(level=logging.INFO)
28 | 
29 | parser = argparse.ArgumentParser(description="Upload sample dataset to S3")
30 | parser.add_argument("--bucket", type=str, help="S3 target path", default="s3://ml-tests/test-data/")
31 | parser.add_argument("source", type=str, help="Path to the sample dataset")
32 | parser.add_argument("target", type=str, help="Path to the sample dataset")
33 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing data")
34 | 
35 | args = parser.parse_args()
36 | 
37 | source = args.source
38 | target = args.target
39 | bucket = args.bucket
40 | 
41 | assert os.path.exists(source), f"Source {source} does not exist"
42 | 
43 | if not target.startswith("s3://"):
44 |     if target.startswith("/"):
45 |         target = target[1:]
46 |     if bucket.endswith("/"):
47 |         bucket = bucket[:-1]
48 |     target = os.path.join(bucket, target)
49 | 
50 | LOG.info(f"Uploading {source} to {target}")
51 | transfer(source, target, overwrite=args.overwrite)
52 | LOG.info("Upload complete")
53 | 


--------------------------------------------------------------------------------