├── .gitattributes
├── .github
    ├── dependabot.yml
    ├── pull_request_template.md
    └── workflows
    │   ├── ci.yaml
    │   ├── full_archive_ci.yaml
    │   └── pythonpublish.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── BLM.png
├── CITATION.cff
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── ci
    ├── environment-cloud-test.yml
    ├── environment-upstream-dev.yml
    └── environment.yml
├── codecov.yml
├── docs
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── contributor-guide.rst
    ├── drift_removal.ipynb
    ├── environment.yml
    ├── images
    │   ├── logo.png
    │   └── workflow_diagram.png
    ├── index.rst
    ├── make.bat
    ├── postprocessing.ipynb
    ├── regionmask.ipynb
    ├── tutorial.ipynb
    └── whats-new.rst
├── notebooks
    ├── .ipynb_checkpoints
    │   └── parse_area_gn-checkpoint.ipynb
    ├── add_more_models.ipynb
    ├── maintenance_grids.ipynb
    ├── metric_parse_improvement.ipynb
    ├── parse_area_gn.ipynb
    ├── test.yaml
    └── testing_various_issues.ipynb
├── pyproject.toml
├── readthedocs.yml
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── test_drift_removal.py
    ├── test_grids.py
    ├── test_postprocessing.py
    ├── test_preprocessing.py
    ├── test_preprocessing_cloud.py
    ├── test_regionmask.py
    └── test_utils.py
└── xmip
    ├── __init__.py
    ├── drift_removal.py
    ├── grids.py
    ├── postprocessing.py
    ├── preprocessing.py
    ├── regionmask.py
    ├── specs
        └── staggered_grid_config.yaml
    └── utils.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | xmip/_version.py export-subst
2 | * text=auto eol=lf
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: 'github-actions'
4 |     directory: '/'
5 |     schedule:
6 |       interval: 'monthly'
7 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | <!-- Feel free to remove check-list items aren't relevant to your change -->
2 | 
3 |  - [ ] Closes #xxxx
4 |  - [ ] Tests added
5 |  - [ ] Passes `pre-commit run --all-files`
6 |  - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst`
7 |  - [ ] New functions/methods are listed in `api.rst`
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - "main"
  6 |   pull_request:
  7 |     branches:
  8 |       - "*"
  9 |   schedule:
 10 |     - cron: "0 13 * * 1"
 11 | 
 12 | concurrency:
 13 |   group: ${{ github.workflow }}-${{ github.ref }}
 14 |   cancel-in-progress: true
 15 | 
 16 | jobs:
 17 |   detect-ci-trigger:
 18 |     name: detect ci trigger
 19 |     runs-on: ubuntu-latest
 20 |     if: github.event_name == 'push' || github.event_name == 'pull_request'
 21 |     outputs:
 22 |       triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
 23 |     steps:
 24 |       - uses: actions/checkout@v4
 25 |         with:
 26 |           fetch-depth: 2
 27 |       - uses: xarray-contrib/ci-trigger@v1
 28 |         id: detect-trigger
 29 |         with:
 30 |           keyword: "[full-cloud-ci]"
 31 | 
 32 |   build:
 33 |     name: Build (${{ matrix.python-version }} | ${{ matrix.os }})
 34 |     if: github.repository == 'jbusecke/xmip'
 35 |     runs-on: ${{ matrix.os }}
 36 |     timeout-minutes: 45
 37 |     defaults:
 38 |       run:
 39 |         shell: bash -l {0}
 40 |     strategy:
 41 |       fail-fast: false
 42 |       matrix:
 43 |         os: ["ubuntu-latest"]
 44 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 45 |     steps:
 46 |       - uses: actions/checkout@v4
 47 |       - name: Create conda environment
 48 |         uses: mamba-org/setup-micromamba@v1
 49 |         with:
 50 |           cache-environment: true
 51 |           cache-downloads: true
 52 |           micromamba-version: 'latest'
 53 |           environment-file: ci/environment.yml
 54 |           create-args: >-
 55 |             python=${{ matrix.python-version }}
 56 |       - name: Install xMIP
 57 |         run: |
 58 |           python -m pip install -e . --no-deps
 59 |           conda list
 60 |       - name: Run Tests
 61 |         run: |
 62 |           pytest -n auto --cov=./ --cov-report=xml --ignore=tests/test_preprocessing_cloud.py
 63 |       - name: Upload code coverage to Codecov
 64 |         uses: codecov/codecov-action@v4.5.0
 65 |         with:
 66 |           file: ./coverage.xml
 67 |           flags: unittests
 68 |           env_vars: OS,PYTHON
 69 |           name: codecov-umbrella
 70 |           fail_ci_if_error: false
 71 |       - name: Check Machine Config
 72 |         run: lscpu
 73 | 
 74 |   upstream-dev:
 75 |     name: Build (upstream-dev)
 76 |     runs-on: ubuntu-latest
 77 |     defaults:
 78 |       run:
 79 |         shell: bash -l {0}
 80 |     steps:
 81 |       - uses: actions/checkout@v4
 82 |       - name: Create conda environment
 83 |         uses: mamba-org/setup-micromamba@v1
 84 |         with:
 85 |           cache-environment: true
 86 |           cache-downloads: true
 87 |           micromamba-version: 'latest'
 88 |           environment-file: ci/environment-upstream-dev.yml
 89 |           create-args: >-
 90 |             python=3.11
 91 |       - name: Install xMIP
 92 |         run: |
 93 |           python -m pip install -e . --no-deps
 94 |           conda list
 95 |       - name: Run Tests
 96 |         run: |
 97 |           pytest -n auto --cov=./ --cov-report=xml --ignore=tests/test_preprocessing_cloud.py
 98 |       - name: Upload code coverage to Codecov
 99 |         uses: codecov/codecov-action@v4.5.0
100 |         with:
101 |           file: ./coverage.xml
102 |           flags: unittests
103 |           env_vars: OS,PYTHON
104 |           name: codecov-umbrella
105 |           fail_ci_if_error: false
106 |       - name: Run Tests
107 |         run: |
108 |           pytest -n auto --ignore=tests/test_preprocessing_cloud.py
109 | 
110 |   cloud-tests:
111 |     needs: detect-ci-trigger
112 |     if: needs.detect-ci-trigger.outputs.triggered == 'true'
113 |     name: Build (cloud-data-tests | ${{ matrix.variable_id }} | ${{ matrix.experiment_id }} | ${{ matrix.grid_label }})
114 |     strategy:
115 |       fail-fast: false
116 |       matrix:
117 |         variable_id: ["thetao", "o2", "so", "uo"]
118 |         experiment_id: ["historical", "ssp585"]
119 |         grid_label: ["gn", "gr"]
120 |     runs-on: ubuntu-latest
121 |     defaults:
122 |       run:
123 |         shell: bash -l {0}
124 |     steps:
125 |       - uses: actions/checkout@v4
126 |       - name: Create conda environment
127 |         uses: mamba-org/setup-micromamba@v1
128 |         with:
129 |           cache-environment: true
130 |           cache-downloads: true
131 |           micromamba-version: 'latest'
132 |           environment-file: ci/environment.yml
133 |           create-args: >-
134 |             python=3.11
135 |       - name: Install xMIP
136 |         run: |
137 |           python -m pip install -e . --no-deps
138 |           conda list
139 |       - name: Check Machine Config
140 |         run: lscpu
141 |       - name: Run Tests
142 |         run: |
143 |           pwd
144 |           echo $PYTHONPATH
145 |           pytest -n auto --reruns 1 --reruns-delay 5 tests/test_preprocessing_cloud.py --gl ${{ matrix.grid_label }} --ei ${{ matrix.experiment_id }} --vi ${{ matrix.variable_id }}
146 | 


--------------------------------------------------------------------------------
/.github/workflows/full_archive_ci.yaml:
--------------------------------------------------------------------------------
 1 | name: Full Archive CI
 2 | on:
 3 |   workflow_dispatch: # enable a manual trigger
 4 |     inputs:
 5 |         name:
 6 |           description: 'Testing a manual trigger'
 7 |   schedule:
 8 |     - cron: "0 13 * * 1" # run every monday
 9 | 
10 | jobs:
11 |   cloud-tests:
12 |     name: cloud-tests (${{ matrix.catalog}} catalog) | ${{ matrix.variable_id }} | ${{ matrix.experiment_id }} | ${{ matrix.grid_label }}
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         variable_id: ['zos','so', 'thetao', 'uo','o2']
17 |         experiment_id: ['historical','piControl', 'esm-hist', 'esm-piControl',
18 |         'ssp245', 'ssp370','ssp585', 'ssp119']
19 |         grid_label: ['gn', 'gr']
20 |         catalog: ['main']
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |     - uses: actions/checkout@v4
24 |     - name: Cache conda
25 |       uses: actions/cache@v4
26 |       env:
27 |         # Increase this value to reset cache if ci/environment-upstream-dev.yml has not changed
28 |         CACHE_NUMBER: 0
29 |       with:
30 |         path: ~/conda_pkgs_dir
31 |         key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/environment-upstream-dev.yml') }}
32 |     - uses: conda-incubator/setup-miniconda@v3.0.3
33 |       with:
34 |         channels: conda-forge
35 |         mamba-version: '*'
36 |         channel-priority: strict
37 |         activate-environment: test_env_xmip # Defined in ci/environment-upstream-dev.yml
38 |         auto-update-conda: false
39 |         python-version: 3.8
40 |         environment-file: ci/environment-cloud-test.yml
41 |         use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
42 |     - name: Set up conda environment
43 |       shell: bash -l {0}
44 |       run: python -m pip install -e . --no-deps --force-reinstall
45 |     - name: Conda List
46 |       shell: bash -l {0}
47 |       run: conda list
48 |     - name: Check Machine Config
49 |       shell: bash -l {0}
50 |       run: lscpu
51 |     - name: Run Tests
52 |       shell: bash -l {0}
53 |       run: |
54 |         pwd
55 |         echo $PYTHONPATH
56 |         pytest --reruns 1 --reruns-delay 10 --maxfail 20 tests/test_preprocessing_cloud.py --gl ${{ matrix.grid_label }} --ei ${{ matrix.experiment_id }} --vi ${{ matrix.variable_id }} --cat ${{ matrix.catalog }}
57 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yaml:
--------------------------------------------------------------------------------
 1 | name: Build and Upload xmip to PyPI
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - "main"
 6 |   pull_request:
 7 |     branches:
 8 |       - "*"
 9 |   release:
10 |     types:
11 |       - published
12 | 
13 | jobs:
14 |   build-artifacts:
15 |     runs-on: ubuntu-latest
16 |     if: github.repository == 'jbusecke/xmip'
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |         with:
20 |           fetch-depth: 0
21 |       - name: Set up Python
22 |         uses: actions/setup-python@v5
23 |         with:
24 |           python-version: "3.x"
25 |       - name: Install dependencies
26 |         run: |
27 |           python -m pip install --upgrade pip
28 |           python -m pip install --upgrade setuptools setuptools-scm build twine
29 |       - name: Build only
30 |         if: github.event_name != 'release'
31 |         run: |
32 |           python -m build
33 |           twine check dist/*
34 |       - name: Build and publish
35 |         if: github.event_name == 'release'
36 |         env:
37 |           TWINE_USERNAME: "__token__"
38 |           TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
39 |         run: |
40 |           python -m build
41 |           twine check dist/*
42 |           twine upload dist/*
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build
57 | make.bat
58 | 
59 | # PyBuilder
60 | target/
61 | 
62 | # pyenv python configuration file
63 | .python-version
64 | 
65 | .ipynb_checkpoints
66 | 
67 | **/dask-worker-space/
68 | mydask.png
69 | 
70 | .vscode
71 | .mypy_cache
72 | readthedocs.yml
73 | xmip/_version.py
74 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |   autoupdate_schedule: quarterly
 3 |   autofix_prs: true
 4 | 
 5 | repos:
 6 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 7 |     rev: v4.6.0
 8 |     hooks:
 9 |       - id: check-added-large-files
10 |       - id: check-toml
11 |       - id: trailing-whitespace
12 |       - id: end-of-file-fixer
13 |       - id: check-yaml
14 |       - id: debug-statements
15 |   - repo: https://github.com/astral-sh/ruff-pre-commit
16 |     rev: v0.5.0
17 |     hooks:
18 |       # Run the linter.
19 |       - id: ruff
20 |         args: [ --fix ]
21 |       # Run the formatter.
22 |       - id: ruff-format
23 | 


--------------------------------------------------------------------------------
/BLM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/BLM.png


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Busecke"
 5 |   given-names: "Julius J. M."
 6 |   orcid: "https://orcid.org/0000-0001-8571-865X"
 7 | - family-names: "Spring"
 8 |   given-names: "Aaron"
 9 |   orcid: "https://orcid.org/0000-0003-0216-2241"
10 | - family-names: "Maroon"
11 |   given-names: "Elizabeth"
12 |   orcid: "https://orcid.org/0000-0002-1660-7822"
13 | - family-names: "Nicholas"
14 |   given-names: "Thomas"
15 |   orcid: "https://orcid.org/0000-0002-2176-0530"
16 | - family-names: "Magin"
17 |   given-names: "Justus"
18 |   orcid: "https://orcid.org/0000-0002-4254-8002"
19 |   affiliation: "IFREMER"
20 | - family-names: "Ritschel"
21 |   given-names: "Markus"
22 |   orcid: "https://orcid.org/0000-0001-7464-7075"
23 |   affiliation: "Universität Hamburg, Germany"
24 | - family-names: "Angevaare"
25 |   given-names: "Joran J. R."
26 |   orcid: "https://orcid.org/0000-0003-3392-8123"
27 |   affiliation: "KNMI"
28 | 
29 | title: "xMIP"
30 | url: "https://github.com/jbusecke/xMIP"
31 | # version and doi are completed by Zenodo automatically, do not provide here.
32 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    Copyright 2022 xMIP Contributors
179 | 
180 |    Licensed under the Apache License, Version 2.0 (the "License");
181 |    you may not use this file except in compliance with the License.
182 |    You may obtain a copy of the License at
183 | 
184 |        http://www.apache.org/licenses/LICENSE-2.0
185 | 
186 |    Unless required by applicable law or agreed to in writing, software
187 |    distributed under the License is distributed on an "AS IS" BASIS,
188 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189 |    See the License for the specific language governing permissions and
190 |    limitations under the License.
191 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include xmip *
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Documentation Status](https://readthedocs.org/projects/cmip6-preprocessing/badge/?version=latest)](https://cmip6-preprocessing.readthedocs.io/en/latest/?badge=latest)
 2 | [![Anaconda Cloud](https://anaconda.org/conda-forge/xmip/badges/version.svg)](https://anaconda.org/conda-forge/xmip)
 3 | [![conda-forge](https://img.shields.io/conda/dn/conda-forge/xmip?label=conda-forge)](https://anaconda.org/conda-forge/xmip)
 4 | [![Pypi](https://img.shields.io/pypi/v/xmip.svg)](https://pypi.org/project/xmip)
 5 | [![Build Status](https://img.shields.io/github/workflow/status/jbusecke/xmip/CI?logo=github)](https://github.com/jbusecke/xmip/actions)
 6 | [![Full Archive CI](https://github.com/jbusecke/xmip/workflows/Full%20Archive%20CI/badge.svg)](https://github.com/jbusecke/xmip/actions/workflows/full_archive_ci.yaml)
 7 | [![codecov](https://codecov.io/gh/jbusecke/xmip/branch/main/graph/badge.svg)](https://codecov.io/gh/jbusecke/xmip)
 8 | [![License:MIT](https://img.shields.io/badge/License-MIT-lightgray.svg?style=flt-square)](https://opensource.org/licenses/MIT)
 9 | [![DOI](https://zenodo.org/badge/215606850.svg)](https://zenodo.org/badge/latestdoi/215606850)
10 | 
11 | ![BLM](BLM.png)
12 | 
13 | Science is not immune to racism. Academia is an elitist system with numerous gatekeepers that has mostly allowed a very limited spectrum of people to pursue a career. I believe we need to change that.
14 | 
15 | Open source development and reproducible science are a great way to democratize the means for scientific analysis. **But you can't git clone software if you are being murdered by the police for being Black!**
16 | 
17 | Free access to software and hollow diversity statements are hardly enough to crush the systemic and institutionalized racism in our society and academia.
18 | 
19 | If you are using this package, I ask you to go beyond just speaking out and donate [here](https://secure.actblue.com/donate/cmip6_preprocessing) to [Data for Black Lives](http://d4bl.org/) and [Black Lives Matter Action](https://blacklivesmatter.com/global-actions/).
20 | 
21 | I explicitly welcome suggestions regarding the wording of this statement and for additional organizations to support. Please raise an [issue](https://github.com/jbusecke/xmip/issues) for suggestions.
22 | 
23 | 
24 | 
25 | # xmip (formerly cmip6_preprocessing)
26 | 
27 | This package facilitates the cleaning, organization and interactive analysis of Model Intercomparison Projects (MIPs) within the [Pangeo](https://pangeo.io) software stack.
28 | 
29 | Are you interested in CMIP6 data, but find that is is not quite `analysis ready`? Do you just want to run a simple (or complicated) analysis on various models and end up having to write logic for each seperate case, because various datasets still require fixes to names, coordinates, etc.? Then this package is for you.
30 | 
31 | Developed during the [cmip6-hackathon](https://cmip6hack.github.io/#/) this package provides utility functions that play nicely with [intake-esm](https://github.com/NCAR/intake-esm).
32 | 
33 | We currently support the following functions
34 | 
35 | 1. Preprocessing CMIP6 data (Please check out the [tutorial](docs/tutorial.ipynb) for some examples using the [pangeo cloud](ocean.pangeo.io)). The preprocessig includes:
36 |     a. Fix inconsistent naming of dimensions and coordinates
37 |     b. Fix inconsistent values,shape and dataset location of coordinates
38 |     c. Homogenize longitude conventions
39 |     d. Fix inconsistent units
40 | 2. [Creating large scale ocean basin masks for arbitrary model output](docs/regionmask.ipynb)
41 | 
42 | The following issues are under development:
43 | 1. Reconstruct/find grid metrics
44 | 2. Arrange different variables on their respective staggered grid, so they can work seamlessly with [xgcm](https://xgcm.readthedocs.io/en/latest/)
45 | 
46 | Check out this recent Earthcube [notebook](https://github.com/earthcube2020/ec20_busecke_etal) (cite via doi: [10.1002/essoar.10504241.1](https://www.essoar.org/doi/10.1002/essoar.10504241.1)) for a high level demo of `xmip` and [xgcm](https://github.com/xgcm/xgcm).
47 | 
48 | 
49 | ## Installation
50 | 
51 | Install `xmip` via pip:
52 | 
53 | `pip install xmip`
54 | 
55 | or conda:
56 | 
57 | `conda install -c conda-forge xmip`
58 | 
59 | To install the newest main from github you can use pip aswell:
60 | 
61 | `pip install git+pip install git+https://github.com/jbusecke/xmip.git`
62 | 


--------------------------------------------------------------------------------
/ci/environment-cloud-test.yml:
--------------------------------------------------------------------------------
 1 | name: test_env_xmip
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - xarray>=0.17.0
 6 |   - xgcm < 0.7.0 # temporary pin since we need 'extrapolate' option for padding
 7 |   # Dependencies for the pangeo cloud data
 8 |   - intake-esm
 9 |   - gcsfs
10 |   - zarr
11 |   - pint
12 |   - cf_xarray>=0.6.0
13 |   - pint-xarray
14 |   # Dependencies for the testing suite
15 |   - pytest-cov
16 |   - pytest-xdist
17 |   - pytest-rerunfailures
18 |   - codecov
19 | 


--------------------------------------------------------------------------------
/ci/environment-upstream-dev.yml:
--------------------------------------------------------------------------------
 1 | name: test_env_xmip
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - cftime
 6 |   - dask
 7 |   - xgcm <0.7.0 # temporary pin since we need 'extrapolate' option for padding
 8 |   - pip
 9 |   - cartopy #installing this without conda is a nightmare, so ill leave it here
10 |   - xesmf # same here
11 |   - rasterio # Trying to get around an apparent bug with py 3.10 + pip + rasterio (https://github.com/jbusecke/cmip6_preprocessing/pull/231#issuecomment-1132190649)
12 |   - pip:
13 |     - codecov
14 |     - pytest-cov
15 |     - pytest-xdist
16 |     - git+https://github.com/regionmask/regionmask.git
17 |     - git+https://github.com/pydata/xarray.git
18 |     #- git+https://github.com/xgcm/xgcm.git
19 |     - git+https://github.com/jbusecke/xarrayutils.git
20 |     - git+https://github.com/xarray-contrib/cf-xarray.git
21 |     - git+https://github.com/hgrecco/pint.git
22 |     - git+https://github.com/xarray-contrib/pint-xarray.git
23 | 


--------------------------------------------------------------------------------
/ci/environment.yml:
--------------------------------------------------------------------------------
 1 | name: test_env_xmip
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - xarray>=0.17.0
 6 |   - pandas
 7 |   - netcdf4
 8 |   - scipy
 9 |   - xgcm<0.7.0 #Revert this after fixing the extrapolate option
10 |   - cftime
11 |   - regionmask
12 |   - cartopy
13 |   - xesmf
14 |   - xarrayutils>=2.0.0 #TODO remove when the new xarray polyfit is implemented
15 |   - pint
16 |   - cf_xarray>=0.6.0
17 |   - pint-xarray>=0.2.1
18 |   - pytest-cov
19 |   - pytest-xdist
20 |   - codecov
21 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   require_ci_to_pass: no
 3 |   max_report_age: off
 4 | 
 5 | comment: false
 6 | 
 7 | coverage:
 8 |   precision: 2
 9 |   round: down
10 |   status:
11 |     project:
12 |       default:
13 |         target: 95
14 |         informational: true
15 |     patch: off
16 |     changes: off
17 | 
18 | ignore:
19 |   - "setup.py"
20 |   - "tests/*"
21 |   - "xmip/__init__.py"
22 |   - "xmip/_version.py"
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | :mod:`API`
 2 | ----------------------------
 3 | 
 4 | preprocessing
 5 | =============
 6 | .. automodule:: xmip.preprocessing
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 
11 | postprocessing
12 | ==============
13 | .. automodule:: xmip.postprocessing
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 
18 | 
19 | grids
20 | =====
21 | .. automodule:: xmip.grids
22 |    :members:
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 
26 | regionmask
27 | ==========
28 | .. automodule:: xmip.regionmask
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 
33 | utils
34 | =====
35 | .. automodule:: xmip.utils
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | 
10 | # If extensions (or modules to document with autodoc) are in another directory,
11 | # add these directories to sys.path here. If the directory is relative to the
12 | # documentation root, use os.path.abspath to make it absolute, like shown here.
13 | #
14 | import os
15 | import pathlib
16 | import sys
17 | 
18 | 
19 | print("python exec:", sys.executable)
20 | print("sys.path:", sys.path)
21 | root = pathlib.Path(__file__).parent.parent.absolute()
22 | os.environ["PYTHONPATH"] = str(root)
23 | sys.path.insert(0, str(root))
24 | 
25 | import xmip  # noqa
26 | from importlib.metadata import version  # noqa
27 | 
28 | release = version("xmip")
29 | # for example take major/minor/patch
30 | version = ".".join(release.split(".")[:3])
31 | 
32 | # From https://github.com/pypa/setuptools_scm/#usage-from-sphinx
33 | 
34 | # -- Project information -----------------------------------------------------
35 | 
36 | project = "xmip"
37 | copyright = "2021, xmip maintainers"
38 | author = "xmip maintainers"
39 | 
40 | 
41 | # -- General configuration ---------------------------------------------------
42 | 
43 | # Add any Sphinx extension module names here, as strings. They can be
44 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
45 | # ones.
46 | extensions = [
47 |     "sphinx.ext.autodoc",
48 |     "sphinx.ext.viewcode",
49 |     "sphinx.ext.napoleon",
50 |     "nbsphinx",
51 |     "recommonmark",
52 |     "sphinx.ext.mathjax",
53 |     "sphinx.ext.autosummary",
54 |     "sphinx.ext.extlinks",
55 |     "sphinx.ext.intersphinx",
56 |     "numpydoc",
57 |     "nbsphinx",
58 |     "IPython.sphinxext.ipython_directive",
59 |     "IPython.sphinxext.ipython_console_highlighting",
60 |     "sphinxcontrib.srclinks",
61 | ]
62 | 
63 | # Add any paths that contain templates here, relative to this directory.
64 | templates_path = ["_templates"]
65 | 
66 | # List of patterns, relative to source directory, that match files and
67 | # directories to ignore when looking for source files.
68 | # This pattern also affects html_static_path and html_extra_path.
69 | exclude_patterns = ["_build", "**.ipynb_checkpoints", "Thumbs.db", ".DS_Store"]
70 | 
71 | # link to github issues
72 | extlinks = {
73 |     "issue": ("https://github.com/jbusecke/xmip/issues/%s", "GH#%s"),
74 |     "pull": ("https://github.com/jbusecke/xmip/pull/%s", "GH#%s"),
75 | }
76 | 
77 | # -- Options for HTML output -------------------------------------------------
78 | 
79 | # The theme to use for HTML and HTML Help pages.  See the documentation for
80 | # a list of builtin themes.
81 | #
82 | html_theme = "pangeo"
83 | 
84 | # Add any paths that contain custom static files (such as style sheets) here,
85 | # relative to this directory. They are copied after the builtin static files,
86 | # so a file named "default.css" will overwrite the builtin "default.css".
87 | html_static_path = ["_static"]
88 | 


--------------------------------------------------------------------------------
/docs/contributor-guide.rst:
--------------------------------------------------------------------------------
 1 | .. _contributor_guide:
 2 | 
 3 | Contributor Guide
 4 | -----------------
 5 | 
 6 | **xmip** is meant to be a community driven package and we welcome feedback and
 7 | contributions.
 8 | 
 9 | Did you notice a bug? Are you missing a feature? A good first starting place is to
10 | open an issue in the `github issues page <https://github.com/jbusecke/xmip/issues>`_.
11 | 
12 | 
13 | In order to contribute to xmip, please fork the repository and submit a pull request.
14 | A good step by step tutorial for this can be found in the
15 | `xarray contributor guide <https://xarray.pydata.org/en/stable/contributing.html#working-with-the-code>`_.
16 | 
17 | 
18 | Environments
19 | ^^^^^^^^^^^^
20 | The easiest way to start developing xmip pull requests,
21 | is to install one of the conda environments provided in the `ci folder <https://github.com/jbusecke/xmip/tree/main/ci>`_::
22 | 
23 |     conda env create -f ci/environment-py3.8.yml
24 | 
25 | Activate the environment with::
26 | 
27 |     conda activate test_env_xmip
28 | 
29 | We use `black <https://github.com/python/black>`_ as code formatter and pull request will
30 | fail in the CI if not properly formatted.
31 | 
32 | All conda environments contain black and you can reformat code using::
33 | 
34 |     black xmip
35 | 
36 | `pre-commit <https://pre-commit.com/>`_ provides an automated way to reformat your code
37 | prior to each commit. Simply install pre-commit::
38 | 
39 |     pip install pre-commit
40 | 
41 | and install it in the xmip root directory with::
42 | 
43 |     pre-commit install
44 | 
45 | and your code will be properly formatted before each commit.
46 | 
47 | Change and build docs
48 | ^^^^^^^^^^^^^^^^^^^^^
49 | 
50 | To make additions changes to the documentation please install/activate the docs environment `docs/environment.yml`.
51 | 
52 | You can then make changes in the and build the html locally by running `make html` in the `docs` folder.
53 | 
54 | Check the generated html locally with `open _build/html/index.html`.
55 | 
56 | .. note::
57 |    Some of the CI can take a long time to build and when making changes to the docs only, you can deactivate it by adding `[ci-skip]` to your commit message.
58 | 
59 | For example::
60 | 
61 |     git commit -m '[skip-ci] Just a typo in the docs'
62 | 
63 | will skip the expensive cloud CI for intermediate pushes.
64 | 
65 | 
66 | How to release a new version of xmip (for maintainers only)
67 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
68 | The process of releasing at this point is very easy.
69 | 
70 | We need only two things: A PR to update the documentation and and making a release on github.
71 | 
72 | 1. Make sure that all the new features/bugfixes etc are appropriately documented in `doc/whats-new.rst`, add the date to the current release and make an empty (unreleased) entry for the next minor release as a PR.
73 | 2. Navigate to the 'tags' symbol on the repos main page, click on 'Releases' and on 'Draft new release' on the right. Add the version number and a short description and save the release.
74 | 
75 | From here the github actions take over and package things for `Pypi <https://pypi.org/project/xmip/>`_.
76 | The conda-forge package will be triggered by the Pypi release and you will have to approve a PR in `xmip-feedstock <https://github.com/conda-forge/xmip-feedstock>`_. This takes a while, usually a few hours to a day.
77 | 
78 | Thats it!
79 | 


--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
 1 | name: xmip_docs
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - cartopy
 6 |   # Insert your dependencies here
 7 |   - numpydoc
 8 |   - sphinx
 9 |   - sphinx_rtd_theme
10 |   - ipython
11 |   - ipykernel # not strictly necessary but this is nice to run notebooks in this env to test
12 |   - pandoc
13 |   - recommonmark
14 |   - pip
15 |   - nc-time-axis
16 |   - pint
17 |   - pip:
18 |     - docrep<=0.2.7
19 |     - nbsphinx
20 |     - jupyter_client
21 |     - sphinx_pangeo_theme
22 |     - sphinx-copybutton
23 |     - sphinxcontrib-srclinks
24 | 


--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/docs/images/logo.png


--------------------------------------------------------------------------------
/docs/images/workflow_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/docs/images/workflow_diagram.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. xmip documentation master file, created by
  2 |    sphinx-quickstart on Thu Feb 25 16:11:36 2021.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | 
  7 | .. image:: images/logo.png
  8 | 
  9 | Analysis ready CMIP6 data with Pangeo
 10 | =====================================
 11 | 
 12 | Modern climate science like the IPCC rely heavily on model inter comparison projects (MIPs). These projects essentially pool together model results from various climate modeling centers around the world, that were run according to specific protocols, in order to compare, for instance, the response of the coupled climate system to changes in forcing.
 13 | 
 14 | The vast amount of work that has been put into the standardization of these experiments enables climate scientists to use a wealth of data to answer their specific questions, thus refining future models and increasing our understanding of the complex system that is our home planet.
 15 | 
 16 | However, from the viewpoint of analyzing these data, the output is still quite 'dirty' making the quintessential workflow of:
 17 | 
 18 | 1. Develop a metric/analysis to apply to one model.
 19 | 2. Run that analysis across all the models  and interpret results.
 20 | 
 21 | inherently difficult.
 22 | 
 23 | Most of the problems arise from differences in the convention the model output is provided in. This includes, but is not limited to different naming conventions for coordinate variables,  units, grid variables.
 24 | `xmip` aims to provide lightweight tools, that let you get right to the science, without spending hours on cleaning up the data.
 25 | 
 26 | 
 27 | 
 28 | Installation
 29 | ------------
 30 | 
 31 | Installation from Conda Forge
 32 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 33 | 
 34 | The easiest way to install xMIP along with its dependencies is via conda
 35 | forge::
 36 | 
 37 |     conda install -c conda-forge xmip
 38 | 
 39 | Installation from Pip
 40 | ^^^^^^^^^^^^^^^^^^^^^
 41 | 
 42 | An alternative is to use pip::
 43 | 
 44 |     pip install xmip
 45 | 
 46 | Installation from GitHub
 47 | ^^^^^^^^^^^^^^^^^^^^^^^^
 48 | 
 49 | You can get the newest version by installing directly from GitHub::
 50 | 
 51 |     pip install git+https://github.com/jbusecke/xmip.git
 52 | 
 53 | 
 54 | Getting Started
 55 | ---------------
 56 | 
 57 | The most basic functionality is provided by the `combined_preprocessing` function. Check out the `tutorial <tutorial.ipynb>`_ for a brief introduction of the basic functionality.
 58 | 
 59 | 
 60 | Suggested Workflow
 61 | ------------------
 62 | 
 63 | We aim to provide a flexible solution for many scientific workflows which might need combination of datasets at different 'levels'.
 64 | 
 65 | .. image:: images/workflow_diagram.png
 66 | 
 67 | The `preprocessing` module deals with 'cleaning' single variable datasets (e.g. from a single zarr store in the `pangeo CMIP6 cloud data <https://pangeo-data.github.io/pangeo-cmip6-cloud/>`_ or a dataset loaded from mulitple netcdf files on a local server/HPC).
 68 | 
 69 | It is often desired to remove control run drift from the data before running analyses. Check out the `drift_removal` module for utilities based on aligning/detrending branched runs.
 70 | 
 71 | Depending on your science goal, you might need to combine several datasets into members (multi variable datasets) or even further. These combination tasks are facilitated by the `postprocessing` module. This provides the ability to 'match and combine' datasets based on their attributes. For more detailed examples please check out the `Postprocessing` section.
 72 | 
 73 | The `regionmask` module enables you to create basin masks for each model (and any other data with longitude/latitude values)
 74 | 
 75 | 
 76 | .. I need to check out how to link the API sections and from within notebooks properly. Look into https://myst-nb.readthedocs.io/en/latest/
 77 | 
 78 | 
 79 | Contents
 80 | --------
 81 | 
 82 | .. toctree::
 83 |    :maxdepth: 1
 84 | 
 85 |    tutorial
 86 |    postprocessing
 87 |    drift_removal
 88 |    regionmask
 89 |    contributor-guide
 90 |    api
 91 |    whats-new
 92 | 
 93 | .. toctree::
 94 |    :maxdepth: 2
 95 |    :caption: Contents:
 96 | 
 97 | 
 98 | 
 99 | Indices and tables
100 | ==================
101 | 
102 | * :ref:`genindex`
103 | * :ref:`modindex`
104 | * :ref:`search`
105 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/whats-new.rst:
--------------------------------------------------------------------------------
  1 | .. currentmodule:: xmip
  2 | 
  3 | What's New
  4 | ===========
  5 | .. _whats-new.0.8.0:
  6 | 
  7 | v0.8.0 (unreleased)
  8 | -------------------
  9 | 
 10 | Internal Changes
 11 | ~~~~~~~~~~~~~~~~
 12 | - Add `longitude_bnds` and `latitude_bnds` to `cmip_renaming_dict` (:pull:`300`). By `Joran Angevaare <https://github.com/JoranAngevaare>`_
 13 | - Updated pre-commit linting to use ruff (:pull:`359`). By `Julius Busecke <https://github.com/jbusecke>`_
 14 | - Modernized packaging workflow, that runs on each PR (:pull:`361`). By `Julius Busecke <https://github.com/jbusecke>`_
 15 | - Added 'nvertices' -> 'vertex' to renaming preprocessor (:pull:`357`). By `Julius Busecke <https://github.com/jbusecke>`_
 16 | - Updated mamba CI + testing py311/py312 (:issue:`360`, :pull:`362`). By `Julius Busecke <https://github.com/jbusecke>`_
 17 | 
 18 | Bugfixes
 19 | ~~~~~~~~
 20 | - Fixed cyclic interpolation in `_interp_nominal_lon` (:issue:`295`, :pull:`296`). By `Joran Angevaare <https://github.com/JoranAngevaare>`_
 21 | - Fix formatting `whats-new.rst` for doc-building (:pull:`366`). By `Joran Angevaare <https://github.com/JoranAngevaare>`_
 22 | 
 23 | .. _whats-new.0.7.2:
 24 | 
 25 | v0.7.3 (unreleased)
 26 | -------------------
 27 | 
 28 | Internal Changes
 29 | ~~~~~~~~~~~~~~~~
 30 | - Added PR template (:pull:`304`). By `Julius Busecke <https://github.com/jbusecke>`_
 31 | - Add `longitude_bnds` and `latitude_bnds` to `cmip_renaming_dict` (:pull:`300`). By `Joran Angevaare <https://github.com/JoranAngevaare>`_
 32 | 
 33 | .. _whats-new.0.7.0:
 34 | 
 35 | v0.7.0 (2023/01/03)
 36 | -------------------
 37 | 
 38 | New Features
 39 | ~~~~~~~~~~~~
 40 | - :py:func:`~xmip.postprocessing.match_metrics` Now allows more flexible metric matching (accepting e.g. already merged members) + better error for missing match_attrs (:pull:`275`). By `Julius Busecke <https://github.com/jbusecke>`_
 41 | - Postprocessing functions can now easily be nested on top of each other (:pull:`187`). By `Julius Busecke <https://github.com/jbusecke>`_
 42 | 
 43 | 
 44 | Breaking Changes
 45 | ~~~~~~~~~~~~~~~~
 46 | - Requires xarray>=0.17.0 and drops support for python 3.6 (:pull:`170`, :pull:`173`). By `Julius Busecke <https://github.com/jbusecke>`_
 47 | - :py:func:`~xmip.utils.cmip6_dataset_id` not includes the attribute `variable_id` (:pull:`166`) By `Julius Busecke <https://github.com/jbusecke>`_
 48 | - Dropped support for python 3.7 (:pull:`268`, :issue:`267`). By `Julius Busecke <https://github.com/jbusecke>`_
 49 | 
 50 | Internal Changes
 51 | ~~~~~~~~~~~~~~~~
 52 | 
 53 | - Unit correction logic now uses pint-xarray under the hood (:pull:`160`, :issue:`31`).
 54 | By `Tom Nicholas <https://github.com/TomNicholas>`_ and `Julius Busecke <https://github.com/jbusecke>`_
 55 | 
 56 | - License changed to Apache-2.0 (:pull:`272`, :issue:`256`). By `Julius Busecke <https://github.com/jbusecke>`_
 57 | 
 58 | Bugfixes
 59 | ~~~~~~~~
 60 | - :py:func:`~xmip.postprocessing.concat_members` now accepts datasets which already have 'member_id' as a dimension (maintain compatibility with recent intake-esm changes) (:pull:`277`). By `Julius Busecke <https://github.com/jbusecke>`_
 61 | 
 62 | - :py:func:`~xmip.postprocessing.match_metrics` now accepts single variables as str input (:issue:`229`, :pull:`245`). By `Julius Busecke <https://github.com/jbusecke>`_
 63 | 
 64 | - :py:func:`~xmip.postprocessing.concat_members` now returns a dataset with labelled `member_id` dimension (:issue:`196` , :pull:`197`). By `Julius Busecke <https://github.com/jbusecke>`_
 65 | 
 66 | - Fixes incompatibility with upstream changes in xarray>=0.19.0 (:issue:`173`, :pull:`174`). By `Julius Busecke <https://github.com/jbusecke>`_
 67 | 
 68 | - :py:func:`~xmip.drift_removal.match_and_remove_drift` does now work with chunked (dask powered) datasets (:pull:`164`).By `Julius Busecke <https://github.com/jbusecke>`_
 69 | 
 70 | Internal Changes
 71 | ~~~~~~~~~~~~~~~~
 72 | 
 73 | - Unit correction logic now uses pint-xarray under the hood (:pull:`160`, :issue:`31`).
 74 | By `Tom Nicholas <https://github.com/TomNicholas>`_ and `Julius Busecke <https://github.com/jbusecke>`_
 75 | 
 76 | 
 77 | .. _whats-new.0.5.0:
 78 | 
 79 | v0.5.0 (2021/7/9)
 80 | -------------------
 81 | 
 82 | New Features
 83 | ~~~~~~~~~~~~
 84 | - :py:func:`~xmip.postprocessing.interpolate_grid_labels` enables batch combination of different grid_labels
 85 | (e.g. from native to regridded and vice versa) using xesmf (:pull:`161`). By `Julius Busecke <https://github.com/jbusecke>`_
 86 | 
 87 | - :py:func:`~xmip.drift_removal.match_and_remove_drift` enables batch detrending/drift-drift_removal
 88 | from a dictionary of datasets (:pull:`155`). By `Julius Busecke <https://github.com/jbusecke>`_
 89 | 
 90 | .. _whats-new.0.4.0:
 91 | 
 92 | v0.4.0 (2021/6/9)
 93 | -------------------
 94 | 
 95 | New Features
 96 | ~~~~~~~~~~~~
 97 | 
 98 | - Started implementing metadata fixes in `combined_preprocessing` (:pull:`147`). By `Julius Busecke <https://github.com/jbusecke>`_
 99 | 
100 | - Added `drift_removal` which adds ability to align time of branched runs and remove drift from the parent (e.g. control) run (:pull:`126`, :pull:`148`). By `Julius Busecke <https://github.com/jbusecke>`_
101 | 
102 | .. _whats-new.0.3.0:
103 | 
104 | v0.3.0 (2021/6/9)
105 | -------------------
106 | 
107 | New Features
108 | ~~~~~~~~~~~~
109 | - Added `postprocessing` module and added ability to parse metrics to multiple datasets in a dictionary (:pull:`110`, :pull:`117`). By `Julius Busecke <https://github.com/jbusecke>`_
110 | 
111 | 
112 | Internal Changes
113 | ~~~~~~~~~~~~~~~~
114 | 
115 | - Refactored CI internals, added dependabot, and some updated failcases (:pull:`121`, :pull:`128`, :pull:`129`, :pull:`133`, :pull:`134`, :pull:`135`). By `Julius Busecke <https://github.com/jbusecke>`_
116 | 
117 | .. _whats-new.0.2.0:
118 | 
119 | v0.2.0 (2021/4/9)
120 | -----------------
121 | 
122 | Breaking changes
123 | ~~~~~~~~~~~~~~~~
124 | - Removed `replace_x_y_nominal_lat_lon` from `combined_preprocessing` due to ongoing performance issues with dask (:issue:`75`, :issue:`85`, :issue:`94`) (:pull:`104`). By `Julius Busecke <https://github.com/jbusecke>`_
125 | - Further refactor of `replace_x_y_nominal_lat_lon`, which avoids missing values in the dimension coordinates (:issue:`66`) (:pull:`79`). By `Julius Busecke <https://github.com/jbusecke>`_
126 | 
127 | - Consistent treatment of cf-style bounds. The combination of `parse_lon_lat_bounds`,`maybe_convert_bounds_to_vertex`, `maybe_convert_vertex_to_bounds`, and `sort_vertex_order` applied on the dataset, assures that all datasets have both conventions available and the vertex order is the same. By `Julius Busecke <https://github.com/jbusecke>`_
128 | 
129 | - New implementation of `replace_x_y_nominal_lat_lon`, which avoids duplicate values in the derived dimensions (:issue:`34`) (:pull:`35`). By `Julius Busecke <https://github.com/jbusecke>`_
130 | 
131 | New Features
132 | ~~~~~~~~~~~~
133 | - Create merged region masks with :py:func:`merged_mask` (:pull:`18`). By `Julius Busecke <https://github.com/jbusecke>`_
134 | 
135 | 
136 | Bug fixes
137 | ~~~~~~~~~
138 | - Updated cmip6 catalog location for the pangeo gc archive (:issue:`80`) (:pull:`81`). By `Julius Busecke <https://github.com/jbusecke>`_
139 | 
140 | 
141 | Documentation
142 | ~~~~~~~~~~~~~
143 | - Sphinx/RTD documentation, including contributor guide and new logo 🤗. (:issue:`27`) (:pull:`99`).
144 | 
145 | Internal Changes
146 | ~~~~~~~~~~~~~~~~
147 | - Adds options to skip extensive cloud ci by using [skip-ci] in commit message. Adds the ability to cancel previous GHA jobs to prevent long wait times for rapid pushes. (:pull:`99`) By `Julius Busecke <https://github.com/jbusecke>`_.
148 | 
149 | -  Add `ni` and `nj` to the `rename_dict` dictionary in _preprocessing.py_ as dimensions to be corrected (:pull:`54`). By `Markus Ritschel <https://github.com/markusritschel>`_
150 | 
151 | 
152 | .. _whats-new.0.1.2:
153 | 
154 | v0.1.2
155 | ------
156 | 
157 | 
158 | New Features
159 | ~~~~~~~~~~~~
160 | - Added more models, now supporting both ocean and atmospheric output for :py:func:`combined_preprocessing` (:pull:`14`). By `Julius Busecke <https://github.com/jbusecke>`_
161 | 
162 | 
163 | 
164 | .. _whats-new.0.1.0:
165 | 
166 | v0.1.0 (2/21/2020)
167 | ----------------------
168 | 
169 | Initial release.
170 | 


--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/parse_area_gn-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Extract areas from all available parameters and parse based on `source_id`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "/srv/conda/envs/notebook/lib/python3.7/site-packages/intake/source/discovery.py:136: FutureWarning: The drivers ['stac-catalog', 'stac-collection', 'stac-item'] do not specify entry_points and were only discovered via a package scan. This may break in a future release of intake. The packages should be updated.\n",
 20 |       "  FutureWarning)\n"
 21 |      ]
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "import intake\n",
 26 |     "import numpy as np"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from cmip6_preprocessing.parse_static_metrics import parse_metrics"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# define collection\n",
 45 |     "col = intake.open_esm_datastore(\"../../cmip6hack-ocean-bgc/catalogs/pangeo-cmip6.json\")"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
 58 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
 59 |       "\n",
 60 |       "--> There will be 22 group(s)\n"
 61 |      ]
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "# # load a bunch of dataset with intake_esm\n",
 66 |     "# import warnings\n",
 67 |     "# with warnings.catch_warnings():\n",
 68 |     "#     warnings.simplefilter(\"ignore\")\n",
 69 |     "#     query = dict(experiment_id='piControl',\n",
 70 |     "#                      variable_id=['thetao'], grid_label='gn')\n",
 71 |     "#     cat = col.search(**query)\n",
 72 |     "#     cat.df\n",
 73 |     "#     raw_data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})\n",
 74 |     "#     raw_data_dict.keys()"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/plain": [
 85 |        "<xgcm.Grid>\n",
 86 |        "X Axis (periodic):\n",
 87 |        "  * center   x"
 88 |       ]
 89 |      },
 90 |      "execution_count": 5,
 91 |      "metadata": {},
 92 |      "output_type": "execute_result"
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "# from xgcm import Grid\n",
 97 |     "# ds_test = raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']\n",
 98 |     "# grid = Grid(ds_test, coords={'X':{'center':'x'}})\n",
 99 |     "# grid"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 5,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "# raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "# # now parse all areas...\n",
118 |     "# with warnings.catch_warnings():\n",
119 |     "#     warnings.simplefilter(\"ignore\")\n",
120 |     "#     data_dict = parse_metrics(raw_data_dict, col)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "You can see there are 22 models with temp data!"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "But not each one of them has an area.... It turns out that the areas are spread all over the catalogue with no discenible system. BUT THEY ARE SOMEWHERE for pretty much every model...and they should be the same for a given `source_id` and `grid_spec`. So with `parse_metrics` we can parse the area into the datasets as coordinates."
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 7,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "# new_dict = {k:ds for k, ds in data_dict.items() if 'areacello' in ds.coords}"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 8,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "# len(new_dict)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "So we didnt get all of the models, but at least we got 17 to have an area for further calculations"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 9,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "# new_dict['CMIP.MIROC.MIROC6.piControl.Omon.gn'].thetao"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 10,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "query = dict(experiment_id='piControl',\n",
178 |     "                     variable_id=['thetao', 'uo', 'vo'],table_id='Omon', grid_label='gn')"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 11,
184 |    "metadata": {},
185 |    "outputs": [
186 |     {
187 |      "name": "stdout",
188 |      "output_type": "stream",
189 |      "text": [
190 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
191 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
192 |       "\n",
193 |       "--> There will be 22 group(s)\n",
194 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
195 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
196 |       "\n",
197 |       "--> There will be 21 group(s)\n",
198 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
199 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
200 |       "\n",
201 |       "--> There will be 21 group(s)\n",
202 |       "BCC-CSM2-MR\n",
203 |       "Grid Type: B detected\n",
204 |       "BCC-ESM1\n",
205 |       "Grid Type: B detected\n",
206 |       "CAMS-CSM1-0\n",
207 |       "Grid Type: B detected\n",
208 |       "CanESM5\n",
209 |       "Grid Type: C detected\n",
210 |       "CNRM-CM6-1\n",
211 |       "Grid Type: C detected\n",
212 |       "<xarray.Dataset>\n",
213 |       "Dimensions:      (axis_nbounds: 2, lev: 75, member_id: 1, nvertex: 4, time: 6000, x: 362, x_left: 362, y: 294, y_left: 294)\n",
214 |       "Coordinates:\n",
215 |       "  * x            (x) int64 0 1 2 3 4 5 6 7 8 ... 354 355 356 357 358 359 360 361\n",
216 |       "  * y            (y) int64 0 1 2 3 4 5 6 7 8 ... 286 287 288 289 290 291 292 293\n",
217 |       "  * lev          (lev) float64 0.5058 1.556 2.668 ... 5.698e+03 5.902e+03\n",
218 |       "  * time         (time) object 1850-01-16 12:00:00 ... 2349-12-16 12:00:00\n",
219 |       "  * member_id    (member_id) <U8 'r1i1p1f2'\n",
220 |       "  * x_left       (x_left) float64 -0.5 0.5 1.5 2.5 ... 357.5 358.5 359.5 360.5\n",
221 |       "  * y_left       (y_left) float64 -0.5 0.5 1.5 2.5 ... 289.5 290.5 291.5 292.5\n",
222 |       "Dimensions without coordinates: axis_nbounds, nvertex\n",
223 |       "Data variables:\n",
224 |       "    lat          (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
225 |       "    bounds_lat   (y, x, nvertex) float64 dask.array<chunksize=(294, 362, 4), meta=np.ndarray>\n",
226 |       "    bounds_lon   (y, x, nvertex) float64 dask.array<chunksize=(294, 362, 4), meta=np.ndarray>\n",
227 |       "    lon          (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
228 |       "    lev_bounds   (lev, axis_nbounds) float64 dask.array<chunksize=(75, 2), meta=np.ndarray>\n",
229 |       "    time_bounds  (time, axis_nbounds) object dask.array<chunksize=(6000, 2), meta=np.ndarray>\n",
230 |       "    thetao       (member_id, time, lev, y, x) float32 dask.array<chunksize=(1, 8, 75, 294, 362), meta=np.ndarray>\n",
231 |       "    lat_e        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
232 |       "    lon_e        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
233 |       "    uo           (member_id, time, lev, y, x) float32 dask.array<chunksize=(1, 7, 75, 294, 362), meta=np.ndarray>\n",
234 |       "    lat_n        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
235 |       "    lon_n        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
236 |       "    vo           (member_id, time, lev, y, x) float32 dask.array<chunksize=(1, 7, 75, 294, 362), meta=np.ndarray>\n",
237 |       "Attributes:\n",
238 |       "    CMIP6_CV_version:       cv=6.2.3.0-7-g2019642\n",
239 |       "    Conventions:            CF-1.7 CMIP-6.2\n",
240 |       "    EXPID:                  CNRM-CM6-1_piControl_r1i1p1f2\n",
241 |       "    activity_id:            CMIP\n",
242 |       "    arpege_minor_version:   6.3.1\n",
243 |       "    branch_method:          standard\n",
244 |       "    branch_time_in_child:   0.0\n",
245 |       "    branch_time_in_parent:  273932.0\n",
246 |       "    contact:                contact.cmip@meteo.fr\n",
247 |       "    creation_date:          2018-03-21T09:34:26Z\n",
248 |       "    data_specs_version:     01.00.21\n",
249 |       "    description:            DECK: control\n",
250 |       "    dr2xml_md5sum:          f996a989d4bc796959fe96cfda3db969\n",
251 |       "    dr2xml_version:         1.0\n",
252 |       "    experiment:             pre-industrial control\n",
253 |       "    experiment_id:          piControl\n",
254 |       "    external_variables:     areacello volcello\n",
255 |       "    forcing_index:          2\n",
256 |       "    frequency:              mon\n",
257 |       "    further_info_url:       https://furtherinfo.es-doc.org/CMIP6.CNRM-CERFACS...\n",
258 |       "    grid:                   native ocean tri-polar grid with 105 k ocean cells\n",
259 |       "    grid_label:             gn\n",
260 |       "    history:                none\n",
261 |       "    initialization_index:   1\n",
262 |       "    institution:            CNRM (Centre National de Recherches Meteorologiqu...\n",
263 |       "    institution_id:         CNRM-CERFACS\n",
264 |       "    license:                CMIP6 model data produced by CNRM-CERFACS is lice...\n",
265 |       "    mip_era:                CMIP6\n",
266 |       "    name:                   /scratch/utmp/ftdir/voldoire/eclis/transfers/CNRM...\n",
267 |       "    nemo_gelato_commit:     49095b3accd5d4c_6524fe19b00467a\n",
268 |       "    nominal_resolution:     100 km\n",
269 |       "    parent_activity_id:     CMIP\n",
270 |       "    parent_experiment_id:   piControl-spinup\n",
271 |       "    parent_mip_era:         CMIP6\n",
272 |       "    parent_source_id:       CNRM-CM6-1\n",
273 |       "    parent_time_units:      days since 1850-01-01 00:00:00\n",
274 |       "    parent_variant_label:   r1i1p1f2\n",
275 |       "    physics_index:          1\n",
276 |       "    product:                model-output\n",
277 |       "    realization_index:      1\n",
278 |       "    realm:                  ocean\n",
279 |       "    references:             http://www.umr-cnrm.fr/cmip6/references\n",
280 |       "    source:                 CNRM-CM6-1 (2017):  aerosol: prescribed monthly f...\n",
281 |       "    source_id:              CNRM-CM6-1\n",
282 |       "    source_type:            AOGCM\n",
283 |       "    sub_experiment:         none\n",
284 |       "    sub_experiment_id:      none\n",
285 |       "    table_id:               Omon\n",
286 |       "    title:                  CNRM-CM6-1 model output prepared for CMIP6 / CMIP...\n",
287 |       "    tracking_id:            hdl:21.14100/191fcb31-b7db-4857-9779-0ef8288da7bd...\n",
288 |       "    variable_id:            thetao\n",
289 |       "    variant_info:           . Information provided by this attribute may in s...\n",
290 |       "    variant_label:          r1i1p1f2\n",
291 |       "    xios_commit:            1442-shuffle\n"
292 |      ]
293 |     },
294 |     {
295 |      "ename": "ValueError",
296 |      "evalue": "Couldn't find a center coordinate for axis Y",
297 |      "output_type": "error",
298 |      "traceback": [
299 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
300 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
301 |       "\u001b[0;32m<ipython-input-11-78ff165b38cf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcatch_warnings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# these lines just make sure that the warnings dont clutter your notebook\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mdata_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
302 |       "\u001b[0;32m~/cmip6_preprocessing/cmip6_preprocessing/preprocessing.py\u001b[0m in \u001b[0;36mread_data\u001b[0;34m(col, preview, required_variable_id, **kwargs)\u001b[0m\n\u001b[1;32m     19\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv_dict\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'AWI-CM-1-1-MR'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m         \u001b[0mdata_final\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfull_preprocessing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplot\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdata_final\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
303 |       "\u001b[0;32m~/cmip6_preprocessing/cmip6_preprocessing/preprocessing.py\u001b[0m in \u001b[0;36mfull_preprocessing\u001b[0;34m(dat_dict, modelname, tracer_ref, u_ref, v_ref, plot, verbose)\u001b[0m\n\u001b[1;32m    103\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    104\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m     \u001b[0mgrid_temp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    106\u001b[0m     \u001b[0mds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecreate_metrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrid_temp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    107\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
304 |       "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, check_dims, periodic, default_shifts, face_connections, coords, metrics)\u001b[0m\n\u001b[1;32m    830\u001b[0m                 \u001b[0mis_periodic\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    831\u001b[0m                 \u001b[0mdefault_shifts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis_default_shifts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 832\u001b[0;31m                 \u001b[0mcoords\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    833\u001b[0m             )\n\u001b[1;32m    834\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
305 |       "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, axis_name, periodic, default_shifts, coords)\u001b[0m\n\u001b[1;32m     91\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     92\u001b[0m             \u001b[0;31m# fall back on comodo conventions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoords\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomodo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_axis_positions_and_coords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m         \u001b[0;31m# self.coords is a dictionary with the following structure\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
306 |       "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/comodo.py\u001b[0m in \u001b[0;36mget_axis_positions_and_coords\u001b[0;34m(ds, axis_name)\u001b[0m\n\u001b[1;32m     83\u001b[0m     }\n\u001b[1;32m     84\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Couldn't find a center coordinate for axis %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     86\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     87\u001b[0m         raise ValueError(\n",
307 |       "\u001b[0;31mValueError\u001b[0m: Couldn't find a center coordinate for axis Y"
308 |      ]
309 |     }
310 |    ],
311 |    "source": [
312 |     "# load the same thing with preprocessing\n",
313 |     "from cmip6_preprocessing.preprocessing import read_data\n",
314 |     "with warnings.catch_warnings(): # these lines just make sure that the warnings dont clutter your notebook\n",
315 |     "    warnings.simplefilter(\"ignore\")\n",
316 |     "    data_dict = read_data(col, **query)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "data_dict"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "parse_metrics(data_dict, col, rename=True) #rename is important to get the consistent naming!"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": []
343 |   }
344 |  ],
345 |  "metadata": {
346 |   "kernelspec": {
347 |    "display_name": "Python 3",
348 |    "language": "python",
349 |    "name": "python3"
350 |   },
351 |   "language_info": {
352 |    "codemirror_mode": {
353 |     "name": "ipython",
354 |     "version": 3
355 |    },
356 |    "file_extension": ".py",
357 |    "mimetype": "text/x-python",
358 |    "name": "python",
359 |    "nbconvert_exporter": "python",
360 |    "pygments_lexer": "ipython3",
361 |    "version": "3.7.3"
362 |   }
363 |  },
364 |  "nbformat": 4,
365 |  "nbformat_minor": 4
366 | }
367 | 


--------------------------------------------------------------------------------
/notebooks/parse_area_gn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Extract areas from all available parameters and parse based on `source_id`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "/srv/conda/envs/notebook/lib/python3.7/site-packages/intake/source/discovery.py:136: FutureWarning: The drivers ['stac-catalog', 'stac-collection', 'stac-item'] do not specify entry_points and were only discovered via a package scan. This may break in a future release of intake. The packages should be updated.\n",
 20 |       "  FutureWarning)\n"
 21 |      ]
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "import intake\n",
 26 |     "import numpy as np"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from xmip.parse_static_metrics import parse_metrics"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# define collection\n",
 45 |     "col = intake.open_esm_datastore(\"../../cmip6hack-ocean-bgc/catalogs/pangeo-cmip6.json\")"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
 58 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
 59 |       "\n",
 60 |       "--> There will be 22 group(s)\n"
 61 |      ]
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "# # load a bunch of dataset with intake_esm\n",
 66 |     "# import warnings\n",
 67 |     "# with warnings.catch_warnings():\n",
 68 |     "#     warnings.simplefilter(\"ignore\")\n",
 69 |     "#     query = dict(experiment_id='piControl',\n",
 70 |     "#                      variable_id=['thetao'], grid_label='gn')\n",
 71 |     "#     cat = col.search(**query)\n",
 72 |     "#     cat.df\n",
 73 |     "#     raw_data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})\n",
 74 |     "#     raw_data_dict.keys()"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/plain": [
 85 |        "<xgcm.Grid>\n",
 86 |        "X Axis (periodic):\n",
 87 |        "  * center   x"
 88 |       ]
 89 |      },
 90 |      "execution_count": 5,
 91 |      "metadata": {},
 92 |      "output_type": "execute_result"
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "# from xgcm import Grid\n",
 97 |     "# ds_test = raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']\n",
 98 |     "# grid = Grid(ds_test, coords={'X':{'center':'x'}})\n",
 99 |     "# grid"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 5,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "# raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "# # now parse all areas...\n",
118 |     "# with warnings.catch_warnings():\n",
119 |     "#     warnings.simplefilter(\"ignore\")\n",
120 |     "#     data_dict = parse_metrics(raw_data_dict, col)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "You can see there are 22 models with temp data!"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "But not each one of them has an area.... It turns out that the areas are spread all over the catalogue with no discenible system. BUT THEY ARE SOMEWHERE for pretty much every model...and they should be the same for a given `source_id` and `grid_spec`. So with `parse_metrics` we can parse the area into the datasets as coordinates."
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 7,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "# new_dict = {k:ds for k, ds in data_dict.items() if 'areacello' in ds.coords}"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 8,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "# len(new_dict)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "So we didnt get all of the models, but at least we got 17 to have an area for further calculations"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 9,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "# new_dict['CMIP.MIROC.MIROC6.piControl.Omon.gn'].thetao"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 10,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "query = dict(experiment_id='piControl',\n",
178 |     "                     variable_id=['thetao', 'uo', 'vo'],table_id='Omon', grid_label='gn')"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 11,
184 |    "metadata": {},
185 |    "outputs": [
186 |     {
187 |      "name": "stdout",
188 |      "output_type": "stream",
189 |      "text": [
190 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
191 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
192 |       "\n",
193 |       "--> There will be 22 group(s)\n",
194 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
195 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
196 |       "\n",
197 |       "--> There will be 21 group(s)\n",
198 |       "--> The keys in the returned dictionary of datasets are constructed as follows:\n",
199 |       "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n",
200 |       "\n",
201 |       "--> There will be 21 group(s)\n",
202 |       "BCC-CSM2-MR\n",
203 |       "Grid Type: B detected\n",
204 |       "BCC-ESM1\n",
205 |       "Grid Type: B detected\n",
206 |       "CAMS-CSM1-0\n",
207 |       "Grid Type: B detected\n",
208 |       "CanESM5\n",
209 |       "Grid Type: C detected\n",
210 |       "CNRM-CM6-1\n",
211 |       "Grid Type: C detected\n",
212 |       "<xarray.Dataset>\n",
213 |       "Dimensions:      (axis_nbounds: 2, lev: 75, member_id: 1, nvertex: 4, time: 6000, x: 362, x_left: 362, y: 294, y_left: 294)\n",
214 |       "Coordinates:\n",
215 |       "  * x            (x) int64 0 1 2 3 4 5 6 7 8 ... 354 355 356 357 358 359 360 361\n",
216 |       "  * y            (y) int64 0 1 2 3 4 5 6 7 8 ... 286 287 288 289 290 291 292 293\n",
217 |       "  * lev          (lev) float64 0.5058 1.556 2.668 ... 5.698e+03 5.902e+03\n",
218 |       "  * time         (time) object 1850-01-16 12:00:00 ... 2349-12-16 12:00:00\n",
219 |       "  * member_id    (member_id) <U8 'r1i1p1f2'\n",
220 |       "  * x_left       (x_left) float64 -0.5 0.5 1.5 2.5 ... 357.5 358.5 359.5 360.5\n",
221 |       "  * y_left       (y_left) float64 -0.5 0.5 1.5 2.5 ... 289.5 290.5 291.5 292.5\n",
222 |       "Dimensions without coordinates: axis_nbounds, nvertex\n",
223 |       "Data variables:\n",
224 |       "    lat          (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
225 |       "    bounds_lat   (y, x, nvertex) float64 dask.array<chunksize=(294, 362, 4), meta=np.ndarray>\n",
226 |       "    bounds_lon   (y, x, nvertex) float64 dask.array<chunksize=(294, 362, 4), meta=np.ndarray>\n",
227 |       "    lon          (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
228 |       "    lev_bounds   (lev, axis_nbounds) float64 dask.array<chunksize=(75, 2), meta=np.ndarray>\n",
229 |       "    time_bounds  (time, axis_nbounds) object dask.array<chunksize=(6000, 2), meta=np.ndarray>\n",
230 |       "    thetao       (member_id, time, lev, y, x) float32 dask.array<chunksize=(1, 8, 75, 294, 362), meta=np.ndarray>\n",
231 |       "    lat_e        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
232 |       "    lon_e        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
233 |       "    uo           (member_id, time, lev, y, x) float32 dask.array<chunksize=(1, 7, 75, 294, 362), meta=np.ndarray>\n",
234 |       "    lat_n        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
235 |       "    lon_n        (y, x) float64 dask.array<chunksize=(294, 362), meta=np.ndarray>\n",
236 |       "    vo           (member_id, time, lev, y, x) float32 dask.array<chunksize=(1, 7, 75, 294, 362), meta=np.ndarray>\n",
237 |       "Attributes:\n",
238 |       "    CMIP6_CV_version:       cv=6.2.3.0-7-g2019642\n",
239 |       "    Conventions:            CF-1.7 CMIP-6.2\n",
240 |       "    EXPID:                  CNRM-CM6-1_piControl_r1i1p1f2\n",
241 |       "    activity_id:            CMIP\n",
242 |       "    arpege_minor_version:   6.3.1\n",
243 |       "    branch_method:          standard\n",
244 |       "    branch_time_in_child:   0.0\n",
245 |       "    branch_time_in_parent:  273932.0\n",
246 |       "    contact:                contact.cmip@meteo.fr\n",
247 |       "    creation_date:          2018-03-21T09:34:26Z\n",
248 |       "    data_specs_version:     01.00.21\n",
249 |       "    description:            DECK: control\n",
250 |       "    dr2xml_md5sum:          f996a989d4bc796959fe96cfda3db969\n",
251 |       "    dr2xml_version:         1.0\n",
252 |       "    experiment:             pre-industrial control\n",
253 |       "    experiment_id:          piControl\n",
254 |       "    external_variables:     areacello volcello\n",
255 |       "    forcing_index:          2\n",
256 |       "    frequency:              mon\n",
257 |       "    further_info_url:       https://furtherinfo.es-doc.org/CMIP6.CNRM-CERFACS...\n",
258 |       "    grid:                   native ocean tri-polar grid with 105 k ocean cells\n",
259 |       "    grid_label:             gn\n",
260 |       "    history:                none\n",
261 |       "    initialization_index:   1\n",
262 |       "    institution:            CNRM (Centre National de Recherches Meteorologiqu...\n",
263 |       "    institution_id:         CNRM-CERFACS\n",
264 |       "    license:                CMIP6 model data produced by CNRM-CERFACS is lice...\n",
265 |       "    mip_era:                CMIP6\n",
266 |       "    name:                   /scratch/utmp/ftdir/voldoire/eclis/transfers/CNRM...\n",
267 |       "    nemo_gelato_commit:     49095b3accd5d4c_6524fe19b00467a\n",
268 |       "    nominal_resolution:     100 km\n",
269 |       "    parent_activity_id:     CMIP\n",
270 |       "    parent_experiment_id:   piControl-spinup\n",
271 |       "    parent_mip_era:         CMIP6\n",
272 |       "    parent_source_id:       CNRM-CM6-1\n",
273 |       "    parent_time_units:      days since 1850-01-01 00:00:00\n",
274 |       "    parent_variant_label:   r1i1p1f2\n",
275 |       "    physics_index:          1\n",
276 |       "    product:                model-output\n",
277 |       "    realization_index:      1\n",
278 |       "    realm:                  ocean\n",
279 |       "    references:             http://www.umr-cnrm.fr/cmip6/references\n",
280 |       "    source:                 CNRM-CM6-1 (2017):  aerosol: prescribed monthly f...\n",
281 |       "    source_id:              CNRM-CM6-1\n",
282 |       "    source_type:            AOGCM\n",
283 |       "    sub_experiment:         none\n",
284 |       "    sub_experiment_id:      none\n",
285 |       "    table_id:               Omon\n",
286 |       "    title:                  CNRM-CM6-1 model output prepared for CMIP6 / CMIP...\n",
287 |       "    tracking_id:            hdl:21.14100/191fcb31-b7db-4857-9779-0ef8288da7bd...\n",
288 |       "    variable_id:            thetao\n",
289 |       "    variant_info:           . Information provided by this attribute may in s...\n",
290 |       "    variant_label:          r1i1p1f2\n",
291 |       "    xios_commit:            1442-shuffle\n"
292 |      ]
293 |     },
294 |     {
295 |      "ename": "ValueError",
296 |      "evalue": "Couldn't find a center coordinate for axis Y",
297 |      "output_type": "error",
298 |      "traceback": [
299 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
300 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
301 |       "\u001b[0;32m<ipython-input-11-78ff165b38cf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcatch_warnings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# these lines just make sure that the warnings dont clutter your notebook\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mdata_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
302 |       "\u001b[0;32m~/xmip/xmip/preprocessing.py\u001b[0m in \u001b[0;36mread_data\u001b[0;34m(col, preview, required_variable_id, **kwargs)\u001b[0m\n\u001b[1;32m     19\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv_dict\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'AWI-CM-1-1-MR'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m         \u001b[0mdata_final\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfull_preprocessing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplot\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdata_final\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
303 |       "\u001b[0;32m~/xmip/xmip/preprocessing.py\u001b[0m in \u001b[0;36mfull_preprocessing\u001b[0;34m(dat_dict, modelname, tracer_ref, u_ref, v_ref, plot, verbose)\u001b[0m\n\u001b[1;32m    103\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    104\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m     \u001b[0mgrid_temp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    106\u001b[0m     \u001b[0mds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecreate_metrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrid_temp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    107\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
304 |       "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, check_dims, periodic, default_shifts, face_connections, coords, metrics)\u001b[0m\n\u001b[1;32m    830\u001b[0m                 \u001b[0mis_periodic\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    831\u001b[0m                 \u001b[0mdefault_shifts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis_default_shifts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 832\u001b[0;31m                 \u001b[0mcoords\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    833\u001b[0m             )\n\u001b[1;32m    834\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
305 |       "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, axis_name, periodic, default_shifts, coords)\u001b[0m\n\u001b[1;32m     91\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     92\u001b[0m             \u001b[0;31m# fall back on comodo conventions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoords\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomodo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_axis_positions_and_coords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m         \u001b[0;31m# self.coords is a dictionary with the following structure\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
306 |       "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/comodo.py\u001b[0m in \u001b[0;36mget_axis_positions_and_coords\u001b[0;34m(ds, axis_name)\u001b[0m\n\u001b[1;32m     83\u001b[0m     }\n\u001b[1;32m     84\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Couldn't find a center coordinate for axis %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     86\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     87\u001b[0m         raise ValueError(\n",
307 |       "\u001b[0;31mValueError\u001b[0m: Couldn't find a center coordinate for axis Y"
308 |      ]
309 |     }
310 |    ],
311 |    "source": [
312 |     "# load the same thing with preprocessing\n",
313 |     "from xmip.preprocessing import read_data\n",
314 |     "with warnings.catch_warnings(): # these lines just make sure that the warnings dont clutter your notebook\n",
315 |     "    warnings.simplefilter(\"ignore\")\n",
316 |     "    data_dict = read_data(col, **query)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "data_dict"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "parse_metrics(data_dict, col, rename=True) #rename is important to get the consistent naming!"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": []
343 |   }
344 |  ],
345 |  "metadata": {
346 |   "kernelspec": {
347 |    "display_name": "Python 3",
348 |    "language": "python",
349 |    "name": "python3"
350 |   },
351 |   "language_info": {
352 |    "codemirror_mode": {
353 |     "name": "ipython",
354 |     "version": 3
355 |    },
356 |    "file_extension": ".py",
357 |    "mimetype": "text/x-python",
358 |    "name": "python",
359 |    "nbconvert_exporter": "python",
360 |    "pygments_lexer": "ipython3",
361 |    "version": "3.7.6"
362 |   }
363 |  },
364 |  "nbformat": 4,
365 |  "nbformat_minor": 4
366 | }
367 | 


--------------------------------------------------------------------------------
/notebooks/test.yaml:
--------------------------------------------------------------------------------
  1 | ACCESS-CM2:
  2 |   gn:
  3 |     axis_shift:
  4 |       X: right
  5 |       Y: right
  6 | ACCESS-ESM1-5:
  7 |   gn:
  8 |     axis_shift:
  9 |       X: right
 10 |       Y: right
 11 | BCC-CSM2-MR:
 12 |   gn:
 13 |     axis_shift:
 14 |       X: right
 15 |       Y: right
 16 | BCC-ESM1:
 17 |   gn:
 18 |     axis_shift:
 19 |       X: right
 20 |       Y: right
 21 | CAMS-CSM1-0:
 22 |   gn:
 23 |     axis_shift:
 24 |       X: right
 25 |       Y: right
 26 | CAS-ESM2-0:
 27 |   gn:
 28 |     axis_shift:
 29 |       X: left
 30 |       Y: left
 31 | CESM1-1-CAM5-CMIP5:
 32 |   gn:
 33 |     axis_shift:
 34 |       X: left
 35 |       Y: left
 36 |   gr:
 37 |     axis_shift:
 38 |       X: left
 39 |       Y: left
 40 | CESM2:
 41 |   gn:
 42 |     axis_shift:
 43 |       X: left
 44 |       Y: right
 45 |   gr:
 46 |     axis_shift:
 47 |       X: left
 48 |       Y: left
 49 | CESM2-FV2:
 50 |   gn:
 51 |     axis_shift:
 52 |       X: left
 53 |       Y: right
 54 |   gr:
 55 |     axis_shift:
 56 |       X: left
 57 |       Y: left
 58 | CESM2-WACCM:
 59 |   gn:
 60 |     axis_shift:
 61 |       X: left
 62 |       Y: right
 63 |   gr:
 64 |     axis_shift:
 65 |       X: left
 66 |       Y: left
 67 | CESM2-WACCM-FV2:
 68 |   gn:
 69 |     axis_shift:
 70 |       X: left
 71 |       Y: right
 72 |   gr:
 73 |     axis_shift:
 74 |       X: left
 75 |       Y: left
 76 | CIESM:
 77 |   gn:
 78 |     axis_shift:
 79 |       X: left
 80 |       Y: left
 81 | CMCC-CM2-HR4:
 82 |   gn:
 83 |     axis_shift:
 84 |       X: left
 85 |       Y: left
 86 | CMCC-ESM2:
 87 |   gn:
 88 |     axis_shift:
 89 |       X: left
 90 |       Y: left
 91 | CNRM-CM6-1:
 92 |   gn:
 93 |     axis_shift:
 94 |       X: right
 95 |       Y: right
 96 |   gr1:
 97 |     axis_shift:
 98 |       X: left
 99 |       Y: left
100 | CNRM-CM6-1-HR:
101 |   gn:
102 |     axis_shift:
103 |       X: left
104 |       Y: right
105 | CNRM-ESM2-1:
106 |   gn:
107 |     axis_shift:
108 |       X: right
109 |       Y: right
110 |   gr1:
111 |     axis_shift:
112 |       X: left
113 |       Y: left
114 | CanESM5:
115 |   gn:
116 |     axis_shift:
117 |       X: right
118 |       Y: right
119 | CanESM5-CanOE:
120 |   gn:
121 |     axis_shift:
122 |       X: right
123 |       Y: right
124 | E3SM-1-0:
125 |   gr:
126 |     axis_shift:
127 |       X: left
128 |       Y: left
129 | E3SM-1-1:
130 |   gr:
131 |     axis_shift:
132 |       X: left
133 |       Y: left
134 | E3SM-1-1-ECA:
135 |   gr:
136 |     axis_shift:
137 |       X: left
138 |       Y: left
139 | EC-Earth3:
140 |   gn:
141 |     axis_shift:
142 |       X: right
143 |       Y: right
144 |   gr:
145 |     axis_shift:
146 |       X: left
147 |       Y: left
148 | EC-Earth3-AerChem:
149 |   gn:
150 |     axis_shift:
151 |       X: left
152 |       Y: left
153 | EC-Earth3-LR:
154 |   gn:
155 |     axis_shift:
156 |       X: right
157 |       Y: right
158 | EC-Earth3-Veg:
159 |   gn:
160 |     axis_shift:
161 |       X: right
162 |       Y: right
163 |   gr:
164 |     axis_shift:
165 |       X: left
166 |       Y: left
167 | EC-Earth3-Veg-LR:
168 |   gn:
169 |     axis_shift:
170 |       X: left
171 |       Y: left
172 | FGOALS-f3-L:
173 |   gn:
174 |     axis_shift:
175 |       X: left
176 |       Y: left
177 | FGOALS-g3:
178 |   gn:
179 |     axis_shift:
180 |       X: left
181 |       Y: left
182 | FIO-ESM-2-0:
183 |   gn:
184 |     axis_shift:
185 |       X: left
186 |       Y: right
187 | GFDL-CM4:
188 |   gn:
189 |     axis_shift:
190 |       X: left
191 |       Y: left
192 |   gr:
193 |     axis_shift:
194 |       X: left
195 |       Y: left
196 | GFDL-ESM2M:
197 |   gn:
198 |     axis_shift:
199 |       X: left
200 |       Y: left
201 | GFDL-ESM4:
202 |   gn:
203 |     axis_shift:
204 |       X: left
205 |       Y: left
206 |   gr:
207 |     axis_shift:
208 |       X: left
209 |       Y: left
210 | GFDL-OM4p5B:
211 |   gn:
212 |     axis_shift:
213 |       X: left
214 |       Y: left
215 |   gr:
216 |     axis_shift:
217 |       X: left
218 |       Y: left
219 | GISS-E2-1-G:
220 |   gn:
221 |     axis_shift:
222 |       X: left
223 |       Y: left
224 | GISS-E2-1-G-CC:
225 |   gn:
226 |     axis_shift:
227 |       X: right
228 |       Y: left
229 | GISS-E2-1-H:
230 |   gn:
231 |     axis_shift:
232 |       X: left
233 |       Y: left
234 |   gr:
235 |     axis_shift:
236 |       X: left
237 |       Y: left
238 | GISS-E2-2-G:
239 |   gn:
240 |     axis_shift:
241 |       X: right
242 |       Y: left
243 | HadGEM3-GC31-LL:
244 |   gn:
245 |     axis_shift:
246 |       X: right
247 |       Y: right
248 | HadGEM3-GC31-MM:
249 |   gn:
250 |     axis_shift:
251 |       X: left
252 |       Y: right
253 | IITM-ESM:
254 |   gn:
255 |     axis_shift:
256 |       X: left
257 |       Y: left
258 | INM-CM4-8:
259 |   gr1:
260 |     axis_shift:
261 |       X: left
262 |       Y: left
263 | INM-CM5-0:
264 |   gr1:
265 |     axis_shift:
266 |       X: left
267 |       Y: left
268 | IPSL-CM6A-LR:
269 |   gn:
270 |     axis_shift:
271 |       X: right
272 |       Y: right
273 | KACE-1-0-G:
274 |   gr:
275 |     axis_shift:
276 |       X: left
277 |       Y: left
278 | KIOST-ESM:
279 |   gr1:
280 |     axis_shift:
281 |       X: left
282 |       Y: left
283 | MCM-UA-1-0:
284 |   gn:
285 |     axis_shift:
286 |       X: right
287 |       Y: right
288 | MIROC-ES2L:
289 |   gn:
290 |     axis_shift:
291 |       X: right
292 |       Y: right
293 |   gr1:
294 |     axis_shift:
295 |       X: left
296 |       Y: left
297 | MIROC6:
298 |   gn:
299 |     axis_shift:
300 |       X: right
301 |       Y: right
302 | MPI-ESM1-2-HR:
303 |   gn:
304 |     axis_shift:
305 |       X: right
306 |       Y: left
307 | MRI-ESM2-0:
308 |   gn:
309 |     axis_shift:
310 |       X: left
311 |       Y: left
312 |   gr:
313 |     axis_shift:
314 |       X: left
315 |       Y: left
316 | NESM3:
317 |   gn:
318 |     axis_shift:
319 |       X: right
320 |       Y: right
321 | NorCPM1:
322 |   gn:
323 |     axis_shift:
324 |       X: left
325 |       Y: left
326 |   gr:
327 |     axis_shift:
328 |       X: left
329 |       Y: left
330 | NorESM1-F:
331 |   gn:
332 |     axis_shift:
333 |       X: left
334 |       Y: left
335 | NorESM2-LM:
336 |   gn:
337 |     axis_shift:
338 |       X: right
339 |       Y: left
340 |   gr:
341 |     axis_shift:
342 |       X: right
343 |       Y: left
344 | NorESM2-MM:
345 |   gn:
346 |     axis_shift:
347 |       X: right
348 |       Y: left
349 |   gr:
350 |     axis_shift:
351 |       X: right
352 |       Y: left
353 | SAM0-UNICON:
354 |   gn:
355 |     axis_shift:
356 |       X: left
357 |       Y: right
358 | TaiESM1:
359 |   gn:
360 |     axis_shift:
361 |       X: left
362 |       Y: right
363 | UKESM1-0-LL:
364 |   gn:
365 |     axis_shift:
366 |       X: right
367 |       Y: right
368 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.setuptools_scm]
 6 | 
 7 | [tool.interrogate]
 8 | ignore-init-method = true
 9 | ignore-init-module = false
10 | ignore-magic = false
11 | ignore-semiprivate = true
12 | ignore-private = true
13 | ignore-property-decorators = true
14 | ignore-module = false
15 | fail-under = 95
16 | # This somehow does not work...the excludes are defined in the pre-commit-config.yaml for now
17 | # exclude = ["setup.py", "docs", "tests/*", "xmip/_version.py"]
18 | verbose = 1
19 | quiet = false
20 | color = true
21 | 
22 | [tool.isort]
23 | known_third_party = ["cf_xarray", "cftime", "dask", "fsspec", "numpy", "pint", "pint_xarray", "pkg_resources", "pytest", "setuptools", "xarray", "xarrayutils", "xesmf", "xgcm", "yaml"]
24 | 
25 | 
26 | [tool.pytest.ini_options]
27 | minversion = "6.0"
28 | addopts = " -vv -rXfE"
29 | # only test the root level, otherwise it picks up the tests of the project template
30 | testpaths = [
31 |     "tests",
32 | ]
33 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | build:
 3 |   os: ubuntu-20.04
 4 |   tools:
 5 |     python: mambaforge-4.10
 6 | sphinx:
 7 |   configuration: docs/conf.py
 8 | python:
 9 |   install:
10 |   - method: setuptools
11 |     path: .
12 | conda:
13 |   environment: docs/environment.yml
14 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [sdist]
 2 | formats = gztar
 3 | 
 4 | [check-manifest]
 5 | ignore =
 6 |     *.yml
 7 |     *.yaml
 8 |     .coveragerc
 9 |     docs
10 |     docs/*
11 |     *.enc
12 |     notebooks
13 |     notebooks/*
14 |     tests
15 |     tests/*
16 | 
17 | [flake8]
18 | max-line-length = 105
19 | select = C,E,F,W,B,B950
20 | ignore = E203, E501, W503
21 | exclude =
22 |     xmip/_version.py
23 |     docs/*
24 |     __init__.py
25 | 
26 | 
27 | [metadata]
28 | name = xmip
29 | description = Analysis ready CMIP6 data the easy way
30 | author = xmip developers
31 | url=https://github.com/jbusecke/xmip
32 | long_description = file: README.md
33 | long_description_content_type = text/markdown
34 | license = Apache
35 | license_file = LICENSE.txt
36 | 
37 | ## These need to be filled in by the author!
38 | # For details see: https://pypi.org/classifiers/
39 | 
40 | classifiers =
41 |     Development Status :: 4 - Beta
42 |     Topic :: Scientific/Engineering
43 |     Intended Audience :: Science/Research
44 |     Operating System :: OS Independent
45 |     Programming Language :: Python
46 |     Programming Language :: Python :: 3
47 |     Programming Language :: Python :: 3.8
48 |     Programming Language :: Python :: 3.9
49 |     Programming Language :: Python :: 3.10
50 |     License :: OSI Approved :: Apache Software License
51 | 
52 | ## Add your email here
53 | author_email = jbusecke@princeton.edu
54 | 
55 | 
56 | ### make sure to fill in your dependencies!
57 | [options]
58 | install_requires =
59 |     numpy
60 |     pandas
61 |     xarray>=0.17.0
62 |     xgcm<0.7.0
63 |     cftime
64 |     xarrayutils
65 |     pint
66 |     cf_xarray >= 0.6.0
67 |     pint-xarray
68 | setup_requires=
69 |     setuptools
70 |     setuptools-scm
71 | python_requires = >=3.8
72 | ################ Up until here
73 | 
74 | include_package_data = True
75 | zip_safe = False
76 | packages = find:
77 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     use_scm_version={
 6 |         "write_to": "xmip/_version.py",
 7 |         "write_to_template": '__version__ = "{version}"',
 8 |         "tag_regex": r"^(?P<prefix>v)?(?P<version>[^\+]+)(?P<suffix>.*)?$",
 9 |     },
10 |     setup_requires=["setuptools>=45", "setuptools_scm[toml]>=6.0"],
11 | )
12 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_grids.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | import xarray as xr
  4 | 
  5 | from xgcm import Grid
  6 | from xgcm.autogenerate import generate_grid_ds
  7 | 
  8 | from xmip.grids import (
  9 |     _interp_vertex_to_bounds,
 10 |     _parse_bounds_vertex,
 11 |     combine_staggered_grid,
 12 |     create_full_grid,
 13 |     detect_shift,
 14 |     distance,
 15 |     distance_deg,
 16 |     recreate_metrics,
 17 | )
 18 | 
 19 | 
 20 | def _add_small_rand(da):
 21 |     return da + (np.random.rand(*da.shape) * 0.05)
 22 | 
 23 | 
 24 | def _test_data(grid_label="gn", z_axis=True):
 25 |     xt = np.arange(4) + 1
 26 |     yt = np.arange(5) + 1
 27 |     zt = np.arange(6) + 1
 28 | 
 29 |     x = xr.DataArray(xt, coords=[("x", xt)])
 30 |     y = xr.DataArray(yt, coords=[("y", yt)])
 31 |     lev = xr.DataArray(zt, coords=[("lev", zt)])
 32 | 
 33 |     # Need to add a tracer here to get the tracer dimsuffix
 34 |     coords = [("x", x.data), ("y", y.data)]
 35 |     data = np.random.rand(len(xt), len(yt))
 36 |     dims = ["x", "y"]
 37 | 
 38 |     if z_axis:
 39 |         coords.append(("lev", lev.data))
 40 |         data = np.random.rand(len(x), len(y), len(lev))
 41 |         dims = ["x", "y", "lev"]
 42 | 
 43 |     tr = xr.DataArray(
 44 |         data,
 45 |         dims=dims,
 46 |         coords=coords,
 47 |     )
 48 | 
 49 |     lon_raw = xr.DataArray(xt, coords=[("x", xt)])
 50 |     lat_raw = xr.DataArray(yt, coords=[("y", yt)])
 51 |     lon = lon_raw * xr.ones_like(lat_raw)
 52 |     lat = xr.ones_like(lon_raw) * lat_raw
 53 | 
 54 |     lon_bounds_e = lon + 0.5
 55 |     lon_bounds_w = lon - 0.5 + (np.random.rand(*lon.shape) * 0.05)
 56 |     lat_bounds_n = lat + 0.5 + (np.random.rand(*lon.shape) * 0.05)
 57 |     lat_bounds_s = lat - 0.5 + (np.random.rand(*lon.shape) * 0.05)
 58 | 
 59 |     lon_bounds = xr.concat(
 60 |         [_add_small_rand(lon_bounds_w), _add_small_rand(lon_bounds_w)], dim="bnds"
 61 |     )
 62 |     lat_bounds = xr.concat(
 63 |         [_add_small_rand(lat_bounds_s), _add_small_rand(lat_bounds_n)], dim="bnds"
 64 |     )
 65 | 
 66 |     if z_axis:
 67 |         lev_bounds = xr.concat(
 68 |             [_add_small_rand(lev - 0.5), _add_small_rand(lev + 0.5)], dim="bnds"
 69 |         )
 70 | 
 71 |     lon_verticies = xr.concat(
 72 |         [
 73 |             _add_small_rand(lon_bounds_e),
 74 |             _add_small_rand(lon_bounds_e),
 75 |             _add_small_rand(lon_bounds_w),
 76 |             _add_small_rand(lon_bounds_w),
 77 |         ],
 78 |         dim="vertex",
 79 |     )
 80 |     lat_verticies = xr.concat(
 81 |         [
 82 |             _add_small_rand(lat_bounds_s),
 83 |             _add_small_rand(lat_bounds_n),
 84 |             _add_small_rand(lat_bounds_n),
 85 |             _add_small_rand(lat_bounds_s),
 86 |         ],
 87 |         dim="vertex",
 88 |     )
 89 | 
 90 |     ds = xr.Dataset({"base": tr})
 91 | 
 92 |     dataset_coords = dict(
 93 |         lon=lon,
 94 |         lat=lat,
 95 |         lon_bounds=lon_bounds,
 96 |         lat_bounds=lat_bounds,
 97 |         lon_verticies=lon_verticies,
 98 |         lat_verticies=lat_verticies,
 99 |     )
100 | 
101 |     if z_axis:
102 |         dataset_coords["lev_bounds"] = lev_bounds
103 | 
104 |     ds = ds.assign_coords(dataset_coords)
105 |     ds.attrs["source_id"] = "test_model"
106 |     ds.attrs["grid_label"] = grid_label
107 |     ds.attrs["variable_id"] = "base"
108 |     return ds
109 | 
110 | 
111 | def test_parse_bounds_vertex():
112 |     lon_b = xr.DataArray(np.array([0, 1, 2, 3]), dims=["vertex"])
113 |     lat_b = xr.DataArray(np.array([10, 11, 12, 13]), dims=["vertex"])
114 | 
115 |     data = np.random.rand(4)
116 | 
117 |     da = xr.DataArray(
118 |         data, dims=["vertex"], coords={"lon_verticies": lon_b, "lat_verticies": lat_b}
119 |     )
120 |     test = _parse_bounds_vertex(da, "vertex", position=[0, 3])
121 |     print(test)
122 |     expected = (da.isel(vertex=0).load().data, da.isel(vertex=3).load().data)
123 |     print(expected)
124 |     assert test == expected
125 | 
126 | 
127 | def test_interp_vertex_to_bounds():
128 |     da = xr.DataArray(np.arange(4), dims=["vertex"])
129 |     # test interp on the y axis
130 |     expected = xr.DataArray(np.array([1.5, 1.5]), dims=["bnds"])
131 |     xr.testing.assert_equal(_interp_vertex_to_bounds(da, "y"), expected)
132 |     # test interp on the x axis
133 |     expected = xr.DataArray(np.array([0.5, 2.5]), dims=["bnds"])
134 |     xr.testing.assert_equal(_interp_vertex_to_bounds(da, "x"), expected)
135 | 
136 | 
137 | def test_distance_deg():
138 |     lon0, lat0, lon1, lat1 = 120, 30, 121, 31
139 |     delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1)
140 |     assert delta_lon == 1.0
141 |     assert delta_lat == 1.0
142 | 
143 |     lon0, lat0, lon1, lat1 = 360, 30, 1, 31
144 |     delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1)
145 |     assert delta_lon == 1.0
146 |     assert delta_lat == 1.0
147 | 
148 |     lon0, lat0, lon1, lat1 = 300, 30, 301, 30.09
149 |     delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1)
150 |     assert delta_lon == 1.0
151 |     assert delta_lat == 0.0
152 | 
153 | 
154 | @pytest.mark.parametrize("lon", [0, 90, 120])
155 | @pytest.mark.parametrize("lat", [0, 10, 45])
156 | def test_distance(lon, lat):
157 |     Re = 6.378e6
158 |     # test straight lat line
159 |     lon0, lat0, lon1, lat1 = lon, lat, lon, lat + 1
160 |     dist = distance(lon0, lat0, lon1, lat1)
161 |     np.testing.assert_allclose(dist, Re * (np.pi * 1.0 / 180))
162 | 
163 |     # test straight lon line
164 |     lon0, lat0, lon1, lat1 = lon, lat, lon + 1, lat
165 |     dist = distance(lon0, lat0, lon1, lat1)
166 |     np.testing.assert_allclose(
167 |         dist, Re * (np.pi * 1.0 / 180) * np.cos(np.pi * lat0 / 180)
168 |     )
169 | 
170 | 
171 | # TODO: inner and outer (needs to be implemented in xgcm autogenerate first)
172 | @pytest.mark.parametrize("xshift", ["left", "right"])
173 | @pytest.mark.parametrize("yshift", ["left", "right"])
174 | @pytest.mark.parametrize("z_axis", [True, False])
175 | def test_recreate_metrics(xshift, yshift, z_axis):
176 |     # reconstruct all the metrics by hand and compare to inferred output
177 | 
178 |     # * For now this is a regular lon lat grid. Might need to add some tests for more complex grids.
179 |     # Then again. This will not do a great job for those....
180 | 
181 |     # create test dataset
182 |     ds = _test_data(z_axis=z_axis)
183 | 
184 |     # TODO: generalize so this also works with e.g. zonal average sections (which dont have a X axis)
185 |     coord_dict = {"X": "x", "Y": "y"}
186 |     if z_axis:
187 |         coord_dict["Z"] = "lev"
188 | 
189 |     ds_full = generate_grid_ds(
190 |         ds,
191 |         coord_dict,
192 |         position={"X": ("center", xshift), "Y": ("center", yshift)},
193 |     )
194 | 
195 |     grid = Grid(ds_full)
196 | 
197 |     ds_metrics, metrics_dict = recreate_metrics(ds_full, grid)
198 | 
199 |     if z_axis:
200 |         # Check that the bound values are intact (previously those got alterd due to unexpected behaviour of .assign_coords())
201 |         assert "bnds" in ds_metrics.lev_bounds.dims
202 | 
203 |     # compute the more complex metrics (I could wrap this into a function I guess?)
204 |     lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs(ds.lon.load(), xshift)
205 |     lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs(ds.lat.load(), xshift)
206 |     dx_gx_expected = distance(lon0, lat0, lon1, lat1)
207 | 
208 |     lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs(ds.lon.load(), yshift)
209 |     lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs(ds.lat.load(), yshift)
210 |     dy_gy_expected = distance(lon0, lat0, lon1, lat1)
211 | 
212 |     # corner metrics
213 |     # dx
214 |     if yshift == "left":
215 |         # dx
216 |         lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs(
217 |             _interp_vertex_to_bounds(ds_metrics.lon_verticies, "y").isel(bnds=0),
218 |             xshift,
219 |         )
220 |         lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs(
221 |             ds_metrics.lat_bounds.isel(bnds=0), xshift
222 |         )
223 |     elif yshift == "right":
224 |         lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs(
225 |             _interp_vertex_to_bounds(ds_metrics.lon_verticies, "y").isel(bnds=1),
226 |             xshift,
227 |         )
228 |         lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs(
229 |             ds_metrics.lat_bounds.isel(bnds=1), xshift
230 |         )
231 |     dx_gxgy_expected = distance(lon0, lat0, lon1, lat1)
232 | 
233 |     # dy
234 |     if xshift == "left":
235 |         # dx
236 |         lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs(
237 |             _interp_vertex_to_bounds(ds_metrics.lat_verticies, "x").isel(bnds=0),
238 |             yshift,
239 |         )
240 |         lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs(
241 |             ds_metrics.lon_bounds.isel(bnds=0), yshift
242 |         )
243 |     elif xshift == "right":
244 |         lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs(
245 |             _interp_vertex_to_bounds(ds_metrics.lat_verticies, "x").isel(bnds=1),
246 |             yshift,
247 |         )
248 |         lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs(
249 |             ds_metrics.lon_bounds.isel(bnds=1), yshift
250 |         )
251 |     dy_gxgy_expected = distance(lon0, lat0, lon1, lat1)
252 | 
253 |     if xshift == "left":
254 |         vertex_points = [0, 1]
255 |     else:
256 |         vertex_points = [2, 3]
257 |     lon0, lon1 = (
258 |         ds_metrics.lon_verticies.isel(vertex=vertex_points[0]),
259 |         ds_metrics.lon_verticies.isel(vertex=vertex_points[1]),
260 |     )
261 |     lat0, lat1 = (
262 |         ds_metrics.lat_verticies.isel(vertex=vertex_points[0]),
263 |         ds_metrics.lat_verticies.isel(vertex=vertex_points[1]),
264 |     )
265 |     dy_gx_expected = distance(lon0, lat0, lon1, lat1)
266 | 
267 |     if yshift == "left":
268 |         vertex_points = [0, 3]
269 |     else:
270 |         vertex_points = [1, 2]
271 |     lon0, lon1 = (
272 |         ds_metrics.lon_verticies.isel(vertex=vertex_points[0]),
273 |         ds_metrics.lon_verticies.isel(vertex=vertex_points[1]),
274 |     )
275 |     lat0, lat1 = (
276 |         ds_metrics.lat_verticies.isel(vertex=vertex_points[0]),
277 |         ds_metrics.lat_verticies.isel(vertex=vertex_points[1]),
278 |     )
279 |     dx_gy_expected = distance(lon0, lat0, lon1, lat1)
280 | 
281 |     if z_axis:
282 |         dz_t_expected = ds.lev_bounds.diff("bnds").squeeze().data
283 |     else:
284 |         dz_t_expected = None
285 | 
286 |     for var, expected in [
287 |         ("dz_t", dz_t_expected),
288 |         (
289 |             "dx_t",
290 |             distance(
291 |                 ds_metrics.lon_bounds.isel(bnds=0).data,
292 |                 ds_metrics.lat.data,
293 |                 ds_metrics.lon_bounds.isel(bnds=1).data,
294 |                 ds_metrics.lat.data,
295 |             ),
296 |         ),
297 |         (
298 |             "dy_t",
299 |             distance(
300 |                 ds_metrics.lon.data,
301 |                 ds_metrics.lat_bounds.isel(bnds=0).data,
302 |                 ds_metrics.lon.data,
303 |                 ds_metrics.lat_bounds.isel(bnds=1).data,
304 |             ),
305 |         ),
306 |         ("dx_gx", dx_gx_expected),
307 |         ("dy_gy", dy_gy_expected),
308 |         ("dy_gx", dy_gx_expected),
309 |         ("dx_gy", dx_gy_expected),
310 |         ("dy_gxgy", dy_gxgy_expected),
311 |         ("dx_gxgy", dx_gxgy_expected),
312 |     ]:
313 |         if expected is not None:
314 |             print(var)
315 |             control = ds_metrics[var].data
316 |             if expected.shape != control.shape:
317 |                 control = control.T
318 |             np.testing.assert_allclose(control, expected)
319 | 
320 |     if z_axis:
321 |         assert set(["X", "Y", "Z"]).issubset(set(metrics_dict.keys()))
322 |     else:
323 |         assert set(["X", "Y"]).issubset(set(metrics_dict.keys()))
324 |         assert "Z" not in list(metrics_dict.keys())
325 | 
326 | 
327 | # TODO: inner and outer (needs to be implemented in xgcm autogenerate first)
328 | @pytest.mark.parametrize("xshift", ["left", "center", "right"])
329 | @pytest.mark.parametrize("yshift", ["left", "center", "right"])
330 | def test_detect_shift(xshift, yshift):
331 |     # create base dataset (tracer)
332 |     ds_base = _test_data()
333 | 
334 |     # create the maybe shifted dataset
335 |     ds = ds_base.copy()
336 |     if xshift == "left":
337 |         ds["lon"] = ds["lon"] - 0.5
338 |     elif xshift == "right":
339 |         ds["lon"] = ds["lon"] + 0.5
340 | 
341 |     if yshift == "left":
342 |         ds["lat"] = ds["lat"] - 0.5
343 |     elif yshift == "right":
344 |         ds["lat"] = ds["lat"] + 0.5
345 |     assert detect_shift(ds_base, ds, "X") == xshift
346 |     assert detect_shift(ds_base, ds, "Y") == yshift
347 | 
348 |     # repeat with very small shifts (these should not be detected)
349 |     ds = ds_base.copy()
350 |     if xshift == "left":
351 |         ds["lon"] = ds["lon"] - 0.05
352 |     elif xshift == "right":
353 |         ds["lon"] = ds["lon"] + 0.05
354 | 
355 |     if yshift == "left":
356 |         ds["lat"] = ds["lat"] - 0.05
357 |     elif yshift == "right":
358 |         ds["lat"] = ds["lat"] + 0.05
359 |     assert detect_shift(ds_base, ds, "X") == "center"
360 |     assert detect_shift(ds_base, ds, "Y") == "center"
361 | 
362 | 
363 | @pytest.mark.parametrize("xshift", ["left", "right"])
364 | @pytest.mark.parametrize("yshift", ["left", "right"])
365 | @pytest.mark.parametrize("grid_label", ["gr", "gn"])
366 | def test_create_full_grid(xshift, yshift, grid_label):
367 |     ds_base = _test_data(grid_label=grid_label)
368 |     grid_dict = {"test_model": {grid_label: {"axis_shift": {"X": xshift, "Y": yshift}}}}
369 |     # TODO: This should be specific to the grid_label: e.g grid_dict = {'model':{'gr':{'axis_shift':{'X':'left}}}}
370 | 
371 |     ds_full = create_full_grid(ds_base, grid_dict=grid_dict)
372 | 
373 |     shift_dict = {"left": -0.5, "right": 0.5}
374 | 
375 |     assert ds_full["x"].attrs["axis"] == "X"
376 |     assert ds_full["x_" + xshift].attrs["axis"] == "X"
377 |     assert ds_full["x_" + xshift].attrs["c_grid_axis_shift"] == shift_dict[xshift]
378 |     assert ds_full["y"].attrs["axis"] == "Y"
379 |     assert ds_full["y_" + yshift].attrs["axis"] == "Y"
380 |     assert ds_full["y_" + yshift].attrs["c_grid_axis_shift"] == shift_dict[yshift]
381 |     # TODO: integrate the vertical
382 |     # assert ds_full["lev"].attrs["axis"] == "Z"
383 | 
384 |     # I might want to loosen this later and switch to a uniform naming
385 |     # E.g. use x_g for the x dimension on the x gridface, no matter if its left or right...
386 |     # TODO: Check upstream in xgcm
387 |     # Once that is done I
388 |     assert "x_" + xshift in ds_full.dims
389 |     assert "y_" + yshift in ds_full.dims
390 | 
391 |     # test error handling
392 |     with pytest.warns(UserWarning):
393 |         ds_none = create_full_grid(
394 |             ds_base, grid_dict=None
395 |         )  # the synthetic dataset is not in the default dict.
396 |     assert ds_none is None
397 | 
398 | 
399 | @pytest.mark.parametrize("recalculate_metrics", [True, False])
400 | @pytest.mark.parametrize("xshift", ["left", "right"])
401 | @pytest.mark.parametrize("yshift", ["left", "right"])
402 | @pytest.mark.parametrize("grid_label", ["gr", "gn"])
403 | def test_combine_staggered_grid(recalculate_metrics, xshift, yshift, grid_label):
404 |     ds_base = _test_data(grid_label=grid_label)
405 | 
406 |     # create the maybe shifted dataset
407 |     ds = ds_base.copy()
408 |     ds = ds.rename({"base": "other"})
409 |     ds.attrs["variable_id"] = "other"
410 |     if xshift == "left":
411 |         ds["lon"] = ds["lon"] - 0.5
412 |     elif xshift == "right":
413 |         ds["lon"] = ds["lon"] + 0.5
414 | 
415 |     if yshift == "left":
416 |         ds["lat"] = ds["lat"] - 0.5
417 |     elif yshift == "right":
418 |         ds["lat"] = ds["lat"] + 0.5
419 |     grid_dict = {"test_model": {grid_label: {"axis_shift": {"X": xshift, "Y": yshift}}}}
420 | 
421 |     for other_ds in [ds, [ds]]:
422 |         grid, ds_combined = combine_staggered_grid(
423 |             ds_base,
424 |             other_ds,
425 |             grid_dict=grid_dict,
426 |             recalculate_metrics=recalculate_metrics,
427 |         )
428 | 
429 |         for axis, shift in zip(["X", "Y"], [xshift, yshift]):
430 |             # make sure the correct dim is in the added dataset
431 |             assert grid.axes[axis].coords[shift] in ds_combined["other"].dims
432 |             # and also that none of the other are in there
433 |             assert all(
434 |                 [
435 |                     di not in ds_combined["other"].dims
436 |                     for dd, di in grid.axes[axis].coords.items()
437 |                     if dd != shift
438 |                 ]
439 |             )
440 |         # check if metrics are correctly parsed
441 |         if recalculate_metrics:
442 |             for axis in ["X", "Y"]:
443 |                 for metric in ["_t", "_gx", "_gy", "_gxgy"]:
444 |                     assert f"d{axis.lower()}{metric}" in list(ds_combined.coords)
445 | 
446 |     # Test error handling
447 |     with pytest.warns(UserWarning):
448 |         grid_none, ds_combined_none = combine_staggered_grid(
449 |             ds_base,
450 |             ds,
451 |             grid_dict=None,
452 |             recalculate_metrics=recalculate_metrics,
453 |         )
454 |     assert ds_combined_none is None
455 |     assert grid_none is None
456 | 


--------------------------------------------------------------------------------
/tests/test_preprocessing_cloud.py:
--------------------------------------------------------------------------------
  1 | # This module tests data directly from the pangeo google cloud storage.
  2 | # Tests are meant to be more high level and also serve to document known problems (see skip statements).
  3 | 
  4 | import fsspec
  5 | import numpy as np
  6 | import pytest
  7 | import xarray as xr
  8 | 
  9 | from xmip.grids import combine_staggered_grid
 10 | from xmip.preprocessing import _desired_units, _drop_coords, combined_preprocessing
 11 | from xmip.utils import google_cmip_col, model_id_match
 12 | 
 13 | 
 14 | pytest.importorskip("gcsfs")
 15 | 
 16 | 
 17 | def diagnose_duplicates(data):
 18 |     """displays non-unique entries in data"""
 19 |     _, idx = np.unique(data, return_index=True)
 20 |     missing = np.array([i for i in np.arange(len(data)) if i not in idx])
 21 |     if len(missing) > 0:
 22 |         missing_values = data[missing]
 23 |         raise ValueError(f"Duplicate Values ({missing_values}) found")
 24 | 
 25 | 
 26 | def data(
 27 |     source_id, variable_id, experiment_id, grid_label, use_intake_esm, catalog="main"
 28 | ):
 29 |     zarr_kwargs = {
 30 |         "consolidated": True,
 31 |         # "decode_times": False,
 32 |         "decode_times": True,
 33 |         "use_cftime": True,
 34 |     }
 35 | 
 36 |     cat = google_cmip_col(catalog=catalog).search(
 37 |         source_id=source_id,
 38 |         experiment_id=experiment_id,
 39 |         variable_id=variable_id,
 40 |         # member_id="r1i1p1f1",
 41 |         table_id="Omon",
 42 |         grid_label=grid_label,
 43 |     )
 44 | 
 45 |     if len(cat.df["zstore"]) > 0:
 46 |         if use_intake_esm:
 47 |             ddict = cat.to_dataset_dict(
 48 |                 zarr_kwargs=zarr_kwargs,
 49 |                 preprocess=combined_preprocessing,
 50 |                 storage_options={"token": "anon"},
 51 |             )
 52 |             _, ds = ddict.popitem()
 53 |         else:
 54 |             # debugging options
 55 |             # @charlesbluca suggested this to make this work in GHA
 56 |             # https://github.com/jbusecke/xmip/pull/62#issuecomment-741928365
 57 |             mm = fsspec.get_mapper(
 58 |                 cat.df["zstore"][0]
 59 |             )  # think you can pass in storage options here as well?
 60 |             ds_raw = xr.open_zarr(mm, **zarr_kwargs)
 61 |             ds = combined_preprocessing(ds_raw)
 62 |     else:
 63 |         ds = None
 64 | 
 65 |     return ds, cat
 66 | 
 67 | 
 68 | def all_models():
 69 |     df = google_cmip_col().df
 70 |     all_models = df["source_id"].unique()
 71 |     all_models = tuple(np.sort(all_models))
 72 |     # all_models = tuple(["EC-Earth3"])
 73 |     return all_models
 74 | 
 75 | 
 76 | # test_models = ["CESM2-FV2", "GFDL-CM4"]
 77 | test_models = all_models()
 78 | 
 79 | 
 80 | def pytest_generate_tests(metafunc):
 81 |     # This is called for every test. Only get/set command line arguments
 82 |     # if the argument is specified in the list of test "fixturenames".
 83 | 
 84 |     for name in ["vi", "gl", "ei", "cat"]:
 85 |         option_value = getattr(metafunc.config.option, name)
 86 | 
 87 |         if isinstance(option_value, str):
 88 |             option_value = [option_value]
 89 | 
 90 |         if name in metafunc.fixturenames and option_value is not None:
 91 |             metafunc.parametrize(name, option_value)
 92 | 
 93 | 
 94 | # print(f"\n\n\n\n$$$$$$$ All available models: {all_models()}$$$$$$$\n\n\n\n")
 95 | 
 96 | # Combine the input parameters according to command line input
 97 | 
 98 | # --- Most basic test --- #
 99 | 
100 | # Try to combine some of the failures
101 | 
102 | # We dont support these at all
103 | not_supported_failures = [
104 |     ("AWI-ESM-1-1-LR", "*", "*", "gn"),
105 |     ("AWI-CM-1-1-MR", "*", "*", "gn"),
106 | ]
107 | 
108 | # basic problems when trying to concat with intake-esm
109 | intake_concat_failures = [
110 |     (
111 |         "CanESM5",
112 |         [
113 |             "uo",
114 |             "so",
115 |             "thetao",
116 |         ],
117 |         "ssp245",
118 |         "gn",
119 |     ),
120 |     (
121 |         "CanESM5",
122 |         ["zos"],
123 |         [
124 |             "ssp245",
125 |             "ssp585",
126 |         ],
127 |         "gn",
128 |     ),
129 |     (
130 |         "E3SM-1-0",
131 |         ["so", "o2", "zos"],
132 |         ["historical", "ssp585"],
133 |         "gr",
134 |     ),  # issues with time concatenation
135 |     (
136 |         "IPSL-CM6A-LR",
137 |         ["thetao", "o2", "so"],
138 |         "historical",
139 |         "gn",
140 |     ),  # IPSL has an issue with `lev` dims concatting]
141 |     (
142 |         "NorESM2-MM",
143 |         ["uo", "so"],
144 |         "historical",
145 |         "gr",
146 |     ),  # time concatting
147 |     (
148 |         "NorESM2-MM",
149 |         ["so"],
150 |         "historical",
151 |         "gn",
152 |     ),
153 | ]
154 | 
155 | 
156 | # this fixture has to be redifined every time to account for different fail cases for each test
157 | @pytest.fixture
158 | def spec_check_dim_coord_values_wo_intake(request, gl, vi, ei, cat):
159 |     expected_failures = not_supported_failures + [
160 |         ("FGOALS-f3-L", ["thetao"], "piControl", "gn"),
161 |         # (
162 |         #     "GFDL-CM4",
163 |         #     "thetao",
164 |         #     "historical",
165 |         #     "gn",
166 |         # ),  # this should not fail and should trigger an xpass (I just use this for dev purposes to check
167 |         #     # the strict option)
168 |     ]
169 |     spec = (request.param, vi, ei, gl, cat)
170 |     request.param = spec
171 |     if model_id_match(expected_failures, request.param[0:-1]):
172 |         request.node.add_marker(pytest.mark.xfail(strict=True))
173 |     return request
174 | 
175 | 
176 | @pytest.mark.parametrize(
177 |     "spec_check_dim_coord_values_wo_intake", test_models, indirect=True
178 | )
179 | def test_check_dim_coord_values_wo_intake(
180 |     spec_check_dim_coord_values_wo_intake,
181 | ):
182 |     (
183 |         source_id,
184 |         variable_id,
185 |         experiment_id,
186 |         grid_label,
187 |         catalog,
188 |     ) = spec_check_dim_coord_values_wo_intake.param
189 | 
190 |     # there must be a better way to build this at the class level and then tear it down again
191 |     # I can probably get this done with fixtures, but I dont know how atm
192 |     ds, _ = data(
193 |         source_id, variable_id, experiment_id, grid_label, False, catalog=catalog
194 |     )
195 | 
196 |     if ds is None:
197 |         pytest.skip(
198 |             f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
199 |         )
200 | 
201 |     # Check for dim duplicates
202 |     # check all dims for duplicates
203 |     # for di in ds.dims:
204 |     # for now only test a subset of the dims. TODO: Add the bounds once they
205 |     # are cleaned up.
206 |     for di in ["x", "y", "lev", "time"]:
207 |         if di in ds.dims:
208 |             diagnose_duplicates(ds[di].load().data)
209 |             assert len(ds[di]) == len(np.unique(ds[di]))
210 |             if di != "time":  # these tests do not make sense for decoded time
211 |                 assert np.all(~np.isnan(ds[di]))
212 |                 assert np.all(ds[di].diff(di) >= 0)
213 | 
214 |     assert ds.lon.min().load() >= 0
215 |     assert ds.lon.max().load() <= 360
216 |     if "lon_bounds" in ds.variables:
217 |         assert ds.lon_bounds.min().load() >= 0
218 |         assert ds.lon_bounds.max().load() <= 361
219 |     assert ds.lat.min().load() >= -90
220 |     assert ds.lat.max().load() <= 90
221 |     # make sure lon and lat are 2d
222 |     assert len(ds.lon.shape) == 2
223 |     assert len(ds.lat.shape) == 2
224 |     for co in _drop_coords:
225 |         if co in ds.dims:
226 |             assert co not in ds.coords
227 | 
228 |     # Check unit conversion
229 |     for var, expected_unit in _desired_units.items():
230 |         if var in ds.variables:
231 |             unit = ds[var].attrs.get("units")
232 |             if unit:
233 |                 assert unit == expected_unit
234 | 
235 | 
236 | # this fixture has to be redifined every time to account for different fail cases for each test
237 | @pytest.fixture
238 | def spec_check_dim_coord_values(request, gl, vi, ei, cat):
239 |     expected_failures = (
240 |         not_supported_failures
241 |         + intake_concat_failures
242 |         + [
243 |             ("NorESM2-MM", ["uo", "zos"], "historical", "gn"),
244 |             ("NorESM2-MM", "thetao", "historical", "gn"),
245 |             ("NorESM2-MM", "thetao", "historical", "gr"),
246 |             ("FGOALS-f3-L", ["thetao"], "piControl", "gn"),
247 |         ]
248 |     )
249 |     spec = (request.param, vi, ei, gl, cat)
250 |     request.param = spec
251 |     if model_id_match(expected_failures, request.param[0:-1]):
252 |         request.node.add_marker(pytest.mark.xfail(strict=True))
253 |     return request
254 | 
255 | 
256 | @pytest.mark.parametrize("spec_check_dim_coord_values", test_models, indirect=True)
257 | def test_check_dim_coord_values(
258 |     spec_check_dim_coord_values,
259 | ):
260 |     (
261 |         source_id,
262 |         variable_id,
263 |         experiment_id,
264 |         grid_label,
265 |         catalog,
266 |     ) = spec_check_dim_coord_values.param
267 |     # there must be a better way to build this at the class level and then tear it down again
268 |     # I can probably get this done with fixtures, but I dont know how atm
269 |     ds, cat = data(
270 |         source_id, variable_id, experiment_id, grid_label, True, catalog=catalog
271 |     )
272 | 
273 |     if ds is None:
274 |         pytest.skip(
275 |             f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
276 |         )
277 | 
278 |     # Check for dim duplicates
279 |     # check all dims for duplicates
280 |     # for di in ds.dims:
281 |     # for now only test a subset of the dims. TODO: Add the bounds once they
282 |     # are cleaned up.
283 |     for di in ["x", "y", "lev", "time"]:
284 |         if di in ds.dims:
285 |             diagnose_duplicates(ds[di].load().data)
286 |             assert len(ds[di]) == len(np.unique(ds[di]))
287 |             if di != "time":  # these tests do not make sense for decoded time
288 |                 assert np.all(~np.isnan(ds[di]))
289 |                 assert np.all(ds[di].diff(di) >= 0)
290 | 
291 |     assert ds.lon.min().load() >= 0
292 |     assert ds.lon.max().load() <= 360
293 |     if "lon_bounds" in ds.variables:
294 |         assert ds.lon_bounds.min().load() >= 0
295 |         assert ds.lon_bounds.max().load() <= 361
296 |     assert ds.lat.min().load() >= -90
297 |     assert ds.lat.max().load() <= 90
298 |     # make sure lon and lat are 2d
299 |     assert len(ds.lon.shape) == 2
300 |     assert len(ds.lat.shape) == 2
301 |     for co in _drop_coords:
302 |         if co in ds.dims:
303 |             assert co not in ds.coords
304 | 
305 | 
306 | # --- Specific Bound Coords Test -----
307 | 
308 | 
309 | # this fixture has to be redifined every time to account for different fail cases for each test
310 | @pytest.fixture
311 | def spec_check_bounds_verticies(request, gl, vi, ei, cat):
312 |     expected_failures = (
313 |         not_supported_failures
314 |         + intake_concat_failures
315 |         + [
316 |             ("FGOALS-f3-L", ["thetao", "so", "uo", "zos"], "*", "gn"),
317 |             ("FGOALS-g3", ["thetao", "so", "uo", "zos"], "*", "gn"),
318 |             ("NorESM2-MM", ["thetao", "uo", "zos"], "historical", "gn"),
319 |             ("NorESM2-MM", ["thetao", "so"], "historical", "gr"),
320 |             ("IPSL-CM6A-LR", ["thetao", "o2"], "historical", "gn"),
321 |             ("IITM-ESM", ["so", "uo", "thetao"], "piControl", "gn"),
322 |             ("GFDL-CM4", "uo", "*", "gn"),
323 |         ]
324 |     )
325 |     spec = (request.param, vi, ei, gl, cat)
326 |     request.param = spec
327 |     if model_id_match(expected_failures, request.param[0:-1]):
328 |         request.node.add_marker(pytest.mark.xfail(strict=True))
329 |     return request
330 | 
331 | 
332 | @pytest.mark.parametrize("spec_check_bounds_verticies", test_models, indirect=True)
333 | def test_check_bounds_verticies(spec_check_bounds_verticies):
334 |     (
335 |         source_id,
336 |         variable_id,
337 |         experiment_id,
338 |         grid_label,
339 |         catalog,
340 |     ) = spec_check_bounds_verticies.param
341 |     ds, cat = data(
342 |         source_id, variable_id, experiment_id, grid_label, True, catalog=catalog
343 |     )
344 | 
345 |     if ds is None:
346 |         pytest.skip(
347 |             f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
348 |         )
349 | 
350 |     if "vertex" in ds.dims:
351 |         np.testing.assert_allclose(ds.vertex.data, np.arange(4))
352 | 
353 |     # Check for existing bounds and verticies
354 |     for co in ["lon_bounds", "lat_bounds", "lon_verticies", "lat_verticies"]:
355 |         assert co in ds.coords
356 |         # make sure that all other dims are eliminated from the bounds.
357 |         assert (set(ds[co].dims) - set(["bnds", "vertex"])) == set(["x", "y"])
358 | 
359 |     # Check the order of the vertex
360 |     # Ill only check these south of the Arctic for now. Up there
361 |     # things are still weird.
362 |     test_ds = ds.where(abs(ds.lat) <= 40, drop=True)
363 | 
364 |     vertex_lon_diff1 = test_ds.lon_verticies.isel(
365 |         vertex=3
366 |     ) - test_ds.lon_verticies.isel(vertex=0)
367 |     vertex_lon_diff2 = test_ds.lon_verticies.isel(
368 |         vertex=2
369 |     ) - test_ds.lon_verticies.isel(vertex=1)
370 |     vertex_lat_diff1 = test_ds.lat_verticies.isel(
371 |         vertex=1
372 |     ) - test_ds.lat_verticies.isel(vertex=0)
373 |     vertex_lat_diff2 = test_ds.lat_verticies.isel(
374 |         vertex=2
375 |     ) - test_ds.lat_verticies.isel(vertex=3)
376 |     for vertex_diff in [vertex_lon_diff1, vertex_lon_diff2]:
377 |         assert (vertex_diff <= 0).sum() <= (3 * len(vertex_diff.y))
378 |         # allowing for a few rows to be negative
379 | 
380 |     for vertex_diff in [vertex_lat_diff1, vertex_lat_diff2]:
381 |         assert (vertex_diff <= 0).sum() <= (5 * len(vertex_diff.x))
382 |         # allowing for a few rows to be negative
383 |     # This is just to make sure that not the majority of values is negative or zero.
384 | 
385 |     # Same for the bounds:
386 |     lon_diffs = test_ds.lon_bounds.diff("bnds")
387 |     lat_diffs = test_ds.lat_bounds.diff("bnds")
388 | 
389 |     assert (lon_diffs <= 0).sum() <= (5 * len(lon_diffs.y))
390 |     assert (lat_diffs <= 0).sum() <= (5 * len(lat_diffs.y))
391 | 
392 | 
393 | # --- xgcm grid specific tests --- #
394 | # this fixture has to be redifined every time to account for different fail cases for each test
395 | @pytest.fixture
396 | def spec_check_grid(request, gl, vi, ei, cat):
397 |     expected_failures = (
398 |         not_supported_failures
399 |         + intake_concat_failures
400 |         + [
401 |             ("CMCC-ESM2", "*", "*", "gn"),
402 |             ("CMCC-CM2-SR5", "*", "*", "gn"),
403 |             ("CMCC-CM2-HR4", "*", "*", "gn"),
404 |             ("FGOALS-f3-L", "*", "*", "gn"),
405 |             ("FGOALS-g3", "*", "*", "gn"),
406 |             ("E3SM-1-0", ["so", "thetao", "o2"], "*", "gn"),
407 |             (
408 |                 "E3SM-1-0",
409 |                 ["zos"],
410 |                 ["historical", "ssp585", "ssp245", "ssp370", "esm-hist"],
411 |                 "gr",
412 |             ),
413 |             (
414 |                 "EC-Earth3-AerChem",
415 |                 ["so", "thetao", "zos"],
416 |                 ["historical", "piControl", "ssp370"],
417 |                 "gn",
418 |             ),
419 |             ("EC-Earth3-Veg", "*", "historical", "gr"),
420 |             ("EC-Earth3-CC", "*", "*", "gn"),
421 |             ("MPI-ESM-1-2-HAM", "*", "*", "gn"),
422 |             ("NorESM2-MM", "*", "historical", "gn"),
423 |             ("NorESM2-MM", ["thetao", "so", "uo"], "historical", "gr"),
424 |             ("IITM-ESM", "*", "*", "gn"),
425 |             ("GFDL-CM4", ["uo"], "*", "gn"),
426 |             ("IPSL-CM5A2-INCA", "*", "*", "gn"),
427 |             ("IPSL-CM6A-LR-INCA", "*", "*", "gn"),
428 |         ]
429 |     )
430 |     spec = (request.param, vi, ei, gl, cat)
431 |     request.param = spec
432 |     if model_id_match(expected_failures, request.param[0:-1]):
433 |         request.node.add_marker(pytest.mark.xfail(strict=True, reason=""))
434 |     return request
435 | 
436 | 
437 | @pytest.mark.parametrize("spec_check_grid", test_models, indirect=True)
438 | def test_check_grid(
439 |     spec_check_grid,
440 | ):
441 |     source_id, variable_id, experiment_id, grid_label, catalog = spec_check_grid.param
442 | 
443 |     ds, cat = data(
444 |         source_id, variable_id, experiment_id, grid_label, True, catalog=catalog
445 |     )
446 | 
447 |     if ds is None:
448 |         pytest.skip(
449 |             f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}"
450 |         )
451 | 
452 |     # This is just a rudimentary test to see if the creation works
453 |     staggered_grid, ds_staggered = combine_staggered_grid(ds, recalculate_metrics=True)
454 | 
455 |     assert ds_staggered is not None
456 |     #
457 |     if "lev" in ds_staggered.dims:
458 |         assert "bnds" in ds_staggered.lev_bounds.dims
459 | 
460 |     for axis in ["X", "Y"]:
461 |         for metric in ["_t", "_gx", "_gy", "_gxgy"]:
462 |             assert f"d{axis.lower()}{metric}" in list(ds_staggered.coords)
463 |     # TODO: Include actual test to combine variables
464 | 


--------------------------------------------------------------------------------
/tests/test_regionmask.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import xarray as xr
 4 | 
 5 | from xmip.regionmask import _default_merge_dict, merged_mask
 6 | 
 7 | 
 8 | regionmask = pytest.importorskip(
 9 |     "regionmask", minversion="0.5.0+dev"
10 | )  # All tests get skipped if the version of regionmask is not > 0.5.0
11 | 
12 | 
13 | @pytest.mark.parametrize("verbose", [True, False])
14 | def test_merge_mask(verbose):
15 |     x = np.linspace(0, 360, 720)
16 |     y = np.linspace(-90, 90, 360)
17 |     data = np.random.rand(len(x), len(y))
18 |     ds = xr.DataArray(data, coords=[("x", x), ("y", y)]).to_dataset(name="data")
19 |     ds["lon"] = ds["x"] * xr.ones_like(ds["y"])
20 |     ds["lat"] = xr.ones_like(ds["x"]) * ds["y"]
21 | 
22 |     basins = regionmask.defined_regions.natural_earth_v4_1_0.ocean_basins_50
23 | 
24 |     mask = merged_mask(basins, ds, verbose=verbose)
25 | 
26 |     # check if number of regions is correct
27 |     mask_regions = np.unique(mask.data.flat)
28 |     mask_regions = mask_regions[~np.isnan(mask_regions)]
29 | 
30 |     assert len(mask_regions) == len(_default_merge_dict().keys())
31 | 
32 |     # now a brief range check to make sure the pacific is stamped out correctly
33 |     pac = ds.where(
34 |         np.logical_or(np.logical_or(mask == 2, mask == 3), mask == 4), drop=True
35 |     )
36 |     assert pac.lon.min() > 95.0
37 |     assert pac.lon.max() < 295.0
38 | 
39 |     # I shoud add a test for -180-180
40 | 
41 |     # How to use cloud data.
42 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import xarray as xr
 3 | 
 4 | from xmip.utils import cmip6_dataset_id, google_cmip_col, model_id_match
 5 | 
 6 | 
 7 | def test_google_cmip_col():
 8 |     try:
 9 |         import intake
10 |     except ImportError:
11 |         intake = None
12 |     if intake is None:
13 |         with pytest.raises(ImportError):
14 |             col = google_cmip_col(catalog="main")
15 |     else:
16 |         col = google_cmip_col(catalog="main")
17 |         assert (
18 |             col.catalog_file == "https://storage.googleapis.com/cmip6/pangeo-cmip6.csv"
19 |         )
20 | 
21 |         with pytest.raises(ValueError):
22 |             col = google_cmip_col(catalog="wrong")
23 | 
24 | 
25 | def test_model_id_match():
26 |     # wrong amount of elements
27 |     with pytest.raises(ValueError):
28 |         model_id_match([("A", "a", "aa"), ("A", "a", "aa", "aaa")], ("A", "a", "aa"))
29 | 
30 |     with pytest.raises(ValueError):
31 |         model_id_match([("A", "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa", "aaa"))
32 | 
33 |     assert model_id_match([("A", "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa"))
34 |     assert ~model_id_match([("A", ["b", "c"], "aa")], ("A", "a", "aa"))
35 |     assert ~model_id_match([("A", ["b", "c"], "aa")], ("A", "a", "aa"))
36 |     assert ~model_id_match(
37 |         [("EC-Earth3-AerChem", ["so"], "historical", "gn")],
38 |         ("EC-Earth3", ["so"], "historical", "gn"),
39 |     )
40 |     assert ~model_id_match([("A", "a", "aa"), ("B", "a", "aa")], ("AA", "a", "aa"))
41 |     assert ~model_id_match([("AA", "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa"))
42 |     assert ~model_id_match([(["AA"], "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa"))
43 |     assert ~model_id_match([(["AA"], ["a"], "aa"), ("B", "a", "aa")], ("A", "a", "aa"))
44 |     assert model_id_match([("*", "a", "aa")], ("whatever", "a", "aa"))
45 |     assert model_id_match([(["bb", "b"], "a", "aa")], ("b", "a", "aa"))
46 |     assert model_id_match(
47 |         [(["bb", "b"], "a", "aa"), (["bb", "b"], "c", "cc")], ("bb", "a", "aa")
48 |     )
49 | 
50 | 
51 | def test_cmip6_dataset_id():
52 |     ds = xr.Dataset({"data": 4})
53 | 
54 |     ds.attrs = {
55 |         "activity_id": "ai",
56 |         "institution_id": "ii",
57 |         "source_id": "si",
58 |         "variant_label": "vl",
59 |         "experiment_id": "ei",
60 |         "table_id": "ti",
61 |         "grid_label": "gl",
62 |         "variable_id": "vi",
63 |     }
64 | 
65 |     assert cmip6_dataset_id(ds) == "ai.ii.si.ei.vl.ti.gl.none.vi"
66 |     assert cmip6_dataset_id(ds, sep="_") == "ai_ii_si_ei_vl_ti_gl_none_vi"
67 |     assert (
68 |         cmip6_dataset_id(ds, id_attrs=["grid_label", "activity_id", "wrong_attrs"])
69 |         == "gl.ai.none"
70 |     )
71 | 


--------------------------------------------------------------------------------
/xmip/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib.metadata import (  # only works for python 3.8 and upwards
 2 |     PackageNotFoundError,
 3 |     version,
 4 | )
 5 | 
 6 | try:
 7 |     __version__ = version("xmip")
 8 | except PackageNotFoundError:
 9 |     # package is not installed
10 |     __version__ = "unknown"
11 |     pass
12 | 


--------------------------------------------------------------------------------
/xmip/drift_removal.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import dask.array as dsa
  4 | import numpy as np
  5 | import xarray as xr
  6 | import xarrayutils as xru
  7 | 
  8 | from xarrayutils.utils import linear_trend
  9 | 
 10 | from xmip.postprocessing import EXACT_ATTRS, _match_datasets
 11 | from xmip.utils import cmip6_dataset_id
 12 | 
 13 | 
 14 | def _maybe_unpack_date(date):
 15 |     """`Unpacks` cftime from xr.Dataarray if provided"""
 16 |     # I should probably not do this here but instead in the higher level functions...
 17 |     if isinstance(date, xr.DataArray):
 18 |         date = date.data.tolist()
 19 |         if isinstance(date, list):
 20 |             if len(date) != 1:
 21 |                 raise RuntimeError(
 22 |                     "The passed date has the wrong format. Got [{date}] after conversion to list."
 23 |                 )
 24 |             else:
 25 |                 date = date[0]
 26 |     return date
 27 | 
 28 | 
 29 | def _construct_cfdate(data, units, calendar):
 30 |     # This seems clunky. I feel there must be a more elegant way of doing this?
 31 |     date = xr.DataArray(data, attrs={"units": units, "calendar": calendar})
 32 |     return xr.decode_cf(date.to_dataset(name="time"), use_cftime=True).time
 33 | 
 34 | 
 35 | def _datestr_to_cftime(date_str, calendar):
 36 |     # Again I feel this should be more elegant? For now I guess it works
 37 |     return _construct_cfdate([0], f"days since {date_str}", calendar)
 38 | 
 39 | 
 40 | def replace_time(
 41 |     ds, ref_date=None, ref_idx=0, freq="1MS", calendar=None, time_dim="time"
 42 | ):
 43 |     """This function replaces the time encoding of a dataset acoording to `ref_date`.
 44 |     The ref date can be any index of ds.time (default is 0; meaning the first timestep of ds will be replaced with `ref_date`).
 45 |     """
 46 |     # ! I might be able to achieve some of this with time.shift
 47 |     # !
 48 | 
 49 |     if calendar is None:
 50 |         calendar = ds.time.encoding.get("calendar", "standard")
 51 | 
 52 |     if ref_date is None:
 53 |         ref_date = _maybe_unpack_date(ds.time[0])
 54 | 
 55 |     if isinstance(ref_date, str):
 56 |         ref_date = _maybe_unpack_date(_datestr_to_cftime(ref_date, calendar))
 57 | 
 58 |     # TODO: Check the frequency. Currently the logic only works on monthly intervals
 59 |     if freq != "1MS":
 60 |         raise ValueError("`replace_time` currently only works with monthly data.")
 61 | 
 62 |     # determine the start date
 63 |     # propagate the date back (this assumes stricly monthly data)
 64 | 
 65 |     year = _maybe_unpack_date(ref_date).year - (ref_idx // 12)
 66 |     month = _maybe_unpack_date(ref_date).month - (ref_idx % 12)
 67 | 
 68 |     if month <= 0:
 69 |         # move the year one more back
 70 |         year -= 1
 71 |         month = 12 + month
 72 | 
 73 |     attrs = ds.time.attrs
 74 | 
 75 |     start = f"{int(year):04d}-{int(month):02d}"
 76 | 
 77 |     ds = ds.assign_coords(
 78 |         time=xr.cftime_range(start, periods=len(ds.time), freq=freq, calendar=calendar)
 79 |     )
 80 |     ds.time.attrs = attrs
 81 |     return ds
 82 | 
 83 | 
 84 | def find_date_idx(time, date):
 85 |     """Finds the index of `date` within an array of cftime dates. This strictly requires monthly data.
 86 |     Might result in undesired behavior for other time frequencies.
 87 |     """
 88 |     # ! seems like I can refactor this with http://xarray.pydata.org/en/stable/generated/xarray.CFTimeIndex.get_loc.html#xarray.CFTimeIndex.get_loc
 89 | 
 90 |     date = _maybe_unpack_date(date)
 91 | 
 92 |     # easier approach: Find the difference in years and months
 93 |     year_diff = date.year - _maybe_unpack_date(time[0]).year
 94 |     month_diff = date.month - _maybe_unpack_date(time[0]).month
 95 | 
 96 |     return (year_diff * 12) + month_diff
 97 | 
 98 | 
 99 | def unify_time(parent, child, adjust_to="child"):
100 |     """Uses the CMIP6 specific metadata (augmented by xmip....time_preprocessing!!!) to adjust parent time encoding to child experiment.
101 |     Similar to `switch_to_child_time`, but sets the time parameters (e.g. calendar) explicitly to the child conventions
102 |     """
103 |     branch_time_in_parent = child.attrs.get("branch_time_in_parent")
104 | 
105 |     # if branch time is not in attrs do nothing
106 |     if branch_time_in_parent is None:
107 |         child_source_id = child.attrs.get("source_id", "not found")
108 |         parent_source_id = parent.attrs.get("source_id", "not found")
109 |         msg = (
110 |             f"Could not unify time for [child:{child_source_id}|parent:{parent_source_id}]."
111 |             "`branch_time_in_parent` not found in attributes."
112 |         )
113 |         warnings.warn(msg, UserWarning)
114 |         return parent, child
115 | 
116 |     else:
117 |         parent_calendar = parent.time.to_index().calendar
118 |         child_calendar = child.time.to_index().calendar
119 |         branch_time_parent = _construct_cfdate(
120 |             child.attrs.get("branch_time_in_parent"),
121 |             child.attrs.get("parent_time_units"),
122 |             parent_calendar,
123 |         )
124 |         branch_time_child = _construct_cfdate(
125 |             child.attrs.get("branch_time_in_child"),
126 |             child.time.encoding.get("units"),
127 |             child_calendar,
128 |         )
129 | 
130 |         if adjust_to == "child":
131 |             branch_idx_parent = find_date_idx(parent.time, branch_time_parent)
132 |             return (
133 |                 replace_time(
134 |                     parent,
135 |                     branch_time_child,
136 |                     ref_idx=branch_idx_parent,
137 |                     calendar=child_calendar,
138 |                 ),
139 |                 child,
140 |             )
141 |         elif adjust_to == "parent":
142 |             branch_idx_child = find_date_idx(child.time, branch_time_child)
143 |             return parent, replace_time(
144 |                 child,
145 |                 branch_time_parent,
146 |                 ref_idx=branch_idx_child,
147 |                 calendar=parent_calendar,
148 |             )
149 |         else:
150 |             raise ValueError(
151 |                 f"Input for `adjust_to` not valid. Got {adjust_to}. Expected either `child` or `parent`."
152 |             )
153 | 
154 | 
155 | def calculate_drift(
156 |     reference, ds, variable, trend_years=250, compute_short_trends=False
157 | ):
158 |     """Calculate the linear trend at every grid position for the given time (`trend_years`)
159 |     starting from the date when `ds` was branched of from `ds_parent`.
160 |     CMIP6 metadata must be present.
161 | 
162 |     Parameters
163 |     ----------
164 |     ds_parent : xr.Dataset
165 |         The dataset from which the drift (trend) is calculated. Usually the preindustrial control run
166 |     ds : xr.Dataset
167 |         The dataset for which the drift is matched. This is usually the historical experiment.
168 |         !For many models, each historical member is branched
169 |     trend_years : int, optional
170 |         The duration of the trend to compute in years, by default 250 (This is the lenght of
171 |         historical+standard scenario, e.g. 1850-2100)
172 |     """
173 | 
174 |     for attr in [
175 |         "parent_variant_label",
176 |         "parent_source_id",
177 |         "branch_time_in_parent",
178 |         "parent_time_units",
179 |         "source_id",
180 |         "variant_label",
181 |     ]:
182 |         if attr not in ds.attrs:
183 |             raise ValueError(f"Could not find {attr} in attributes of `ds`.")
184 | 
185 |     # Check if the parent member id matches
186 |     match_attrs = ["source_id", "variant_label"]
187 |     for ma in match_attrs:
188 |         if ds.attrs[f"parent_{ma}"] not in reference.attrs[ma]:
189 |             raise ValueError(
190 |                 f'`ds_parent` {ma} ({reference.attrs[ma]}) not compatible with `ds` parent_{ma} ({ds.attrs[f"parent_{ma}"]})'
191 |             )
192 | 
193 |     # find the branch date in the control run
194 |     branch_time_reference = _construct_cfdate(
195 |         ds.attrs["branch_time_in_parent"],
196 |         ds.attrs["parent_time_units"],
197 |         reference.time.to_index().calendar,
198 |     )
199 |     branch_idx_reference = find_date_idx(reference.time, branch_time_reference)
200 |     # there might be some cases where this is not true. Figure out what to do when it happens.
201 |     assert branch_idx_reference >= 0
202 | 
203 |     # cut the referenmce to the appropriate time frame
204 |     reference_cut = reference.isel(
205 |         time=slice(branch_idx_reference, branch_idx_reference + (12 * trend_years))
206 |     )
207 | 
208 |     if len(reference_cut.time) == 0:
209 |         raise RuntimeError(
210 |             "Selecting from `reference` according to the branch time resulted in empty dataset. Check the metadata."
211 |         )
212 |         return None
213 |     else:
214 |         if len(reference_cut.time) < trend_years * 12:
215 |             if compute_short_trends:
216 |                 warnings.warn(
217 |                     f"reference dataset does not have the full {trend_years} years to calculate trend. Using {int(len(reference_cut.time) / 12)} years only"
218 |                 )
219 |             else:
220 |                 raise RuntimeError(
221 |                     f"Reference dataset does not have the full {trend_years} years to calculate trend. Set `calculate_short_trend=True` to compute from a shorter timeseries"
222 |                 )
223 | 
224 |         time_range = xr.concat(
225 |             [
226 |                 reference_cut.time[0].squeeze().drop_vars("time"),
227 |                 reference_cut.time[-1].squeeze().drop_vars("time"),
228 |             ],
229 |             dim="bnds",
230 |         ).reset_coords(drop=True)
231 | 
232 |         # there is some problem when encoding very large years. for now ill preserve these only as
233 |         # strings
234 |         time_range = time_range.astype(str)
235 | 
236 |         # The polyfit implementation actually respects the units.
237 |         # For now my implementation requires the slope to be in units .../month
238 |         # I might be able to change this later and accomodate other time frequencies?
239 |         # get rid of all the additional coords, which resets the time to an integer index
240 | 
241 |         reference_cut = reference_cut[variable]
242 | 
243 |         # TODO: This has pretty poor performance...need to find out why.
244 |         # Reset time dimension to integer index.
245 |         #         reference_cut = reference_cut.drop_vars("time")
246 | 
247 |         # linear regression slope is all we need here.
248 |         #         reg = reference_cut.polyfit("time", 1).sel(degree=1).polyfit_coefficients
249 | 
250 |         reg_raw = linear_trend(
251 |             reference_cut,
252 |             "time",
253 |         )
254 | 
255 |         # ! quite possibly the shittiest fix ever.
256 |         # I changed the API over at xarrayutils and now I have to pay the price over here.
257 |         # TODO: Might want to eliminate this ones the new xarrayutils version has matured.
258 |         if xru.__version__ > "v0.1.3":
259 |             reg = reg_raw.slope
260 |         else:
261 |             reg = reg_raw.sel(parameter="slope").drop_vars("parameter").squeeze()
262 | 
263 |         # again drop all the coordinates
264 |         reg = reg.reset_coords(drop=True)
265 | 
266 |         reg = reg.to_dataset(name=variable)
267 | 
268 |         # add metadata about regression
269 |         reg = reg.assign_coords(trend_time_range=time_range)
270 |         reg.coords["trend_time_range"].attrs.update(
271 |             {
272 |                 "standard_name": "regression_time_bounds",
273 |                 "long_name": "regression_time_in_reference_run",
274 |             }
275 |         )
276 |         # reg should carry the attributes of `ds`
277 |         # ? Maybe I should convert to a dataset?
278 |         reg.attrs.update(ds.attrs)
279 |         return reg
280 | 
281 | 
282 | # TODO: I need a more generalized detrending? Based on indicies --> xarrayutils
283 | # Then refactor this one here just for cmip6
284 | 
285 | 
286 | def detrend_basic(da, da_slope, start_idx=0, dim="time", keep_attrs=True):
287 |     """Basic detrending just based on time index, not date"""
288 |     # now create a trend timeseries at each point
289 |     # and the time indicies by the ref index. This way the trend is correctly calculated from the reference year.
290 |     # this adapts the chunk structure from the input if its a dask array
291 |     attrs = {k: v for k, v in da.attrs.items()}
292 |     idx_start = -start_idx
293 |     idx_stop = len(da.time) - start_idx
294 |     if isinstance(da.data, dsa.Array):
295 |         ref_time = da.isel({di: 0 for di in da.dims if di != dim})
296 |         chunks = ref_time.chunks
297 |         trend_time_idx_data = dsa.arange(
298 |             idx_start, idx_stop, chunks=chunks, dtype=da.dtype
299 |         )
300 |     else:
301 |         trend_time_idx_data = np.arange(idx_start, idx_stop, dtype=da.dtype)
302 | 
303 |     trend_time_idx = xr.DataArray(
304 |         trend_time_idx_data,
305 |         dims=[dim],
306 |     )
307 | 
308 |     # chunk like the time dimension
309 |     slope = da_slope.squeeze()
310 | 
311 |     trend = trend_time_idx * slope
312 | 
313 |     detrended = da - trend
314 |     if keep_attrs:
315 |         detrended.attrs.update(attrs)
316 |     return detrended
317 | 
318 | 
319 | def remove_trend(ds, ds_slope, variable, ref_date, check_mask=True):
320 |     """Detrending method for cmip6 data. Only works with monthly data!
321 |     This does not correct the time convention. Be careful with experiements that have
322 |     a non compatible time convention (often control runs.)
323 |     """
324 | 
325 |     if not isinstance(ds, xr.Dataset):
326 |         raise ValueError("`ds` input needs to be a dataset")
327 | 
328 |     if not isinstance(ds_slope, xr.Dataset):
329 |         raise ValueError("`ds_slope` input needs to be a dataset")
330 | 
331 |     da = ds[variable]
332 |     da_slope = ds_slope[variable]
333 | 
334 |     da, da_slope = xr.align(da, da_slope, join="override")
335 | 
336 |     if check_mask:
337 |         nanmask_data = np.isnan(da.isel(time=[0, len(da.time) // 2, -1])).all("time")
338 |         nanmask_slope = np.isnan(da_slope)
339 |         # perform a quick test to see if the land is aligned properly
340 |         if np.logical_xor(nanmask_data, nanmask_slope).any():
341 |             raise ValueError(
342 |                 "Nanmask between data and slope array not identical. Check input and disable `check_mask` to skip this test"
343 |             )
344 | 
345 |     ref_calendar = da.time.to_index().calendar
346 |     ref_date = xr.cftime_range(ref_date, periods=1, calendar=ref_calendar)
347 | 
348 |     # Find the index corresponding to the ref date (this can be outside the range of the actual data)
349 |     ref_idx = find_date_idx(da.time, ref_date)
350 | 
351 |     detrended = detrend_basic(
352 |         da, da_slope, start_idx=ref_idx, dim="time", keep_attrs=True
353 |     )
354 | 
355 |     # add information to track which data was used to remove trend
356 |     if "trend_time_range" in ds_slope.coords:
357 |         trend_start = ds_slope.trend_time_range.isel(bnds=0).load().data.tolist()
358 |         trend_stop = ds_slope.trend_time_range.isel(bnds=1).load().data.tolist()
359 | 
360 |     else:
361 |         trend_start = "not-available"
362 |         trend_stop = "not-available"
363 |         warnings.warn(
364 |             "`ds_slope` did not have information about the time over which the slope was calculated. Check the input."
365 |         )
366 | 
367 |     detrended.attrs["drift_removed"] = (
368 |         f"linear_trend_{cmip6_dataset_id(ds_slope)}_{trend_start}_{trend_stop}"
369 |     )
370 | 
371 |     return detrended
372 | 
373 | 
374 | def match_and_remove_trend(
375 |     ddict, trend_ddict, ref_date="1850", nomatch="warn", **detrend_kwargs
376 | ):
377 |     """Find and remove trend files from a dictonary of datasets
378 | 
379 |     Parameters
380 |     ----------
381 |     ddict : dict
382 |         dictionary with xr.Datasets which should get a trend/drift removed
383 |     trend_ddict : dict
384 |         dictionary with results of linear regressions. These should be removed from the datasets in `ddict`
385 |     ref_date : str, optional
386 |         Start date of the trend, by default "1850"
387 |     nomatch : str, optional
388 |         Define the behavior when for a given dataset in `ddict` there is no matching trend dataset in `trend_ddict`.
389 |         Can be `warn`, `raise`, or `ignore`, by default 'warn'
390 | 
391 |     Returns
392 |     -------
393 |     dict
394 |         Dictionary of detrended dataasets. Only contains values of `ddict` that actually had a trend removed.
395 | 
396 |     """
397 |     ddict_detrended = {}
398 |     match_attrs = [ma for ma in EXACT_ATTRS if ma not in ["experiment_id"]] + [
399 |         "variable_id"
400 |     ]
401 | 
402 |     for k, ds in ddict.items():
403 |         trend_ds = _match_datasets(
404 |             ds, trend_ddict, match_attrs, pop=False, unique=True, nomatch=nomatch
405 |         )
406 |         if len(trend_ds) == 2:
407 |             trend_ds = trend_ds[
408 |                 1
409 |             ]  # this is a bit clunky. _match_datasest does return the input ds, so we have to grab the second one?
410 |             # I guess I could pass *trend_ds, but that is not very readable
411 |             variable = ds.attrs["variable_id"]
412 |             da_detrended = ds.assign(
413 |                 {
414 |                     variable: remove_trend(
415 |                         ds, trend_ds, variable, ref_date=ref_date, **detrend_kwargs
416 |                     )
417 |                 }
418 |             )
419 |             # should this just return a dataset instead?
420 |             ddict_detrended[k] = da_detrended
421 | 
422 |     return ddict_detrended
423 | 


--------------------------------------------------------------------------------
/xmip/grids.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import numpy as np
  4 | import pkg_resources
  5 | import xarray as xr
  6 | import yaml
  7 | 
  8 | from xgcm import Grid
  9 | from xgcm.autogenerate import generate_grid_ds
 10 | 
 11 | 
 12 | path = "specs/staggered_grid_config.yaml"  # always use slash
 13 | grid_spec = pkg_resources.resource_filename(__name__, path)
 14 | 
 15 | 
 16 | def _parse_bounds_vertex(da, dim="bnds", position=[0, 1]):
 17 |     """Convenience function to extract positions from bounds/verticies"""
 18 |     return tuple([da.isel({dim: i}).load().data for i in position])
 19 | 
 20 | 
 21 | def _interp_vertex_to_bounds(da, orientation):
 22 |     """
 23 |     Convenience function to average 4 vertex points into two bound points.
 24 |     Helpful to recreate e.g. the latitude at the `lon_bounds` points.
 25 |     """
 26 |     if orientation == "x":
 27 |         datasets = [
 28 |             da.isel(vertex=[0, 1]).mean("vertex"),
 29 |             da.isel(vertex=[3, 2]).mean("vertex"),
 30 |         ]
 31 |     elif orientation == "y":
 32 |         datasets = [
 33 |             da.isel(vertex=[0, 3]).mean("vertex"),
 34 |             da.isel(vertex=[1, 2]).mean("vertex"),
 35 |         ]
 36 | 
 37 |     return xr.concat(datasets, dim="bnds")
 38 | 
 39 | 
 40 | def distance_deg(lon0, lat0, lon1, lat1):
 41 |     """Calculate the distance in degress longitude and latitude between two points
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     lon0 : np.array
 46 |         Longitude of first point
 47 |     lat0 : np.array
 48 |         Latitude of first point
 49 |     lon1 : np.array
 50 |         Longitude of second point
 51 |     lat1 : np.array
 52 |         Latitude of second point
 53 |     """
 54 |     delta_lon = lon1 - lon0
 55 |     delta_lat = lat1 - lat0
 56 |     # very small differences can end up negative, so zero them out based on a simple
 57 |     # criterion
 58 |     # this should work for CMIP6 (no 1/1 deg models) but should be based on actual grid
 59 |     # info in the future
 60 |     small_crit = 1 / 10
 61 |     delta_lon = np.where(
 62 |         abs(delta_lon) < small_crit, 0.0, delta_lon
 63 |     )  # , np.nan, delta_lon)
 64 |     delta_lat = np.where(
 65 |         abs(delta_lat) < small_crit, 0.0, delta_lat
 66 |     )  # , np.nan, delta_lat)
 67 | 
 68 |     #     # some bounds are wrapped aroud the lon discontinuty.
 69 |     delta_lon = np.where(delta_lon < (-small_crit * 2), 360 + delta_lon, delta_lon)  #
 70 |     delta_lon = np.where(
 71 |         delta_lon > (360 + small_crit * 2), -360 + delta_lon, delta_lon
 72 |     )
 73 | 
 74 |     return delta_lon, delta_lat
 75 | 
 76 | 
 77 | def distance(lon0, lat0, lon1, lat1):
 78 |     """Calculate the distance in m between two points on a spherical globe
 79 | 
 80 |     Parameters
 81 |     ----------
 82 |     lon0 : np.array
 83 |         Longitude of first point
 84 |     lat0 : np.array
 85 |         Latitude of first point
 86 |     lon1 : np.array
 87 |         Longitude of second point
 88 |     lat1 : np.array
 89 |         Latitude of second point
 90 |     """
 91 |     Re = 6.378e6
 92 |     delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1)
 93 |     dy = Re * (np.pi * delta_lat / 180)
 94 |     dx = Re * (np.pi * delta_lon / 180) * np.cos(np.pi * lat0 / 180)
 95 |     return np.sqrt(dx**2 + dy**2)
 96 | 
 97 | 
 98 | def recreate_metrics(ds, grid):
 99 |     """Recreate a full set of horizontal distance metrics.
100 | 
101 |     Calculates distances between points in lon/lat coordinates
102 | 
103 | 
104 |     The naming of the metrics is as follows:
105 |     [metric_axis]_t : metric centered at tracer point
106 |     [metric_axis]_gx : metric at the cell face on the x-axis.
107 |         For instance `dx_gx` is the x distance centered on the eastern cell face if the shift is `right`
108 |     [metric_axis]_gy : As above but along the y-axis
109 |     [metric_axis]_gxgy : The metric located at the corner point.
110 |         For example `dy_dxdy` is the y distance on the south-west corner if both axes as shifted left.
111 | 
112 |     Parameters
113 |     ----------
114 |     ds : xr.Dataset
115 |         Input dataset.
116 |     grid : xgcm.Grid
117 |         xgcm Grid object matching `ds`
118 | 
119 |     Returns
120 |     -------
121 |     xr.Dataset, dict
122 |         Dataset with added metrics as coordinates and dictionary that can be passed to xgcm.Grid to recognize new metrics
123 |     """
124 |     ds = ds.copy()
125 | 
126 |     # Since this puts out numpy arrays, the arrays need to be transposed correctly
127 |     transpose_dims = ["y", "x"]
128 |     dims = [di for di in ds.dims if di not in transpose_dims]
129 | 
130 |     ds = ds.transpose(*tuple(transpose_dims + dims))
131 | 
132 |     # is the vel point on left or right?
133 |     axis_vel_pos = {
134 |         axis: list(set(grid.axes[axis].coords.keys()) - set(["center"]))[0]
135 |         for axis in ["X", "Y"]
136 |     }
137 |     # determine the appropriate vertex position for the north/south and east/west edge,
138 |     # based on the grid config
139 |     if axis_vel_pos["Y"] in ["left"]:
140 |         ns_vertex_idx = [0, 3]
141 |         ns_bound_idx = [0]
142 |     elif axis_vel_pos["Y"] in ["right"]:
143 |         ns_vertex_idx = [1, 2]
144 |         ns_bound_idx = [1]
145 | 
146 |     if axis_vel_pos["X"] in ["left"]:
147 |         ew_vertex_idx = [0, 1]
148 |         ew_bound_idx = [0]
149 |     elif axis_vel_pos["X"] in ["right"]:
150 |         ew_vertex_idx = [3, 2]
151 |         ew_bound_idx = [1]
152 | 
153 |     # infer dx at tracer points
154 |     if "lon_bounds" in ds.coords and "lat_verticies" in ds.coords:
155 |         lon0, lon1 = _parse_bounds_vertex(ds["lon_bounds"])
156 |         lat0, lat1 = _parse_bounds_vertex(
157 |             _interp_vertex_to_bounds(ds["lat_verticies"], "x")
158 |         )
159 |         dist = distance(lon0, lat0, lon1, lat1)
160 |         ds.coords["dx_t"] = xr.DataArray(dist, coords=ds.lon.coords)
161 | 
162 |     # infer dy at tracer points
163 |     if "lat_bounds" in ds.coords and "lon_verticies" in ds.coords:
164 |         lat0, lat1 = _parse_bounds_vertex(ds["lat_bounds"])
165 |         lon0, lon1 = _parse_bounds_vertex(
166 |             _interp_vertex_to_bounds(ds["lon_verticies"], "y")
167 |         )
168 |         dist = distance(lon0, lat0, lon1, lat1)
169 |         ds.coords["dy_t"] = xr.DataArray(dist, coords=ds.lon.coords)
170 | 
171 |     if "lon_verticies" in ds.coords and "lat_verticies" in ds.coords:
172 |         # infer dx at the north/south face
173 |         lon0, lon1 = _parse_bounds_vertex(
174 |             ds["lon_verticies"], dim="vertex", position=ns_vertex_idx
175 |         )
176 |         lat0, lat1 = _parse_bounds_vertex(
177 |             ds["lat_verticies"], dim="vertex", position=ns_vertex_idx
178 |         )
179 |         dist = distance(lon0, lat0, lon1, lat1)
180 |         ds.coords["dx_gy"] = xr.DataArray(
181 |             dist, coords=grid.interp(ds.lon, "Y", boundary="extrapolate").coords
182 |         )
183 | 
184 |         # infer dy at the east/west face
185 |         lon0, lon1 = _parse_bounds_vertex(
186 |             ds["lon_verticies"], dim="vertex", position=ew_vertex_idx
187 |         )
188 |         lat0, lat1 = _parse_bounds_vertex(
189 |             ds["lat_verticies"], dim="vertex", position=ew_vertex_idx
190 |         )
191 |         dist = distance(lon0, lat0, lon1, lat1)
192 |         ds.coords["dy_gx"] = xr.DataArray(
193 |             dist, coords=grid.interp(ds.lon, "X", boundary="extrapolate").coords
194 |         )
195 | 
196 |     # for the distances that dont line up with the cell boundaries we need some different logic
197 |     boundary = "extend"
198 |     # TODO: This should be removed once we have the default boundary merged in xgcm
199 | 
200 |     # infer dx at eastern/western bound from tracer points
201 |     lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs(
202 |         ds.lon.load(), axis_vel_pos["X"]
203 |     )
204 |     lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs(
205 |         ds.lat.load(), axis_vel_pos["X"]
206 |     )
207 |     dx = distance(lon0, lat0, lon1, lat1)
208 |     ds.coords["dx_gx"] = xr.DataArray(
209 |         dx, coords=grid.interp(ds.lon, "X", boundary=boundary).coords
210 |     )
211 | 
212 |     # infer dy at northern bound from tracer points
213 |     lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs(
214 |         ds.lat.load(), axis_vel_pos["Y"], boundary=boundary
215 |     )
216 |     lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs(
217 |         ds.lon.load(), axis_vel_pos["Y"], boundary=boundary
218 |     )
219 |     dy = distance(lon0, lat0, lon1, lat1)
220 |     ds.coords["dy_gy"] = xr.DataArray(
221 |         dy, coords=grid.interp(ds.lat, "Y", boundary=boundary).coords
222 |     )
223 | 
224 |     # infer dx at the corner point
225 |     lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs(
226 |         _interp_vertex_to_bounds(ds.lon_verticies.load(), "y")
227 |         .isel(bnds=ns_bound_idx)
228 |         .squeeze(),
229 |         axis_vel_pos["X"],
230 |     )
231 |     lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs(
232 |         ds.lat_bounds.isel(bnds=ns_bound_idx).squeeze().load(), axis_vel_pos["X"]
233 |     )
234 |     dx = distance(lon0, lat0, lon1, lat1)
235 |     ds.coords["dx_gxgy"] = xr.DataArray(
236 |         dx,
237 |         coords=grid.interp(
238 |             grid.interp(ds.lon, "X", boundary=boundary), "Y", boundary=boundary
239 |         ).coords,
240 |     )
241 | 
242 |     # infer dy at the corner point
243 |     lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs(
244 |         _interp_vertex_to_bounds(ds.lat_verticies.load(), "x")
245 |         .isel(bnds=ew_bound_idx)
246 |         .squeeze(),
247 |         axis_vel_pos["Y"],
248 |     )
249 |     lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs(
250 |         ds.lon_bounds.isel(bnds=ew_bound_idx).squeeze().load(), axis_vel_pos["Y"]
251 |     )
252 |     dy = distance(lon0, lat0, lon1, lat1)
253 |     ds.coords["dy_gxgy"] = xr.DataArray(
254 |         dy,
255 |         coords=grid.interp(
256 |             grid.interp(ds.lon, "X", boundary=boundary), "Y", boundary=boundary
257 |         ).coords,
258 |     )
259 | 
260 |     # infer dz at tracer point
261 |     if "lev_bounds" in ds.coords:
262 |         ds = ds.assign_coords(
263 |             dz_t=("lev", ds["lev_bounds"].diff("bnds").squeeze(drop=True).data)
264 |         )
265 | 
266 |     metrics_dict = {
267 |         "X": [co for co in ["dx_t", "dx_gy", "dx_gx"] if co in ds.coords],
268 |         "Y": [co for co in ["dy_t", "dy_gy", "dy_gx"] if co in ds.coords],
269 |         "Z": [co for co in ["dz_t"] if co in ds.coords],
270 |     }
271 |     # # only put out axes that have entries
272 |     metrics_dict = {k: v for k, v in metrics_dict.items() if len(v) > 0}
273 | 
274 |     return ds, metrics_dict
275 | 
276 | 
277 | def detect_shift(ds_base, ds, axis):
278 |     """Detects the shift of `ds` relative to `ds` on logical grid axes, using
279 |     lon and lat positions.
280 | 
281 |     Parameters
282 |     ----------
283 |     ds_base : xr.Dataset
284 |         Reference ('base') dataset to compare to. Assumed that this is located at the 'center' coordinate.
285 |     ds : xr.Dataset
286 |         Comparison dataset. The resulting shift will be computed as this dataset relative to `ds_base`
287 |     axis : str
288 |         xgcm logical axis on which to detect the shift
289 | 
290 |     Returns
291 |     -------
292 |     str
293 |         Shift string output, in xgcm conventions.
294 |     """
295 |     ds_base = ds_base.copy()
296 |     ds = ds.copy()
297 |     axis = axis.lower()
298 |     axis_coords = {"x": "lon", "y": "lat"}
299 | 
300 |     # check the shift only for one point, somewhat in the center to avoid the
301 |     # distorted polar regions
302 |     check_point = {"x": len(ds_base.x) // 2, "y": len(ds_base.y) // 2}
303 |     check_point_diff = {k: [v, v + 1] for k, v in check_point.items()}
304 | 
305 |     shift = (
306 |         ds.isel(**check_point)[axis_coords[axis]].load().data
307 |         - ds_base.isel(**check_point)[axis_coords[axis]].load().data
308 |     )
309 |     diff = ds[axis].isel({axis: check_point_diff[axis]}).diff(axis).data.tolist()[0]
310 |     threshold = 0.1
311 |     # the fraction of full cell distance, that a point has to be shifted in order to
312 |     # be recognized.
313 |     # This avoids detection of shifts for very small differences that sometimes happen
314 |     # if the coordinates were written e.g. by different modulel of a model
315 | 
316 |     axis_shift = "center"
317 | 
318 |     if shift > (diff * threshold):
319 |         axis_shift = "right"
320 |     elif shift < -(diff * threshold):
321 |         axis_shift = "left"
322 |     return axis_shift
323 | 
324 | 
325 | def create_full_grid(base_ds, grid_dict=None):
326 |     """Generate a full xgcm-compatible dataset from a reference datasets `base_ds`.
327 |     This dataset should be representing a tracer fields, e.g. the cell center.
328 | 
329 |     Parameters
330 |     ----------
331 |     base_ds : xr.Dataset
332 |         The reference ('base') datasets, assumed to be at the tracer position/cell center
333 |     grid_dict : dict, optional
334 |         Dictionary with info about the grid staggering.
335 |         Must be encoded using the base_ds attrs (e.g. {'model_name':{'axis_shift':{'X':'left',...}}}).
336 |         If deactivated (default), will load from the internal database for CMIP6 models, by default None
337 | 
338 |     Returns
339 |     -------
340 |     xr.Dataset
341 |         xgcm compatible dataset
342 |     """
343 | 
344 |     # load dict with grid shift info for each axis
345 |     if grid_dict is None:
346 |         ff = open(grid_spec, "r")
347 |         grid_dict = yaml.safe_load(ff)
348 |         ff.close()
349 | 
350 |     source_id = base_ds.attrs["source_id"]
351 |     grid_label = base_ds.attrs["grid_label"]
352 | 
353 |     # if source_id not in dict, and grid label is gn, warn and ask to submit an issue
354 |     try:
355 |         axis_shift = grid_dict[source_id][grid_label]["axis_shift"]
356 |     except KeyError:
357 |         warnings.warn(
358 |             f"Could not find the source_id/grid_label ({source_id}/{grid_label}) combo in `grid_dict`, returning `None`. Please submit an issue to github: https://github.com/jbusecke/xmip/issues"
359 |         )
360 |         return None
361 | 
362 |     position = {k: ("center", axis_shift[k]) for k in axis_shift.keys()}
363 | 
364 |     axis_dict = {"X": "x", "Y": "y"}
365 | 
366 |     ds_grid = generate_grid_ds(
367 |         base_ds, axis_dict, position=position, boundary_discontinuity={"X": 360}
368 |     )
369 | 
370 |     # TODO: man parse lev and lev_bounds as center and outer dims.
371 |     # I should also be able to do this with `generate_grid_ds`, but here we
372 |     # have the `lev_bounds` with most models, so that is probably more reliable.
373 |     # cheapest solution right now
374 |     if "lev" in ds_grid.dims:
375 |         ds_grid["lev"].attrs["axis"] = "Z"
376 | 
377 |     return ds_grid
378 | 
379 | 
380 | def combine_staggered_grid(
381 |     ds_base, other_ds=None, recalculate_metrics=False, grid_dict=None, **kwargs
382 | ):
383 |     """Combine a reference datasets with a list of other datasets to a full xgcm-compatible staggered grid datasets.
384 | 
385 | 
386 |     Parameters
387 |     ----------
388 |     ds_base : xr.Dataset
389 |         The reference ('base') datasets, assumed to be at the tracer position/cell center
390 |     other_ds : list,xr.Dataset, optional
391 |         List of datasets representing different variables. Their grid position will be
392 |         automatically detected relative to `ds_base`. Coordinates and attrs of these added datasets will be lost
393 |         , by default None
394 |     recalculate_metrics : bool, optional
395 |         nables the reconstruction of grid metrics usign simple
396 |         spherical geometry, by default False
397 | 
398 |         !!! Check your results carefully when using reconstructed values,
399 |         these might differe substantially if the grid geometry is complicated.
400 |     grid_dict : dict, optional
401 |         Dictionary for staggered grid setup. See `create_full_grid` for detauls
402 |         If None (default), will load staggered grid info from internal database, by default None
403 | 
404 |     Returns
405 |     -------
406 |     xr.Dataset
407 |         Single xgcm-compatible dataset, containing all variables on their respective staggered grid position.
408 |     """
409 |     ds_base = ds_base.copy()
410 |     if isinstance(other_ds, xr.Dataset):
411 |         other_ds = [other_ds]
412 | 
413 |     ds_g = create_full_grid(ds_base, grid_dict=grid_dict)
414 | 
415 |     if ds_g is None:
416 |         warnings.warn("Staggered Grid creation failed. Returning `None`")
417 |         return None, None
418 | 
419 |     # save attrs out for later (something during alignment destroys them)
420 |     dim_attrs_dict = {}
421 |     for di in ds_g.dims:
422 |         dim_attrs_dict[di] = ds_g[di].attrs
423 | 
424 |     # TODO: metrics and interpolation of metrics if they are parsed
425 | 
426 |     # parse other variables
427 |     if other_ds is not None:
428 |         for ds_new in other_ds:
429 |             ds_new = ds_new.copy()
430 |             # strip everything but the variable_id (perhaps I would want to
431 |             # loosen this in the future)
432 |             ds_new = ds_new[ds_new.attrs["variable_id"]]
433 | 
434 |             if not all(
435 |                 [
436 |                     len(ds_new[di]) == len(ds_g[di])
437 |                     for di in ds_new.dims
438 |                     if di not in ["member_id", "time"]
439 |                 ]
440 |             ):
441 |                 warnings.warn(
442 |                     f"Could not parse `{ds_new.name}`, due to a size mismatch. If this is the MRI model, the grid convention is currently not supported."
443 |                 )
444 |             else:
445 |                 # detect shift and rename accordingly
446 |                 rename_dict = {}
447 |                 for axis in ["X", "Y"]:
448 |                     shift = detect_shift(ds_base, ds_new, axis)
449 | 
450 |                     if shift != "center":
451 |                         rename_dict[axis.lower()] = axis.lower() + "_" + shift
452 |                 ds_new = ds_new.rename(rename_dict)
453 |                 ds_new = ds_new.reset_coords(drop=True)
454 |                 # TODO: This needs to be coded more generally, for now hardcode x and y
455 |                 force_align_dims = [di for di in ds_new.dims if "x" in di or "y" in di]
456 |                 _, ds_new = xr.align(
457 |                     ds_g.copy(),
458 |                     ds_new,
459 |                     join="override",
460 |                     exclude=[di for di in ds_new.dims if di not in force_align_dims],
461 |                 )
462 |                 additional_dims = [di for di in ds_new.dims if di not in ds_g.dims]
463 |                 if len(additional_dims) > 0:
464 |                     raise RuntimeError(
465 |                         f"While trying to parse `{ds_new.name}`, detected dims that are not in the base dataset:[{additional_dims}]"
466 |                     )
467 |                 ds_g[ds_new.name] = ds_new
468 | 
469 |     # Restore dims attrs from the beginning
470 |     for di in ds_g.dims:
471 |         ds_g.coords[di].attrs.update(dim_attrs_dict[di])
472 | 
473 |     grid_kwargs = {"periodic": ["X"]}
474 |     grid_kwargs.update(kwargs)
475 |     grid = Grid(ds_g, grid_kwargs)
476 | 
477 |     # if activated calculate metrics
478 |     if recalculate_metrics:
479 |         grid_kwargs.pop(
480 |             "metrics", None
481 |         )  # remove any passed metrics when recalculating them
482 |         # I might be able to refine this more to e.g. allow axes that are not recreated.
483 | 
484 |         ds_g, metrics_dict = recreate_metrics(ds_g, grid)
485 |         # this might fail in circumstances, where the
486 |         grid_kwargs["metrics"] = metrics_dict
487 |         grid = Grid(ds_g, **grid_kwargs)
488 |     return grid, ds_g
489 | 


--------------------------------------------------------------------------------
/xmip/preprocessing.py:
--------------------------------------------------------------------------------
  1 | # Preprocessing for CMIP6 models
  2 | import warnings
  3 | 
  4 | import cf_xarray.units  # noqa: F401
  5 | import numpy as np
  6 | import pint  # noqa: F401
  7 | import pint_xarray  # noqa: F401
  8 | import xarray as xr
  9 | 
 10 | from xmip.utils import cmip6_dataset_id
 11 | 
 12 | 
 13 | # global object for units
 14 | _desired_units = {"lev": "m"}
 15 | _unit_overrides = {name: None for name in ["so"]}
 16 | 
 17 | 
 18 | _drop_coords = ["bnds", "vertex"]
 19 | 
 20 | 
 21 | def cmip6_renaming_dict():
 22 |     """a universal renaming dict. Keys correspond to source id (model name)
 23 |     and valuse are a dict of target name (key) and a list of variables that
 24 |     should be renamed into the target."""
 25 |     rename_dict = {
 26 |         # dim labels (order represents the priority when checking for the dim labels)
 27 |         "x": ["i", "ni", "xh", "nlon"],
 28 |         "y": ["j", "nj", "yh", "nlat"],
 29 |         "lev": ["deptht", "olevel", "zlev", "olev", "depth"],
 30 |         "bnds": ["bnds", "axis_nbounds", "d2"],
 31 |         "vertex": ["vertex", "nvertex", "vertices", "nvertices"],
 32 |         # coordinate labels
 33 |         "lon": ["longitude", "nav_lon"],
 34 |         "lat": ["latitude", "nav_lat"],
 35 |         "lev_bounds": [
 36 |             "deptht_bounds",
 37 |             "lev_bnds",
 38 |             "olevel_bounds",
 39 |             "zlev_bnds",
 40 |         ],
 41 |         "lon_bounds": [
 42 |             "bounds_lon",
 43 |             "bounds_nav_lon",
 44 |             "lon_bnds",
 45 |             "x_bnds",
 46 |             "vertices_longitude",
 47 |             "longitude_bnds",
 48 |         ],
 49 |         "lat_bounds": [
 50 |             "bounds_lat",
 51 |             "bounds_nav_lat",
 52 |             "lat_bnds",
 53 |             "y_bnds",
 54 |             "vertices_latitude",
 55 |             "latitude_bnds",
 56 |         ],
 57 |         "time_bounds": ["time_bnds"],
 58 |     }
 59 |     return rename_dict
 60 | 
 61 | 
 62 | def rename_cmip6(ds, rename_dict=None):
 63 |     """Homogenizes cmip6 dataasets to common naming"""
 64 |     attrs = {k: v for k, v in ds.attrs.items()}
 65 |     ds_id = cmip6_dataset_id(ds)
 66 | 
 67 |     if rename_dict is None:
 68 |         rename_dict = cmip6_renaming_dict()
 69 | 
 70 |     # TODO: Be even stricter here and reset every variable except the one given in the attr
 71 |     # as variable_id
 72 |     # ds_reset = ds.reset_coords()
 73 | 
 74 |     def _maybe_rename_dims(da, rdict):
 75 |         for di in da.dims:
 76 |             for target, candidates in rdict.items():
 77 |                 if di in candidates:
 78 |                     da = da.swap_dims({di: target})
 79 |                     if di in da.coords:
 80 |                         if not di == target:
 81 |                             da = da.rename({di: target}).set_xindex(target)
 82 |         return da
 83 | 
 84 |     # first take care of the dims and reconstruct a clean ds
 85 |     ds = xr.Dataset(
 86 |         {
 87 |             k: _maybe_rename_dims(ds[k], rename_dict)
 88 |             for k in list(ds.data_vars) + list(set(ds.coords) - set(ds.dims))
 89 |         }
 90 |     )
 91 | 
 92 |     rename_vars = list(set(ds.variables) - set(ds.dims))
 93 | 
 94 |     for target, candidates in rename_dict.items():
 95 |         if target not in ds:
 96 |             matching_candidates = [ca for ca in candidates if ca in rename_vars]
 97 |             if len(matching_candidates) > 0:
 98 |                 if len(matching_candidates) > 1:
 99 |                     warnings.warn(
100 |                         f"{ds_id}:While renaming to target `{target}`, more than one candidate was found {matching_candidates}. Renaming {matching_candidates[0]} to {target}. Please double check results."
101 |                     )
102 |                 ds = ds.rename({matching_candidates[0]: target})
103 | 
104 |     # special treatment for 'lon'/'lat' if there is no 'x'/'y' after renaming process
105 |     for di, co in [("x", "lon"), ("y", "lat")]:
106 |         if di not in ds.dims and co in ds.dims:
107 |             ds = ds.rename({co: di})
108 | 
109 |     # restore attributes
110 |     ds.attrs = attrs
111 |     return ds
112 | 
113 | 
114 | def promote_empty_dims(ds):
115 |     """Convert empty dimensions to actual coordinates"""
116 |     ds = ds.copy()
117 |     for di in ds.dims:
118 |         if di not in ds.coords:
119 |             ds = ds.assign_coords({di: ds[di]})
120 |     return ds
121 | 
122 | 
123 | # some of the models do not have 2d lon lats, correct that.
124 | def broadcast_lonlat(ds, verbose=True):
125 |     """Some models (all `gr` grid_labels) have 1D lon lat arrays
126 |     This functions broadcasts those so lon/lat are always 2d arrays."""
127 |     if "lon" not in ds.variables:
128 |         ds.coords["lon"] = ds["x"]
129 |     if "lat" not in ds.variables:
130 |         ds.coords["lat"] = ds["y"]
131 | 
132 |     if len(ds["lon"].dims) < 2:
133 |         ds.coords["lon"] = ds["lon"] * xr.ones_like(ds["lat"])
134 |     if len(ds["lat"].dims) < 2:
135 |         ds.coords["lat"] = xr.ones_like(ds["lon"]) * ds["lat"]
136 | 
137 |     return ds
138 | 
139 | 
140 | def _interp_nominal_lon(lon_1d: np.ndarray) -> np.ndarray:
141 |     x = np.arange(len(lon_1d))
142 |     idx = np.isnan(lon_1d)
143 |     # Assume that longitudes are cyclic (i.e. that the period equals the length of lon)
144 |     return np.interp(x, x[~idx], lon_1d[~idx], period=len(lon_1d))
145 | 
146 | 
147 | def replace_x_y_nominal_lat_lon(ds):
148 |     """Approximate the dimensional values of x and y with mean lat and lon at the equator"""
149 |     ds = ds.copy()
150 | 
151 |     def maybe_fix_non_unique(data, pad=False):
152 |         """remove duplicate values by linear interpolation
153 |         if values are non-unique. `pad` if the last two points are the same
154 |         pad with -90 or 90. This is only applicable to lat values"""
155 |         if len(data) == len(np.unique(data)):
156 |             return data
157 |         else:
158 |             # pad each end with the other end.
159 |             if pad:
160 |                 if len(np.unique([data[0:2]])) < 2:
161 |                     data[0] = -90
162 |                 if len(np.unique([data[-2:]])) < 2:
163 |                     data[-1] = 90
164 | 
165 |             ii_range = np.arange(len(data))
166 |             _, indicies = np.unique(data, return_index=True)
167 |             double_idx = np.array([ii not in indicies for ii in ii_range])
168 |             # print(f"non-unique values found at:{ii_range[double_idx]})")
169 |             data[double_idx] = np.interp(
170 |                 ii_range[double_idx], ii_range[~double_idx], data[~double_idx]
171 |             )
172 |             return data
173 | 
174 |     if "x" in ds.dims and "y" in ds.dims:
175 |         # define 'nominal' longitude/latitude values
176 |         # latitude is defined as the max value of `lat` in the zonal direction
177 |         # longitude is taken from the `middle` of the meridonal direction, to
178 |         # get values close to the equator
179 | 
180 |         # pick the nominal lon/lat values from the eastern
181 |         # and southern edge, and
182 |         eq_idx = len(ds.y) // 2
183 | 
184 |         nominal_x = ds.isel(y=eq_idx).lon.load()
185 |         nominal_y = ds.lat.max("x").load()
186 | 
187 |         # interpolate nans
188 |         # Special treatment for gaps in longitude
189 |         nominal_x = _interp_nominal_lon(nominal_x.data)
190 |         nominal_y = nominal_y.interpolate_na("y").data
191 | 
192 |         # eliminate non unique values
193 |         # these occour e.g. in "MPI-ESM1-2-HR"
194 |         nominal_y = maybe_fix_non_unique(nominal_y)
195 |         nominal_x = maybe_fix_non_unique(nominal_x)
196 | 
197 |         ds = ds.assign_coords(x=nominal_x, y=nominal_y)
198 |         ds = ds.sortby("x")
199 |         ds = ds.sortby("y")
200 | 
201 |         # do one more interpolation for the x values, in case the boundary values were
202 |         # affected
203 |         ds = ds.assign_coords(
204 |             x=maybe_fix_non_unique(ds.x.load().data),
205 |             y=maybe_fix_non_unique(ds.y.load().data, pad=True),
206 |         )
207 | 
208 |     else:
209 |         warnings.warn(
210 |             "No x and y found in dimensions for source_id:%s. This likely means that you forgot to rename the dataset or this is the German unstructured model"
211 |             % ds.attrs["source_id"]
212 |         )
213 |     return ds
214 | 
215 | 
216 | def correct_units(ds):
217 |     "Converts coordinates into SI units using pint-xarray"
218 |     # codify units with pint
219 |     # Perhaps this should be kept separately from the fixing?
220 |     # See https://github.com/jbusecke/xmip/pull/160#discussion_r667041858
221 |     try:
222 |         # exclude salinity from the quantification (see https://github.com/jbusecke/xmip/pull/160#issuecomment-878627027 for details)
223 |         quantified = ds.pint.quantify(_unit_overrides)
224 |         target_units = {
225 |             var: target_unit
226 |             for var, target_unit in _desired_units.items()
227 |             if var in quantified
228 |         }
229 | 
230 |         converted = quantified.pint.to(target_units)
231 |         ds = converted.pint.dequantify(format="~P")
232 |     except ValueError as e:
233 |         warnings.warn(
234 |             f"{cmip6_dataset_id(ds)}: Unit correction failed with: {e}", UserWarning
235 |         )
236 |     return ds
237 | 
238 | 
239 | def correct_coordinates(ds, verbose=False):
240 |     """converts wrongly assigned data_vars to coordinates"""
241 |     ds = ds.copy()
242 |     for co in [
243 |         "x",
244 |         "y",
245 |         "lon",
246 |         "lat",
247 |         "lev",
248 |         "bnds",
249 |         "lev_bounds",
250 |         "lon_bounds",
251 |         "lat_bounds",
252 |         "time_bounds",
253 |         "lat_verticies",
254 |         "lon_verticies",
255 |     ]:
256 |         if co in ds.variables:
257 |             if verbose:
258 |                 print("setting %s as coord" % (co))
259 |             ds = ds.set_coords(co)
260 |     return ds
261 | 
262 | 
263 | def correct_lon(ds):
264 |     """Wraps negative x and lon values around to have 0-360 lons.
265 |     longitude names expected to be corrected with `rename_cmip6`"""
266 |     ds = ds.copy()
267 | 
268 |     # remove out of bounds values found in some
269 |     # models as missing values
270 |     ds["lon"] = ds["lon"].where(abs(ds["lon"]) <= 1000)
271 |     ds["lat"] = ds["lat"].where(abs(ds["lat"]) <= 1000)
272 | 
273 |     # adjust lon convention
274 |     lon = ds["lon"].where(ds["lon"] > 0, 360 + ds["lon"])
275 |     ds = ds.assign_coords(lon=lon)
276 | 
277 |     if "lon_bounds" in ds.variables:
278 |         lon_b = ds["lon_bounds"].where(ds["lon_bounds"] > 0, 360 + ds["lon_bounds"])
279 |         ds = ds.assign_coords(lon_bounds=lon_b)
280 | 
281 |     return ds
282 | 
283 | 
284 | def parse_lon_lat_bounds(ds):
285 |     """both `regular` 2d bounds and vertex bounds are parsed as `*_bounds`.
286 |     This function renames them to `*_verticies` if the vertex dimension is found.
287 |     Also removes time dimension from static bounds as found in e.g. `SAM0-UNICON` model.
288 |     """
289 |     if "source_id" in ds.attrs.keys():
290 |         if ds.attrs["source_id"] == "FGOALS-f3-L":
291 |             warnings.warn("`FGOALS-f3-L` does not provide lon or lat bounds.")
292 | 
293 |     ds = ds.copy()
294 | 
295 |     if "lat_bounds" in ds.variables:
296 |         if "x" not in ds.lat_bounds.dims:
297 |             ds.coords["lat_bounds"] = ds.coords["lat_bounds"] * xr.ones_like(ds.x)
298 | 
299 |     if "lon_bounds" in ds.variables:
300 |         if "y" not in ds.lon_bounds.dims:
301 |             ds.coords["lon_bounds"] = ds.coords["lon_bounds"] * xr.ones_like(ds.y)
302 | 
303 |     # I am assuming that all bound fields with time were broadcasted in error (except time bounds obviously),
304 |     # and will drop the time dimension.
305 |     error_dims = ["time"]
306 |     for ed in error_dims:
307 |         for co in ["lon_bounds", "lat_bounds", "lev_bounds"]:
308 |             if co in ds.variables:
309 |                 if ed in ds[co].dims:
310 |                     warnings.warn(
311 |                         f"Found {ed} as dimension in `{co}`. Assuming this is an error and just picking the first step along that dimension."
312 |                     )
313 |                     stripped_coord = ds[co].isel({ed: 0}).squeeze()
314 |                     # make sure that dimension is actually dropped
315 |                     if ed in stripped_coord.coords:
316 |                         stripped_coord = stripped_coord.drop(ed)
317 | 
318 |                     ds = ds.assign_coords({co: stripped_coord})
319 | 
320 |     # Finally rename the bounds that are given in vertex convention
321 |     for va in ["lon", "lat"]:
322 |         va_name = va + "_bounds"
323 |         if va_name in ds.variables and "vertex" in ds[va_name].dims:
324 |             ds = ds.rename({va_name: va + "_verticies"})
325 | 
326 |     return ds
327 | 
328 | 
329 | def maybe_convert_bounds_to_vertex(ds):
330 |     """Converts renamed lon and lat bounds into verticies, by copying
331 |     the values into the corners. Assumes a rectangular cell."""
332 |     ds = ds.copy()
333 |     if "bnds" in ds.dims:
334 |         if "lon_bounds" in ds.variables and "lat_bounds" in ds.variables:
335 |             if (
336 |                 "lon_verticies" not in ds.variables
337 |                 and "lat_verticies" not in ds.variables
338 |             ):
339 |                 lon_b = xr.ones_like(ds.lat) * ds.coords["lon_bounds"]
340 |                 lat_b = xr.ones_like(ds.lon) * ds.coords["lat_bounds"]
341 | 
342 |                 lon_bb = xr.concat(
343 |                     [lon_b.isel(bnds=ii).squeeze(drop=True) for ii in [0, 0, 1, 1]],
344 |                     dim="vertex",
345 |                 )
346 |                 lon_bb = lon_bb.reset_coords(drop=True)
347 | 
348 |                 lat_bb = xr.concat(
349 |                     [lat_b.isel(bnds=ii).squeeze(drop=True) for ii in [0, 1, 1, 0]],
350 |                     dim="vertex",
351 |                 )
352 |                 lat_bb = lat_bb.reset_coords(drop=True)
353 | 
354 |                 ds = ds.assign_coords(lon_verticies=lon_bb, lat_verticies=lat_bb)
355 | 
356 |     return ds
357 | 
358 | 
359 | def maybe_convert_vertex_to_bounds(ds):
360 |     """Converts lon and lat verticies to bounds by averaging corner points
361 |     on the appropriate cell face center."""
362 | 
363 |     ds = ds.copy()
364 |     if "vertex" in ds.dims:
365 |         if "lon_verticies" in ds.variables and "lat_verticies" in ds.variables:
366 |             if "lon_bounds" not in ds.variables and "lat_bounds" not in ds.variables:
367 |                 lon_b = xr.concat(
368 |                     [
369 |                         ds["lon_verticies"].isel(vertex=[0, 1]).mean("vertex"),
370 |                         ds["lon_verticies"].isel(vertex=[2, 3]).mean("vertex"),
371 |                     ],
372 |                     dim="bnds",
373 |                 )
374 |                 lat_b = xr.concat(
375 |                     [
376 |                         ds["lat_verticies"].isel(vertex=[0, 3]).mean("vertex"),
377 |                         ds["lat_verticies"].isel(vertex=[1, 2]).mean("vertex"),
378 |                     ],
379 |                     dim="bnds",
380 |                 )
381 | 
382 |                 ds = ds.assign_coords(lon_bounds=lon_b, lat_bounds=lat_b)
383 |     ds = promote_empty_dims(ds)
384 |     return ds
385 | 
386 | 
387 | def sort_vertex_order(ds):
388 |     """sorts the vertex dimension in a coherent order:
389 |     0: lower left
390 |     1: upper left
391 |     2: upper right
392 |     3: lower right
393 |     """
394 |     ds = ds.copy()
395 |     if (
396 |         "vertex" in ds.dims
397 |         and "lon_verticies" in ds.variables
398 |         and "lat_verticies" in ds.variables
399 |     ):
400 |         # pick a vertex in the middle of the domain, to avoid the pole areas
401 |         x_idx = len(ds.x) // 2
402 |         y_idx = len(ds.y) // 2
403 | 
404 |         lon_b = ds.lon_verticies.isel(x=x_idx, y=y_idx).load().data
405 |         lat_b = ds.lat_verticies.isel(x=x_idx, y=y_idx).load().data
406 |         vert = ds.vertex.load().data
407 | 
408 |         points = np.vstack((lon_b, lat_b, vert)).T
409 | 
410 |         # split into left and right
411 |         lon_sorted = points[np.argsort(points[:, 0]), :]
412 |         right = lon_sorted[:2, :]
413 |         left = lon_sorted[2:, :]
414 |         # sort again on each side to get top and bottom
415 |         bl, tl = left[np.argsort(left[:, 1]), :]
416 |         br, tr = right[np.argsort(right[:, 1]), :]
417 | 
418 |         points_sorted = np.vstack((bl, tl, tr, br))
419 | 
420 |         idx_sorted = (points_sorted.shape[0] - 1) - np.argsort(points_sorted[:, 2])
421 |         ds = ds.assign_coords(vertex=idx_sorted)
422 |         ds = ds.sortby("vertex")
423 | 
424 |     return ds
425 | 
426 | 
427 | # TODO: Implement this in a sleeker way with daops
428 | def fix_metadata(ds):
429 |     """
430 |     Fix known issues (from errata) with the metadata.
431 |     """
432 | 
433 |     # https://errata.es-doc.org/static/view.html?uid=2f6b5963-f87e-b2df-a5b0-2f12b6b68d32
434 |     if ds.attrs["source_id"] == "GFDL-CM4" and ds.attrs["experiment_id"] in [
435 |         "1pctCO2",
436 |         "abrupt-4xCO2",
437 |         "historical",
438 |     ]:
439 |         ds.attrs["branch_time_in_parent"] = 91250
440 |     # https://errata.es-doc.org/static/view.html?uid=61fb170e-91bb-4c64-8f1d-6f5e342ee421
441 |     if ds.attrs["source_id"] == "GFDL-CM4" and ds.attrs["experiment_id"] in [
442 |         "ssp245",
443 |         "ssp585",
444 |     ]:
445 |         ds.attrs["branch_time_in_child"] = 60225
446 |     return ds
447 | 
448 | 
449 | def combined_preprocessing(ds):
450 |     # fix naming
451 |     ds = rename_cmip6(ds)
452 |     # promote empty dims to actual coordinates
453 |     ds = promote_empty_dims(ds)
454 |     # demote coordinates from data_variables
455 |     ds = correct_coordinates(ds)
456 |     # broadcast lon/lat
457 |     ds = broadcast_lonlat(ds)
458 |     # shift all lons to consistent 0-360
459 |     ds = correct_lon(ds)
460 |     # fix the units
461 |     ds = correct_units(ds)
462 |     # rename the `bounds` according to their style (bound or vertex)
463 |     ds = parse_lon_lat_bounds(ds)
464 |     # sort verticies in a consistent manner
465 |     ds = sort_vertex_order(ds)
466 |     # convert vertex into bounds and vice versa, so both are available
467 |     ds = maybe_convert_bounds_to_vertex(ds)
468 |     ds = maybe_convert_vertex_to_bounds(ds)
469 |     ds = fix_metadata(ds)
470 |     ds = ds.drop_vars(_drop_coords, errors="ignore")
471 |     return ds
472 | 


--------------------------------------------------------------------------------
/xmip/regionmask.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import xarray as xr
  3 | 
  4 | 
  5 | def _default_merge_dict():
  6 |     return {
  7 |         "North Atlantic Ocean": [
  8 |             "Caribbean Sea",
  9 |             "Gulf of Mexico",
 10 |             "Labrador Sea",
 11 |             "Hudson Bay",
 12 |             "Baffin Bay",
 13 |             "Norwegian Sea",
 14 |             "Greenland Sea",
 15 |             "Bay of Biscay",
 16 |             "Norwegian Sea",
 17 |             "Greenland Sea",
 18 |             "Gulf of Guinea",
 19 |             "Irish Sea",
 20 |             "North Sea",
 21 |             "Bahía de Campeche",
 22 |             "Davis Strait",
 23 |             "Sargasso Sea",
 24 |             "Hudson Strait",
 25 |             "English Channel",
 26 |             "Gulf of Honduras",
 27 |             "Bristol Channel",
 28 |             "Inner Seas",
 29 |             "Straits of Florida",
 30 |             "Gulf of Saint Lawrence",
 31 |             "Bay of Fundy",
 32 |             "Melville Bay",
 33 |             "Gulf of Maine",
 34 |             "Chesapeake Bay",
 35 |             "Amazon River",
 36 |             "James Bay",
 37 |             "Ungava Bay",
 38 |         ],
 39 |         "South Atlantic Ocean": ["Río de la Plata", "Golfo San Jorge"],
 40 |         "North Pacific Ocean": [
 41 |             "Philippine Sea",
 42 |             "Gulf of Alaska",
 43 |             "Sea of Okhotsk",
 44 |             "East China Sea",
 45 |             "Yellow Sea",
 46 |             "Bering Sea",
 47 |             "Golfo de California",
 48 |             "Korea Strait",
 49 |             "Cook Inlet",
 50 |             "Bristol Bay",
 51 |             "Shelikhova Gulf",
 52 |             "Bo Hai",
 53 |             "Golfo de Panamá",
 54 |             "Yangtze River",
 55 |             "Columbia River",
 56 |             "Sea of Japan",
 57 |             "Inner Sea",
 58 |         ],
 59 |         "South Pacific Ocean": [
 60 |             "Coral Sea",
 61 |             "Tasman Sea",
 62 |             "Bay of Plenty",
 63 |             "Bismarck Sea",
 64 |             "Solomon Sea",
 65 |             "Great Barrier Reef",
 66 |         ],
 67 |         "Maritime Continent": [
 68 |             "Celebes Sea",
 69 |             "Sulu Sea",
 70 |             "Banda Sea",
 71 |             "Luzon Strait",
 72 |             "Java Sea",
 73 |             "Arafura Sea",
 74 |             "Timor Sea",
 75 |             "Gulf of Thailand",
 76 |             "Gulf of Carpentaria",
 77 |             "Molucca Sea",
 78 |             "Gulf of Tonkin",
 79 |             "Strait of Malacca",
 80 |             "Strait of Singapore",
 81 |             "Makassar Strait",
 82 |             "Ceram Sea",
 83 |             "Taiwan Strait",
 84 |             "South China Sea",
 85 |         ],
 86 |         "INDIAN OCEAN": [
 87 |             "Mozambique Channel",
 88 |             "Bay of Bengal",
 89 |             "Arabian Sea",
 90 |             "Persian Gulf",
 91 |             "Andaman Sea",
 92 |             "Laccadive Sea",
 93 |             "Gulf of Aden",
 94 |             "Gulf of Oman",
 95 |             "Gulf of Mannar",
 96 |             "Gulf of Kutch",
 97 |             "Great Australian Bight",
 98 |         ],
 99 |         "Arctic Ocean": [
100 |             "Beaufort Sea",
101 |             "Chukchi Sea",
102 |             "Barents Sea",
103 |             "Kara Sea",
104 |             "Laptev Sea",
105 |             "White Sea",
106 |             "The North Western Passages",
107 |             "Amundsen Gulf",
108 |             "Viscount Melville Sound",
109 |         ],
110 |         "SOUTHERN OCEAN": [
111 |             "Ross Sea Eastern Basin",
112 |             "Ross Sea Western Basin",
113 |             "Weddell Sea",
114 |             "Bellingshausen Sea",
115 |             "Amundsen Sea",
116 |             "Scotia Sea",
117 |             "Drake Passage",
118 |         ],
119 |         "Black Sea": None,
120 |         "Mediterranean Sea": [
121 |             "Mediterranean Sea Eastern Basin",
122 |             "Mediterranean Sea Western Basin",
123 |             "Tyrrhenian Sea",
124 |             "Adriatic Sea",
125 |             "Golfe du Lion",
126 |             "Ionian Sea",
127 |             "Strait of Gibraltar",
128 |             "Balearic Sea",
129 |             "Aegean Sea",
130 |         ],
131 |         "Red Sea": None,
132 |         "Caspian Sea": None,
133 |         "Baltic Sea": ["Gulf of Bothnia", "Gulf of Finland"],
134 |     }
135 | 
136 | 
137 | def merged_mask(
138 |     basins, ds, lon_name="lon", lat_name="lat", merge_dict=None, verbose=False
139 | ):
140 |     """Combine geographical basins (from regionmask) to larger ocean basins.
141 | 
142 |     Parameters
143 |     ----------
144 |     basins : regionmask.core.regions.Regions object
145 |         Loaded basin data from regionmask, e.g. `import regionmask;basins = regionmask.defined_regions.natural_earth.ocean_basins_50`
146 |     ds : xr.Dataset
147 |         Input dataset on which to construct the mask
148 |     lon_name : str, optional
149 |         Name of the longitude coordinate in `ds`, defaults to `lon`
150 |     lat_name : str, optional
151 |         Name of the latitude coordinate in `ds`, defaults to `lat`
152 |     merge_dict : dict, optional
153 |         dictionary defining new aggregated regions (as keys) and the regions to be merge into that region as as values (list of names).
154 |         Defaults to large scale ocean basins defined by `xmip.regionmask.default_merge_dict`
155 |     verbose : bool, optional
156 |        Prints more output, e.g. the regions in `basins` that were not used in the merging step. Defaults to False.
157 | 
158 |     Returns
159 |     -------
160 |     mask : xr.DataArray
161 |         The mask contains ascending numeric value for each key ( merged region) in `merge_dict`.
162 |         When the default is used the numeric values correspond to the following regions:
163 |         * 0: North Atlantic
164 | 
165 |         * 1: South Atlantic
166 | 
167 |         * 2: North Pacific
168 | 
169 |         * 3: South Pacific
170 | 
171 |         * 4: Maritime Continent
172 | 
173 |         * 5: Indian Ocean
174 | 
175 |         * 6: Arctic Ocean
176 | 
177 |         * 7: Southern Ocean
178 | 
179 |         * 8: Black Sea
180 | 
181 |         * 9: Mediterranean Sea
182 | 
183 |         *10: Red Sea
184 | 
185 |         *11: Caspian Sea
186 | 
187 |     """
188 |     mask = basins.mask(ds, lon_name=lon_name, lat_name=lat_name)
189 | 
190 |     if merge_dict is None:
191 |         merge_dict = _default_merge_dict()
192 | 
193 |     dict_keys = list(merge_dict.keys())
194 |     number_dict = {k: None for k in dict_keys}
195 |     merged_basins = []
196 |     for ocean, small_basins in merge_dict.items():
197 |         try:
198 |             ocean_idx = basins.map_keys(ocean)
199 |         except KeyError:
200 |             # The ocean key is new and cant be found in the previous keys (e.g. for Atlantic full or maritime continent)
201 |             ocean_idx = mask.max().data + 1
202 |         number_dict[ocean] = ocean_idx
203 |         if small_basins:
204 |             for sb in small_basins:
205 |                 sb_idx = basins.map_keys(sb)
206 |                 # set the index of each small basin to the ocean value
207 |                 mask = mask.where(mask != sb_idx, ocean_idx)
208 |                 merged_basins.append(sb)
209 | 
210 |     if verbose:
211 |         remaining_basins = [
212 |             str(basins.regions[ri].name)
213 |             for ri in range(len(basins.regions))
214 |             if (basins.regions[ri].name not in merged_basins)
215 |             and (basins.regions[ri].name not in list(merge_dict.keys()))
216 |         ]
217 |         print(remaining_basins)
218 | 
219 |     # reset the mask indicies to the order of the passed dictionary keys
220 |     mask_reordered = xr.ones_like(mask.copy()) * np.nan
221 |     for new_idx, k in enumerate(dict_keys):
222 |         old_idx = number_dict[k]
223 |         mask_reordered = mask_reordered.where(mask != old_idx, new_idx)
224 | 
225 |     return mask_reordered
226 | 


--------------------------------------------------------------------------------
/xmip/specs/staggered_grid_config.yaml:
--------------------------------------------------------------------------------
  1 | ACCESS-CM2:
  2 |   gn:
  3 |     axis_shift:
  4 |       X: right
  5 |       Y: right
  6 | ACCESS-ESM1-5:
  7 |   gn:
  8 |     axis_shift:
  9 |       X: right
 10 |       Y: right
 11 | BCC-CSM2-MR:
 12 |   gn:
 13 |     axis_shift:
 14 |       X: right
 15 |       Y: right
 16 | BCC-ESM1:
 17 |   gn:
 18 |     axis_shift:
 19 |       X: right
 20 |       Y: right
 21 | CAMS-CSM1-0:
 22 |   gn:
 23 |     axis_shift:
 24 |       X: right
 25 |       Y: right
 26 | CAS-ESM2-0:
 27 |   gn:
 28 |     axis_shift:
 29 |       X: left
 30 |       Y: left
 31 | CESM1-1-CAM5-CMIP5:
 32 |   gn:
 33 |     axis_shift:
 34 |       X: right
 35 |       Y: right
 36 |   gr:
 37 |     axis_shift:
 38 |       X: left
 39 |       Y: left
 40 | CESM2:
 41 |   gn:
 42 |     axis_shift:
 43 |       X: right
 44 |       Y: right
 45 |   gr:
 46 |     axis_shift:
 47 |       X: left
 48 |       Y: left
 49 | CESM2-FV2:
 50 |   gn:
 51 |     axis_shift:
 52 |       X: right
 53 |       Y: right
 54 |   gr:
 55 |     axis_shift:
 56 |       X: left
 57 |       Y: left
 58 | CESM2-WACCM:
 59 |   gn:
 60 |     axis_shift:
 61 |       X: right
 62 |       Y: right
 63 |   gr:
 64 |     axis_shift:
 65 |       X: left
 66 |       Y: left
 67 | CESM2-WACCM-FV2:
 68 |   gn:
 69 |     axis_shift:
 70 |       X: right
 71 |       Y: right
 72 |   gr:
 73 |     axis_shift:
 74 |       X: left
 75 |       Y: left
 76 | CIESM:
 77 |   gn:
 78 |     axis_shift:
 79 |       X: left
 80 |       Y: left
 81 | CNRM-CM6-1:
 82 |   gn:
 83 |     axis_shift:
 84 |       X: right
 85 |       Y: right
 86 |   gr1:
 87 |     axis_shift:
 88 |       X: left
 89 |       Y: left
 90 | CNRM-CM6-1-HR:
 91 |   gn:
 92 |     axis_shift:
 93 |       X: left
 94 |       Y: right
 95 | CNRM-ESM2-1:
 96 |   gn:
 97 |     axis_shift:
 98 |       X: right
 99 |       Y: right
100 |   gr1:
101 |     axis_shift:
102 |       X: left
103 |       Y: left
104 | CanESM5:
105 |   gn:
106 |     axis_shift:
107 |       X: right
108 |       Y: right
109 | CanESM5-CanOE:
110 |   gn:
111 |     axis_shift:
112 |       X: right
113 |       Y: right
114 | E3SM-1-0:
115 |   gr:
116 |     axis_shift:
117 |       X: left
118 |       Y: left
119 | E3SM-1-1:
120 |   gr:
121 |     axis_shift:
122 |       X: left
123 |       Y: left
124 | E3SM-1-1-ECA:
125 |   gr:
126 |     axis_shift:
127 |       X: left
128 |       Y: left
129 | EC-Earth3:
130 |   gn:
131 |     axis_shift:
132 |       X: right
133 |       Y: right
134 |   gr:
135 |     axis_shift:
136 |       X: left
137 |       Y: left
138 | EC-Earth3-LR:
139 |   gn:
140 |     axis_shift:
141 |       X: right
142 |       Y: right
143 | EC-Earth3-Veg:
144 |   gn:
145 |     axis_shift:
146 |       X: right
147 |       Y: right
148 | EC-Earth3-Veg-LR:
149 |   gn:
150 |     axis_shift:
151 |       X: left
152 |       Y: left
153 | FGOALS-f3-L:
154 |   gn:
155 |     axis_shift:
156 |       X: left
157 |       Y: left
158 | FGOALS-g3:
159 |   gn:
160 |     axis_shift:
161 |       X: left
162 |       Y: left
163 | FIO-ESM-2-0:
164 |   gn:
165 |     axis_shift:
166 |       X: left
167 |       Y: right
168 | GFDL-CM4:
169 |   gn:
170 |     axis_shift:
171 |       X: left
172 |       Y: left
173 |   gr:
174 |     axis_shift:
175 |       X: left
176 |       Y: left
177 | GFDL-ESM4:
178 |   gn:
179 |     axis_shift:
180 |       X: left
181 |       Y: left
182 |   gr:
183 |     axis_shift:
184 |       X: left
185 |       Y: left
186 | GFDL-OM4p5B:
187 |   gn:
188 |     axis_shift:
189 |       X: left
190 |       Y: left
191 |   gr:
192 |     axis_shift:
193 |       X: left
194 |       Y: left
195 | GISS-E2-1-G:
196 |   gn:
197 |     axis_shift:
198 |       X: left
199 |       Y: left
200 | GISS-E2-1-G-CC:
201 |   gn:
202 |     axis_shift:
203 |       X: right
204 |       Y: left
205 | GISS-E2-1-H:
206 |   gn:
207 |     axis_shift:
208 |       X: left
209 |       Y: left
210 |   gr:
211 |     axis_shift:
212 |       X: left
213 |       Y: left
214 | GISS-E2-2-G:
215 |   gn:
216 |     axis_shift:
217 |       X: right
218 |       Y: left
219 | HadGEM3-GC31-LL:
220 |   gn:
221 |     axis_shift:
222 |       X: right
223 |       Y: right
224 | HadGEM3-GC31-MM:
225 |   gn:
226 |     axis_shift:
227 |       X: left
228 |       Y: right
229 | IITM-ESM:
230 |   gn:
231 |     axis_shift:
232 |       X: left
233 |       Y: left
234 | INM-CM4-8:
235 |   gr1:
236 |     axis_shift:
237 |       X: left
238 |       Y: left
239 | INM-CM5-0:
240 |   gr1:
241 |     axis_shift:
242 |       X: left
243 |       Y: left
244 | IPSL-CM6A-LR:
245 |   gn:
246 |     axis_shift:
247 |       X: right
248 |       Y: right
249 | KACE-1-0-G:
250 |   gr:
251 |     axis_shift:
252 |       X: left
253 |       Y: left
254 | MCM-UA-1-0:
255 |   gn:
256 |     axis_shift:
257 |       X: right
258 |       Y: right
259 | MIROC-ES2L:
260 |   gn:
261 |     axis_shift:
262 |       X: right
263 |       Y: right
264 | MIROC6:
265 |   gn:
266 |     axis_shift:
267 |       X: right
268 |       Y: right
269 | MPI-ESM1-2-HR:
270 |   gn:
271 |     axis_shift:
272 |       X: right
273 |       Y: left
274 | MRI-ESM2-0:
275 |   gn:
276 |     axis_shift:
277 |       X: left
278 |       Y: right
279 |   gr:
280 |     axis_shift:
281 |       X: left
282 |       Y: left
283 | NESM3:
284 |   gn:
285 |     axis_shift:
286 |       X: right
287 |       Y: right
288 | NorCPM1:
289 |   gn:
290 |     axis_shift:
291 |       X: left
292 |       Y: left
293 |   gr:
294 |     axis_shift:
295 |       X: left
296 |       Y: left
297 | NorESM1-F:
298 |   gn:
299 |     axis_shift:
300 |       X: left
301 |       Y: left
302 | NorESM2-LM:
303 |   gn:
304 |     axis_shift:
305 |       X: right
306 |       Y: left
307 |   gr:
308 |     axis_shift:
309 |       X: right
310 |       Y: left
311 | NorESM2-MM:
312 |   gn:
313 |     axis_shift:
314 |       X: right
315 |       Y: left
316 |   gr:
317 |     axis_shift:
318 |       X: right
319 |       Y: left
320 | SAM0-UNICON:
321 |   gn:
322 |     axis_shift:
323 |       X: left
324 |       Y: right
325 | TaiESM1:
326 |   gn:
327 |     axis_shift:
328 |       X: left
329 |       Y: right
330 | UKESM1-0-LL:
331 |   gn:
332 |     axis_shift:
333 |       X: right
334 |       Y: right
335 | # This is manually added (due to missing velocity data in the cloud). Might have to adjust if it causes issues.
336 | MPI-ESM1-2-LR:
337 |   gn:
338 |     axis_shift:
339 |       X: left
340 |       Y: left
341 | 


--------------------------------------------------------------------------------
/xmip/utils.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     import intake
  3 | except ImportError:
  4 |     intake = None
  5 | 
  6 | 
  7 | def google_cmip_col(catalog="main"):
  8 |     """A tiny utility function to point to the 'official' pangeo cmip6 cloud files."""
  9 |     if intake is None:
 10 |         raise ImportError(
 11 |             "This functionality requires intake-esm. Install with `conda install -c conda-forge intake-esm"
 12 |         )
 13 |     if catalog == "main":
 14 |         return intake.open_esm_datastore(
 15 |             "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
 16 |         )
 17 |     # this doesnt work anymore, but ill leave it here as an example for the future
 18 |     #     elif catalog == "testing":
 19 |     #         return intake.open_esm_datastore(
 20 |     #             "https://storage.googleapis.com/cmip6/pangeo-cmip6-testing.json"
 21 |     #         )
 22 |     else:
 23 |         raise ValueError("Catalog not recognized. Should be `main` or `testing`")
 24 | 
 25 | 
 26 | def model_id_match(match_list, id_tuple):
 27 |     """Matches `id_tuple` to the list of tuples `exception_list`, which can contain
 28 |     wildcards (match any entry) and lists (match any entry that is in the list).
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     match_list : list
 33 |         list of tuples with id strings corresponding to e.g. `source_id`, `grid_label`...
 34 |     id_tuple : tuple
 35 |         single tuple with id strings.
 36 |     """
 37 |     # Check the size of tuples
 38 |     if any([len(t) != len(id_tuple) for t in match_list]):
 39 |         raise ValueError(
 40 |             "Each tuple in `match_list` must have the same number of elements as `match_id`"
 41 |         )
 42 | 
 43 |     match_list_checked = []
 44 |     for ml in match_list:
 45 |         ml_processed = []
 46 |         for i in range(len(ml)):
 47 |             match_element = ml[i]
 48 |             if isinstance(match_element, str) and match_element != "*":
 49 |                 match_element = [match_element]
 50 |             if id_tuple[i] in match_element or match_element == "*":
 51 |                 ml_processed.append(True)
 52 |             else:
 53 |                 ml_processed.append(False)
 54 |         match_list_checked.append(all(ml_processed))
 55 |     return any(match_list_checked)
 56 | 
 57 | 
 58 | def _key_from_attrs(ds, attrs, sep="."):
 59 |     return sep.join([ds.attrs[i] if i in ds.attrs.keys() else "none" for i in attrs])
 60 | 
 61 | 
 62 | def cmip6_dataset_id(
 63 |     ds,
 64 |     sep=".",
 65 |     id_attrs=[
 66 |         "activity_id",
 67 |         "institution_id",
 68 |         "source_id",
 69 |         "experiment_id",
 70 |         "variant_label",
 71 |         "table_id",
 72 |         "grid_label",
 73 |         "version",
 74 |         "variable_id",
 75 |     ],
 76 | ):
 77 |     """Creates a unique string id for e.g. saving files to disk from CMIP6 output
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     ds : xr.Dataset
 82 |         Input dataset
 83 |     sep : str, optional
 84 |         String/Symbol to seperate fields in resulting string, by default "."
 85 | 
 86 |     Returns
 87 |     -------
 88 |     str
 89 |         Concatenated
 90 |     """
 91 |     return _key_from_attrs(ds, id_attrs, sep=sep)
 92 | 
 93 | 
 94 | def _maybe_make_list(item):
 95 |     "utility function to make sure output is a list"
 96 |     if isinstance(item, str):
 97 |         return [item]
 98 |     elif isinstance(item, list):
 99 |         return item
100 |     else:
101 |         return list(item)
102 | 


--------------------------------------------------------------------------------