├── .gitattributes ├── .github ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── ci.yaml │ ├── full_archive_ci.yaml │ └── pythonpublish.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── BLM.png ├── CITATION.cff ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── ci ├── environment-cloud-test.yml ├── environment-upstream-dev.yml └── environment.yml ├── codecov.yml ├── docs ├── Makefile ├── api.rst ├── conf.py ├── contributor-guide.rst ├── drift_removal.ipynb ├── environment.yml ├── images │ ├── logo.png │ └── workflow_diagram.png ├── index.rst ├── make.bat ├── postprocessing.ipynb ├── regionmask.ipynb ├── tutorial.ipynb └── whats-new.rst ├── notebooks ├── .ipynb_checkpoints │ └── parse_area_gn-checkpoint.ipynb ├── add_more_models.ipynb ├── maintenance_grids.ipynb ├── metric_parse_improvement.ipynb ├── parse_area_gn.ipynb ├── test.yaml └── testing_various_issues.ipynb ├── pyproject.toml ├── readthedocs.yml ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── test_drift_removal.py ├── test_grids.py ├── test_postprocessing.py ├── test_preprocessing.py ├── test_preprocessing_cloud.py ├── test_regionmask.py └── test_utils.py └── xmip ├── __init__.py ├── drift_removal.py ├── grids.py ├── postprocessing.py ├── preprocessing.py ├── regionmask.py ├── specs └── staggered_grid_config.yaml └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | xmip/_version.py export-subst 2 | * text=auto eol=lf 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'github-actions' 4 | directory: '/' 5 | schedule: 6 | interval: 'monthly' 7 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [ ] Closes #xxxx 4 | - [ ] Tests added 5 | - [ ] Passes `pre-commit run --all-files` 6 | - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` 7 | - [ ] New functions/methods are listed in `api.rst` 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - "main" 6 | pull_request: 7 | branches: 8 | - "*" 9 | schedule: 10 | - cron: "0 13 * * 1" 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | detect-ci-trigger: 18 | name: detect ci trigger 19 | runs-on: ubuntu-latest 20 | if: github.event_name == 'push' || github.event_name == 'pull_request' 21 | outputs: 22 | triggered: ${{ steps.detect-trigger.outputs.trigger-found }} 23 | steps: 24 | - uses: actions/checkout@v4 25 | with: 26 | fetch-depth: 2 27 | - uses: xarray-contrib/ci-trigger@v1 28 | id: detect-trigger 29 | with: 30 | keyword: "[full-cloud-ci]" 31 | 32 | build: 33 | name: Build (${{ matrix.python-version }} | ${{ matrix.os }}) 34 | if: github.repository == 'jbusecke/xmip' 35 | runs-on: ${{ matrix.os }} 36 | timeout-minutes: 45 37 | defaults: 38 | run: 39 | shell: bash -l {0} 40 | strategy: 41 | fail-fast: false 42 | matrix: 43 | os: ["ubuntu-latest"] 44 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 45 | steps: 46 | - uses: actions/checkout@v4 47 | - name: Create conda environment 48 | uses: mamba-org/setup-micromamba@v1 49 | with: 50 | cache-environment: true 51 | cache-downloads: true 52 | micromamba-version: 'latest' 53 | environment-file: ci/environment.yml 54 | create-args: >- 55 | python=${{ matrix.python-version }} 56 | - name: Install xMIP 57 | run: | 58 | python -m pip install -e . --no-deps 59 | conda list 60 | - name: Run Tests 61 | run: | 62 | pytest -n auto --cov=./ --cov-report=xml --ignore=tests/test_preprocessing_cloud.py 63 | - name: Upload code coverage to Codecov 64 | uses: codecov/codecov-action@v4.5.0 65 | with: 66 | file: ./coverage.xml 67 | flags: unittests 68 | env_vars: OS,PYTHON 69 | name: codecov-umbrella 70 | fail_ci_if_error: false 71 | - name: Check Machine Config 72 | run: lscpu 73 | 74 | upstream-dev: 75 | name: Build (upstream-dev) 76 | runs-on: ubuntu-latest 77 | defaults: 78 | run: 79 | shell: bash -l {0} 80 | steps: 81 | - uses: actions/checkout@v4 82 | - name: Create conda environment 83 | uses: mamba-org/setup-micromamba@v1 84 | with: 85 | cache-environment: true 86 | cache-downloads: true 87 | micromamba-version: 'latest' 88 | environment-file: ci/environment-upstream-dev.yml 89 | create-args: >- 90 | python=3.11 91 | - name: Install xMIP 92 | run: | 93 | python -m pip install -e . --no-deps 94 | conda list 95 | - name: Run Tests 96 | run: | 97 | pytest -n auto --cov=./ --cov-report=xml --ignore=tests/test_preprocessing_cloud.py 98 | - name: Upload code coverage to Codecov 99 | uses: codecov/codecov-action@v4.5.0 100 | with: 101 | file: ./coverage.xml 102 | flags: unittests 103 | env_vars: OS,PYTHON 104 | name: codecov-umbrella 105 | fail_ci_if_error: false 106 | - name: Run Tests 107 | run: | 108 | pytest -n auto --ignore=tests/test_preprocessing_cloud.py 109 | 110 | cloud-tests: 111 | needs: detect-ci-trigger 112 | if: needs.detect-ci-trigger.outputs.triggered == 'true' 113 | name: Build (cloud-data-tests | ${{ matrix.variable_id }} | ${{ matrix.experiment_id }} | ${{ matrix.grid_label }}) 114 | strategy: 115 | fail-fast: false 116 | matrix: 117 | variable_id: ["thetao", "o2", "so", "uo"] 118 | experiment_id: ["historical", "ssp585"] 119 | grid_label: ["gn", "gr"] 120 | runs-on: ubuntu-latest 121 | defaults: 122 | run: 123 | shell: bash -l {0} 124 | steps: 125 | - uses: actions/checkout@v4 126 | - name: Create conda environment 127 | uses: mamba-org/setup-micromamba@v1 128 | with: 129 | cache-environment: true 130 | cache-downloads: true 131 | micromamba-version: 'latest' 132 | environment-file: ci/environment.yml 133 | create-args: >- 134 | python=3.11 135 | - name: Install xMIP 136 | run: | 137 | python -m pip install -e . --no-deps 138 | conda list 139 | - name: Check Machine Config 140 | run: lscpu 141 | - name: Run Tests 142 | run: | 143 | pwd 144 | echo $PYTHONPATH 145 | pytest -n auto --reruns 1 --reruns-delay 5 tests/test_preprocessing_cloud.py --gl ${{ matrix.grid_label }} --ei ${{ matrix.experiment_id }} --vi ${{ matrix.variable_id }} 146 | -------------------------------------------------------------------------------- /.github/workflows/full_archive_ci.yaml: -------------------------------------------------------------------------------- 1 | name: Full Archive CI 2 | on: 3 | workflow_dispatch: # enable a manual trigger 4 | inputs: 5 | name: 6 | description: 'Testing a manual trigger' 7 | schedule: 8 | - cron: "0 13 * * 1" # run every monday 9 | 10 | jobs: 11 | cloud-tests: 12 | name: cloud-tests (${{ matrix.catalog}} catalog) | ${{ matrix.variable_id }} | ${{ matrix.experiment_id }} | ${{ matrix.grid_label }} 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | variable_id: ['zos','so', 'thetao', 'uo','o2'] 17 | experiment_id: ['historical','piControl', 'esm-hist', 'esm-piControl', 18 | 'ssp245', 'ssp370','ssp585', 'ssp119'] 19 | grid_label: ['gn', 'gr'] 20 | catalog: ['main'] 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Cache conda 25 | uses: actions/cache@v4 26 | env: 27 | # Increase this value to reset cache if ci/environment-upstream-dev.yml has not changed 28 | CACHE_NUMBER: 0 29 | with: 30 | path: ~/conda_pkgs_dir 31 | key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/environment-upstream-dev.yml') }} 32 | - uses: conda-incubator/setup-miniconda@v3.0.3 33 | with: 34 | channels: conda-forge 35 | mamba-version: '*' 36 | channel-priority: strict 37 | activate-environment: test_env_xmip # Defined in ci/environment-upstream-dev.yml 38 | auto-update-conda: false 39 | python-version: 3.8 40 | environment-file: ci/environment-cloud-test.yml 41 | use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly! 42 | - name: Set up conda environment 43 | shell: bash -l {0} 44 | run: python -m pip install -e . --no-deps --force-reinstall 45 | - name: Conda List 46 | shell: bash -l {0} 47 | run: conda list 48 | - name: Check Machine Config 49 | shell: bash -l {0} 50 | run: lscpu 51 | - name: Run Tests 52 | shell: bash -l {0} 53 | run: | 54 | pwd 55 | echo $PYTHONPATH 56 | pytest --reruns 1 --reruns-delay 10 --maxfail 20 tests/test_preprocessing_cloud.py --gl ${{ matrix.grid_label }} --ei ${{ matrix.experiment_id }} --vi ${{ matrix.variable_id }} --cat ${{ matrix.catalog }} 57 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yaml: -------------------------------------------------------------------------------- 1 | name: Build and Upload xmip to PyPI 2 | on: 3 | push: 4 | branches: 5 | - "main" 6 | pull_request: 7 | branches: 8 | - "*" 9 | release: 10 | types: 11 | - published 12 | 13 | jobs: 14 | build-artifacts: 15 | runs-on: ubuntu-latest 16 | if: github.repository == 'jbusecke/xmip' 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | - name: Set up Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: "3.x" 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | python -m pip install --upgrade setuptools setuptools-scm build twine 29 | - name: Build only 30 | if: github.event_name != 'release' 31 | run: | 32 | python -m build 33 | twine check dist/* 34 | - name: Build and publish 35 | if: github.event_name == 'release' 36 | env: 37 | TWINE_USERNAME: "__token__" 38 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 39 | run: | 40 | python -m build 41 | twine check dist/* 42 | twine upload dist/* 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build 57 | make.bat 58 | 59 | # PyBuilder 60 | target/ 61 | 62 | # pyenv python configuration file 63 | .python-version 64 | 65 | .ipynb_checkpoints 66 | 67 | **/dask-worker-space/ 68 | mydask.png 69 | 70 | .vscode 71 | .mypy_cache 72 | readthedocs.yml 73 | xmip/_version.py 74 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_schedule: quarterly 3 | autofix_prs: true 4 | 5 | repos: 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | rev: v4.6.0 8 | hooks: 9 | - id: check-added-large-files 10 | - id: check-toml 11 | - id: trailing-whitespace 12 | - id: end-of-file-fixer 13 | - id: check-yaml 14 | - id: debug-statements 15 | - repo: https://github.com/astral-sh/ruff-pre-commit 16 | rev: v0.5.0 17 | hooks: 18 | # Run the linter. 19 | - id: ruff 20 | args: [ --fix ] 21 | # Run the formatter. 22 | - id: ruff-format 23 | -------------------------------------------------------------------------------- /BLM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/BLM.png -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Busecke" 5 | given-names: "Julius J. M." 6 | orcid: "https://orcid.org/0000-0001-8571-865X" 7 | - family-names: "Spring" 8 | given-names: "Aaron" 9 | orcid: "https://orcid.org/0000-0003-0216-2241" 10 | - family-names: "Maroon" 11 | given-names: "Elizabeth" 12 | orcid: "https://orcid.org/0000-0002-1660-7822" 13 | - family-names: "Nicholas" 14 | given-names: "Thomas" 15 | orcid: "https://orcid.org/0000-0002-2176-0530" 16 | - family-names: "Magin" 17 | given-names: "Justus" 18 | orcid: "https://orcid.org/0000-0002-4254-8002" 19 | affiliation: "IFREMER" 20 | - family-names: "Ritschel" 21 | given-names: "Markus" 22 | orcid: "https://orcid.org/0000-0001-7464-7075" 23 | affiliation: "Universität Hamburg, Germany" 24 | - family-names: "Angevaare" 25 | given-names: "Joran J. R." 26 | orcid: "https://orcid.org/0000-0003-3392-8123" 27 | affiliation: "KNMI" 28 | 29 | title: "xMIP" 30 | url: "https://github.com/jbusecke/xMIP" 31 | # version and doi are completed by Zenodo automatically, do not provide here. 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | Copyright 2022 xMIP Contributors 179 | 180 | Licensed under the Apache License, Version 2.0 (the "License"); 181 | you may not use this file except in compliance with the License. 182 | You may obtain a copy of the License at 183 | 184 | http://www.apache.org/licenses/LICENSE-2.0 185 | 186 | Unless required by applicable law or agreed to in writing, software 187 | distributed under the License is distributed on an "AS IS" BASIS, 188 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 189 | See the License for the specific language governing permissions and 190 | limitations under the License. 191 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include xmip * 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Documentation Status](https://readthedocs.org/projects/cmip6-preprocessing/badge/?version=latest)](https://cmip6-preprocessing.readthedocs.io/en/latest/?badge=latest) 2 | [![Anaconda Cloud](https://anaconda.org/conda-forge/xmip/badges/version.svg)](https://anaconda.org/conda-forge/xmip) 3 | [![conda-forge](https://img.shields.io/conda/dn/conda-forge/xmip?label=conda-forge)](https://anaconda.org/conda-forge/xmip) 4 | [![Pypi](https://img.shields.io/pypi/v/xmip.svg)](https://pypi.org/project/xmip) 5 | [![Build Status](https://img.shields.io/github/workflow/status/jbusecke/xmip/CI?logo=github)](https://github.com/jbusecke/xmip/actions) 6 | [![Full Archive CI](https://github.com/jbusecke/xmip/workflows/Full%20Archive%20CI/badge.svg)](https://github.com/jbusecke/xmip/actions/workflows/full_archive_ci.yaml) 7 | [![codecov](https://codecov.io/gh/jbusecke/xmip/branch/main/graph/badge.svg)](https://codecov.io/gh/jbusecke/xmip) 8 | [![License:MIT](https://img.shields.io/badge/License-MIT-lightgray.svg?style=flt-square)](https://opensource.org/licenses/MIT) 9 | [![DOI](https://zenodo.org/badge/215606850.svg)](https://zenodo.org/badge/latestdoi/215606850) 10 | 11 | ![BLM](BLM.png) 12 | 13 | Science is not immune to racism. Academia is an elitist system with numerous gatekeepers that has mostly allowed a very limited spectrum of people to pursue a career. I believe we need to change that. 14 | 15 | Open source development and reproducible science are a great way to democratize the means for scientific analysis. **But you can't git clone software if you are being murdered by the police for being Black!** 16 | 17 | Free access to software and hollow diversity statements are hardly enough to crush the systemic and institutionalized racism in our society and academia. 18 | 19 | If you are using this package, I ask you to go beyond just speaking out and donate [here](https://secure.actblue.com/donate/cmip6_preprocessing) to [Data for Black Lives](http://d4bl.org/) and [Black Lives Matter Action](https://blacklivesmatter.com/global-actions/). 20 | 21 | I explicitly welcome suggestions regarding the wording of this statement and for additional organizations to support. Please raise an [issue](https://github.com/jbusecke/xmip/issues) for suggestions. 22 | 23 | 24 | 25 | # xmip (formerly cmip6_preprocessing) 26 | 27 | This package facilitates the cleaning, organization and interactive analysis of Model Intercomparison Projects (MIPs) within the [Pangeo](https://pangeo.io) software stack. 28 | 29 | Are you interested in CMIP6 data, but find that is is not quite `analysis ready`? Do you just want to run a simple (or complicated) analysis on various models and end up having to write logic for each seperate case, because various datasets still require fixes to names, coordinates, etc.? Then this package is for you. 30 | 31 | Developed during the [cmip6-hackathon](https://cmip6hack.github.io/#/) this package provides utility functions that play nicely with [intake-esm](https://github.com/NCAR/intake-esm). 32 | 33 | We currently support the following functions 34 | 35 | 1. Preprocessing CMIP6 data (Please check out the [tutorial](docs/tutorial.ipynb) for some examples using the [pangeo cloud](ocean.pangeo.io)). The preprocessig includes: 36 | a. Fix inconsistent naming of dimensions and coordinates 37 | b. Fix inconsistent values,shape and dataset location of coordinates 38 | c. Homogenize longitude conventions 39 | d. Fix inconsistent units 40 | 2. [Creating large scale ocean basin masks for arbitrary model output](docs/regionmask.ipynb) 41 | 42 | The following issues are under development: 43 | 1. Reconstruct/find grid metrics 44 | 2. Arrange different variables on their respective staggered grid, so they can work seamlessly with [xgcm](https://xgcm.readthedocs.io/en/latest/) 45 | 46 | Check out this recent Earthcube [notebook](https://github.com/earthcube2020/ec20_busecke_etal) (cite via doi: [10.1002/essoar.10504241.1](https://www.essoar.org/doi/10.1002/essoar.10504241.1)) for a high level demo of `xmip` and [xgcm](https://github.com/xgcm/xgcm). 47 | 48 | 49 | ## Installation 50 | 51 | Install `xmip` via pip: 52 | 53 | `pip install xmip` 54 | 55 | or conda: 56 | 57 | `conda install -c conda-forge xmip` 58 | 59 | To install the newest main from github you can use pip aswell: 60 | 61 | `pip install git+pip install git+https://github.com/jbusecke/xmip.git` 62 | -------------------------------------------------------------------------------- /ci/environment-cloud-test.yml: -------------------------------------------------------------------------------- 1 | name: test_env_xmip 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - xarray>=0.17.0 6 | - xgcm < 0.7.0 # temporary pin since we need 'extrapolate' option for padding 7 | # Dependencies for the pangeo cloud data 8 | - intake-esm 9 | - gcsfs 10 | - zarr 11 | - pint 12 | - cf_xarray>=0.6.0 13 | - pint-xarray 14 | # Dependencies for the testing suite 15 | - pytest-cov 16 | - pytest-xdist 17 | - pytest-rerunfailures 18 | - codecov 19 | -------------------------------------------------------------------------------- /ci/environment-upstream-dev.yml: -------------------------------------------------------------------------------- 1 | name: test_env_xmip 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - cftime 6 | - dask 7 | - xgcm <0.7.0 # temporary pin since we need 'extrapolate' option for padding 8 | - pip 9 | - cartopy #installing this without conda is a nightmare, so ill leave it here 10 | - xesmf # same here 11 | - rasterio # Trying to get around an apparent bug with py 3.10 + pip + rasterio (https://github.com/jbusecke/cmip6_preprocessing/pull/231#issuecomment-1132190649) 12 | - pip: 13 | - codecov 14 | - pytest-cov 15 | - pytest-xdist 16 | - git+https://github.com/regionmask/regionmask.git 17 | - git+https://github.com/pydata/xarray.git 18 | #- git+https://github.com/xgcm/xgcm.git 19 | - git+https://github.com/jbusecke/xarrayutils.git 20 | - git+https://github.com/xarray-contrib/cf-xarray.git 21 | - git+https://github.com/hgrecco/pint.git 22 | - git+https://github.com/xarray-contrib/pint-xarray.git 23 | -------------------------------------------------------------------------------- /ci/environment.yml: -------------------------------------------------------------------------------- 1 | name: test_env_xmip 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - xarray>=0.17.0 6 | - pandas 7 | - netcdf4 8 | - scipy 9 | - xgcm<0.7.0 #Revert this after fixing the extrapolate option 10 | - cftime 11 | - regionmask 12 | - cartopy 13 | - xesmf 14 | - xarrayutils>=2.0.0 #TODO remove when the new xarray polyfit is implemented 15 | - pint 16 | - cf_xarray>=0.6.0 17 | - pint-xarray>=0.2.1 18 | - pytest-cov 19 | - pytest-xdist 20 | - codecov 21 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: no 3 | max_report_age: off 4 | 5 | comment: false 6 | 7 | coverage: 8 | precision: 2 9 | round: down 10 | status: 11 | project: 12 | default: 13 | target: 95 14 | informational: true 15 | patch: off 16 | changes: off 17 | 18 | ignore: 19 | - "setup.py" 20 | - "tests/*" 21 | - "xmip/__init__.py" 22 | - "xmip/_version.py" 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | :mod:`API` 2 | ---------------------------- 3 | 4 | preprocessing 5 | ============= 6 | .. automodule:: xmip.preprocessing 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | 11 | postprocessing 12 | ============== 13 | .. automodule:: xmip.postprocessing 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | 18 | 19 | grids 20 | ===== 21 | .. automodule:: xmip.grids 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | regionmask 27 | ========== 28 | .. automodule:: xmip.regionmask 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | utils 34 | ===== 35 | .. automodule:: xmip.utils 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | 10 | # If extensions (or modules to document with autodoc) are in another directory, 11 | # add these directories to sys.path here. If the directory is relative to the 12 | # documentation root, use os.path.abspath to make it absolute, like shown here. 13 | # 14 | import os 15 | import pathlib 16 | import sys 17 | 18 | 19 | print("python exec:", sys.executable) 20 | print("sys.path:", sys.path) 21 | root = pathlib.Path(__file__).parent.parent.absolute() 22 | os.environ["PYTHONPATH"] = str(root) 23 | sys.path.insert(0, str(root)) 24 | 25 | import xmip # noqa 26 | from importlib.metadata import version # noqa 27 | 28 | release = version("xmip") 29 | # for example take major/minor/patch 30 | version = ".".join(release.split(".")[:3]) 31 | 32 | # From https://github.com/pypa/setuptools_scm/#usage-from-sphinx 33 | 34 | # -- Project information ----------------------------------------------------- 35 | 36 | project = "xmip" 37 | copyright = "2021, xmip maintainers" 38 | author = "xmip maintainers" 39 | 40 | 41 | # -- General configuration --------------------------------------------------- 42 | 43 | # Add any Sphinx extension module names here, as strings. They can be 44 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 45 | # ones. 46 | extensions = [ 47 | "sphinx.ext.autodoc", 48 | "sphinx.ext.viewcode", 49 | "sphinx.ext.napoleon", 50 | "nbsphinx", 51 | "recommonmark", 52 | "sphinx.ext.mathjax", 53 | "sphinx.ext.autosummary", 54 | "sphinx.ext.extlinks", 55 | "sphinx.ext.intersphinx", 56 | "numpydoc", 57 | "nbsphinx", 58 | "IPython.sphinxext.ipython_directive", 59 | "IPython.sphinxext.ipython_console_highlighting", 60 | "sphinxcontrib.srclinks", 61 | ] 62 | 63 | # Add any paths that contain templates here, relative to this directory. 64 | templates_path = ["_templates"] 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path. 69 | exclude_patterns = ["_build", "**.ipynb_checkpoints", "Thumbs.db", ".DS_Store"] 70 | 71 | # link to github issues 72 | extlinks = { 73 | "issue": ("https://github.com/jbusecke/xmip/issues/%s", "GH#%s"), 74 | "pull": ("https://github.com/jbusecke/xmip/pull/%s", "GH#%s"), 75 | } 76 | 77 | # -- Options for HTML output ------------------------------------------------- 78 | 79 | # The theme to use for HTML and HTML Help pages. See the documentation for 80 | # a list of builtin themes. 81 | # 82 | html_theme = "pangeo" 83 | 84 | # Add any paths that contain custom static files (such as style sheets) here, 85 | # relative to this directory. They are copied after the builtin static files, 86 | # so a file named "default.css" will overwrite the builtin "default.css". 87 | html_static_path = ["_static"] 88 | -------------------------------------------------------------------------------- /docs/contributor-guide.rst: -------------------------------------------------------------------------------- 1 | .. _contributor_guide: 2 | 3 | Contributor Guide 4 | ----------------- 5 | 6 | **xmip** is meant to be a community driven package and we welcome feedback and 7 | contributions. 8 | 9 | Did you notice a bug? Are you missing a feature? A good first starting place is to 10 | open an issue in the `github issues page `_. 11 | 12 | 13 | In order to contribute to xmip, please fork the repository and submit a pull request. 14 | A good step by step tutorial for this can be found in the 15 | `xarray contributor guide `_. 16 | 17 | 18 | Environments 19 | ^^^^^^^^^^^^ 20 | The easiest way to start developing xmip pull requests, 21 | is to install one of the conda environments provided in the `ci folder `_:: 22 | 23 | conda env create -f ci/environment-py3.8.yml 24 | 25 | Activate the environment with:: 26 | 27 | conda activate test_env_xmip 28 | 29 | We use `black `_ as code formatter and pull request will 30 | fail in the CI if not properly formatted. 31 | 32 | All conda environments contain black and you can reformat code using:: 33 | 34 | black xmip 35 | 36 | `pre-commit `_ provides an automated way to reformat your code 37 | prior to each commit. Simply install pre-commit:: 38 | 39 | pip install pre-commit 40 | 41 | and install it in the xmip root directory with:: 42 | 43 | pre-commit install 44 | 45 | and your code will be properly formatted before each commit. 46 | 47 | Change and build docs 48 | ^^^^^^^^^^^^^^^^^^^^^ 49 | 50 | To make additions changes to the documentation please install/activate the docs environment `docs/environment.yml`. 51 | 52 | You can then make changes in the and build the html locally by running `make html` in the `docs` folder. 53 | 54 | Check the generated html locally with `open _build/html/index.html`. 55 | 56 | .. note:: 57 | Some of the CI can take a long time to build and when making changes to the docs only, you can deactivate it by adding `[ci-skip]` to your commit message. 58 | 59 | For example:: 60 | 61 | git commit -m '[skip-ci] Just a typo in the docs' 62 | 63 | will skip the expensive cloud CI for intermediate pushes. 64 | 65 | 66 | How to release a new version of xmip (for maintainers only) 67 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 68 | The process of releasing at this point is very easy. 69 | 70 | We need only two things: A PR to update the documentation and and making a release on github. 71 | 72 | 1. Make sure that all the new features/bugfixes etc are appropriately documented in `doc/whats-new.rst`, add the date to the current release and make an empty (unreleased) entry for the next minor release as a PR. 73 | 2. Navigate to the 'tags' symbol on the repos main page, click on 'Releases' and on 'Draft new release' on the right. Add the version number and a short description and save the release. 74 | 75 | From here the github actions take over and package things for `Pypi `_. 76 | The conda-forge package will be triggered by the Pypi release and you will have to approve a PR in `xmip-feedstock `_. This takes a while, usually a few hours to a day. 77 | 78 | Thats it! 79 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: xmip_docs 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - cartopy 6 | # Insert your dependencies here 7 | - numpydoc 8 | - sphinx 9 | - sphinx_rtd_theme 10 | - ipython 11 | - ipykernel # not strictly necessary but this is nice to run notebooks in this env to test 12 | - pandoc 13 | - recommonmark 14 | - pip 15 | - nc-time-axis 16 | - pint 17 | - pip: 18 | - docrep<=0.2.7 19 | - nbsphinx 20 | - jupyter_client 21 | - sphinx_pangeo_theme 22 | - sphinx-copybutton 23 | - sphinxcontrib-srclinks 24 | -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/docs/images/logo.png -------------------------------------------------------------------------------- /docs/images/workflow_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/docs/images/workflow_diagram.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. xmip documentation master file, created by 2 | sphinx-quickstart on Thu Feb 25 16:11:36 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 7 | .. image:: images/logo.png 8 | 9 | Analysis ready CMIP6 data with Pangeo 10 | ===================================== 11 | 12 | Modern climate science like the IPCC rely heavily on model inter comparison projects (MIPs). These projects essentially pool together model results from various climate modeling centers around the world, that were run according to specific protocols, in order to compare, for instance, the response of the coupled climate system to changes in forcing. 13 | 14 | The vast amount of work that has been put into the standardization of these experiments enables climate scientists to use a wealth of data to answer their specific questions, thus refining future models and increasing our understanding of the complex system that is our home planet. 15 | 16 | However, from the viewpoint of analyzing these data, the output is still quite 'dirty' making the quintessential workflow of: 17 | 18 | 1. Develop a metric/analysis to apply to one model. 19 | 2. Run that analysis across all the models and interpret results. 20 | 21 | inherently difficult. 22 | 23 | Most of the problems arise from differences in the convention the model output is provided in. This includes, but is not limited to different naming conventions for coordinate variables, units, grid variables. 24 | `xmip` aims to provide lightweight tools, that let you get right to the science, without spending hours on cleaning up the data. 25 | 26 | 27 | 28 | Installation 29 | ------------ 30 | 31 | Installation from Conda Forge 32 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 33 | 34 | The easiest way to install xMIP along with its dependencies is via conda 35 | forge:: 36 | 37 | conda install -c conda-forge xmip 38 | 39 | Installation from Pip 40 | ^^^^^^^^^^^^^^^^^^^^^ 41 | 42 | An alternative is to use pip:: 43 | 44 | pip install xmip 45 | 46 | Installation from GitHub 47 | ^^^^^^^^^^^^^^^^^^^^^^^^ 48 | 49 | You can get the newest version by installing directly from GitHub:: 50 | 51 | pip install git+https://github.com/jbusecke/xmip.git 52 | 53 | 54 | Getting Started 55 | --------------- 56 | 57 | The most basic functionality is provided by the `combined_preprocessing` function. Check out the `tutorial `_ for a brief introduction of the basic functionality. 58 | 59 | 60 | Suggested Workflow 61 | ------------------ 62 | 63 | We aim to provide a flexible solution for many scientific workflows which might need combination of datasets at different 'levels'. 64 | 65 | .. image:: images/workflow_diagram.png 66 | 67 | The `preprocessing` module deals with 'cleaning' single variable datasets (e.g. from a single zarr store in the `pangeo CMIP6 cloud data `_ or a dataset loaded from mulitple netcdf files on a local server/HPC). 68 | 69 | It is often desired to remove control run drift from the data before running analyses. Check out the `drift_removal` module for utilities based on aligning/detrending branched runs. 70 | 71 | Depending on your science goal, you might need to combine several datasets into members (multi variable datasets) or even further. These combination tasks are facilitated by the `postprocessing` module. This provides the ability to 'match and combine' datasets based on their attributes. For more detailed examples please check out the `Postprocessing` section. 72 | 73 | The `regionmask` module enables you to create basin masks for each model (and any other data with longitude/latitude values) 74 | 75 | 76 | .. I need to check out how to link the API sections and from within notebooks properly. Look into https://myst-nb.readthedocs.io/en/latest/ 77 | 78 | 79 | Contents 80 | -------- 81 | 82 | .. toctree:: 83 | :maxdepth: 1 84 | 85 | tutorial 86 | postprocessing 87 | drift_removal 88 | regionmask 89 | contributor-guide 90 | api 91 | whats-new 92 | 93 | .. toctree:: 94 | :maxdepth: 2 95 | :caption: Contents: 96 | 97 | 98 | 99 | Indices and tables 100 | ================== 101 | 102 | * :ref:`genindex` 103 | * :ref:`modindex` 104 | * :ref:`search` 105 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/whats-new.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: xmip 2 | 3 | What's New 4 | =========== 5 | .. _whats-new.0.8.0: 6 | 7 | v0.8.0 (unreleased) 8 | ------------------- 9 | 10 | Internal Changes 11 | ~~~~~~~~~~~~~~~~ 12 | - Add `longitude_bnds` and `latitude_bnds` to `cmip_renaming_dict` (:pull:`300`). By `Joran Angevaare `_ 13 | - Updated pre-commit linting to use ruff (:pull:`359`). By `Julius Busecke `_ 14 | - Modernized packaging workflow, that runs on each PR (:pull:`361`). By `Julius Busecke `_ 15 | - Added 'nvertices' -> 'vertex' to renaming preprocessor (:pull:`357`). By `Julius Busecke `_ 16 | - Updated mamba CI + testing py311/py312 (:issue:`360`, :pull:`362`). By `Julius Busecke `_ 17 | 18 | Bugfixes 19 | ~~~~~~~~ 20 | - Fixed cyclic interpolation in `_interp_nominal_lon` (:issue:`295`, :pull:`296`). By `Joran Angevaare `_ 21 | - Fix formatting `whats-new.rst` for doc-building (:pull:`366`). By `Joran Angevaare `_ 22 | 23 | .. _whats-new.0.7.2: 24 | 25 | v0.7.3 (unreleased) 26 | ------------------- 27 | 28 | Internal Changes 29 | ~~~~~~~~~~~~~~~~ 30 | - Added PR template (:pull:`304`). By `Julius Busecke `_ 31 | - Add `longitude_bnds` and `latitude_bnds` to `cmip_renaming_dict` (:pull:`300`). By `Joran Angevaare `_ 32 | 33 | .. _whats-new.0.7.0: 34 | 35 | v0.7.0 (2023/01/03) 36 | ------------------- 37 | 38 | New Features 39 | ~~~~~~~~~~~~ 40 | - :py:func:`~xmip.postprocessing.match_metrics` Now allows more flexible metric matching (accepting e.g. already merged members) + better error for missing match_attrs (:pull:`275`). By `Julius Busecke `_ 41 | - Postprocessing functions can now easily be nested on top of each other (:pull:`187`). By `Julius Busecke `_ 42 | 43 | 44 | Breaking Changes 45 | ~~~~~~~~~~~~~~~~ 46 | - Requires xarray>=0.17.0 and drops support for python 3.6 (:pull:`170`, :pull:`173`). By `Julius Busecke `_ 47 | - :py:func:`~xmip.utils.cmip6_dataset_id` not includes the attribute `variable_id` (:pull:`166`) By `Julius Busecke `_ 48 | - Dropped support for python 3.7 (:pull:`268`, :issue:`267`). By `Julius Busecke `_ 49 | 50 | Internal Changes 51 | ~~~~~~~~~~~~~~~~ 52 | 53 | - Unit correction logic now uses pint-xarray under the hood (:pull:`160`, :issue:`31`). 54 | By `Tom Nicholas `_ and `Julius Busecke `_ 55 | 56 | - License changed to Apache-2.0 (:pull:`272`, :issue:`256`). By `Julius Busecke `_ 57 | 58 | Bugfixes 59 | ~~~~~~~~ 60 | - :py:func:`~xmip.postprocessing.concat_members` now accepts datasets which already have 'member_id' as a dimension (maintain compatibility with recent intake-esm changes) (:pull:`277`). By `Julius Busecke `_ 61 | 62 | - :py:func:`~xmip.postprocessing.match_metrics` now accepts single variables as str input (:issue:`229`, :pull:`245`). By `Julius Busecke `_ 63 | 64 | - :py:func:`~xmip.postprocessing.concat_members` now returns a dataset with labelled `member_id` dimension (:issue:`196` , :pull:`197`). By `Julius Busecke `_ 65 | 66 | - Fixes incompatibility with upstream changes in xarray>=0.19.0 (:issue:`173`, :pull:`174`). By `Julius Busecke `_ 67 | 68 | - :py:func:`~xmip.drift_removal.match_and_remove_drift` does now work with chunked (dask powered) datasets (:pull:`164`).By `Julius Busecke `_ 69 | 70 | Internal Changes 71 | ~~~~~~~~~~~~~~~~ 72 | 73 | - Unit correction logic now uses pint-xarray under the hood (:pull:`160`, :issue:`31`). 74 | By `Tom Nicholas `_ and `Julius Busecke `_ 75 | 76 | 77 | .. _whats-new.0.5.0: 78 | 79 | v0.5.0 (2021/7/9) 80 | ------------------- 81 | 82 | New Features 83 | ~~~~~~~~~~~~ 84 | - :py:func:`~xmip.postprocessing.interpolate_grid_labels` enables batch combination of different grid_labels 85 | (e.g. from native to regridded and vice versa) using xesmf (:pull:`161`). By `Julius Busecke `_ 86 | 87 | - :py:func:`~xmip.drift_removal.match_and_remove_drift` enables batch detrending/drift-drift_removal 88 | from a dictionary of datasets (:pull:`155`). By `Julius Busecke `_ 89 | 90 | .. _whats-new.0.4.0: 91 | 92 | v0.4.0 (2021/6/9) 93 | ------------------- 94 | 95 | New Features 96 | ~~~~~~~~~~~~ 97 | 98 | - Started implementing metadata fixes in `combined_preprocessing` (:pull:`147`). By `Julius Busecke `_ 99 | 100 | - Added `drift_removal` which adds ability to align time of branched runs and remove drift from the parent (e.g. control) run (:pull:`126`, :pull:`148`). By `Julius Busecke `_ 101 | 102 | .. _whats-new.0.3.0: 103 | 104 | v0.3.0 (2021/6/9) 105 | ------------------- 106 | 107 | New Features 108 | ~~~~~~~~~~~~ 109 | - Added `postprocessing` module and added ability to parse metrics to multiple datasets in a dictionary (:pull:`110`, :pull:`117`). By `Julius Busecke `_ 110 | 111 | 112 | Internal Changes 113 | ~~~~~~~~~~~~~~~~ 114 | 115 | - Refactored CI internals, added dependabot, and some updated failcases (:pull:`121`, :pull:`128`, :pull:`129`, :pull:`133`, :pull:`134`, :pull:`135`). By `Julius Busecke `_ 116 | 117 | .. _whats-new.0.2.0: 118 | 119 | v0.2.0 (2021/4/9) 120 | ----------------- 121 | 122 | Breaking changes 123 | ~~~~~~~~~~~~~~~~ 124 | - Removed `replace_x_y_nominal_lat_lon` from `combined_preprocessing` due to ongoing performance issues with dask (:issue:`75`, :issue:`85`, :issue:`94`) (:pull:`104`). By `Julius Busecke `_ 125 | - Further refactor of `replace_x_y_nominal_lat_lon`, which avoids missing values in the dimension coordinates (:issue:`66`) (:pull:`79`). By `Julius Busecke `_ 126 | 127 | - Consistent treatment of cf-style bounds. The combination of `parse_lon_lat_bounds`,`maybe_convert_bounds_to_vertex`, `maybe_convert_vertex_to_bounds`, and `sort_vertex_order` applied on the dataset, assures that all datasets have both conventions available and the vertex order is the same. By `Julius Busecke `_ 128 | 129 | - New implementation of `replace_x_y_nominal_lat_lon`, which avoids duplicate values in the derived dimensions (:issue:`34`) (:pull:`35`). By `Julius Busecke `_ 130 | 131 | New Features 132 | ~~~~~~~~~~~~ 133 | - Create merged region masks with :py:func:`merged_mask` (:pull:`18`). By `Julius Busecke `_ 134 | 135 | 136 | Bug fixes 137 | ~~~~~~~~~ 138 | - Updated cmip6 catalog location for the pangeo gc archive (:issue:`80`) (:pull:`81`). By `Julius Busecke `_ 139 | 140 | 141 | Documentation 142 | ~~~~~~~~~~~~~ 143 | - Sphinx/RTD documentation, including contributor guide and new logo 🤗. (:issue:`27`) (:pull:`99`). 144 | 145 | Internal Changes 146 | ~~~~~~~~~~~~~~~~ 147 | - Adds options to skip extensive cloud ci by using [skip-ci] in commit message. Adds the ability to cancel previous GHA jobs to prevent long wait times for rapid pushes. (:pull:`99`) By `Julius Busecke `_. 148 | 149 | - Add `ni` and `nj` to the `rename_dict` dictionary in _preprocessing.py_ as dimensions to be corrected (:pull:`54`). By `Markus Ritschel `_ 150 | 151 | 152 | .. _whats-new.0.1.2: 153 | 154 | v0.1.2 155 | ------ 156 | 157 | 158 | New Features 159 | ~~~~~~~~~~~~ 160 | - Added more models, now supporting both ocean and atmospheric output for :py:func:`combined_preprocessing` (:pull:`14`). By `Julius Busecke `_ 161 | 162 | 163 | 164 | .. _whats-new.0.1.0: 165 | 166 | v0.1.0 (2/21/2020) 167 | ---------------------- 168 | 169 | Initial release. 170 | -------------------------------------------------------------------------------- /notebooks/.ipynb_checkpoints/parse_area_gn-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Extract areas from all available parameters and parse based on `source_id`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "/srv/conda/envs/notebook/lib/python3.7/site-packages/intake/source/discovery.py:136: FutureWarning: The drivers ['stac-catalog', 'stac-collection', 'stac-item'] do not specify entry_points and were only discovered via a package scan. This may break in a future release of intake. The packages should be updated.\n", 20 | " FutureWarning)\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "import intake\n", 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "from cmip6_preprocessing.parse_static_metrics import parse_metrics" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# define collection\n", 45 | "col = intake.open_esm_datastore(\"../../cmip6hack-ocean-bgc/catalogs/pangeo-cmip6.json\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 58 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 59 | "\n", 60 | "--> There will be 22 group(s)\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "# # load a bunch of dataset with intake_esm\n", 66 | "# import warnings\n", 67 | "# with warnings.catch_warnings():\n", 68 | "# warnings.simplefilter(\"ignore\")\n", 69 | "# query = dict(experiment_id='piControl',\n", 70 | "# variable_id=['thetao'], grid_label='gn')\n", 71 | "# cat = col.search(**query)\n", 72 | "# cat.df\n", 73 | "# raw_data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})\n", 74 | "# raw_data_dict.keys()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "\n", 86 | "X Axis (periodic):\n", 87 | " * center x" 88 | ] 89 | }, 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "# from xgcm import Grid\n", 97 | "# ds_test = raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']\n", 98 | "# grid = Grid(ds_test, coords={'X':{'center':'x'}})\n", 99 | "# grid" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 5, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "# raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# # now parse all areas...\n", 118 | "# with warnings.catch_warnings():\n", 119 | "# warnings.simplefilter(\"ignore\")\n", 120 | "# data_dict = parse_metrics(raw_data_dict, col)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "You can see there are 22 models with temp data!" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "But not each one of them has an area.... It turns out that the areas are spread all over the catalogue with no discenible system. BUT THEY ARE SOMEWHERE for pretty much every model...and they should be the same for a given `source_id` and `grid_spec`. So with `parse_metrics` we can parse the area into the datasets as coordinates." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 7, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "# new_dict = {k:ds for k, ds in data_dict.items() if 'areacello' in ds.coords}" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 8, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# len(new_dict)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "So we didnt get all of the models, but at least we got 17 to have an area for further calculations" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# new_dict['CMIP.MIROC.MIROC6.piControl.Omon.gn'].thetao" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 10, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "query = dict(experiment_id='piControl',\n", 178 | " variable_id=['thetao', 'uo', 'vo'],table_id='Omon', grid_label='gn')" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 11, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 191 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 192 | "\n", 193 | "--> There will be 22 group(s)\n", 194 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 195 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 196 | "\n", 197 | "--> There will be 21 group(s)\n", 198 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 199 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 200 | "\n", 201 | "--> There will be 21 group(s)\n", 202 | "BCC-CSM2-MR\n", 203 | "Grid Type: B detected\n", 204 | "BCC-ESM1\n", 205 | "Grid Type: B detected\n", 206 | "CAMS-CSM1-0\n", 207 | "Grid Type: B detected\n", 208 | "CanESM5\n", 209 | "Grid Type: C detected\n", 210 | "CNRM-CM6-1\n", 211 | "Grid Type: C detected\n", 212 | "\n", 213 | "Dimensions: (axis_nbounds: 2, lev: 75, member_id: 1, nvertex: 4, time: 6000, x: 362, x_left: 362, y: 294, y_left: 294)\n", 214 | "Coordinates:\n", 215 | " * x (x) int64 0 1 2 3 4 5 6 7 8 ... 354 355 356 357 358 359 360 361\n", 216 | " * y (y) int64 0 1 2 3 4 5 6 7 8 ... 286 287 288 289 290 291 292 293\n", 217 | " * lev (lev) float64 0.5058 1.556 2.668 ... 5.698e+03 5.902e+03\n", 218 | " * time (time) object 1850-01-16 12:00:00 ... 2349-12-16 12:00:00\n", 219 | " * member_id (member_id) \n", 225 | " bounds_lat (y, x, nvertex) float64 dask.array\n", 226 | " bounds_lon (y, x, nvertex) float64 dask.array\n", 227 | " lon (y, x) float64 dask.array\n", 228 | " lev_bounds (lev, axis_nbounds) float64 dask.array\n", 229 | " time_bounds (time, axis_nbounds) object dask.array\n", 230 | " thetao (member_id, time, lev, y, x) float32 dask.array\n", 231 | " lat_e (y, x) float64 dask.array\n", 232 | " lon_e (y, x) float64 dask.array\n", 233 | " uo (member_id, time, lev, y, x) float32 dask.array\n", 234 | " lat_n (y, x) float64 dask.array\n", 235 | " lon_n (y, x) float64 dask.array\n", 236 | " vo (member_id, time, lev, y, x) float32 dask.array\n", 237 | "Attributes:\n", 238 | " CMIP6_CV_version: cv=6.2.3.0-7-g2019642\n", 239 | " Conventions: CF-1.7 CMIP-6.2\n", 240 | " EXPID: CNRM-CM6-1_piControl_r1i1p1f2\n", 241 | " activity_id: CMIP\n", 242 | " arpege_minor_version: 6.3.1\n", 243 | " branch_method: standard\n", 244 | " branch_time_in_child: 0.0\n", 245 | " branch_time_in_parent: 273932.0\n", 246 | " contact: contact.cmip@meteo.fr\n", 247 | " creation_date: 2018-03-21T09:34:26Z\n", 248 | " data_specs_version: 01.00.21\n", 249 | " description: DECK: control\n", 250 | " dr2xml_md5sum: f996a989d4bc796959fe96cfda3db969\n", 251 | " dr2xml_version: 1.0\n", 252 | " experiment: pre-industrial control\n", 253 | " experiment_id: piControl\n", 254 | " external_variables: areacello volcello\n", 255 | " forcing_index: 2\n", 256 | " frequency: mon\n", 257 | " further_info_url: https://furtherinfo.es-doc.org/CMIP6.CNRM-CERFACS...\n", 258 | " grid: native ocean tri-polar grid with 105 k ocean cells\n", 259 | " grid_label: gn\n", 260 | " history: none\n", 261 | " initialization_index: 1\n", 262 | " institution: CNRM (Centre National de Recherches Meteorologiqu...\n", 263 | " institution_id: CNRM-CERFACS\n", 264 | " license: CMIP6 model data produced by CNRM-CERFACS is lice...\n", 265 | " mip_era: CMIP6\n", 266 | " name: /scratch/utmp/ftdir/voldoire/eclis/transfers/CNRM...\n", 267 | " nemo_gelato_commit: 49095b3accd5d4c_6524fe19b00467a\n", 268 | " nominal_resolution: 100 km\n", 269 | " parent_activity_id: CMIP\n", 270 | " parent_experiment_id: piControl-spinup\n", 271 | " parent_mip_era: CMIP6\n", 272 | " parent_source_id: CNRM-CM6-1\n", 273 | " parent_time_units: days since 1850-01-01 00:00:00\n", 274 | " parent_variant_label: r1i1p1f2\n", 275 | " physics_index: 1\n", 276 | " product: model-output\n", 277 | " realization_index: 1\n", 278 | " realm: ocean\n", 279 | " references: http://www.umr-cnrm.fr/cmip6/references\n", 280 | " source: CNRM-CM6-1 (2017): aerosol: prescribed monthly f...\n", 281 | " source_id: CNRM-CM6-1\n", 282 | " source_type: AOGCM\n", 283 | " sub_experiment: none\n", 284 | " sub_experiment_id: none\n", 285 | " table_id: Omon\n", 286 | " title: CNRM-CM6-1 model output prepared for CMIP6 / CMIP...\n", 287 | " tracking_id: hdl:21.14100/191fcb31-b7db-4857-9779-0ef8288da7bd...\n", 288 | " variable_id: thetao\n", 289 | " variant_info: . Information provided by this attribute may in s...\n", 290 | " variant_label: r1i1p1f2\n", 291 | " xios_commit: 1442-shuffle\n" 292 | ] 293 | }, 294 | { 295 | "ename": "ValueError", 296 | "evalue": "Couldn't find a center coordinate for axis Y", 297 | "output_type": "error", 298 | "traceback": [ 299 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 300 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 301 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcatch_warnings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# these lines just make sure that the warnings dont clutter your notebook\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdata_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 302 | "\u001b[0;32m~/cmip6_preprocessing/cmip6_preprocessing/preprocessing.py\u001b[0m in \u001b[0;36mread_data\u001b[0;34m(col, preview, required_variable_id, **kwargs)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv_dict\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'AWI-CM-1-1-MR'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mdata_final\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfull_preprocessing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplot\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata_final\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 303 | "\u001b[0;32m~/cmip6_preprocessing/cmip6_preprocessing/preprocessing.py\u001b[0m in \u001b[0;36mfull_preprocessing\u001b[0;34m(dat_dict, modelname, tracer_ref, u_ref, v_ref, plot, verbose)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0mgrid_temp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0mds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecreate_metrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrid_temp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 304 | "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, check_dims, periodic, default_shifts, face_connections, coords, metrics)\u001b[0m\n\u001b[1;32m 830\u001b[0m \u001b[0mis_periodic\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mdefault_shifts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis_default_shifts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 832\u001b[0;31m \u001b[0mcoords\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 833\u001b[0m )\n\u001b[1;32m 834\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 305 | "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, axis_name, periodic, default_shifts, coords)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0;31m# fall back on comodo conventions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoords\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomodo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_axis_positions_and_coords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;31m# self.coords is a dictionary with the following structure\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 306 | "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/comodo.py\u001b[0m in \u001b[0;36mget_axis_positions_and_coords\u001b[0;34m(ds, axis_name)\u001b[0m\n\u001b[1;32m 83\u001b[0m }\n\u001b[1;32m 84\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Couldn't find a center coordinate for axis %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m raise ValueError(\n", 307 | "\u001b[0;31mValueError\u001b[0m: Couldn't find a center coordinate for axis Y" 308 | ] 309 | } 310 | ], 311 | "source": [ 312 | "# load the same thing with preprocessing\n", 313 | "from cmip6_preprocessing.preprocessing import read_data\n", 314 | "with warnings.catch_warnings(): # these lines just make sure that the warnings dont clutter your notebook\n", 315 | " warnings.simplefilter(\"ignore\")\n", 316 | " data_dict = read_data(col, **query)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "data_dict" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "parse_metrics(data_dict, col, rename=True) #rename is important to get the consistent naming!" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [] 343 | } 344 | ], 345 | "metadata": { 346 | "kernelspec": { 347 | "display_name": "Python 3", 348 | "language": "python", 349 | "name": "python3" 350 | }, 351 | "language_info": { 352 | "codemirror_mode": { 353 | "name": "ipython", 354 | "version": 3 355 | }, 356 | "file_extension": ".py", 357 | "mimetype": "text/x-python", 358 | "name": "python", 359 | "nbconvert_exporter": "python", 360 | "pygments_lexer": "ipython3", 361 | "version": "3.7.3" 362 | } 363 | }, 364 | "nbformat": 4, 365 | "nbformat_minor": 4 366 | } 367 | -------------------------------------------------------------------------------- /notebooks/parse_area_gn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Extract areas from all available parameters and parse based on `source_id`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "/srv/conda/envs/notebook/lib/python3.7/site-packages/intake/source/discovery.py:136: FutureWarning: The drivers ['stac-catalog', 'stac-collection', 'stac-item'] do not specify entry_points and were only discovered via a package scan. This may break in a future release of intake. The packages should be updated.\n", 20 | " FutureWarning)\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "import intake\n", 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "from xmip.parse_static_metrics import parse_metrics" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# define collection\n", 45 | "col = intake.open_esm_datastore(\"../../cmip6hack-ocean-bgc/catalogs/pangeo-cmip6.json\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 58 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 59 | "\n", 60 | "--> There will be 22 group(s)\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "# # load a bunch of dataset with intake_esm\n", 66 | "# import warnings\n", 67 | "# with warnings.catch_warnings():\n", 68 | "# warnings.simplefilter(\"ignore\")\n", 69 | "# query = dict(experiment_id='piControl',\n", 70 | "# variable_id=['thetao'], grid_label='gn')\n", 71 | "# cat = col.search(**query)\n", 72 | "# cat.df\n", 73 | "# raw_data_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})\n", 74 | "# raw_data_dict.keys()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "\n", 86 | "X Axis (periodic):\n", 87 | " * center x" 88 | ] 89 | }, 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "# from xgcm import Grid\n", 97 | "# ds_test = raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']\n", 98 | "# grid = Grid(ds_test, coords={'X':{'center':'x'}})\n", 99 | "# grid" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 5, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "# raw_data_dict['CMIP.CNRM-CERFACS.CNRM-CM6-1.piControl.Omon.gn']" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# # now parse all areas...\n", 118 | "# with warnings.catch_warnings():\n", 119 | "# warnings.simplefilter(\"ignore\")\n", 120 | "# data_dict = parse_metrics(raw_data_dict, col)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "You can see there are 22 models with temp data!" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "But not each one of them has an area.... It turns out that the areas are spread all over the catalogue with no discenible system. BUT THEY ARE SOMEWHERE for pretty much every model...and they should be the same for a given `source_id` and `grid_spec`. So with `parse_metrics` we can parse the area into the datasets as coordinates." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 7, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "# new_dict = {k:ds for k, ds in data_dict.items() if 'areacello' in ds.coords}" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 8, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# len(new_dict)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "So we didnt get all of the models, but at least we got 17 to have an area for further calculations" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# new_dict['CMIP.MIROC.MIROC6.piControl.Omon.gn'].thetao" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 10, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "query = dict(experiment_id='piControl',\n", 178 | " variable_id=['thetao', 'uo', 'vo'],table_id='Omon', grid_label='gn')" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 11, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 191 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 192 | "\n", 193 | "--> There will be 22 group(s)\n", 194 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 195 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 196 | "\n", 197 | "--> There will be 21 group(s)\n", 198 | "--> The keys in the returned dictionary of datasets are constructed as follows:\n", 199 | "\t'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'\n", 200 | "\n", 201 | "--> There will be 21 group(s)\n", 202 | "BCC-CSM2-MR\n", 203 | "Grid Type: B detected\n", 204 | "BCC-ESM1\n", 205 | "Grid Type: B detected\n", 206 | "CAMS-CSM1-0\n", 207 | "Grid Type: B detected\n", 208 | "CanESM5\n", 209 | "Grid Type: C detected\n", 210 | "CNRM-CM6-1\n", 211 | "Grid Type: C detected\n", 212 | "\n", 213 | "Dimensions: (axis_nbounds: 2, lev: 75, member_id: 1, nvertex: 4, time: 6000, x: 362, x_left: 362, y: 294, y_left: 294)\n", 214 | "Coordinates:\n", 215 | " * x (x) int64 0 1 2 3 4 5 6 7 8 ... 354 355 356 357 358 359 360 361\n", 216 | " * y (y) int64 0 1 2 3 4 5 6 7 8 ... 286 287 288 289 290 291 292 293\n", 217 | " * lev (lev) float64 0.5058 1.556 2.668 ... 5.698e+03 5.902e+03\n", 218 | " * time (time) object 1850-01-16 12:00:00 ... 2349-12-16 12:00:00\n", 219 | " * member_id (member_id) \n", 225 | " bounds_lat (y, x, nvertex) float64 dask.array\n", 226 | " bounds_lon (y, x, nvertex) float64 dask.array\n", 227 | " lon (y, x) float64 dask.array\n", 228 | " lev_bounds (lev, axis_nbounds) float64 dask.array\n", 229 | " time_bounds (time, axis_nbounds) object dask.array\n", 230 | " thetao (member_id, time, lev, y, x) float32 dask.array\n", 231 | " lat_e (y, x) float64 dask.array\n", 232 | " lon_e (y, x) float64 dask.array\n", 233 | " uo (member_id, time, lev, y, x) float32 dask.array\n", 234 | " lat_n (y, x) float64 dask.array\n", 235 | " lon_n (y, x) float64 dask.array\n", 236 | " vo (member_id, time, lev, y, x) float32 dask.array\n", 237 | "Attributes:\n", 238 | " CMIP6_CV_version: cv=6.2.3.0-7-g2019642\n", 239 | " Conventions: CF-1.7 CMIP-6.2\n", 240 | " EXPID: CNRM-CM6-1_piControl_r1i1p1f2\n", 241 | " activity_id: CMIP\n", 242 | " arpege_minor_version: 6.3.1\n", 243 | " branch_method: standard\n", 244 | " branch_time_in_child: 0.0\n", 245 | " branch_time_in_parent: 273932.0\n", 246 | " contact: contact.cmip@meteo.fr\n", 247 | " creation_date: 2018-03-21T09:34:26Z\n", 248 | " data_specs_version: 01.00.21\n", 249 | " description: DECK: control\n", 250 | " dr2xml_md5sum: f996a989d4bc796959fe96cfda3db969\n", 251 | " dr2xml_version: 1.0\n", 252 | " experiment: pre-industrial control\n", 253 | " experiment_id: piControl\n", 254 | " external_variables: areacello volcello\n", 255 | " forcing_index: 2\n", 256 | " frequency: mon\n", 257 | " further_info_url: https://furtherinfo.es-doc.org/CMIP6.CNRM-CERFACS...\n", 258 | " grid: native ocean tri-polar grid with 105 k ocean cells\n", 259 | " grid_label: gn\n", 260 | " history: none\n", 261 | " initialization_index: 1\n", 262 | " institution: CNRM (Centre National de Recherches Meteorologiqu...\n", 263 | " institution_id: CNRM-CERFACS\n", 264 | " license: CMIP6 model data produced by CNRM-CERFACS is lice...\n", 265 | " mip_era: CMIP6\n", 266 | " name: /scratch/utmp/ftdir/voldoire/eclis/transfers/CNRM...\n", 267 | " nemo_gelato_commit: 49095b3accd5d4c_6524fe19b00467a\n", 268 | " nominal_resolution: 100 km\n", 269 | " parent_activity_id: CMIP\n", 270 | " parent_experiment_id: piControl-spinup\n", 271 | " parent_mip_era: CMIP6\n", 272 | " parent_source_id: CNRM-CM6-1\n", 273 | " parent_time_units: days since 1850-01-01 00:00:00\n", 274 | " parent_variant_label: r1i1p1f2\n", 275 | " physics_index: 1\n", 276 | " product: model-output\n", 277 | " realization_index: 1\n", 278 | " realm: ocean\n", 279 | " references: http://www.umr-cnrm.fr/cmip6/references\n", 280 | " source: CNRM-CM6-1 (2017): aerosol: prescribed monthly f...\n", 281 | " source_id: CNRM-CM6-1\n", 282 | " source_type: AOGCM\n", 283 | " sub_experiment: none\n", 284 | " sub_experiment_id: none\n", 285 | " table_id: Omon\n", 286 | " title: CNRM-CM6-1 model output prepared for CMIP6 / CMIP...\n", 287 | " tracking_id: hdl:21.14100/191fcb31-b7db-4857-9779-0ef8288da7bd...\n", 288 | " variable_id: thetao\n", 289 | " variant_info: . Information provided by this attribute may in s...\n", 290 | " variant_label: r1i1p1f2\n", 291 | " xios_commit: 1442-shuffle\n" 292 | ] 293 | }, 294 | { 295 | "ename": "ValueError", 296 | "evalue": "Couldn't find a center coordinate for axis Y", 297 | "output_type": "error", 298 | "traceback": [ 299 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 300 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 301 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcatch_warnings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# these lines just make sure that the warnings dont clutter your notebook\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdata_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 302 | "\u001b[0;32m~/xmip/xmip/preprocessing.py\u001b[0m in \u001b[0;36mread_data\u001b[0;34m(col, preview, required_variable_id, **kwargs)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv_dict\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'AWI-CM-1-1-MR'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mdata_final\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodelname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfull_preprocessing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodelname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplot\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata_final\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 303 | "\u001b[0;32m~/xmip/xmip/preprocessing.py\u001b[0m in \u001b[0;36mfull_preprocessing\u001b[0;34m(dat_dict, modelname, tracer_ref, u_ref, v_ref, plot, verbose)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0mgrid_temp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0mds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrecreate_metrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrid_temp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 304 | "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, check_dims, periodic, default_shifts, face_connections, coords, metrics)\u001b[0m\n\u001b[1;32m 830\u001b[0m \u001b[0mis_periodic\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mdefault_shifts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis_default_shifts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 832\u001b[0;31m \u001b[0mcoords\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoords\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 833\u001b[0m )\n\u001b[1;32m 834\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 305 | "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/grid.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ds, axis_name, periodic, default_shifts, coords)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0;31m# fall back on comodo conventions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoords\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomodo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_axis_positions_and_coords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;31m# self.coords is a dictionary with the following structure\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 306 | "\u001b[0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/xgcm/comodo.py\u001b[0m in \u001b[0;36mget_axis_positions_and_coords\u001b[0;34m(ds, axis_name)\u001b[0m\n\u001b[1;32m 83\u001b[0m }\n\u001b[1;32m 84\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Couldn't find a center coordinate for axis %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0maxis_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcoords_without_axis_shift\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m raise ValueError(\n", 307 | "\u001b[0;31mValueError\u001b[0m: Couldn't find a center coordinate for axis Y" 308 | ] 309 | } 310 | ], 311 | "source": [ 312 | "# load the same thing with preprocessing\n", 313 | "from xmip.preprocessing import read_data\n", 314 | "with warnings.catch_warnings(): # these lines just make sure that the warnings dont clutter your notebook\n", 315 | " warnings.simplefilter(\"ignore\")\n", 316 | " data_dict = read_data(col, **query)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "data_dict" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "parse_metrics(data_dict, col, rename=True) #rename is important to get the consistent naming!" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [] 343 | } 344 | ], 345 | "metadata": { 346 | "kernelspec": { 347 | "display_name": "Python 3", 348 | "language": "python", 349 | "name": "python3" 350 | }, 351 | "language_info": { 352 | "codemirror_mode": { 353 | "name": "ipython", 354 | "version": 3 355 | }, 356 | "file_extension": ".py", 357 | "mimetype": "text/x-python", 358 | "name": "python", 359 | "nbconvert_exporter": "python", 360 | "pygments_lexer": "ipython3", 361 | "version": "3.7.6" 362 | } 363 | }, 364 | "nbformat": 4, 365 | "nbformat_minor": 4 366 | } 367 | -------------------------------------------------------------------------------- /notebooks/test.yaml: -------------------------------------------------------------------------------- 1 | ACCESS-CM2: 2 | gn: 3 | axis_shift: 4 | X: right 5 | Y: right 6 | ACCESS-ESM1-5: 7 | gn: 8 | axis_shift: 9 | X: right 10 | Y: right 11 | BCC-CSM2-MR: 12 | gn: 13 | axis_shift: 14 | X: right 15 | Y: right 16 | BCC-ESM1: 17 | gn: 18 | axis_shift: 19 | X: right 20 | Y: right 21 | CAMS-CSM1-0: 22 | gn: 23 | axis_shift: 24 | X: right 25 | Y: right 26 | CAS-ESM2-0: 27 | gn: 28 | axis_shift: 29 | X: left 30 | Y: left 31 | CESM1-1-CAM5-CMIP5: 32 | gn: 33 | axis_shift: 34 | X: left 35 | Y: left 36 | gr: 37 | axis_shift: 38 | X: left 39 | Y: left 40 | CESM2: 41 | gn: 42 | axis_shift: 43 | X: left 44 | Y: right 45 | gr: 46 | axis_shift: 47 | X: left 48 | Y: left 49 | CESM2-FV2: 50 | gn: 51 | axis_shift: 52 | X: left 53 | Y: right 54 | gr: 55 | axis_shift: 56 | X: left 57 | Y: left 58 | CESM2-WACCM: 59 | gn: 60 | axis_shift: 61 | X: left 62 | Y: right 63 | gr: 64 | axis_shift: 65 | X: left 66 | Y: left 67 | CESM2-WACCM-FV2: 68 | gn: 69 | axis_shift: 70 | X: left 71 | Y: right 72 | gr: 73 | axis_shift: 74 | X: left 75 | Y: left 76 | CIESM: 77 | gn: 78 | axis_shift: 79 | X: left 80 | Y: left 81 | CMCC-CM2-HR4: 82 | gn: 83 | axis_shift: 84 | X: left 85 | Y: left 86 | CMCC-ESM2: 87 | gn: 88 | axis_shift: 89 | X: left 90 | Y: left 91 | CNRM-CM6-1: 92 | gn: 93 | axis_shift: 94 | X: right 95 | Y: right 96 | gr1: 97 | axis_shift: 98 | X: left 99 | Y: left 100 | CNRM-CM6-1-HR: 101 | gn: 102 | axis_shift: 103 | X: left 104 | Y: right 105 | CNRM-ESM2-1: 106 | gn: 107 | axis_shift: 108 | X: right 109 | Y: right 110 | gr1: 111 | axis_shift: 112 | X: left 113 | Y: left 114 | CanESM5: 115 | gn: 116 | axis_shift: 117 | X: right 118 | Y: right 119 | CanESM5-CanOE: 120 | gn: 121 | axis_shift: 122 | X: right 123 | Y: right 124 | E3SM-1-0: 125 | gr: 126 | axis_shift: 127 | X: left 128 | Y: left 129 | E3SM-1-1: 130 | gr: 131 | axis_shift: 132 | X: left 133 | Y: left 134 | E3SM-1-1-ECA: 135 | gr: 136 | axis_shift: 137 | X: left 138 | Y: left 139 | EC-Earth3: 140 | gn: 141 | axis_shift: 142 | X: right 143 | Y: right 144 | gr: 145 | axis_shift: 146 | X: left 147 | Y: left 148 | EC-Earth3-AerChem: 149 | gn: 150 | axis_shift: 151 | X: left 152 | Y: left 153 | EC-Earth3-LR: 154 | gn: 155 | axis_shift: 156 | X: right 157 | Y: right 158 | EC-Earth3-Veg: 159 | gn: 160 | axis_shift: 161 | X: right 162 | Y: right 163 | gr: 164 | axis_shift: 165 | X: left 166 | Y: left 167 | EC-Earth3-Veg-LR: 168 | gn: 169 | axis_shift: 170 | X: left 171 | Y: left 172 | FGOALS-f3-L: 173 | gn: 174 | axis_shift: 175 | X: left 176 | Y: left 177 | FGOALS-g3: 178 | gn: 179 | axis_shift: 180 | X: left 181 | Y: left 182 | FIO-ESM-2-0: 183 | gn: 184 | axis_shift: 185 | X: left 186 | Y: right 187 | GFDL-CM4: 188 | gn: 189 | axis_shift: 190 | X: left 191 | Y: left 192 | gr: 193 | axis_shift: 194 | X: left 195 | Y: left 196 | GFDL-ESM2M: 197 | gn: 198 | axis_shift: 199 | X: left 200 | Y: left 201 | GFDL-ESM4: 202 | gn: 203 | axis_shift: 204 | X: left 205 | Y: left 206 | gr: 207 | axis_shift: 208 | X: left 209 | Y: left 210 | GFDL-OM4p5B: 211 | gn: 212 | axis_shift: 213 | X: left 214 | Y: left 215 | gr: 216 | axis_shift: 217 | X: left 218 | Y: left 219 | GISS-E2-1-G: 220 | gn: 221 | axis_shift: 222 | X: left 223 | Y: left 224 | GISS-E2-1-G-CC: 225 | gn: 226 | axis_shift: 227 | X: right 228 | Y: left 229 | GISS-E2-1-H: 230 | gn: 231 | axis_shift: 232 | X: left 233 | Y: left 234 | gr: 235 | axis_shift: 236 | X: left 237 | Y: left 238 | GISS-E2-2-G: 239 | gn: 240 | axis_shift: 241 | X: right 242 | Y: left 243 | HadGEM3-GC31-LL: 244 | gn: 245 | axis_shift: 246 | X: right 247 | Y: right 248 | HadGEM3-GC31-MM: 249 | gn: 250 | axis_shift: 251 | X: left 252 | Y: right 253 | IITM-ESM: 254 | gn: 255 | axis_shift: 256 | X: left 257 | Y: left 258 | INM-CM4-8: 259 | gr1: 260 | axis_shift: 261 | X: left 262 | Y: left 263 | INM-CM5-0: 264 | gr1: 265 | axis_shift: 266 | X: left 267 | Y: left 268 | IPSL-CM6A-LR: 269 | gn: 270 | axis_shift: 271 | X: right 272 | Y: right 273 | KACE-1-0-G: 274 | gr: 275 | axis_shift: 276 | X: left 277 | Y: left 278 | KIOST-ESM: 279 | gr1: 280 | axis_shift: 281 | X: left 282 | Y: left 283 | MCM-UA-1-0: 284 | gn: 285 | axis_shift: 286 | X: right 287 | Y: right 288 | MIROC-ES2L: 289 | gn: 290 | axis_shift: 291 | X: right 292 | Y: right 293 | gr1: 294 | axis_shift: 295 | X: left 296 | Y: left 297 | MIROC6: 298 | gn: 299 | axis_shift: 300 | X: right 301 | Y: right 302 | MPI-ESM1-2-HR: 303 | gn: 304 | axis_shift: 305 | X: right 306 | Y: left 307 | MRI-ESM2-0: 308 | gn: 309 | axis_shift: 310 | X: left 311 | Y: left 312 | gr: 313 | axis_shift: 314 | X: left 315 | Y: left 316 | NESM3: 317 | gn: 318 | axis_shift: 319 | X: right 320 | Y: right 321 | NorCPM1: 322 | gn: 323 | axis_shift: 324 | X: left 325 | Y: left 326 | gr: 327 | axis_shift: 328 | X: left 329 | Y: left 330 | NorESM1-F: 331 | gn: 332 | axis_shift: 333 | X: left 334 | Y: left 335 | NorESM2-LM: 336 | gn: 337 | axis_shift: 338 | X: right 339 | Y: left 340 | gr: 341 | axis_shift: 342 | X: right 343 | Y: left 344 | NorESM2-MM: 345 | gn: 346 | axis_shift: 347 | X: right 348 | Y: left 349 | gr: 350 | axis_shift: 351 | X: right 352 | Y: left 353 | SAM0-UNICON: 354 | gn: 355 | axis_shift: 356 | X: left 357 | Y: right 358 | TaiESM1: 359 | gn: 360 | axis_shift: 361 | X: left 362 | Y: right 363 | UKESM1-0-LL: 364 | gn: 365 | axis_shift: 366 | X: right 367 | Y: right 368 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | 7 | [tool.interrogate] 8 | ignore-init-method = true 9 | ignore-init-module = false 10 | ignore-magic = false 11 | ignore-semiprivate = true 12 | ignore-private = true 13 | ignore-property-decorators = true 14 | ignore-module = false 15 | fail-under = 95 16 | # This somehow does not work...the excludes are defined in the pre-commit-config.yaml for now 17 | # exclude = ["setup.py", "docs", "tests/*", "xmip/_version.py"] 18 | verbose = 1 19 | quiet = false 20 | color = true 21 | 22 | [tool.isort] 23 | known_third_party = ["cf_xarray", "cftime", "dask", "fsspec", "numpy", "pint", "pint_xarray", "pkg_resources", "pytest", "setuptools", "xarray", "xarrayutils", "xesmf", "xgcm", "yaml"] 24 | 25 | 26 | [tool.pytest.ini_options] 27 | minversion = "6.0" 28 | addopts = " -vv -rXfE" 29 | # only test the root level, otherwise it picks up the tests of the project template 30 | testpaths = [ 31 | "tests", 32 | ] 33 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | build: 3 | os: ubuntu-20.04 4 | tools: 5 | python: mambaforge-4.10 6 | sphinx: 7 | configuration: docs/conf.py 8 | python: 9 | install: 10 | - method: setuptools 11 | path: . 12 | conda: 13 | environment: docs/environment.yml 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [sdist] 2 | formats = gztar 3 | 4 | [check-manifest] 5 | ignore = 6 | *.yml 7 | *.yaml 8 | .coveragerc 9 | docs 10 | docs/* 11 | *.enc 12 | notebooks 13 | notebooks/* 14 | tests 15 | tests/* 16 | 17 | [flake8] 18 | max-line-length = 105 19 | select = C,E,F,W,B,B950 20 | ignore = E203, E501, W503 21 | exclude = 22 | xmip/_version.py 23 | docs/* 24 | __init__.py 25 | 26 | 27 | [metadata] 28 | name = xmip 29 | description = Analysis ready CMIP6 data the easy way 30 | author = xmip developers 31 | url=https://github.com/jbusecke/xmip 32 | long_description = file: README.md 33 | long_description_content_type = text/markdown 34 | license = Apache 35 | license_file = LICENSE.txt 36 | 37 | ## These need to be filled in by the author! 38 | # For details see: https://pypi.org/classifiers/ 39 | 40 | classifiers = 41 | Development Status :: 4 - Beta 42 | Topic :: Scientific/Engineering 43 | Intended Audience :: Science/Research 44 | Operating System :: OS Independent 45 | Programming Language :: Python 46 | Programming Language :: Python :: 3 47 | Programming Language :: Python :: 3.8 48 | Programming Language :: Python :: 3.9 49 | Programming Language :: Python :: 3.10 50 | License :: OSI Approved :: Apache Software License 51 | 52 | ## Add your email here 53 | author_email = jbusecke@princeton.edu 54 | 55 | 56 | ### make sure to fill in your dependencies! 57 | [options] 58 | install_requires = 59 | numpy 60 | pandas 61 | xarray>=0.17.0 62 | xgcm<0.7.0 63 | cftime 64 | xarrayutils 65 | pint 66 | cf_xarray >= 0.6.0 67 | pint-xarray 68 | setup_requires= 69 | setuptools 70 | setuptools-scm 71 | python_requires = >=3.8 72 | ################ Up until here 73 | 74 | include_package_data = True 75 | zip_safe = False 76 | packages = find: 77 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup( 5 | use_scm_version={ 6 | "write_to": "xmip/_version.py", 7 | "write_to_template": '__version__ = "{version}"', 8 | "tag_regex": r"^(?Pv)?(?P[^\+]+)(?P.*)?$", 9 | }, 10 | setup_requires=["setuptools>=45", "setuptools_scm[toml]>=6.0"], 11 | ) 12 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbusecke/xMIP/108266e72d01c173e15cdf4ca00612a0d4f0a8ca/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_grids.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import xarray as xr 4 | 5 | from xgcm import Grid 6 | from xgcm.autogenerate import generate_grid_ds 7 | 8 | from xmip.grids import ( 9 | _interp_vertex_to_bounds, 10 | _parse_bounds_vertex, 11 | combine_staggered_grid, 12 | create_full_grid, 13 | detect_shift, 14 | distance, 15 | distance_deg, 16 | recreate_metrics, 17 | ) 18 | 19 | 20 | def _add_small_rand(da): 21 | return da + (np.random.rand(*da.shape) * 0.05) 22 | 23 | 24 | def _test_data(grid_label="gn", z_axis=True): 25 | xt = np.arange(4) + 1 26 | yt = np.arange(5) + 1 27 | zt = np.arange(6) + 1 28 | 29 | x = xr.DataArray(xt, coords=[("x", xt)]) 30 | y = xr.DataArray(yt, coords=[("y", yt)]) 31 | lev = xr.DataArray(zt, coords=[("lev", zt)]) 32 | 33 | # Need to add a tracer here to get the tracer dimsuffix 34 | coords = [("x", x.data), ("y", y.data)] 35 | data = np.random.rand(len(xt), len(yt)) 36 | dims = ["x", "y"] 37 | 38 | if z_axis: 39 | coords.append(("lev", lev.data)) 40 | data = np.random.rand(len(x), len(y), len(lev)) 41 | dims = ["x", "y", "lev"] 42 | 43 | tr = xr.DataArray( 44 | data, 45 | dims=dims, 46 | coords=coords, 47 | ) 48 | 49 | lon_raw = xr.DataArray(xt, coords=[("x", xt)]) 50 | lat_raw = xr.DataArray(yt, coords=[("y", yt)]) 51 | lon = lon_raw * xr.ones_like(lat_raw) 52 | lat = xr.ones_like(lon_raw) * lat_raw 53 | 54 | lon_bounds_e = lon + 0.5 55 | lon_bounds_w = lon - 0.5 + (np.random.rand(*lon.shape) * 0.05) 56 | lat_bounds_n = lat + 0.5 + (np.random.rand(*lon.shape) * 0.05) 57 | lat_bounds_s = lat - 0.5 + (np.random.rand(*lon.shape) * 0.05) 58 | 59 | lon_bounds = xr.concat( 60 | [_add_small_rand(lon_bounds_w), _add_small_rand(lon_bounds_w)], dim="bnds" 61 | ) 62 | lat_bounds = xr.concat( 63 | [_add_small_rand(lat_bounds_s), _add_small_rand(lat_bounds_n)], dim="bnds" 64 | ) 65 | 66 | if z_axis: 67 | lev_bounds = xr.concat( 68 | [_add_small_rand(lev - 0.5), _add_small_rand(lev + 0.5)], dim="bnds" 69 | ) 70 | 71 | lon_verticies = xr.concat( 72 | [ 73 | _add_small_rand(lon_bounds_e), 74 | _add_small_rand(lon_bounds_e), 75 | _add_small_rand(lon_bounds_w), 76 | _add_small_rand(lon_bounds_w), 77 | ], 78 | dim="vertex", 79 | ) 80 | lat_verticies = xr.concat( 81 | [ 82 | _add_small_rand(lat_bounds_s), 83 | _add_small_rand(lat_bounds_n), 84 | _add_small_rand(lat_bounds_n), 85 | _add_small_rand(lat_bounds_s), 86 | ], 87 | dim="vertex", 88 | ) 89 | 90 | ds = xr.Dataset({"base": tr}) 91 | 92 | dataset_coords = dict( 93 | lon=lon, 94 | lat=lat, 95 | lon_bounds=lon_bounds, 96 | lat_bounds=lat_bounds, 97 | lon_verticies=lon_verticies, 98 | lat_verticies=lat_verticies, 99 | ) 100 | 101 | if z_axis: 102 | dataset_coords["lev_bounds"] = lev_bounds 103 | 104 | ds = ds.assign_coords(dataset_coords) 105 | ds.attrs["source_id"] = "test_model" 106 | ds.attrs["grid_label"] = grid_label 107 | ds.attrs["variable_id"] = "base" 108 | return ds 109 | 110 | 111 | def test_parse_bounds_vertex(): 112 | lon_b = xr.DataArray(np.array([0, 1, 2, 3]), dims=["vertex"]) 113 | lat_b = xr.DataArray(np.array([10, 11, 12, 13]), dims=["vertex"]) 114 | 115 | data = np.random.rand(4) 116 | 117 | da = xr.DataArray( 118 | data, dims=["vertex"], coords={"lon_verticies": lon_b, "lat_verticies": lat_b} 119 | ) 120 | test = _parse_bounds_vertex(da, "vertex", position=[0, 3]) 121 | print(test) 122 | expected = (da.isel(vertex=0).load().data, da.isel(vertex=3).load().data) 123 | print(expected) 124 | assert test == expected 125 | 126 | 127 | def test_interp_vertex_to_bounds(): 128 | da = xr.DataArray(np.arange(4), dims=["vertex"]) 129 | # test interp on the y axis 130 | expected = xr.DataArray(np.array([1.5, 1.5]), dims=["bnds"]) 131 | xr.testing.assert_equal(_interp_vertex_to_bounds(da, "y"), expected) 132 | # test interp on the x axis 133 | expected = xr.DataArray(np.array([0.5, 2.5]), dims=["bnds"]) 134 | xr.testing.assert_equal(_interp_vertex_to_bounds(da, "x"), expected) 135 | 136 | 137 | def test_distance_deg(): 138 | lon0, lat0, lon1, lat1 = 120, 30, 121, 31 139 | delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1) 140 | assert delta_lon == 1.0 141 | assert delta_lat == 1.0 142 | 143 | lon0, lat0, lon1, lat1 = 360, 30, 1, 31 144 | delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1) 145 | assert delta_lon == 1.0 146 | assert delta_lat == 1.0 147 | 148 | lon0, lat0, lon1, lat1 = 300, 30, 301, 30.09 149 | delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1) 150 | assert delta_lon == 1.0 151 | assert delta_lat == 0.0 152 | 153 | 154 | @pytest.mark.parametrize("lon", [0, 90, 120]) 155 | @pytest.mark.parametrize("lat", [0, 10, 45]) 156 | def test_distance(lon, lat): 157 | Re = 6.378e6 158 | # test straight lat line 159 | lon0, lat0, lon1, lat1 = lon, lat, lon, lat + 1 160 | dist = distance(lon0, lat0, lon1, lat1) 161 | np.testing.assert_allclose(dist, Re * (np.pi * 1.0 / 180)) 162 | 163 | # test straight lon line 164 | lon0, lat0, lon1, lat1 = lon, lat, lon + 1, lat 165 | dist = distance(lon0, lat0, lon1, lat1) 166 | np.testing.assert_allclose( 167 | dist, Re * (np.pi * 1.0 / 180) * np.cos(np.pi * lat0 / 180) 168 | ) 169 | 170 | 171 | # TODO: inner and outer (needs to be implemented in xgcm autogenerate first) 172 | @pytest.mark.parametrize("xshift", ["left", "right"]) 173 | @pytest.mark.parametrize("yshift", ["left", "right"]) 174 | @pytest.mark.parametrize("z_axis", [True, False]) 175 | def test_recreate_metrics(xshift, yshift, z_axis): 176 | # reconstruct all the metrics by hand and compare to inferred output 177 | 178 | # * For now this is a regular lon lat grid. Might need to add some tests for more complex grids. 179 | # Then again. This will not do a great job for those.... 180 | 181 | # create test dataset 182 | ds = _test_data(z_axis=z_axis) 183 | 184 | # TODO: generalize so this also works with e.g. zonal average sections (which dont have a X axis) 185 | coord_dict = {"X": "x", "Y": "y"} 186 | if z_axis: 187 | coord_dict["Z"] = "lev" 188 | 189 | ds_full = generate_grid_ds( 190 | ds, 191 | coord_dict, 192 | position={"X": ("center", xshift), "Y": ("center", yshift)}, 193 | ) 194 | 195 | grid = Grid(ds_full) 196 | 197 | ds_metrics, metrics_dict = recreate_metrics(ds_full, grid) 198 | 199 | if z_axis: 200 | # Check that the bound values are intact (previously those got alterd due to unexpected behaviour of .assign_coords()) 201 | assert "bnds" in ds_metrics.lev_bounds.dims 202 | 203 | # compute the more complex metrics (I could wrap this into a function I guess?) 204 | lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs(ds.lon.load(), xshift) 205 | lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs(ds.lat.load(), xshift) 206 | dx_gx_expected = distance(lon0, lat0, lon1, lat1) 207 | 208 | lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs(ds.lon.load(), yshift) 209 | lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs(ds.lat.load(), yshift) 210 | dy_gy_expected = distance(lon0, lat0, lon1, lat1) 211 | 212 | # corner metrics 213 | # dx 214 | if yshift == "left": 215 | # dx 216 | lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs( 217 | _interp_vertex_to_bounds(ds_metrics.lon_verticies, "y").isel(bnds=0), 218 | xshift, 219 | ) 220 | lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs( 221 | ds_metrics.lat_bounds.isel(bnds=0), xshift 222 | ) 223 | elif yshift == "right": 224 | lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs( 225 | _interp_vertex_to_bounds(ds_metrics.lon_verticies, "y").isel(bnds=1), 226 | xshift, 227 | ) 228 | lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs( 229 | ds_metrics.lat_bounds.isel(bnds=1), xshift 230 | ) 231 | dx_gxgy_expected = distance(lon0, lat0, lon1, lat1) 232 | 233 | # dy 234 | if xshift == "left": 235 | # dx 236 | lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs( 237 | _interp_vertex_to_bounds(ds_metrics.lat_verticies, "x").isel(bnds=0), 238 | yshift, 239 | ) 240 | lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs( 241 | ds_metrics.lon_bounds.isel(bnds=0), yshift 242 | ) 243 | elif xshift == "right": 244 | lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs( 245 | _interp_vertex_to_bounds(ds_metrics.lat_verticies, "x").isel(bnds=1), 246 | yshift, 247 | ) 248 | lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs( 249 | ds_metrics.lon_bounds.isel(bnds=1), yshift 250 | ) 251 | dy_gxgy_expected = distance(lon0, lat0, lon1, lat1) 252 | 253 | if xshift == "left": 254 | vertex_points = [0, 1] 255 | else: 256 | vertex_points = [2, 3] 257 | lon0, lon1 = ( 258 | ds_metrics.lon_verticies.isel(vertex=vertex_points[0]), 259 | ds_metrics.lon_verticies.isel(vertex=vertex_points[1]), 260 | ) 261 | lat0, lat1 = ( 262 | ds_metrics.lat_verticies.isel(vertex=vertex_points[0]), 263 | ds_metrics.lat_verticies.isel(vertex=vertex_points[1]), 264 | ) 265 | dy_gx_expected = distance(lon0, lat0, lon1, lat1) 266 | 267 | if yshift == "left": 268 | vertex_points = [0, 3] 269 | else: 270 | vertex_points = [1, 2] 271 | lon0, lon1 = ( 272 | ds_metrics.lon_verticies.isel(vertex=vertex_points[0]), 273 | ds_metrics.lon_verticies.isel(vertex=vertex_points[1]), 274 | ) 275 | lat0, lat1 = ( 276 | ds_metrics.lat_verticies.isel(vertex=vertex_points[0]), 277 | ds_metrics.lat_verticies.isel(vertex=vertex_points[1]), 278 | ) 279 | dx_gy_expected = distance(lon0, lat0, lon1, lat1) 280 | 281 | if z_axis: 282 | dz_t_expected = ds.lev_bounds.diff("bnds").squeeze().data 283 | else: 284 | dz_t_expected = None 285 | 286 | for var, expected in [ 287 | ("dz_t", dz_t_expected), 288 | ( 289 | "dx_t", 290 | distance( 291 | ds_metrics.lon_bounds.isel(bnds=0).data, 292 | ds_metrics.lat.data, 293 | ds_metrics.lon_bounds.isel(bnds=1).data, 294 | ds_metrics.lat.data, 295 | ), 296 | ), 297 | ( 298 | "dy_t", 299 | distance( 300 | ds_metrics.lon.data, 301 | ds_metrics.lat_bounds.isel(bnds=0).data, 302 | ds_metrics.lon.data, 303 | ds_metrics.lat_bounds.isel(bnds=1).data, 304 | ), 305 | ), 306 | ("dx_gx", dx_gx_expected), 307 | ("dy_gy", dy_gy_expected), 308 | ("dy_gx", dy_gx_expected), 309 | ("dx_gy", dx_gy_expected), 310 | ("dy_gxgy", dy_gxgy_expected), 311 | ("dx_gxgy", dx_gxgy_expected), 312 | ]: 313 | if expected is not None: 314 | print(var) 315 | control = ds_metrics[var].data 316 | if expected.shape != control.shape: 317 | control = control.T 318 | np.testing.assert_allclose(control, expected) 319 | 320 | if z_axis: 321 | assert set(["X", "Y", "Z"]).issubset(set(metrics_dict.keys())) 322 | else: 323 | assert set(["X", "Y"]).issubset(set(metrics_dict.keys())) 324 | assert "Z" not in list(metrics_dict.keys()) 325 | 326 | 327 | # TODO: inner and outer (needs to be implemented in xgcm autogenerate first) 328 | @pytest.mark.parametrize("xshift", ["left", "center", "right"]) 329 | @pytest.mark.parametrize("yshift", ["left", "center", "right"]) 330 | def test_detect_shift(xshift, yshift): 331 | # create base dataset (tracer) 332 | ds_base = _test_data() 333 | 334 | # create the maybe shifted dataset 335 | ds = ds_base.copy() 336 | if xshift == "left": 337 | ds["lon"] = ds["lon"] - 0.5 338 | elif xshift == "right": 339 | ds["lon"] = ds["lon"] + 0.5 340 | 341 | if yshift == "left": 342 | ds["lat"] = ds["lat"] - 0.5 343 | elif yshift == "right": 344 | ds["lat"] = ds["lat"] + 0.5 345 | assert detect_shift(ds_base, ds, "X") == xshift 346 | assert detect_shift(ds_base, ds, "Y") == yshift 347 | 348 | # repeat with very small shifts (these should not be detected) 349 | ds = ds_base.copy() 350 | if xshift == "left": 351 | ds["lon"] = ds["lon"] - 0.05 352 | elif xshift == "right": 353 | ds["lon"] = ds["lon"] + 0.05 354 | 355 | if yshift == "left": 356 | ds["lat"] = ds["lat"] - 0.05 357 | elif yshift == "right": 358 | ds["lat"] = ds["lat"] + 0.05 359 | assert detect_shift(ds_base, ds, "X") == "center" 360 | assert detect_shift(ds_base, ds, "Y") == "center" 361 | 362 | 363 | @pytest.mark.parametrize("xshift", ["left", "right"]) 364 | @pytest.mark.parametrize("yshift", ["left", "right"]) 365 | @pytest.mark.parametrize("grid_label", ["gr", "gn"]) 366 | def test_create_full_grid(xshift, yshift, grid_label): 367 | ds_base = _test_data(grid_label=grid_label) 368 | grid_dict = {"test_model": {grid_label: {"axis_shift": {"X": xshift, "Y": yshift}}}} 369 | # TODO: This should be specific to the grid_label: e.g grid_dict = {'model':{'gr':{'axis_shift':{'X':'left}}}} 370 | 371 | ds_full = create_full_grid(ds_base, grid_dict=grid_dict) 372 | 373 | shift_dict = {"left": -0.5, "right": 0.5} 374 | 375 | assert ds_full["x"].attrs["axis"] == "X" 376 | assert ds_full["x_" + xshift].attrs["axis"] == "X" 377 | assert ds_full["x_" + xshift].attrs["c_grid_axis_shift"] == shift_dict[xshift] 378 | assert ds_full["y"].attrs["axis"] == "Y" 379 | assert ds_full["y_" + yshift].attrs["axis"] == "Y" 380 | assert ds_full["y_" + yshift].attrs["c_grid_axis_shift"] == shift_dict[yshift] 381 | # TODO: integrate the vertical 382 | # assert ds_full["lev"].attrs["axis"] == "Z" 383 | 384 | # I might want to loosen this later and switch to a uniform naming 385 | # E.g. use x_g for the x dimension on the x gridface, no matter if its left or right... 386 | # TODO: Check upstream in xgcm 387 | # Once that is done I 388 | assert "x_" + xshift in ds_full.dims 389 | assert "y_" + yshift in ds_full.dims 390 | 391 | # test error handling 392 | with pytest.warns(UserWarning): 393 | ds_none = create_full_grid( 394 | ds_base, grid_dict=None 395 | ) # the synthetic dataset is not in the default dict. 396 | assert ds_none is None 397 | 398 | 399 | @pytest.mark.parametrize("recalculate_metrics", [True, False]) 400 | @pytest.mark.parametrize("xshift", ["left", "right"]) 401 | @pytest.mark.parametrize("yshift", ["left", "right"]) 402 | @pytest.mark.parametrize("grid_label", ["gr", "gn"]) 403 | def test_combine_staggered_grid(recalculate_metrics, xshift, yshift, grid_label): 404 | ds_base = _test_data(grid_label=grid_label) 405 | 406 | # create the maybe shifted dataset 407 | ds = ds_base.copy() 408 | ds = ds.rename({"base": "other"}) 409 | ds.attrs["variable_id"] = "other" 410 | if xshift == "left": 411 | ds["lon"] = ds["lon"] - 0.5 412 | elif xshift == "right": 413 | ds["lon"] = ds["lon"] + 0.5 414 | 415 | if yshift == "left": 416 | ds["lat"] = ds["lat"] - 0.5 417 | elif yshift == "right": 418 | ds["lat"] = ds["lat"] + 0.5 419 | grid_dict = {"test_model": {grid_label: {"axis_shift": {"X": xshift, "Y": yshift}}}} 420 | 421 | for other_ds in [ds, [ds]]: 422 | grid, ds_combined = combine_staggered_grid( 423 | ds_base, 424 | other_ds, 425 | grid_dict=grid_dict, 426 | recalculate_metrics=recalculate_metrics, 427 | ) 428 | 429 | for axis, shift in zip(["X", "Y"], [xshift, yshift]): 430 | # make sure the correct dim is in the added dataset 431 | assert grid.axes[axis].coords[shift] in ds_combined["other"].dims 432 | # and also that none of the other are in there 433 | assert all( 434 | [ 435 | di not in ds_combined["other"].dims 436 | for dd, di in grid.axes[axis].coords.items() 437 | if dd != shift 438 | ] 439 | ) 440 | # check if metrics are correctly parsed 441 | if recalculate_metrics: 442 | for axis in ["X", "Y"]: 443 | for metric in ["_t", "_gx", "_gy", "_gxgy"]: 444 | assert f"d{axis.lower()}{metric}" in list(ds_combined.coords) 445 | 446 | # Test error handling 447 | with pytest.warns(UserWarning): 448 | grid_none, ds_combined_none = combine_staggered_grid( 449 | ds_base, 450 | ds, 451 | grid_dict=None, 452 | recalculate_metrics=recalculate_metrics, 453 | ) 454 | assert ds_combined_none is None 455 | assert grid_none is None 456 | -------------------------------------------------------------------------------- /tests/test_preprocessing_cloud.py: -------------------------------------------------------------------------------- 1 | # This module tests data directly from the pangeo google cloud storage. 2 | # Tests are meant to be more high level and also serve to document known problems (see skip statements). 3 | 4 | import fsspec 5 | import numpy as np 6 | import pytest 7 | import xarray as xr 8 | 9 | from xmip.grids import combine_staggered_grid 10 | from xmip.preprocessing import _desired_units, _drop_coords, combined_preprocessing 11 | from xmip.utils import google_cmip_col, model_id_match 12 | 13 | 14 | pytest.importorskip("gcsfs") 15 | 16 | 17 | def diagnose_duplicates(data): 18 | """displays non-unique entries in data""" 19 | _, idx = np.unique(data, return_index=True) 20 | missing = np.array([i for i in np.arange(len(data)) if i not in idx]) 21 | if len(missing) > 0: 22 | missing_values = data[missing] 23 | raise ValueError(f"Duplicate Values ({missing_values}) found") 24 | 25 | 26 | def data( 27 | source_id, variable_id, experiment_id, grid_label, use_intake_esm, catalog="main" 28 | ): 29 | zarr_kwargs = { 30 | "consolidated": True, 31 | # "decode_times": False, 32 | "decode_times": True, 33 | "use_cftime": True, 34 | } 35 | 36 | cat = google_cmip_col(catalog=catalog).search( 37 | source_id=source_id, 38 | experiment_id=experiment_id, 39 | variable_id=variable_id, 40 | # member_id="r1i1p1f1", 41 | table_id="Omon", 42 | grid_label=grid_label, 43 | ) 44 | 45 | if len(cat.df["zstore"]) > 0: 46 | if use_intake_esm: 47 | ddict = cat.to_dataset_dict( 48 | zarr_kwargs=zarr_kwargs, 49 | preprocess=combined_preprocessing, 50 | storage_options={"token": "anon"}, 51 | ) 52 | _, ds = ddict.popitem() 53 | else: 54 | # debugging options 55 | # @charlesbluca suggested this to make this work in GHA 56 | # https://github.com/jbusecke/xmip/pull/62#issuecomment-741928365 57 | mm = fsspec.get_mapper( 58 | cat.df["zstore"][0] 59 | ) # think you can pass in storage options here as well? 60 | ds_raw = xr.open_zarr(mm, **zarr_kwargs) 61 | ds = combined_preprocessing(ds_raw) 62 | else: 63 | ds = None 64 | 65 | return ds, cat 66 | 67 | 68 | def all_models(): 69 | df = google_cmip_col().df 70 | all_models = df["source_id"].unique() 71 | all_models = tuple(np.sort(all_models)) 72 | # all_models = tuple(["EC-Earth3"]) 73 | return all_models 74 | 75 | 76 | # test_models = ["CESM2-FV2", "GFDL-CM4"] 77 | test_models = all_models() 78 | 79 | 80 | def pytest_generate_tests(metafunc): 81 | # This is called for every test. Only get/set command line arguments 82 | # if the argument is specified in the list of test "fixturenames". 83 | 84 | for name in ["vi", "gl", "ei", "cat"]: 85 | option_value = getattr(metafunc.config.option, name) 86 | 87 | if isinstance(option_value, str): 88 | option_value = [option_value] 89 | 90 | if name in metafunc.fixturenames and option_value is not None: 91 | metafunc.parametrize(name, option_value) 92 | 93 | 94 | # print(f"\n\n\n\n$$$$$$$ All available models: {all_models()}$$$$$$$\n\n\n\n") 95 | 96 | # Combine the input parameters according to command line input 97 | 98 | # --- Most basic test --- # 99 | 100 | # Try to combine some of the failures 101 | 102 | # We dont support these at all 103 | not_supported_failures = [ 104 | ("AWI-ESM-1-1-LR", "*", "*", "gn"), 105 | ("AWI-CM-1-1-MR", "*", "*", "gn"), 106 | ] 107 | 108 | # basic problems when trying to concat with intake-esm 109 | intake_concat_failures = [ 110 | ( 111 | "CanESM5", 112 | [ 113 | "uo", 114 | "so", 115 | "thetao", 116 | ], 117 | "ssp245", 118 | "gn", 119 | ), 120 | ( 121 | "CanESM5", 122 | ["zos"], 123 | [ 124 | "ssp245", 125 | "ssp585", 126 | ], 127 | "gn", 128 | ), 129 | ( 130 | "E3SM-1-0", 131 | ["so", "o2", "zos"], 132 | ["historical", "ssp585"], 133 | "gr", 134 | ), # issues with time concatenation 135 | ( 136 | "IPSL-CM6A-LR", 137 | ["thetao", "o2", "so"], 138 | "historical", 139 | "gn", 140 | ), # IPSL has an issue with `lev` dims concatting] 141 | ( 142 | "NorESM2-MM", 143 | ["uo", "so"], 144 | "historical", 145 | "gr", 146 | ), # time concatting 147 | ( 148 | "NorESM2-MM", 149 | ["so"], 150 | "historical", 151 | "gn", 152 | ), 153 | ] 154 | 155 | 156 | # this fixture has to be redifined every time to account for different fail cases for each test 157 | @pytest.fixture 158 | def spec_check_dim_coord_values_wo_intake(request, gl, vi, ei, cat): 159 | expected_failures = not_supported_failures + [ 160 | ("FGOALS-f3-L", ["thetao"], "piControl", "gn"), 161 | # ( 162 | # "GFDL-CM4", 163 | # "thetao", 164 | # "historical", 165 | # "gn", 166 | # ), # this should not fail and should trigger an xpass (I just use this for dev purposes to check 167 | # # the strict option) 168 | ] 169 | spec = (request.param, vi, ei, gl, cat) 170 | request.param = spec 171 | if model_id_match(expected_failures, request.param[0:-1]): 172 | request.node.add_marker(pytest.mark.xfail(strict=True)) 173 | return request 174 | 175 | 176 | @pytest.mark.parametrize( 177 | "spec_check_dim_coord_values_wo_intake", test_models, indirect=True 178 | ) 179 | def test_check_dim_coord_values_wo_intake( 180 | spec_check_dim_coord_values_wo_intake, 181 | ): 182 | ( 183 | source_id, 184 | variable_id, 185 | experiment_id, 186 | grid_label, 187 | catalog, 188 | ) = spec_check_dim_coord_values_wo_intake.param 189 | 190 | # there must be a better way to build this at the class level and then tear it down again 191 | # I can probably get this done with fixtures, but I dont know how atm 192 | ds, _ = data( 193 | source_id, variable_id, experiment_id, grid_label, False, catalog=catalog 194 | ) 195 | 196 | if ds is None: 197 | pytest.skip( 198 | f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" 199 | ) 200 | 201 | # Check for dim duplicates 202 | # check all dims for duplicates 203 | # for di in ds.dims: 204 | # for now only test a subset of the dims. TODO: Add the bounds once they 205 | # are cleaned up. 206 | for di in ["x", "y", "lev", "time"]: 207 | if di in ds.dims: 208 | diagnose_duplicates(ds[di].load().data) 209 | assert len(ds[di]) == len(np.unique(ds[di])) 210 | if di != "time": # these tests do not make sense for decoded time 211 | assert np.all(~np.isnan(ds[di])) 212 | assert np.all(ds[di].diff(di) >= 0) 213 | 214 | assert ds.lon.min().load() >= 0 215 | assert ds.lon.max().load() <= 360 216 | if "lon_bounds" in ds.variables: 217 | assert ds.lon_bounds.min().load() >= 0 218 | assert ds.lon_bounds.max().load() <= 361 219 | assert ds.lat.min().load() >= -90 220 | assert ds.lat.max().load() <= 90 221 | # make sure lon and lat are 2d 222 | assert len(ds.lon.shape) == 2 223 | assert len(ds.lat.shape) == 2 224 | for co in _drop_coords: 225 | if co in ds.dims: 226 | assert co not in ds.coords 227 | 228 | # Check unit conversion 229 | for var, expected_unit in _desired_units.items(): 230 | if var in ds.variables: 231 | unit = ds[var].attrs.get("units") 232 | if unit: 233 | assert unit == expected_unit 234 | 235 | 236 | # this fixture has to be redifined every time to account for different fail cases for each test 237 | @pytest.fixture 238 | def spec_check_dim_coord_values(request, gl, vi, ei, cat): 239 | expected_failures = ( 240 | not_supported_failures 241 | + intake_concat_failures 242 | + [ 243 | ("NorESM2-MM", ["uo", "zos"], "historical", "gn"), 244 | ("NorESM2-MM", "thetao", "historical", "gn"), 245 | ("NorESM2-MM", "thetao", "historical", "gr"), 246 | ("FGOALS-f3-L", ["thetao"], "piControl", "gn"), 247 | ] 248 | ) 249 | spec = (request.param, vi, ei, gl, cat) 250 | request.param = spec 251 | if model_id_match(expected_failures, request.param[0:-1]): 252 | request.node.add_marker(pytest.mark.xfail(strict=True)) 253 | return request 254 | 255 | 256 | @pytest.mark.parametrize("spec_check_dim_coord_values", test_models, indirect=True) 257 | def test_check_dim_coord_values( 258 | spec_check_dim_coord_values, 259 | ): 260 | ( 261 | source_id, 262 | variable_id, 263 | experiment_id, 264 | grid_label, 265 | catalog, 266 | ) = spec_check_dim_coord_values.param 267 | # there must be a better way to build this at the class level and then tear it down again 268 | # I can probably get this done with fixtures, but I dont know how atm 269 | ds, cat = data( 270 | source_id, variable_id, experiment_id, grid_label, True, catalog=catalog 271 | ) 272 | 273 | if ds is None: 274 | pytest.skip( 275 | f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" 276 | ) 277 | 278 | # Check for dim duplicates 279 | # check all dims for duplicates 280 | # for di in ds.dims: 281 | # for now only test a subset of the dims. TODO: Add the bounds once they 282 | # are cleaned up. 283 | for di in ["x", "y", "lev", "time"]: 284 | if di in ds.dims: 285 | diagnose_duplicates(ds[di].load().data) 286 | assert len(ds[di]) == len(np.unique(ds[di])) 287 | if di != "time": # these tests do not make sense for decoded time 288 | assert np.all(~np.isnan(ds[di])) 289 | assert np.all(ds[di].diff(di) >= 0) 290 | 291 | assert ds.lon.min().load() >= 0 292 | assert ds.lon.max().load() <= 360 293 | if "lon_bounds" in ds.variables: 294 | assert ds.lon_bounds.min().load() >= 0 295 | assert ds.lon_bounds.max().load() <= 361 296 | assert ds.lat.min().load() >= -90 297 | assert ds.lat.max().load() <= 90 298 | # make sure lon and lat are 2d 299 | assert len(ds.lon.shape) == 2 300 | assert len(ds.lat.shape) == 2 301 | for co in _drop_coords: 302 | if co in ds.dims: 303 | assert co not in ds.coords 304 | 305 | 306 | # --- Specific Bound Coords Test ----- 307 | 308 | 309 | # this fixture has to be redifined every time to account for different fail cases for each test 310 | @pytest.fixture 311 | def spec_check_bounds_verticies(request, gl, vi, ei, cat): 312 | expected_failures = ( 313 | not_supported_failures 314 | + intake_concat_failures 315 | + [ 316 | ("FGOALS-f3-L", ["thetao", "so", "uo", "zos"], "*", "gn"), 317 | ("FGOALS-g3", ["thetao", "so", "uo", "zos"], "*", "gn"), 318 | ("NorESM2-MM", ["thetao", "uo", "zos"], "historical", "gn"), 319 | ("NorESM2-MM", ["thetao", "so"], "historical", "gr"), 320 | ("IPSL-CM6A-LR", ["thetao", "o2"], "historical", "gn"), 321 | ("IITM-ESM", ["so", "uo", "thetao"], "piControl", "gn"), 322 | ("GFDL-CM4", "uo", "*", "gn"), 323 | ] 324 | ) 325 | spec = (request.param, vi, ei, gl, cat) 326 | request.param = spec 327 | if model_id_match(expected_failures, request.param[0:-1]): 328 | request.node.add_marker(pytest.mark.xfail(strict=True)) 329 | return request 330 | 331 | 332 | @pytest.mark.parametrize("spec_check_bounds_verticies", test_models, indirect=True) 333 | def test_check_bounds_verticies(spec_check_bounds_verticies): 334 | ( 335 | source_id, 336 | variable_id, 337 | experiment_id, 338 | grid_label, 339 | catalog, 340 | ) = spec_check_bounds_verticies.param 341 | ds, cat = data( 342 | source_id, variable_id, experiment_id, grid_label, True, catalog=catalog 343 | ) 344 | 345 | if ds is None: 346 | pytest.skip( 347 | f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" 348 | ) 349 | 350 | if "vertex" in ds.dims: 351 | np.testing.assert_allclose(ds.vertex.data, np.arange(4)) 352 | 353 | # Check for existing bounds and verticies 354 | for co in ["lon_bounds", "lat_bounds", "lon_verticies", "lat_verticies"]: 355 | assert co in ds.coords 356 | # make sure that all other dims are eliminated from the bounds. 357 | assert (set(ds[co].dims) - set(["bnds", "vertex"])) == set(["x", "y"]) 358 | 359 | # Check the order of the vertex 360 | # Ill only check these south of the Arctic for now. Up there 361 | # things are still weird. 362 | test_ds = ds.where(abs(ds.lat) <= 40, drop=True) 363 | 364 | vertex_lon_diff1 = test_ds.lon_verticies.isel( 365 | vertex=3 366 | ) - test_ds.lon_verticies.isel(vertex=0) 367 | vertex_lon_diff2 = test_ds.lon_verticies.isel( 368 | vertex=2 369 | ) - test_ds.lon_verticies.isel(vertex=1) 370 | vertex_lat_diff1 = test_ds.lat_verticies.isel( 371 | vertex=1 372 | ) - test_ds.lat_verticies.isel(vertex=0) 373 | vertex_lat_diff2 = test_ds.lat_verticies.isel( 374 | vertex=2 375 | ) - test_ds.lat_verticies.isel(vertex=3) 376 | for vertex_diff in [vertex_lon_diff1, vertex_lon_diff2]: 377 | assert (vertex_diff <= 0).sum() <= (3 * len(vertex_diff.y)) 378 | # allowing for a few rows to be negative 379 | 380 | for vertex_diff in [vertex_lat_diff1, vertex_lat_diff2]: 381 | assert (vertex_diff <= 0).sum() <= (5 * len(vertex_diff.x)) 382 | # allowing for a few rows to be negative 383 | # This is just to make sure that not the majority of values is negative or zero. 384 | 385 | # Same for the bounds: 386 | lon_diffs = test_ds.lon_bounds.diff("bnds") 387 | lat_diffs = test_ds.lat_bounds.diff("bnds") 388 | 389 | assert (lon_diffs <= 0).sum() <= (5 * len(lon_diffs.y)) 390 | assert (lat_diffs <= 0).sum() <= (5 * len(lat_diffs.y)) 391 | 392 | 393 | # --- xgcm grid specific tests --- # 394 | # this fixture has to be redifined every time to account for different fail cases for each test 395 | @pytest.fixture 396 | def spec_check_grid(request, gl, vi, ei, cat): 397 | expected_failures = ( 398 | not_supported_failures 399 | + intake_concat_failures 400 | + [ 401 | ("CMCC-ESM2", "*", "*", "gn"), 402 | ("CMCC-CM2-SR5", "*", "*", "gn"), 403 | ("CMCC-CM2-HR4", "*", "*", "gn"), 404 | ("FGOALS-f3-L", "*", "*", "gn"), 405 | ("FGOALS-g3", "*", "*", "gn"), 406 | ("E3SM-1-0", ["so", "thetao", "o2"], "*", "gn"), 407 | ( 408 | "E3SM-1-0", 409 | ["zos"], 410 | ["historical", "ssp585", "ssp245", "ssp370", "esm-hist"], 411 | "gr", 412 | ), 413 | ( 414 | "EC-Earth3-AerChem", 415 | ["so", "thetao", "zos"], 416 | ["historical", "piControl", "ssp370"], 417 | "gn", 418 | ), 419 | ("EC-Earth3-Veg", "*", "historical", "gr"), 420 | ("EC-Earth3-CC", "*", "*", "gn"), 421 | ("MPI-ESM-1-2-HAM", "*", "*", "gn"), 422 | ("NorESM2-MM", "*", "historical", "gn"), 423 | ("NorESM2-MM", ["thetao", "so", "uo"], "historical", "gr"), 424 | ("IITM-ESM", "*", "*", "gn"), 425 | ("GFDL-CM4", ["uo"], "*", "gn"), 426 | ("IPSL-CM5A2-INCA", "*", "*", "gn"), 427 | ("IPSL-CM6A-LR-INCA", "*", "*", "gn"), 428 | ] 429 | ) 430 | spec = (request.param, vi, ei, gl, cat) 431 | request.param = spec 432 | if model_id_match(expected_failures, request.param[0:-1]): 433 | request.node.add_marker(pytest.mark.xfail(strict=True, reason="")) 434 | return request 435 | 436 | 437 | @pytest.mark.parametrize("spec_check_grid", test_models, indirect=True) 438 | def test_check_grid( 439 | spec_check_grid, 440 | ): 441 | source_id, variable_id, experiment_id, grid_label, catalog = spec_check_grid.param 442 | 443 | ds, cat = data( 444 | source_id, variable_id, experiment_id, grid_label, True, catalog=catalog 445 | ) 446 | 447 | if ds is None: 448 | pytest.skip( 449 | f"No data found for {source_id}|{variable_id}|{experiment_id}|{grid_label}" 450 | ) 451 | 452 | # This is just a rudimentary test to see if the creation works 453 | staggered_grid, ds_staggered = combine_staggered_grid(ds, recalculate_metrics=True) 454 | 455 | assert ds_staggered is not None 456 | # 457 | if "lev" in ds_staggered.dims: 458 | assert "bnds" in ds_staggered.lev_bounds.dims 459 | 460 | for axis in ["X", "Y"]: 461 | for metric in ["_t", "_gx", "_gy", "_gxgy"]: 462 | assert f"d{axis.lower()}{metric}" in list(ds_staggered.coords) 463 | # TODO: Include actual test to combine variables 464 | -------------------------------------------------------------------------------- /tests/test_regionmask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import xarray as xr 4 | 5 | from xmip.regionmask import _default_merge_dict, merged_mask 6 | 7 | 8 | regionmask = pytest.importorskip( 9 | "regionmask", minversion="0.5.0+dev" 10 | ) # All tests get skipped if the version of regionmask is not > 0.5.0 11 | 12 | 13 | @pytest.mark.parametrize("verbose", [True, False]) 14 | def test_merge_mask(verbose): 15 | x = np.linspace(0, 360, 720) 16 | y = np.linspace(-90, 90, 360) 17 | data = np.random.rand(len(x), len(y)) 18 | ds = xr.DataArray(data, coords=[("x", x), ("y", y)]).to_dataset(name="data") 19 | ds["lon"] = ds["x"] * xr.ones_like(ds["y"]) 20 | ds["lat"] = xr.ones_like(ds["x"]) * ds["y"] 21 | 22 | basins = regionmask.defined_regions.natural_earth_v4_1_0.ocean_basins_50 23 | 24 | mask = merged_mask(basins, ds, verbose=verbose) 25 | 26 | # check if number of regions is correct 27 | mask_regions = np.unique(mask.data.flat) 28 | mask_regions = mask_regions[~np.isnan(mask_regions)] 29 | 30 | assert len(mask_regions) == len(_default_merge_dict().keys()) 31 | 32 | # now a brief range check to make sure the pacific is stamped out correctly 33 | pac = ds.where( 34 | np.logical_or(np.logical_or(mask == 2, mask == 3), mask == 4), drop=True 35 | ) 36 | assert pac.lon.min() > 95.0 37 | assert pac.lon.max() < 295.0 38 | 39 | # I shoud add a test for -180-180 40 | 41 | # How to use cloud data. 42 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import xarray as xr 3 | 4 | from xmip.utils import cmip6_dataset_id, google_cmip_col, model_id_match 5 | 6 | 7 | def test_google_cmip_col(): 8 | try: 9 | import intake 10 | except ImportError: 11 | intake = None 12 | if intake is None: 13 | with pytest.raises(ImportError): 14 | col = google_cmip_col(catalog="main") 15 | else: 16 | col = google_cmip_col(catalog="main") 17 | assert ( 18 | col.catalog_file == "https://storage.googleapis.com/cmip6/pangeo-cmip6.csv" 19 | ) 20 | 21 | with pytest.raises(ValueError): 22 | col = google_cmip_col(catalog="wrong") 23 | 24 | 25 | def test_model_id_match(): 26 | # wrong amount of elements 27 | with pytest.raises(ValueError): 28 | model_id_match([("A", "a", "aa"), ("A", "a", "aa", "aaa")], ("A", "a", "aa")) 29 | 30 | with pytest.raises(ValueError): 31 | model_id_match([("A", "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa", "aaa")) 32 | 33 | assert model_id_match([("A", "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa")) 34 | assert ~model_id_match([("A", ["b", "c"], "aa")], ("A", "a", "aa")) 35 | assert ~model_id_match([("A", ["b", "c"], "aa")], ("A", "a", "aa")) 36 | assert ~model_id_match( 37 | [("EC-Earth3-AerChem", ["so"], "historical", "gn")], 38 | ("EC-Earth3", ["so"], "historical", "gn"), 39 | ) 40 | assert ~model_id_match([("A", "a", "aa"), ("B", "a", "aa")], ("AA", "a", "aa")) 41 | assert ~model_id_match([("AA", "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa")) 42 | assert ~model_id_match([(["AA"], "a", "aa"), ("B", "a", "aa")], ("A", "a", "aa")) 43 | assert ~model_id_match([(["AA"], ["a"], "aa"), ("B", "a", "aa")], ("A", "a", "aa")) 44 | assert model_id_match([("*", "a", "aa")], ("whatever", "a", "aa")) 45 | assert model_id_match([(["bb", "b"], "a", "aa")], ("b", "a", "aa")) 46 | assert model_id_match( 47 | [(["bb", "b"], "a", "aa"), (["bb", "b"], "c", "cc")], ("bb", "a", "aa") 48 | ) 49 | 50 | 51 | def test_cmip6_dataset_id(): 52 | ds = xr.Dataset({"data": 4}) 53 | 54 | ds.attrs = { 55 | "activity_id": "ai", 56 | "institution_id": "ii", 57 | "source_id": "si", 58 | "variant_label": "vl", 59 | "experiment_id": "ei", 60 | "table_id": "ti", 61 | "grid_label": "gl", 62 | "variable_id": "vi", 63 | } 64 | 65 | assert cmip6_dataset_id(ds) == "ai.ii.si.ei.vl.ti.gl.none.vi" 66 | assert cmip6_dataset_id(ds, sep="_") == "ai_ii_si_ei_vl_ti_gl_none_vi" 67 | assert ( 68 | cmip6_dataset_id(ds, id_attrs=["grid_label", "activity_id", "wrong_attrs"]) 69 | == "gl.ai.none" 70 | ) 71 | -------------------------------------------------------------------------------- /xmip/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import ( # only works for python 3.8 and upwards 2 | PackageNotFoundError, 3 | version, 4 | ) 5 | 6 | try: 7 | __version__ = version("xmip") 8 | except PackageNotFoundError: 9 | # package is not installed 10 | __version__ = "unknown" 11 | pass 12 | -------------------------------------------------------------------------------- /xmip/drift_removal.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import dask.array as dsa 4 | import numpy as np 5 | import xarray as xr 6 | import xarrayutils as xru 7 | 8 | from xarrayutils.utils import linear_trend 9 | 10 | from xmip.postprocessing import EXACT_ATTRS, _match_datasets 11 | from xmip.utils import cmip6_dataset_id 12 | 13 | 14 | def _maybe_unpack_date(date): 15 | """`Unpacks` cftime from xr.Dataarray if provided""" 16 | # I should probably not do this here but instead in the higher level functions... 17 | if isinstance(date, xr.DataArray): 18 | date = date.data.tolist() 19 | if isinstance(date, list): 20 | if len(date) != 1: 21 | raise RuntimeError( 22 | "The passed date has the wrong format. Got [{date}] after conversion to list." 23 | ) 24 | else: 25 | date = date[0] 26 | return date 27 | 28 | 29 | def _construct_cfdate(data, units, calendar): 30 | # This seems clunky. I feel there must be a more elegant way of doing this? 31 | date = xr.DataArray(data, attrs={"units": units, "calendar": calendar}) 32 | return xr.decode_cf(date.to_dataset(name="time"), use_cftime=True).time 33 | 34 | 35 | def _datestr_to_cftime(date_str, calendar): 36 | # Again I feel this should be more elegant? For now I guess it works 37 | return _construct_cfdate([0], f"days since {date_str}", calendar) 38 | 39 | 40 | def replace_time( 41 | ds, ref_date=None, ref_idx=0, freq="1MS", calendar=None, time_dim="time" 42 | ): 43 | """This function replaces the time encoding of a dataset acoording to `ref_date`. 44 | The ref date can be any index of ds.time (default is 0; meaning the first timestep of ds will be replaced with `ref_date`). 45 | """ 46 | # ! I might be able to achieve some of this with time.shift 47 | # ! 48 | 49 | if calendar is None: 50 | calendar = ds.time.encoding.get("calendar", "standard") 51 | 52 | if ref_date is None: 53 | ref_date = _maybe_unpack_date(ds.time[0]) 54 | 55 | if isinstance(ref_date, str): 56 | ref_date = _maybe_unpack_date(_datestr_to_cftime(ref_date, calendar)) 57 | 58 | # TODO: Check the frequency. Currently the logic only works on monthly intervals 59 | if freq != "1MS": 60 | raise ValueError("`replace_time` currently only works with monthly data.") 61 | 62 | # determine the start date 63 | # propagate the date back (this assumes stricly monthly data) 64 | 65 | year = _maybe_unpack_date(ref_date).year - (ref_idx // 12) 66 | month = _maybe_unpack_date(ref_date).month - (ref_idx % 12) 67 | 68 | if month <= 0: 69 | # move the year one more back 70 | year -= 1 71 | month = 12 + month 72 | 73 | attrs = ds.time.attrs 74 | 75 | start = f"{int(year):04d}-{int(month):02d}" 76 | 77 | ds = ds.assign_coords( 78 | time=xr.cftime_range(start, periods=len(ds.time), freq=freq, calendar=calendar) 79 | ) 80 | ds.time.attrs = attrs 81 | return ds 82 | 83 | 84 | def find_date_idx(time, date): 85 | """Finds the index of `date` within an array of cftime dates. This strictly requires monthly data. 86 | Might result in undesired behavior for other time frequencies. 87 | """ 88 | # ! seems like I can refactor this with http://xarray.pydata.org/en/stable/generated/xarray.CFTimeIndex.get_loc.html#xarray.CFTimeIndex.get_loc 89 | 90 | date = _maybe_unpack_date(date) 91 | 92 | # easier approach: Find the difference in years and months 93 | year_diff = date.year - _maybe_unpack_date(time[0]).year 94 | month_diff = date.month - _maybe_unpack_date(time[0]).month 95 | 96 | return (year_diff * 12) + month_diff 97 | 98 | 99 | def unify_time(parent, child, adjust_to="child"): 100 | """Uses the CMIP6 specific metadata (augmented by xmip....time_preprocessing!!!) to adjust parent time encoding to child experiment. 101 | Similar to `switch_to_child_time`, but sets the time parameters (e.g. calendar) explicitly to the child conventions 102 | """ 103 | branch_time_in_parent = child.attrs.get("branch_time_in_parent") 104 | 105 | # if branch time is not in attrs do nothing 106 | if branch_time_in_parent is None: 107 | child_source_id = child.attrs.get("source_id", "not found") 108 | parent_source_id = parent.attrs.get("source_id", "not found") 109 | msg = ( 110 | f"Could not unify time for [child:{child_source_id}|parent:{parent_source_id}]." 111 | "`branch_time_in_parent` not found in attributes." 112 | ) 113 | warnings.warn(msg, UserWarning) 114 | return parent, child 115 | 116 | else: 117 | parent_calendar = parent.time.to_index().calendar 118 | child_calendar = child.time.to_index().calendar 119 | branch_time_parent = _construct_cfdate( 120 | child.attrs.get("branch_time_in_parent"), 121 | child.attrs.get("parent_time_units"), 122 | parent_calendar, 123 | ) 124 | branch_time_child = _construct_cfdate( 125 | child.attrs.get("branch_time_in_child"), 126 | child.time.encoding.get("units"), 127 | child_calendar, 128 | ) 129 | 130 | if adjust_to == "child": 131 | branch_idx_parent = find_date_idx(parent.time, branch_time_parent) 132 | return ( 133 | replace_time( 134 | parent, 135 | branch_time_child, 136 | ref_idx=branch_idx_parent, 137 | calendar=child_calendar, 138 | ), 139 | child, 140 | ) 141 | elif adjust_to == "parent": 142 | branch_idx_child = find_date_idx(child.time, branch_time_child) 143 | return parent, replace_time( 144 | child, 145 | branch_time_parent, 146 | ref_idx=branch_idx_child, 147 | calendar=parent_calendar, 148 | ) 149 | else: 150 | raise ValueError( 151 | f"Input for `adjust_to` not valid. Got {adjust_to}. Expected either `child` or `parent`." 152 | ) 153 | 154 | 155 | def calculate_drift( 156 | reference, ds, variable, trend_years=250, compute_short_trends=False 157 | ): 158 | """Calculate the linear trend at every grid position for the given time (`trend_years`) 159 | starting from the date when `ds` was branched of from `ds_parent`. 160 | CMIP6 metadata must be present. 161 | 162 | Parameters 163 | ---------- 164 | ds_parent : xr.Dataset 165 | The dataset from which the drift (trend) is calculated. Usually the preindustrial control run 166 | ds : xr.Dataset 167 | The dataset for which the drift is matched. This is usually the historical experiment. 168 | !For many models, each historical member is branched 169 | trend_years : int, optional 170 | The duration of the trend to compute in years, by default 250 (This is the lenght of 171 | historical+standard scenario, e.g. 1850-2100) 172 | """ 173 | 174 | for attr in [ 175 | "parent_variant_label", 176 | "parent_source_id", 177 | "branch_time_in_parent", 178 | "parent_time_units", 179 | "source_id", 180 | "variant_label", 181 | ]: 182 | if attr not in ds.attrs: 183 | raise ValueError(f"Could not find {attr} in attributes of `ds`.") 184 | 185 | # Check if the parent member id matches 186 | match_attrs = ["source_id", "variant_label"] 187 | for ma in match_attrs: 188 | if ds.attrs[f"parent_{ma}"] not in reference.attrs[ma]: 189 | raise ValueError( 190 | f'`ds_parent` {ma} ({reference.attrs[ma]}) not compatible with `ds` parent_{ma} ({ds.attrs[f"parent_{ma}"]})' 191 | ) 192 | 193 | # find the branch date in the control run 194 | branch_time_reference = _construct_cfdate( 195 | ds.attrs["branch_time_in_parent"], 196 | ds.attrs["parent_time_units"], 197 | reference.time.to_index().calendar, 198 | ) 199 | branch_idx_reference = find_date_idx(reference.time, branch_time_reference) 200 | # there might be some cases where this is not true. Figure out what to do when it happens. 201 | assert branch_idx_reference >= 0 202 | 203 | # cut the referenmce to the appropriate time frame 204 | reference_cut = reference.isel( 205 | time=slice(branch_idx_reference, branch_idx_reference + (12 * trend_years)) 206 | ) 207 | 208 | if len(reference_cut.time) == 0: 209 | raise RuntimeError( 210 | "Selecting from `reference` according to the branch time resulted in empty dataset. Check the metadata." 211 | ) 212 | return None 213 | else: 214 | if len(reference_cut.time) < trend_years * 12: 215 | if compute_short_trends: 216 | warnings.warn( 217 | f"reference dataset does not have the full {trend_years} years to calculate trend. Using {int(len(reference_cut.time) / 12)} years only" 218 | ) 219 | else: 220 | raise RuntimeError( 221 | f"Reference dataset does not have the full {trend_years} years to calculate trend. Set `calculate_short_trend=True` to compute from a shorter timeseries" 222 | ) 223 | 224 | time_range = xr.concat( 225 | [ 226 | reference_cut.time[0].squeeze().drop_vars("time"), 227 | reference_cut.time[-1].squeeze().drop_vars("time"), 228 | ], 229 | dim="bnds", 230 | ).reset_coords(drop=True) 231 | 232 | # there is some problem when encoding very large years. for now ill preserve these only as 233 | # strings 234 | time_range = time_range.astype(str) 235 | 236 | # The polyfit implementation actually respects the units. 237 | # For now my implementation requires the slope to be in units .../month 238 | # I might be able to change this later and accomodate other time frequencies? 239 | # get rid of all the additional coords, which resets the time to an integer index 240 | 241 | reference_cut = reference_cut[variable] 242 | 243 | # TODO: This has pretty poor performance...need to find out why. 244 | # Reset time dimension to integer index. 245 | # reference_cut = reference_cut.drop_vars("time") 246 | 247 | # linear regression slope is all we need here. 248 | # reg = reference_cut.polyfit("time", 1).sel(degree=1).polyfit_coefficients 249 | 250 | reg_raw = linear_trend( 251 | reference_cut, 252 | "time", 253 | ) 254 | 255 | # ! quite possibly the shittiest fix ever. 256 | # I changed the API over at xarrayutils and now I have to pay the price over here. 257 | # TODO: Might want to eliminate this ones the new xarrayutils version has matured. 258 | if xru.__version__ > "v0.1.3": 259 | reg = reg_raw.slope 260 | else: 261 | reg = reg_raw.sel(parameter="slope").drop_vars("parameter").squeeze() 262 | 263 | # again drop all the coordinates 264 | reg = reg.reset_coords(drop=True) 265 | 266 | reg = reg.to_dataset(name=variable) 267 | 268 | # add metadata about regression 269 | reg = reg.assign_coords(trend_time_range=time_range) 270 | reg.coords["trend_time_range"].attrs.update( 271 | { 272 | "standard_name": "regression_time_bounds", 273 | "long_name": "regression_time_in_reference_run", 274 | } 275 | ) 276 | # reg should carry the attributes of `ds` 277 | # ? Maybe I should convert to a dataset? 278 | reg.attrs.update(ds.attrs) 279 | return reg 280 | 281 | 282 | # TODO: I need a more generalized detrending? Based on indicies --> xarrayutils 283 | # Then refactor this one here just for cmip6 284 | 285 | 286 | def detrend_basic(da, da_slope, start_idx=0, dim="time", keep_attrs=True): 287 | """Basic detrending just based on time index, not date""" 288 | # now create a trend timeseries at each point 289 | # and the time indicies by the ref index. This way the trend is correctly calculated from the reference year. 290 | # this adapts the chunk structure from the input if its a dask array 291 | attrs = {k: v for k, v in da.attrs.items()} 292 | idx_start = -start_idx 293 | idx_stop = len(da.time) - start_idx 294 | if isinstance(da.data, dsa.Array): 295 | ref_time = da.isel({di: 0 for di in da.dims if di != dim}) 296 | chunks = ref_time.chunks 297 | trend_time_idx_data = dsa.arange( 298 | idx_start, idx_stop, chunks=chunks, dtype=da.dtype 299 | ) 300 | else: 301 | trend_time_idx_data = np.arange(idx_start, idx_stop, dtype=da.dtype) 302 | 303 | trend_time_idx = xr.DataArray( 304 | trend_time_idx_data, 305 | dims=[dim], 306 | ) 307 | 308 | # chunk like the time dimension 309 | slope = da_slope.squeeze() 310 | 311 | trend = trend_time_idx * slope 312 | 313 | detrended = da - trend 314 | if keep_attrs: 315 | detrended.attrs.update(attrs) 316 | return detrended 317 | 318 | 319 | def remove_trend(ds, ds_slope, variable, ref_date, check_mask=True): 320 | """Detrending method for cmip6 data. Only works with monthly data! 321 | This does not correct the time convention. Be careful with experiements that have 322 | a non compatible time convention (often control runs.) 323 | """ 324 | 325 | if not isinstance(ds, xr.Dataset): 326 | raise ValueError("`ds` input needs to be a dataset") 327 | 328 | if not isinstance(ds_slope, xr.Dataset): 329 | raise ValueError("`ds_slope` input needs to be a dataset") 330 | 331 | da = ds[variable] 332 | da_slope = ds_slope[variable] 333 | 334 | da, da_slope = xr.align(da, da_slope, join="override") 335 | 336 | if check_mask: 337 | nanmask_data = np.isnan(da.isel(time=[0, len(da.time) // 2, -1])).all("time") 338 | nanmask_slope = np.isnan(da_slope) 339 | # perform a quick test to see if the land is aligned properly 340 | if np.logical_xor(nanmask_data, nanmask_slope).any(): 341 | raise ValueError( 342 | "Nanmask between data and slope array not identical. Check input and disable `check_mask` to skip this test" 343 | ) 344 | 345 | ref_calendar = da.time.to_index().calendar 346 | ref_date = xr.cftime_range(ref_date, periods=1, calendar=ref_calendar) 347 | 348 | # Find the index corresponding to the ref date (this can be outside the range of the actual data) 349 | ref_idx = find_date_idx(da.time, ref_date) 350 | 351 | detrended = detrend_basic( 352 | da, da_slope, start_idx=ref_idx, dim="time", keep_attrs=True 353 | ) 354 | 355 | # add information to track which data was used to remove trend 356 | if "trend_time_range" in ds_slope.coords: 357 | trend_start = ds_slope.trend_time_range.isel(bnds=0).load().data.tolist() 358 | trend_stop = ds_slope.trend_time_range.isel(bnds=1).load().data.tolist() 359 | 360 | else: 361 | trend_start = "not-available" 362 | trend_stop = "not-available" 363 | warnings.warn( 364 | "`ds_slope` did not have information about the time over which the slope was calculated. Check the input." 365 | ) 366 | 367 | detrended.attrs["drift_removed"] = ( 368 | f"linear_trend_{cmip6_dataset_id(ds_slope)}_{trend_start}_{trend_stop}" 369 | ) 370 | 371 | return detrended 372 | 373 | 374 | def match_and_remove_trend( 375 | ddict, trend_ddict, ref_date="1850", nomatch="warn", **detrend_kwargs 376 | ): 377 | """Find and remove trend files from a dictonary of datasets 378 | 379 | Parameters 380 | ---------- 381 | ddict : dict 382 | dictionary with xr.Datasets which should get a trend/drift removed 383 | trend_ddict : dict 384 | dictionary with results of linear regressions. These should be removed from the datasets in `ddict` 385 | ref_date : str, optional 386 | Start date of the trend, by default "1850" 387 | nomatch : str, optional 388 | Define the behavior when for a given dataset in `ddict` there is no matching trend dataset in `trend_ddict`. 389 | Can be `warn`, `raise`, or `ignore`, by default 'warn' 390 | 391 | Returns 392 | ------- 393 | dict 394 | Dictionary of detrended dataasets. Only contains values of `ddict` that actually had a trend removed. 395 | 396 | """ 397 | ddict_detrended = {} 398 | match_attrs = [ma for ma in EXACT_ATTRS if ma not in ["experiment_id"]] + [ 399 | "variable_id" 400 | ] 401 | 402 | for k, ds in ddict.items(): 403 | trend_ds = _match_datasets( 404 | ds, trend_ddict, match_attrs, pop=False, unique=True, nomatch=nomatch 405 | ) 406 | if len(trend_ds) == 2: 407 | trend_ds = trend_ds[ 408 | 1 409 | ] # this is a bit clunky. _match_datasest does return the input ds, so we have to grab the second one? 410 | # I guess I could pass *trend_ds, but that is not very readable 411 | variable = ds.attrs["variable_id"] 412 | da_detrended = ds.assign( 413 | { 414 | variable: remove_trend( 415 | ds, trend_ds, variable, ref_date=ref_date, **detrend_kwargs 416 | ) 417 | } 418 | ) 419 | # should this just return a dataset instead? 420 | ddict_detrended[k] = da_detrended 421 | 422 | return ddict_detrended 423 | -------------------------------------------------------------------------------- /xmip/grids.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import numpy as np 4 | import pkg_resources 5 | import xarray as xr 6 | import yaml 7 | 8 | from xgcm import Grid 9 | from xgcm.autogenerate import generate_grid_ds 10 | 11 | 12 | path = "specs/staggered_grid_config.yaml" # always use slash 13 | grid_spec = pkg_resources.resource_filename(__name__, path) 14 | 15 | 16 | def _parse_bounds_vertex(da, dim="bnds", position=[0, 1]): 17 | """Convenience function to extract positions from bounds/verticies""" 18 | return tuple([da.isel({dim: i}).load().data for i in position]) 19 | 20 | 21 | def _interp_vertex_to_bounds(da, orientation): 22 | """ 23 | Convenience function to average 4 vertex points into two bound points. 24 | Helpful to recreate e.g. the latitude at the `lon_bounds` points. 25 | """ 26 | if orientation == "x": 27 | datasets = [ 28 | da.isel(vertex=[0, 1]).mean("vertex"), 29 | da.isel(vertex=[3, 2]).mean("vertex"), 30 | ] 31 | elif orientation == "y": 32 | datasets = [ 33 | da.isel(vertex=[0, 3]).mean("vertex"), 34 | da.isel(vertex=[1, 2]).mean("vertex"), 35 | ] 36 | 37 | return xr.concat(datasets, dim="bnds") 38 | 39 | 40 | def distance_deg(lon0, lat0, lon1, lat1): 41 | """Calculate the distance in degress longitude and latitude between two points 42 | 43 | Parameters 44 | ---------- 45 | lon0 : np.array 46 | Longitude of first point 47 | lat0 : np.array 48 | Latitude of first point 49 | lon1 : np.array 50 | Longitude of second point 51 | lat1 : np.array 52 | Latitude of second point 53 | """ 54 | delta_lon = lon1 - lon0 55 | delta_lat = lat1 - lat0 56 | # very small differences can end up negative, so zero them out based on a simple 57 | # criterion 58 | # this should work for CMIP6 (no 1/1 deg models) but should be based on actual grid 59 | # info in the future 60 | small_crit = 1 / 10 61 | delta_lon = np.where( 62 | abs(delta_lon) < small_crit, 0.0, delta_lon 63 | ) # , np.nan, delta_lon) 64 | delta_lat = np.where( 65 | abs(delta_lat) < small_crit, 0.0, delta_lat 66 | ) # , np.nan, delta_lat) 67 | 68 | # # some bounds are wrapped aroud the lon discontinuty. 69 | delta_lon = np.where(delta_lon < (-small_crit * 2), 360 + delta_lon, delta_lon) # 70 | delta_lon = np.where( 71 | delta_lon > (360 + small_crit * 2), -360 + delta_lon, delta_lon 72 | ) 73 | 74 | return delta_lon, delta_lat 75 | 76 | 77 | def distance(lon0, lat0, lon1, lat1): 78 | """Calculate the distance in m between two points on a spherical globe 79 | 80 | Parameters 81 | ---------- 82 | lon0 : np.array 83 | Longitude of first point 84 | lat0 : np.array 85 | Latitude of first point 86 | lon1 : np.array 87 | Longitude of second point 88 | lat1 : np.array 89 | Latitude of second point 90 | """ 91 | Re = 6.378e6 92 | delta_lon, delta_lat = distance_deg(lon0, lat0, lon1, lat1) 93 | dy = Re * (np.pi * delta_lat / 180) 94 | dx = Re * (np.pi * delta_lon / 180) * np.cos(np.pi * lat0 / 180) 95 | return np.sqrt(dx**2 + dy**2) 96 | 97 | 98 | def recreate_metrics(ds, grid): 99 | """Recreate a full set of horizontal distance metrics. 100 | 101 | Calculates distances between points in lon/lat coordinates 102 | 103 | 104 | The naming of the metrics is as follows: 105 | [metric_axis]_t : metric centered at tracer point 106 | [metric_axis]_gx : metric at the cell face on the x-axis. 107 | For instance `dx_gx` is the x distance centered on the eastern cell face if the shift is `right` 108 | [metric_axis]_gy : As above but along the y-axis 109 | [metric_axis]_gxgy : The metric located at the corner point. 110 | For example `dy_dxdy` is the y distance on the south-west corner if both axes as shifted left. 111 | 112 | Parameters 113 | ---------- 114 | ds : xr.Dataset 115 | Input dataset. 116 | grid : xgcm.Grid 117 | xgcm Grid object matching `ds` 118 | 119 | Returns 120 | ------- 121 | xr.Dataset, dict 122 | Dataset with added metrics as coordinates and dictionary that can be passed to xgcm.Grid to recognize new metrics 123 | """ 124 | ds = ds.copy() 125 | 126 | # Since this puts out numpy arrays, the arrays need to be transposed correctly 127 | transpose_dims = ["y", "x"] 128 | dims = [di for di in ds.dims if di not in transpose_dims] 129 | 130 | ds = ds.transpose(*tuple(transpose_dims + dims)) 131 | 132 | # is the vel point on left or right? 133 | axis_vel_pos = { 134 | axis: list(set(grid.axes[axis].coords.keys()) - set(["center"]))[0] 135 | for axis in ["X", "Y"] 136 | } 137 | # determine the appropriate vertex position for the north/south and east/west edge, 138 | # based on the grid config 139 | if axis_vel_pos["Y"] in ["left"]: 140 | ns_vertex_idx = [0, 3] 141 | ns_bound_idx = [0] 142 | elif axis_vel_pos["Y"] in ["right"]: 143 | ns_vertex_idx = [1, 2] 144 | ns_bound_idx = [1] 145 | 146 | if axis_vel_pos["X"] in ["left"]: 147 | ew_vertex_idx = [0, 1] 148 | ew_bound_idx = [0] 149 | elif axis_vel_pos["X"] in ["right"]: 150 | ew_vertex_idx = [3, 2] 151 | ew_bound_idx = [1] 152 | 153 | # infer dx at tracer points 154 | if "lon_bounds" in ds.coords and "lat_verticies" in ds.coords: 155 | lon0, lon1 = _parse_bounds_vertex(ds["lon_bounds"]) 156 | lat0, lat1 = _parse_bounds_vertex( 157 | _interp_vertex_to_bounds(ds["lat_verticies"], "x") 158 | ) 159 | dist = distance(lon0, lat0, lon1, lat1) 160 | ds.coords["dx_t"] = xr.DataArray(dist, coords=ds.lon.coords) 161 | 162 | # infer dy at tracer points 163 | if "lat_bounds" in ds.coords and "lon_verticies" in ds.coords: 164 | lat0, lat1 = _parse_bounds_vertex(ds["lat_bounds"]) 165 | lon0, lon1 = _parse_bounds_vertex( 166 | _interp_vertex_to_bounds(ds["lon_verticies"], "y") 167 | ) 168 | dist = distance(lon0, lat0, lon1, lat1) 169 | ds.coords["dy_t"] = xr.DataArray(dist, coords=ds.lon.coords) 170 | 171 | if "lon_verticies" in ds.coords and "lat_verticies" in ds.coords: 172 | # infer dx at the north/south face 173 | lon0, lon1 = _parse_bounds_vertex( 174 | ds["lon_verticies"], dim="vertex", position=ns_vertex_idx 175 | ) 176 | lat0, lat1 = _parse_bounds_vertex( 177 | ds["lat_verticies"], dim="vertex", position=ns_vertex_idx 178 | ) 179 | dist = distance(lon0, lat0, lon1, lat1) 180 | ds.coords["dx_gy"] = xr.DataArray( 181 | dist, coords=grid.interp(ds.lon, "Y", boundary="extrapolate").coords 182 | ) 183 | 184 | # infer dy at the east/west face 185 | lon0, lon1 = _parse_bounds_vertex( 186 | ds["lon_verticies"], dim="vertex", position=ew_vertex_idx 187 | ) 188 | lat0, lat1 = _parse_bounds_vertex( 189 | ds["lat_verticies"], dim="vertex", position=ew_vertex_idx 190 | ) 191 | dist = distance(lon0, lat0, lon1, lat1) 192 | ds.coords["dy_gx"] = xr.DataArray( 193 | dist, coords=grid.interp(ds.lon, "X", boundary="extrapolate").coords 194 | ) 195 | 196 | # for the distances that dont line up with the cell boundaries we need some different logic 197 | boundary = "extend" 198 | # TODO: This should be removed once we have the default boundary merged in xgcm 199 | 200 | # infer dx at eastern/western bound from tracer points 201 | lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs( 202 | ds.lon.load(), axis_vel_pos["X"] 203 | ) 204 | lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs( 205 | ds.lat.load(), axis_vel_pos["X"] 206 | ) 207 | dx = distance(lon0, lat0, lon1, lat1) 208 | ds.coords["dx_gx"] = xr.DataArray( 209 | dx, coords=grid.interp(ds.lon, "X", boundary=boundary).coords 210 | ) 211 | 212 | # infer dy at northern bound from tracer points 213 | lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs( 214 | ds.lat.load(), axis_vel_pos["Y"], boundary=boundary 215 | ) 216 | lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs( 217 | ds.lon.load(), axis_vel_pos["Y"], boundary=boundary 218 | ) 219 | dy = distance(lon0, lat0, lon1, lat1) 220 | ds.coords["dy_gy"] = xr.DataArray( 221 | dy, coords=grid.interp(ds.lat, "Y", boundary=boundary).coords 222 | ) 223 | 224 | # infer dx at the corner point 225 | lon0, lon1 = grid.axes["X"]._get_neighbor_data_pairs( 226 | _interp_vertex_to_bounds(ds.lon_verticies.load(), "y") 227 | .isel(bnds=ns_bound_idx) 228 | .squeeze(), 229 | axis_vel_pos["X"], 230 | ) 231 | lat0, lat1 = grid.axes["X"]._get_neighbor_data_pairs( 232 | ds.lat_bounds.isel(bnds=ns_bound_idx).squeeze().load(), axis_vel_pos["X"] 233 | ) 234 | dx = distance(lon0, lat0, lon1, lat1) 235 | ds.coords["dx_gxgy"] = xr.DataArray( 236 | dx, 237 | coords=grid.interp( 238 | grid.interp(ds.lon, "X", boundary=boundary), "Y", boundary=boundary 239 | ).coords, 240 | ) 241 | 242 | # infer dy at the corner point 243 | lat0, lat1 = grid.axes["Y"]._get_neighbor_data_pairs( 244 | _interp_vertex_to_bounds(ds.lat_verticies.load(), "x") 245 | .isel(bnds=ew_bound_idx) 246 | .squeeze(), 247 | axis_vel_pos["Y"], 248 | ) 249 | lon0, lon1 = grid.axes["Y"]._get_neighbor_data_pairs( 250 | ds.lon_bounds.isel(bnds=ew_bound_idx).squeeze().load(), axis_vel_pos["Y"] 251 | ) 252 | dy = distance(lon0, lat0, lon1, lat1) 253 | ds.coords["dy_gxgy"] = xr.DataArray( 254 | dy, 255 | coords=grid.interp( 256 | grid.interp(ds.lon, "X", boundary=boundary), "Y", boundary=boundary 257 | ).coords, 258 | ) 259 | 260 | # infer dz at tracer point 261 | if "lev_bounds" in ds.coords: 262 | ds = ds.assign_coords( 263 | dz_t=("lev", ds["lev_bounds"].diff("bnds").squeeze(drop=True).data) 264 | ) 265 | 266 | metrics_dict = { 267 | "X": [co for co in ["dx_t", "dx_gy", "dx_gx"] if co in ds.coords], 268 | "Y": [co for co in ["dy_t", "dy_gy", "dy_gx"] if co in ds.coords], 269 | "Z": [co for co in ["dz_t"] if co in ds.coords], 270 | } 271 | # # only put out axes that have entries 272 | metrics_dict = {k: v for k, v in metrics_dict.items() if len(v) > 0} 273 | 274 | return ds, metrics_dict 275 | 276 | 277 | def detect_shift(ds_base, ds, axis): 278 | """Detects the shift of `ds` relative to `ds` on logical grid axes, using 279 | lon and lat positions. 280 | 281 | Parameters 282 | ---------- 283 | ds_base : xr.Dataset 284 | Reference ('base') dataset to compare to. Assumed that this is located at the 'center' coordinate. 285 | ds : xr.Dataset 286 | Comparison dataset. The resulting shift will be computed as this dataset relative to `ds_base` 287 | axis : str 288 | xgcm logical axis on which to detect the shift 289 | 290 | Returns 291 | ------- 292 | str 293 | Shift string output, in xgcm conventions. 294 | """ 295 | ds_base = ds_base.copy() 296 | ds = ds.copy() 297 | axis = axis.lower() 298 | axis_coords = {"x": "lon", "y": "lat"} 299 | 300 | # check the shift only for one point, somewhat in the center to avoid the 301 | # distorted polar regions 302 | check_point = {"x": len(ds_base.x) // 2, "y": len(ds_base.y) // 2} 303 | check_point_diff = {k: [v, v + 1] for k, v in check_point.items()} 304 | 305 | shift = ( 306 | ds.isel(**check_point)[axis_coords[axis]].load().data 307 | - ds_base.isel(**check_point)[axis_coords[axis]].load().data 308 | ) 309 | diff = ds[axis].isel({axis: check_point_diff[axis]}).diff(axis).data.tolist()[0] 310 | threshold = 0.1 311 | # the fraction of full cell distance, that a point has to be shifted in order to 312 | # be recognized. 313 | # This avoids detection of shifts for very small differences that sometimes happen 314 | # if the coordinates were written e.g. by different modulel of a model 315 | 316 | axis_shift = "center" 317 | 318 | if shift > (diff * threshold): 319 | axis_shift = "right" 320 | elif shift < -(diff * threshold): 321 | axis_shift = "left" 322 | return axis_shift 323 | 324 | 325 | def create_full_grid(base_ds, grid_dict=None): 326 | """Generate a full xgcm-compatible dataset from a reference datasets `base_ds`. 327 | This dataset should be representing a tracer fields, e.g. the cell center. 328 | 329 | Parameters 330 | ---------- 331 | base_ds : xr.Dataset 332 | The reference ('base') datasets, assumed to be at the tracer position/cell center 333 | grid_dict : dict, optional 334 | Dictionary with info about the grid staggering. 335 | Must be encoded using the base_ds attrs (e.g. {'model_name':{'axis_shift':{'X':'left',...}}}). 336 | If deactivated (default), will load from the internal database for CMIP6 models, by default None 337 | 338 | Returns 339 | ------- 340 | xr.Dataset 341 | xgcm compatible dataset 342 | """ 343 | 344 | # load dict with grid shift info for each axis 345 | if grid_dict is None: 346 | ff = open(grid_spec, "r") 347 | grid_dict = yaml.safe_load(ff) 348 | ff.close() 349 | 350 | source_id = base_ds.attrs["source_id"] 351 | grid_label = base_ds.attrs["grid_label"] 352 | 353 | # if source_id not in dict, and grid label is gn, warn and ask to submit an issue 354 | try: 355 | axis_shift = grid_dict[source_id][grid_label]["axis_shift"] 356 | except KeyError: 357 | warnings.warn( 358 | f"Could not find the source_id/grid_label ({source_id}/{grid_label}) combo in `grid_dict`, returning `None`. Please submit an issue to github: https://github.com/jbusecke/xmip/issues" 359 | ) 360 | return None 361 | 362 | position = {k: ("center", axis_shift[k]) for k in axis_shift.keys()} 363 | 364 | axis_dict = {"X": "x", "Y": "y"} 365 | 366 | ds_grid = generate_grid_ds( 367 | base_ds, axis_dict, position=position, boundary_discontinuity={"X": 360} 368 | ) 369 | 370 | # TODO: man parse lev and lev_bounds as center and outer dims. 371 | # I should also be able to do this with `generate_grid_ds`, but here we 372 | # have the `lev_bounds` with most models, so that is probably more reliable. 373 | # cheapest solution right now 374 | if "lev" in ds_grid.dims: 375 | ds_grid["lev"].attrs["axis"] = "Z" 376 | 377 | return ds_grid 378 | 379 | 380 | def combine_staggered_grid( 381 | ds_base, other_ds=None, recalculate_metrics=False, grid_dict=None, **kwargs 382 | ): 383 | """Combine a reference datasets with a list of other datasets to a full xgcm-compatible staggered grid datasets. 384 | 385 | 386 | Parameters 387 | ---------- 388 | ds_base : xr.Dataset 389 | The reference ('base') datasets, assumed to be at the tracer position/cell center 390 | other_ds : list,xr.Dataset, optional 391 | List of datasets representing different variables. Their grid position will be 392 | automatically detected relative to `ds_base`. Coordinates and attrs of these added datasets will be lost 393 | , by default None 394 | recalculate_metrics : bool, optional 395 | nables the reconstruction of grid metrics usign simple 396 | spherical geometry, by default False 397 | 398 | !!! Check your results carefully when using reconstructed values, 399 | these might differe substantially if the grid geometry is complicated. 400 | grid_dict : dict, optional 401 | Dictionary for staggered grid setup. See `create_full_grid` for detauls 402 | If None (default), will load staggered grid info from internal database, by default None 403 | 404 | Returns 405 | ------- 406 | xr.Dataset 407 | Single xgcm-compatible dataset, containing all variables on their respective staggered grid position. 408 | """ 409 | ds_base = ds_base.copy() 410 | if isinstance(other_ds, xr.Dataset): 411 | other_ds = [other_ds] 412 | 413 | ds_g = create_full_grid(ds_base, grid_dict=grid_dict) 414 | 415 | if ds_g is None: 416 | warnings.warn("Staggered Grid creation failed. Returning `None`") 417 | return None, None 418 | 419 | # save attrs out for later (something during alignment destroys them) 420 | dim_attrs_dict = {} 421 | for di in ds_g.dims: 422 | dim_attrs_dict[di] = ds_g[di].attrs 423 | 424 | # TODO: metrics and interpolation of metrics if they are parsed 425 | 426 | # parse other variables 427 | if other_ds is not None: 428 | for ds_new in other_ds: 429 | ds_new = ds_new.copy() 430 | # strip everything but the variable_id (perhaps I would want to 431 | # loosen this in the future) 432 | ds_new = ds_new[ds_new.attrs["variable_id"]] 433 | 434 | if not all( 435 | [ 436 | len(ds_new[di]) == len(ds_g[di]) 437 | for di in ds_new.dims 438 | if di not in ["member_id", "time"] 439 | ] 440 | ): 441 | warnings.warn( 442 | f"Could not parse `{ds_new.name}`, due to a size mismatch. If this is the MRI model, the grid convention is currently not supported." 443 | ) 444 | else: 445 | # detect shift and rename accordingly 446 | rename_dict = {} 447 | for axis in ["X", "Y"]: 448 | shift = detect_shift(ds_base, ds_new, axis) 449 | 450 | if shift != "center": 451 | rename_dict[axis.lower()] = axis.lower() + "_" + shift 452 | ds_new = ds_new.rename(rename_dict) 453 | ds_new = ds_new.reset_coords(drop=True) 454 | # TODO: This needs to be coded more generally, for now hardcode x and y 455 | force_align_dims = [di for di in ds_new.dims if "x" in di or "y" in di] 456 | _, ds_new = xr.align( 457 | ds_g.copy(), 458 | ds_new, 459 | join="override", 460 | exclude=[di for di in ds_new.dims if di not in force_align_dims], 461 | ) 462 | additional_dims = [di for di in ds_new.dims if di not in ds_g.dims] 463 | if len(additional_dims) > 0: 464 | raise RuntimeError( 465 | f"While trying to parse `{ds_new.name}`, detected dims that are not in the base dataset:[{additional_dims}]" 466 | ) 467 | ds_g[ds_new.name] = ds_new 468 | 469 | # Restore dims attrs from the beginning 470 | for di in ds_g.dims: 471 | ds_g.coords[di].attrs.update(dim_attrs_dict[di]) 472 | 473 | grid_kwargs = {"periodic": ["X"]} 474 | grid_kwargs.update(kwargs) 475 | grid = Grid(ds_g, grid_kwargs) 476 | 477 | # if activated calculate metrics 478 | if recalculate_metrics: 479 | grid_kwargs.pop( 480 | "metrics", None 481 | ) # remove any passed metrics when recalculating them 482 | # I might be able to refine this more to e.g. allow axes that are not recreated. 483 | 484 | ds_g, metrics_dict = recreate_metrics(ds_g, grid) 485 | # this might fail in circumstances, where the 486 | grid_kwargs["metrics"] = metrics_dict 487 | grid = Grid(ds_g, **grid_kwargs) 488 | return grid, ds_g 489 | -------------------------------------------------------------------------------- /xmip/preprocessing.py: -------------------------------------------------------------------------------- 1 | # Preprocessing for CMIP6 models 2 | import warnings 3 | 4 | import cf_xarray.units # noqa: F401 5 | import numpy as np 6 | import pint # noqa: F401 7 | import pint_xarray # noqa: F401 8 | import xarray as xr 9 | 10 | from xmip.utils import cmip6_dataset_id 11 | 12 | 13 | # global object for units 14 | _desired_units = {"lev": "m"} 15 | _unit_overrides = {name: None for name in ["so"]} 16 | 17 | 18 | _drop_coords = ["bnds", "vertex"] 19 | 20 | 21 | def cmip6_renaming_dict(): 22 | """a universal renaming dict. Keys correspond to source id (model name) 23 | and valuse are a dict of target name (key) and a list of variables that 24 | should be renamed into the target.""" 25 | rename_dict = { 26 | # dim labels (order represents the priority when checking for the dim labels) 27 | "x": ["i", "ni", "xh", "nlon"], 28 | "y": ["j", "nj", "yh", "nlat"], 29 | "lev": ["deptht", "olevel", "zlev", "olev", "depth"], 30 | "bnds": ["bnds", "axis_nbounds", "d2"], 31 | "vertex": ["vertex", "nvertex", "vertices", "nvertices"], 32 | # coordinate labels 33 | "lon": ["longitude", "nav_lon"], 34 | "lat": ["latitude", "nav_lat"], 35 | "lev_bounds": [ 36 | "deptht_bounds", 37 | "lev_bnds", 38 | "olevel_bounds", 39 | "zlev_bnds", 40 | ], 41 | "lon_bounds": [ 42 | "bounds_lon", 43 | "bounds_nav_lon", 44 | "lon_bnds", 45 | "x_bnds", 46 | "vertices_longitude", 47 | "longitude_bnds", 48 | ], 49 | "lat_bounds": [ 50 | "bounds_lat", 51 | "bounds_nav_lat", 52 | "lat_bnds", 53 | "y_bnds", 54 | "vertices_latitude", 55 | "latitude_bnds", 56 | ], 57 | "time_bounds": ["time_bnds"], 58 | } 59 | return rename_dict 60 | 61 | 62 | def rename_cmip6(ds, rename_dict=None): 63 | """Homogenizes cmip6 dataasets to common naming""" 64 | attrs = {k: v for k, v in ds.attrs.items()} 65 | ds_id = cmip6_dataset_id(ds) 66 | 67 | if rename_dict is None: 68 | rename_dict = cmip6_renaming_dict() 69 | 70 | # TODO: Be even stricter here and reset every variable except the one given in the attr 71 | # as variable_id 72 | # ds_reset = ds.reset_coords() 73 | 74 | def _maybe_rename_dims(da, rdict): 75 | for di in da.dims: 76 | for target, candidates in rdict.items(): 77 | if di in candidates: 78 | da = da.swap_dims({di: target}) 79 | if di in da.coords: 80 | if not di == target: 81 | da = da.rename({di: target}).set_xindex(target) 82 | return da 83 | 84 | # first take care of the dims and reconstruct a clean ds 85 | ds = xr.Dataset( 86 | { 87 | k: _maybe_rename_dims(ds[k], rename_dict) 88 | for k in list(ds.data_vars) + list(set(ds.coords) - set(ds.dims)) 89 | } 90 | ) 91 | 92 | rename_vars = list(set(ds.variables) - set(ds.dims)) 93 | 94 | for target, candidates in rename_dict.items(): 95 | if target not in ds: 96 | matching_candidates = [ca for ca in candidates if ca in rename_vars] 97 | if len(matching_candidates) > 0: 98 | if len(matching_candidates) > 1: 99 | warnings.warn( 100 | f"{ds_id}:While renaming to target `{target}`, more than one candidate was found {matching_candidates}. Renaming {matching_candidates[0]} to {target}. Please double check results." 101 | ) 102 | ds = ds.rename({matching_candidates[0]: target}) 103 | 104 | # special treatment for 'lon'/'lat' if there is no 'x'/'y' after renaming process 105 | for di, co in [("x", "lon"), ("y", "lat")]: 106 | if di not in ds.dims and co in ds.dims: 107 | ds = ds.rename({co: di}) 108 | 109 | # restore attributes 110 | ds.attrs = attrs 111 | return ds 112 | 113 | 114 | def promote_empty_dims(ds): 115 | """Convert empty dimensions to actual coordinates""" 116 | ds = ds.copy() 117 | for di in ds.dims: 118 | if di not in ds.coords: 119 | ds = ds.assign_coords({di: ds[di]}) 120 | return ds 121 | 122 | 123 | # some of the models do not have 2d lon lats, correct that. 124 | def broadcast_lonlat(ds, verbose=True): 125 | """Some models (all `gr` grid_labels) have 1D lon lat arrays 126 | This functions broadcasts those so lon/lat are always 2d arrays.""" 127 | if "lon" not in ds.variables: 128 | ds.coords["lon"] = ds["x"] 129 | if "lat" not in ds.variables: 130 | ds.coords["lat"] = ds["y"] 131 | 132 | if len(ds["lon"].dims) < 2: 133 | ds.coords["lon"] = ds["lon"] * xr.ones_like(ds["lat"]) 134 | if len(ds["lat"].dims) < 2: 135 | ds.coords["lat"] = xr.ones_like(ds["lon"]) * ds["lat"] 136 | 137 | return ds 138 | 139 | 140 | def _interp_nominal_lon(lon_1d: np.ndarray) -> np.ndarray: 141 | x = np.arange(len(lon_1d)) 142 | idx = np.isnan(lon_1d) 143 | # Assume that longitudes are cyclic (i.e. that the period equals the length of lon) 144 | return np.interp(x, x[~idx], lon_1d[~idx], period=len(lon_1d)) 145 | 146 | 147 | def replace_x_y_nominal_lat_lon(ds): 148 | """Approximate the dimensional values of x and y with mean lat and lon at the equator""" 149 | ds = ds.copy() 150 | 151 | def maybe_fix_non_unique(data, pad=False): 152 | """remove duplicate values by linear interpolation 153 | if values are non-unique. `pad` if the last two points are the same 154 | pad with -90 or 90. This is only applicable to lat values""" 155 | if len(data) == len(np.unique(data)): 156 | return data 157 | else: 158 | # pad each end with the other end. 159 | if pad: 160 | if len(np.unique([data[0:2]])) < 2: 161 | data[0] = -90 162 | if len(np.unique([data[-2:]])) < 2: 163 | data[-1] = 90 164 | 165 | ii_range = np.arange(len(data)) 166 | _, indicies = np.unique(data, return_index=True) 167 | double_idx = np.array([ii not in indicies for ii in ii_range]) 168 | # print(f"non-unique values found at:{ii_range[double_idx]})") 169 | data[double_idx] = np.interp( 170 | ii_range[double_idx], ii_range[~double_idx], data[~double_idx] 171 | ) 172 | return data 173 | 174 | if "x" in ds.dims and "y" in ds.dims: 175 | # define 'nominal' longitude/latitude values 176 | # latitude is defined as the max value of `lat` in the zonal direction 177 | # longitude is taken from the `middle` of the meridonal direction, to 178 | # get values close to the equator 179 | 180 | # pick the nominal lon/lat values from the eastern 181 | # and southern edge, and 182 | eq_idx = len(ds.y) // 2 183 | 184 | nominal_x = ds.isel(y=eq_idx).lon.load() 185 | nominal_y = ds.lat.max("x").load() 186 | 187 | # interpolate nans 188 | # Special treatment for gaps in longitude 189 | nominal_x = _interp_nominal_lon(nominal_x.data) 190 | nominal_y = nominal_y.interpolate_na("y").data 191 | 192 | # eliminate non unique values 193 | # these occour e.g. in "MPI-ESM1-2-HR" 194 | nominal_y = maybe_fix_non_unique(nominal_y) 195 | nominal_x = maybe_fix_non_unique(nominal_x) 196 | 197 | ds = ds.assign_coords(x=nominal_x, y=nominal_y) 198 | ds = ds.sortby("x") 199 | ds = ds.sortby("y") 200 | 201 | # do one more interpolation for the x values, in case the boundary values were 202 | # affected 203 | ds = ds.assign_coords( 204 | x=maybe_fix_non_unique(ds.x.load().data), 205 | y=maybe_fix_non_unique(ds.y.load().data, pad=True), 206 | ) 207 | 208 | else: 209 | warnings.warn( 210 | "No x and y found in dimensions for source_id:%s. This likely means that you forgot to rename the dataset or this is the German unstructured model" 211 | % ds.attrs["source_id"] 212 | ) 213 | return ds 214 | 215 | 216 | def correct_units(ds): 217 | "Converts coordinates into SI units using pint-xarray" 218 | # codify units with pint 219 | # Perhaps this should be kept separately from the fixing? 220 | # See https://github.com/jbusecke/xmip/pull/160#discussion_r667041858 221 | try: 222 | # exclude salinity from the quantification (see https://github.com/jbusecke/xmip/pull/160#issuecomment-878627027 for details) 223 | quantified = ds.pint.quantify(_unit_overrides) 224 | target_units = { 225 | var: target_unit 226 | for var, target_unit in _desired_units.items() 227 | if var in quantified 228 | } 229 | 230 | converted = quantified.pint.to(target_units) 231 | ds = converted.pint.dequantify(format="~P") 232 | except ValueError as e: 233 | warnings.warn( 234 | f"{cmip6_dataset_id(ds)}: Unit correction failed with: {e}", UserWarning 235 | ) 236 | return ds 237 | 238 | 239 | def correct_coordinates(ds, verbose=False): 240 | """converts wrongly assigned data_vars to coordinates""" 241 | ds = ds.copy() 242 | for co in [ 243 | "x", 244 | "y", 245 | "lon", 246 | "lat", 247 | "lev", 248 | "bnds", 249 | "lev_bounds", 250 | "lon_bounds", 251 | "lat_bounds", 252 | "time_bounds", 253 | "lat_verticies", 254 | "lon_verticies", 255 | ]: 256 | if co in ds.variables: 257 | if verbose: 258 | print("setting %s as coord" % (co)) 259 | ds = ds.set_coords(co) 260 | return ds 261 | 262 | 263 | def correct_lon(ds): 264 | """Wraps negative x and lon values around to have 0-360 lons. 265 | longitude names expected to be corrected with `rename_cmip6`""" 266 | ds = ds.copy() 267 | 268 | # remove out of bounds values found in some 269 | # models as missing values 270 | ds["lon"] = ds["lon"].where(abs(ds["lon"]) <= 1000) 271 | ds["lat"] = ds["lat"].where(abs(ds["lat"]) <= 1000) 272 | 273 | # adjust lon convention 274 | lon = ds["lon"].where(ds["lon"] > 0, 360 + ds["lon"]) 275 | ds = ds.assign_coords(lon=lon) 276 | 277 | if "lon_bounds" in ds.variables: 278 | lon_b = ds["lon_bounds"].where(ds["lon_bounds"] > 0, 360 + ds["lon_bounds"]) 279 | ds = ds.assign_coords(lon_bounds=lon_b) 280 | 281 | return ds 282 | 283 | 284 | def parse_lon_lat_bounds(ds): 285 | """both `regular` 2d bounds and vertex bounds are parsed as `*_bounds`. 286 | This function renames them to `*_verticies` if the vertex dimension is found. 287 | Also removes time dimension from static bounds as found in e.g. `SAM0-UNICON` model. 288 | """ 289 | if "source_id" in ds.attrs.keys(): 290 | if ds.attrs["source_id"] == "FGOALS-f3-L": 291 | warnings.warn("`FGOALS-f3-L` does not provide lon or lat bounds.") 292 | 293 | ds = ds.copy() 294 | 295 | if "lat_bounds" in ds.variables: 296 | if "x" not in ds.lat_bounds.dims: 297 | ds.coords["lat_bounds"] = ds.coords["lat_bounds"] * xr.ones_like(ds.x) 298 | 299 | if "lon_bounds" in ds.variables: 300 | if "y" not in ds.lon_bounds.dims: 301 | ds.coords["lon_bounds"] = ds.coords["lon_bounds"] * xr.ones_like(ds.y) 302 | 303 | # I am assuming that all bound fields with time were broadcasted in error (except time bounds obviously), 304 | # and will drop the time dimension. 305 | error_dims = ["time"] 306 | for ed in error_dims: 307 | for co in ["lon_bounds", "lat_bounds", "lev_bounds"]: 308 | if co in ds.variables: 309 | if ed in ds[co].dims: 310 | warnings.warn( 311 | f"Found {ed} as dimension in `{co}`. Assuming this is an error and just picking the first step along that dimension." 312 | ) 313 | stripped_coord = ds[co].isel({ed: 0}).squeeze() 314 | # make sure that dimension is actually dropped 315 | if ed in stripped_coord.coords: 316 | stripped_coord = stripped_coord.drop(ed) 317 | 318 | ds = ds.assign_coords({co: stripped_coord}) 319 | 320 | # Finally rename the bounds that are given in vertex convention 321 | for va in ["lon", "lat"]: 322 | va_name = va + "_bounds" 323 | if va_name in ds.variables and "vertex" in ds[va_name].dims: 324 | ds = ds.rename({va_name: va + "_verticies"}) 325 | 326 | return ds 327 | 328 | 329 | def maybe_convert_bounds_to_vertex(ds): 330 | """Converts renamed lon and lat bounds into verticies, by copying 331 | the values into the corners. Assumes a rectangular cell.""" 332 | ds = ds.copy() 333 | if "bnds" in ds.dims: 334 | if "lon_bounds" in ds.variables and "lat_bounds" in ds.variables: 335 | if ( 336 | "lon_verticies" not in ds.variables 337 | and "lat_verticies" not in ds.variables 338 | ): 339 | lon_b = xr.ones_like(ds.lat) * ds.coords["lon_bounds"] 340 | lat_b = xr.ones_like(ds.lon) * ds.coords["lat_bounds"] 341 | 342 | lon_bb = xr.concat( 343 | [lon_b.isel(bnds=ii).squeeze(drop=True) for ii in [0, 0, 1, 1]], 344 | dim="vertex", 345 | ) 346 | lon_bb = lon_bb.reset_coords(drop=True) 347 | 348 | lat_bb = xr.concat( 349 | [lat_b.isel(bnds=ii).squeeze(drop=True) for ii in [0, 1, 1, 0]], 350 | dim="vertex", 351 | ) 352 | lat_bb = lat_bb.reset_coords(drop=True) 353 | 354 | ds = ds.assign_coords(lon_verticies=lon_bb, lat_verticies=lat_bb) 355 | 356 | return ds 357 | 358 | 359 | def maybe_convert_vertex_to_bounds(ds): 360 | """Converts lon and lat verticies to bounds by averaging corner points 361 | on the appropriate cell face center.""" 362 | 363 | ds = ds.copy() 364 | if "vertex" in ds.dims: 365 | if "lon_verticies" in ds.variables and "lat_verticies" in ds.variables: 366 | if "lon_bounds" not in ds.variables and "lat_bounds" not in ds.variables: 367 | lon_b = xr.concat( 368 | [ 369 | ds["lon_verticies"].isel(vertex=[0, 1]).mean("vertex"), 370 | ds["lon_verticies"].isel(vertex=[2, 3]).mean("vertex"), 371 | ], 372 | dim="bnds", 373 | ) 374 | lat_b = xr.concat( 375 | [ 376 | ds["lat_verticies"].isel(vertex=[0, 3]).mean("vertex"), 377 | ds["lat_verticies"].isel(vertex=[1, 2]).mean("vertex"), 378 | ], 379 | dim="bnds", 380 | ) 381 | 382 | ds = ds.assign_coords(lon_bounds=lon_b, lat_bounds=lat_b) 383 | ds = promote_empty_dims(ds) 384 | return ds 385 | 386 | 387 | def sort_vertex_order(ds): 388 | """sorts the vertex dimension in a coherent order: 389 | 0: lower left 390 | 1: upper left 391 | 2: upper right 392 | 3: lower right 393 | """ 394 | ds = ds.copy() 395 | if ( 396 | "vertex" in ds.dims 397 | and "lon_verticies" in ds.variables 398 | and "lat_verticies" in ds.variables 399 | ): 400 | # pick a vertex in the middle of the domain, to avoid the pole areas 401 | x_idx = len(ds.x) // 2 402 | y_idx = len(ds.y) // 2 403 | 404 | lon_b = ds.lon_verticies.isel(x=x_idx, y=y_idx).load().data 405 | lat_b = ds.lat_verticies.isel(x=x_idx, y=y_idx).load().data 406 | vert = ds.vertex.load().data 407 | 408 | points = np.vstack((lon_b, lat_b, vert)).T 409 | 410 | # split into left and right 411 | lon_sorted = points[np.argsort(points[:, 0]), :] 412 | right = lon_sorted[:2, :] 413 | left = lon_sorted[2:, :] 414 | # sort again on each side to get top and bottom 415 | bl, tl = left[np.argsort(left[:, 1]), :] 416 | br, tr = right[np.argsort(right[:, 1]), :] 417 | 418 | points_sorted = np.vstack((bl, tl, tr, br)) 419 | 420 | idx_sorted = (points_sorted.shape[0] - 1) - np.argsort(points_sorted[:, 2]) 421 | ds = ds.assign_coords(vertex=idx_sorted) 422 | ds = ds.sortby("vertex") 423 | 424 | return ds 425 | 426 | 427 | # TODO: Implement this in a sleeker way with daops 428 | def fix_metadata(ds): 429 | """ 430 | Fix known issues (from errata) with the metadata. 431 | """ 432 | 433 | # https://errata.es-doc.org/static/view.html?uid=2f6b5963-f87e-b2df-a5b0-2f12b6b68d32 434 | if ds.attrs["source_id"] == "GFDL-CM4" and ds.attrs["experiment_id"] in [ 435 | "1pctCO2", 436 | "abrupt-4xCO2", 437 | "historical", 438 | ]: 439 | ds.attrs["branch_time_in_parent"] = 91250 440 | # https://errata.es-doc.org/static/view.html?uid=61fb170e-91bb-4c64-8f1d-6f5e342ee421 441 | if ds.attrs["source_id"] == "GFDL-CM4" and ds.attrs["experiment_id"] in [ 442 | "ssp245", 443 | "ssp585", 444 | ]: 445 | ds.attrs["branch_time_in_child"] = 60225 446 | return ds 447 | 448 | 449 | def combined_preprocessing(ds): 450 | # fix naming 451 | ds = rename_cmip6(ds) 452 | # promote empty dims to actual coordinates 453 | ds = promote_empty_dims(ds) 454 | # demote coordinates from data_variables 455 | ds = correct_coordinates(ds) 456 | # broadcast lon/lat 457 | ds = broadcast_lonlat(ds) 458 | # shift all lons to consistent 0-360 459 | ds = correct_lon(ds) 460 | # fix the units 461 | ds = correct_units(ds) 462 | # rename the `bounds` according to their style (bound or vertex) 463 | ds = parse_lon_lat_bounds(ds) 464 | # sort verticies in a consistent manner 465 | ds = sort_vertex_order(ds) 466 | # convert vertex into bounds and vice versa, so both are available 467 | ds = maybe_convert_bounds_to_vertex(ds) 468 | ds = maybe_convert_vertex_to_bounds(ds) 469 | ds = fix_metadata(ds) 470 | ds = ds.drop_vars(_drop_coords, errors="ignore") 471 | return ds 472 | -------------------------------------------------------------------------------- /xmip/regionmask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xarray as xr 3 | 4 | 5 | def _default_merge_dict(): 6 | return { 7 | "North Atlantic Ocean": [ 8 | "Caribbean Sea", 9 | "Gulf of Mexico", 10 | "Labrador Sea", 11 | "Hudson Bay", 12 | "Baffin Bay", 13 | "Norwegian Sea", 14 | "Greenland Sea", 15 | "Bay of Biscay", 16 | "Norwegian Sea", 17 | "Greenland Sea", 18 | "Gulf of Guinea", 19 | "Irish Sea", 20 | "North Sea", 21 | "Bahía de Campeche", 22 | "Davis Strait", 23 | "Sargasso Sea", 24 | "Hudson Strait", 25 | "English Channel", 26 | "Gulf of Honduras", 27 | "Bristol Channel", 28 | "Inner Seas", 29 | "Straits of Florida", 30 | "Gulf of Saint Lawrence", 31 | "Bay of Fundy", 32 | "Melville Bay", 33 | "Gulf of Maine", 34 | "Chesapeake Bay", 35 | "Amazon River", 36 | "James Bay", 37 | "Ungava Bay", 38 | ], 39 | "South Atlantic Ocean": ["Río de la Plata", "Golfo San Jorge"], 40 | "North Pacific Ocean": [ 41 | "Philippine Sea", 42 | "Gulf of Alaska", 43 | "Sea of Okhotsk", 44 | "East China Sea", 45 | "Yellow Sea", 46 | "Bering Sea", 47 | "Golfo de California", 48 | "Korea Strait", 49 | "Cook Inlet", 50 | "Bristol Bay", 51 | "Shelikhova Gulf", 52 | "Bo Hai", 53 | "Golfo de Panamá", 54 | "Yangtze River", 55 | "Columbia River", 56 | "Sea of Japan", 57 | "Inner Sea", 58 | ], 59 | "South Pacific Ocean": [ 60 | "Coral Sea", 61 | "Tasman Sea", 62 | "Bay of Plenty", 63 | "Bismarck Sea", 64 | "Solomon Sea", 65 | "Great Barrier Reef", 66 | ], 67 | "Maritime Continent": [ 68 | "Celebes Sea", 69 | "Sulu Sea", 70 | "Banda Sea", 71 | "Luzon Strait", 72 | "Java Sea", 73 | "Arafura Sea", 74 | "Timor Sea", 75 | "Gulf of Thailand", 76 | "Gulf of Carpentaria", 77 | "Molucca Sea", 78 | "Gulf of Tonkin", 79 | "Strait of Malacca", 80 | "Strait of Singapore", 81 | "Makassar Strait", 82 | "Ceram Sea", 83 | "Taiwan Strait", 84 | "South China Sea", 85 | ], 86 | "INDIAN OCEAN": [ 87 | "Mozambique Channel", 88 | "Bay of Bengal", 89 | "Arabian Sea", 90 | "Persian Gulf", 91 | "Andaman Sea", 92 | "Laccadive Sea", 93 | "Gulf of Aden", 94 | "Gulf of Oman", 95 | "Gulf of Mannar", 96 | "Gulf of Kutch", 97 | "Great Australian Bight", 98 | ], 99 | "Arctic Ocean": [ 100 | "Beaufort Sea", 101 | "Chukchi Sea", 102 | "Barents Sea", 103 | "Kara Sea", 104 | "Laptev Sea", 105 | "White Sea", 106 | "The North Western Passages", 107 | "Amundsen Gulf", 108 | "Viscount Melville Sound", 109 | ], 110 | "SOUTHERN OCEAN": [ 111 | "Ross Sea Eastern Basin", 112 | "Ross Sea Western Basin", 113 | "Weddell Sea", 114 | "Bellingshausen Sea", 115 | "Amundsen Sea", 116 | "Scotia Sea", 117 | "Drake Passage", 118 | ], 119 | "Black Sea": None, 120 | "Mediterranean Sea": [ 121 | "Mediterranean Sea Eastern Basin", 122 | "Mediterranean Sea Western Basin", 123 | "Tyrrhenian Sea", 124 | "Adriatic Sea", 125 | "Golfe du Lion", 126 | "Ionian Sea", 127 | "Strait of Gibraltar", 128 | "Balearic Sea", 129 | "Aegean Sea", 130 | ], 131 | "Red Sea": None, 132 | "Caspian Sea": None, 133 | "Baltic Sea": ["Gulf of Bothnia", "Gulf of Finland"], 134 | } 135 | 136 | 137 | def merged_mask( 138 | basins, ds, lon_name="lon", lat_name="lat", merge_dict=None, verbose=False 139 | ): 140 | """Combine geographical basins (from regionmask) to larger ocean basins. 141 | 142 | Parameters 143 | ---------- 144 | basins : regionmask.core.regions.Regions object 145 | Loaded basin data from regionmask, e.g. `import regionmask;basins = regionmask.defined_regions.natural_earth.ocean_basins_50` 146 | ds : xr.Dataset 147 | Input dataset on which to construct the mask 148 | lon_name : str, optional 149 | Name of the longitude coordinate in `ds`, defaults to `lon` 150 | lat_name : str, optional 151 | Name of the latitude coordinate in `ds`, defaults to `lat` 152 | merge_dict : dict, optional 153 | dictionary defining new aggregated regions (as keys) and the regions to be merge into that region as as values (list of names). 154 | Defaults to large scale ocean basins defined by `xmip.regionmask.default_merge_dict` 155 | verbose : bool, optional 156 | Prints more output, e.g. the regions in `basins` that were not used in the merging step. Defaults to False. 157 | 158 | Returns 159 | ------- 160 | mask : xr.DataArray 161 | The mask contains ascending numeric value for each key ( merged region) in `merge_dict`. 162 | When the default is used the numeric values correspond to the following regions: 163 | * 0: North Atlantic 164 | 165 | * 1: South Atlantic 166 | 167 | * 2: North Pacific 168 | 169 | * 3: South Pacific 170 | 171 | * 4: Maritime Continent 172 | 173 | * 5: Indian Ocean 174 | 175 | * 6: Arctic Ocean 176 | 177 | * 7: Southern Ocean 178 | 179 | * 8: Black Sea 180 | 181 | * 9: Mediterranean Sea 182 | 183 | *10: Red Sea 184 | 185 | *11: Caspian Sea 186 | 187 | """ 188 | mask = basins.mask(ds, lon_name=lon_name, lat_name=lat_name) 189 | 190 | if merge_dict is None: 191 | merge_dict = _default_merge_dict() 192 | 193 | dict_keys = list(merge_dict.keys()) 194 | number_dict = {k: None for k in dict_keys} 195 | merged_basins = [] 196 | for ocean, small_basins in merge_dict.items(): 197 | try: 198 | ocean_idx = basins.map_keys(ocean) 199 | except KeyError: 200 | # The ocean key is new and cant be found in the previous keys (e.g. for Atlantic full or maritime continent) 201 | ocean_idx = mask.max().data + 1 202 | number_dict[ocean] = ocean_idx 203 | if small_basins: 204 | for sb in small_basins: 205 | sb_idx = basins.map_keys(sb) 206 | # set the index of each small basin to the ocean value 207 | mask = mask.where(mask != sb_idx, ocean_idx) 208 | merged_basins.append(sb) 209 | 210 | if verbose: 211 | remaining_basins = [ 212 | str(basins.regions[ri].name) 213 | for ri in range(len(basins.regions)) 214 | if (basins.regions[ri].name not in merged_basins) 215 | and (basins.regions[ri].name not in list(merge_dict.keys())) 216 | ] 217 | print(remaining_basins) 218 | 219 | # reset the mask indicies to the order of the passed dictionary keys 220 | mask_reordered = xr.ones_like(mask.copy()) * np.nan 221 | for new_idx, k in enumerate(dict_keys): 222 | old_idx = number_dict[k] 223 | mask_reordered = mask_reordered.where(mask != old_idx, new_idx) 224 | 225 | return mask_reordered 226 | -------------------------------------------------------------------------------- /xmip/specs/staggered_grid_config.yaml: -------------------------------------------------------------------------------- 1 | ACCESS-CM2: 2 | gn: 3 | axis_shift: 4 | X: right 5 | Y: right 6 | ACCESS-ESM1-5: 7 | gn: 8 | axis_shift: 9 | X: right 10 | Y: right 11 | BCC-CSM2-MR: 12 | gn: 13 | axis_shift: 14 | X: right 15 | Y: right 16 | BCC-ESM1: 17 | gn: 18 | axis_shift: 19 | X: right 20 | Y: right 21 | CAMS-CSM1-0: 22 | gn: 23 | axis_shift: 24 | X: right 25 | Y: right 26 | CAS-ESM2-0: 27 | gn: 28 | axis_shift: 29 | X: left 30 | Y: left 31 | CESM1-1-CAM5-CMIP5: 32 | gn: 33 | axis_shift: 34 | X: right 35 | Y: right 36 | gr: 37 | axis_shift: 38 | X: left 39 | Y: left 40 | CESM2: 41 | gn: 42 | axis_shift: 43 | X: right 44 | Y: right 45 | gr: 46 | axis_shift: 47 | X: left 48 | Y: left 49 | CESM2-FV2: 50 | gn: 51 | axis_shift: 52 | X: right 53 | Y: right 54 | gr: 55 | axis_shift: 56 | X: left 57 | Y: left 58 | CESM2-WACCM: 59 | gn: 60 | axis_shift: 61 | X: right 62 | Y: right 63 | gr: 64 | axis_shift: 65 | X: left 66 | Y: left 67 | CESM2-WACCM-FV2: 68 | gn: 69 | axis_shift: 70 | X: right 71 | Y: right 72 | gr: 73 | axis_shift: 74 | X: left 75 | Y: left 76 | CIESM: 77 | gn: 78 | axis_shift: 79 | X: left 80 | Y: left 81 | CNRM-CM6-1: 82 | gn: 83 | axis_shift: 84 | X: right 85 | Y: right 86 | gr1: 87 | axis_shift: 88 | X: left 89 | Y: left 90 | CNRM-CM6-1-HR: 91 | gn: 92 | axis_shift: 93 | X: left 94 | Y: right 95 | CNRM-ESM2-1: 96 | gn: 97 | axis_shift: 98 | X: right 99 | Y: right 100 | gr1: 101 | axis_shift: 102 | X: left 103 | Y: left 104 | CanESM5: 105 | gn: 106 | axis_shift: 107 | X: right 108 | Y: right 109 | CanESM5-CanOE: 110 | gn: 111 | axis_shift: 112 | X: right 113 | Y: right 114 | E3SM-1-0: 115 | gr: 116 | axis_shift: 117 | X: left 118 | Y: left 119 | E3SM-1-1: 120 | gr: 121 | axis_shift: 122 | X: left 123 | Y: left 124 | E3SM-1-1-ECA: 125 | gr: 126 | axis_shift: 127 | X: left 128 | Y: left 129 | EC-Earth3: 130 | gn: 131 | axis_shift: 132 | X: right 133 | Y: right 134 | gr: 135 | axis_shift: 136 | X: left 137 | Y: left 138 | EC-Earth3-LR: 139 | gn: 140 | axis_shift: 141 | X: right 142 | Y: right 143 | EC-Earth3-Veg: 144 | gn: 145 | axis_shift: 146 | X: right 147 | Y: right 148 | EC-Earth3-Veg-LR: 149 | gn: 150 | axis_shift: 151 | X: left 152 | Y: left 153 | FGOALS-f3-L: 154 | gn: 155 | axis_shift: 156 | X: left 157 | Y: left 158 | FGOALS-g3: 159 | gn: 160 | axis_shift: 161 | X: left 162 | Y: left 163 | FIO-ESM-2-0: 164 | gn: 165 | axis_shift: 166 | X: left 167 | Y: right 168 | GFDL-CM4: 169 | gn: 170 | axis_shift: 171 | X: left 172 | Y: left 173 | gr: 174 | axis_shift: 175 | X: left 176 | Y: left 177 | GFDL-ESM4: 178 | gn: 179 | axis_shift: 180 | X: left 181 | Y: left 182 | gr: 183 | axis_shift: 184 | X: left 185 | Y: left 186 | GFDL-OM4p5B: 187 | gn: 188 | axis_shift: 189 | X: left 190 | Y: left 191 | gr: 192 | axis_shift: 193 | X: left 194 | Y: left 195 | GISS-E2-1-G: 196 | gn: 197 | axis_shift: 198 | X: left 199 | Y: left 200 | GISS-E2-1-G-CC: 201 | gn: 202 | axis_shift: 203 | X: right 204 | Y: left 205 | GISS-E2-1-H: 206 | gn: 207 | axis_shift: 208 | X: left 209 | Y: left 210 | gr: 211 | axis_shift: 212 | X: left 213 | Y: left 214 | GISS-E2-2-G: 215 | gn: 216 | axis_shift: 217 | X: right 218 | Y: left 219 | HadGEM3-GC31-LL: 220 | gn: 221 | axis_shift: 222 | X: right 223 | Y: right 224 | HadGEM3-GC31-MM: 225 | gn: 226 | axis_shift: 227 | X: left 228 | Y: right 229 | IITM-ESM: 230 | gn: 231 | axis_shift: 232 | X: left 233 | Y: left 234 | INM-CM4-8: 235 | gr1: 236 | axis_shift: 237 | X: left 238 | Y: left 239 | INM-CM5-0: 240 | gr1: 241 | axis_shift: 242 | X: left 243 | Y: left 244 | IPSL-CM6A-LR: 245 | gn: 246 | axis_shift: 247 | X: right 248 | Y: right 249 | KACE-1-0-G: 250 | gr: 251 | axis_shift: 252 | X: left 253 | Y: left 254 | MCM-UA-1-0: 255 | gn: 256 | axis_shift: 257 | X: right 258 | Y: right 259 | MIROC-ES2L: 260 | gn: 261 | axis_shift: 262 | X: right 263 | Y: right 264 | MIROC6: 265 | gn: 266 | axis_shift: 267 | X: right 268 | Y: right 269 | MPI-ESM1-2-HR: 270 | gn: 271 | axis_shift: 272 | X: right 273 | Y: left 274 | MRI-ESM2-0: 275 | gn: 276 | axis_shift: 277 | X: left 278 | Y: right 279 | gr: 280 | axis_shift: 281 | X: left 282 | Y: left 283 | NESM3: 284 | gn: 285 | axis_shift: 286 | X: right 287 | Y: right 288 | NorCPM1: 289 | gn: 290 | axis_shift: 291 | X: left 292 | Y: left 293 | gr: 294 | axis_shift: 295 | X: left 296 | Y: left 297 | NorESM1-F: 298 | gn: 299 | axis_shift: 300 | X: left 301 | Y: left 302 | NorESM2-LM: 303 | gn: 304 | axis_shift: 305 | X: right 306 | Y: left 307 | gr: 308 | axis_shift: 309 | X: right 310 | Y: left 311 | NorESM2-MM: 312 | gn: 313 | axis_shift: 314 | X: right 315 | Y: left 316 | gr: 317 | axis_shift: 318 | X: right 319 | Y: left 320 | SAM0-UNICON: 321 | gn: 322 | axis_shift: 323 | X: left 324 | Y: right 325 | TaiESM1: 326 | gn: 327 | axis_shift: 328 | X: left 329 | Y: right 330 | UKESM1-0-LL: 331 | gn: 332 | axis_shift: 333 | X: right 334 | Y: right 335 | # This is manually added (due to missing velocity data in the cloud). Might have to adjust if it causes issues. 336 | MPI-ESM1-2-LR: 337 | gn: 338 | axis_shift: 339 | X: left 340 | Y: left 341 | -------------------------------------------------------------------------------- /xmip/utils.py: -------------------------------------------------------------------------------- 1 | try: 2 | import intake 3 | except ImportError: 4 | intake = None 5 | 6 | 7 | def google_cmip_col(catalog="main"): 8 | """A tiny utility function to point to the 'official' pangeo cmip6 cloud files.""" 9 | if intake is None: 10 | raise ImportError( 11 | "This functionality requires intake-esm. Install with `conda install -c conda-forge intake-esm" 12 | ) 13 | if catalog == "main": 14 | return intake.open_esm_datastore( 15 | "https://storage.googleapis.com/cmip6/pangeo-cmip6.json" 16 | ) 17 | # this doesnt work anymore, but ill leave it here as an example for the future 18 | # elif catalog == "testing": 19 | # return intake.open_esm_datastore( 20 | # "https://storage.googleapis.com/cmip6/pangeo-cmip6-testing.json" 21 | # ) 22 | else: 23 | raise ValueError("Catalog not recognized. Should be `main` or `testing`") 24 | 25 | 26 | def model_id_match(match_list, id_tuple): 27 | """Matches `id_tuple` to the list of tuples `exception_list`, which can contain 28 | wildcards (match any entry) and lists (match any entry that is in the list). 29 | 30 | Parameters 31 | ---------- 32 | match_list : list 33 | list of tuples with id strings corresponding to e.g. `source_id`, `grid_label`... 34 | id_tuple : tuple 35 | single tuple with id strings. 36 | """ 37 | # Check the size of tuples 38 | if any([len(t) != len(id_tuple) for t in match_list]): 39 | raise ValueError( 40 | "Each tuple in `match_list` must have the same number of elements as `match_id`" 41 | ) 42 | 43 | match_list_checked = [] 44 | for ml in match_list: 45 | ml_processed = [] 46 | for i in range(len(ml)): 47 | match_element = ml[i] 48 | if isinstance(match_element, str) and match_element != "*": 49 | match_element = [match_element] 50 | if id_tuple[i] in match_element or match_element == "*": 51 | ml_processed.append(True) 52 | else: 53 | ml_processed.append(False) 54 | match_list_checked.append(all(ml_processed)) 55 | return any(match_list_checked) 56 | 57 | 58 | def _key_from_attrs(ds, attrs, sep="."): 59 | return sep.join([ds.attrs[i] if i in ds.attrs.keys() else "none" for i in attrs]) 60 | 61 | 62 | def cmip6_dataset_id( 63 | ds, 64 | sep=".", 65 | id_attrs=[ 66 | "activity_id", 67 | "institution_id", 68 | "source_id", 69 | "experiment_id", 70 | "variant_label", 71 | "table_id", 72 | "grid_label", 73 | "version", 74 | "variable_id", 75 | ], 76 | ): 77 | """Creates a unique string id for e.g. saving files to disk from CMIP6 output 78 | 79 | Parameters 80 | ---------- 81 | ds : xr.Dataset 82 | Input dataset 83 | sep : str, optional 84 | String/Symbol to seperate fields in resulting string, by default "." 85 | 86 | Returns 87 | ------- 88 | str 89 | Concatenated 90 | """ 91 | return _key_from_attrs(ds, id_attrs, sep=sep) 92 | 93 | 94 | def _maybe_make_list(item): 95 | "utility function to make sure output is a list" 96 | if isinstance(item, str): 97 | return [item] 98 | elif isinstance(item, list): 99 | return item 100 | else: 101 | return list(item) 102 | --------------------------------------------------------------------------------