├── .circleci └── config.yml ├── .gitattributes ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── alembic.ini ├── conda ├── environment.yml └── meta.yaml ├── cosima_cookbook ├── __init__.py ├── database.py ├── database_update.py ├── database_utils.py ├── date_utils.py ├── diagnostics │ ├── __init__.py │ ├── mean_tau_x.py │ ├── overturning.py │ └── simple.py ├── distributed.py ├── explore.py ├── memory.py ├── netcdf_index.py ├── netcdf_utils.py ├── plots │ ├── __init__.py │ ├── lineplots.py │ ├── maps.py │ ├── overturning.py │ └── scalar.py ├── querying.py └── summary │ ├── __init__.py │ ├── nml_diff.py │ └── nml_summary.py ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ ├── cosima_cookbook.rst │ ├── getting_started.rst │ ├── index.rst │ ├── modules.rst │ └── related_projects.rst ├── readthedocs.yml ├── requirements.txt ├── sandbox ├── alembic │ ├── README │ ├── env.py │ ├── script.py.mako │ └── versions │ │ └── 16223b92479e_add_keywords.py └── diag-vis.py ├── setup.py └── test ├── conftest.py ├── data ├── explore │ ├── duplicate │ │ └── one │ │ │ ├── metadata.yaml │ │ │ └── ocean │ │ │ └── ocean_age.nc │ ├── one │ │ ├── atmosphere │ │ │ └── ty_trans.nc │ │ ├── ice │ │ │ └── hi_m.nc │ │ ├── metadata.yaml │ │ ├── ocean │ │ │ └── ocean.nc │ │ └── restart │ │ │ └── ocean_velocity_advection.res.nc │ └── two │ │ ├── atm │ │ └── hi_m.nc │ │ ├── metadata.yaml │ │ ├── nomodel │ │ └── ty_trans.nc │ │ ├── ocn │ │ ├── ocean.nc │ │ └── ocean_month.nc │ │ └── restart │ │ └── ocean_velocity_advection.res.nc ├── indexing │ ├── alternate │ │ └── experiment_a │ │ │ └── test2.nc │ ├── broken_file │ │ └── output000 │ │ │ └── test.nc │ ├── broken_metadata │ │ ├── metadata.yaml │ │ └── test1.nc │ ├── empty_file │ │ └── output000 │ │ │ └── empty.nc │ ├── longnames │ │ └── output000 │ │ │ ├── test1.nc │ │ │ └── test2.nc │ ├── metadata │ │ ├── metadata.yaml │ │ └── test1.nc │ ├── multiple │ │ ├── experiment_a │ │ │ └── test1.nc │ │ └── experiment_b │ │ │ └── test1.nc │ ├── single_broken_file │ │ └── output000 │ │ │ ├── broken.nc │ │ │ └── test.nc │ ├── symlinked │ │ └── experiment_a │ ├── time │ │ ├── t1.nc │ │ ├── t2.nc │ │ ├── t3.nc │ │ ├── t4.nc │ │ └── t5.nc │ └── time_bounds │ │ └── file001.nc ├── metadata │ ├── keywords │ │ ├── metadata.yaml │ │ └── test1.nc │ ├── keywords2 │ │ ├── metadata.yaml │ │ └── test1.nc │ ├── string_keyword │ │ └── metadata.yaml │ └── upcase │ │ └── metadata.yaml ├── ocean_sealevel.nc ├── querying │ ├── output000 │ │ ├── hi_m.nc │ │ └── ocean.nc │ └── restart000 │ │ └── ty_trans.nc ├── querying_disambiguation │ └── output000 │ │ ├── ocean.nc │ │ └── ocean_month.nc └── update │ ├── experiment_a │ └── test1.nc │ └── experiment_b │ └── test2.nc ├── test_database.py ├── test_dates.py ├── test_explore.py ├── test_indexing.py ├── test_metadata.py ├── test_querying.py ├── test_sqa14.py └── test_update.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.0 2 | jobs: 3 | py36: 4 | working_directory: ~/circleci 5 | docker: 6 | - image: circleci/python 7 | environment: 8 | PYTHON_VER: 3.6 9 | resource_class: medium+ 10 | steps: 11 | - checkout 12 | 13 | - run: | 14 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O conda.sh 15 | bash conda.sh -b -p ~/conda 16 | ~/conda/bin/conda config --system --add channels conda-forge 17 | ~/conda/bin/conda config --system --add channels coecms 18 | ~/conda/bin/conda update conda 19 | ~/conda/bin/conda install --yes conda-build conda-verify 20 | 21 | - run: | 22 | ~/conda/bin/conda build -c coecms conda --python=${PYTHON_VER} 23 | 24 | - run: | 25 | mkdir ~/artefacts 26 | cp $(~/conda/bin/conda build conda --python=${PYTHON_VER} --output) ~/artefacts 27 | 28 | - persist_to_workspace: 29 | root: ~/artefacts 30 | paths: '*' 31 | 32 | publish: 33 | working_directory: /circleci 34 | docker: 35 | - image: scottwales/conda-build 36 | resource_class: medium+ 37 | steps: 38 | - attach_workspace: 39 | at: /artefacts 40 | 41 | - run: 42 | anaconda --token "${ANACONDA_TOKEN}" upload --user "${ANACONDA_USER}" /artefacts/*.tar.bz2 43 | 44 | workflows: 45 | version: 2 46 | build_and_publsh: 47 | jobs: 48 | - py36: 49 | filters: 50 | tags: 51 | only: /.*/ 52 | 53 | - publish: 54 | requires: 55 | - py36 56 | filters: 57 | tags: 58 | only: /.*/ 59 | branches: 60 | ignore: /.*/ 61 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | 2 | *.ipynb diff=jupyternotebook 3 | 4 | *.ipynb merge=jupyternotebook 5 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Testing 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | formatting: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Code formatting 18 | uses: lgeiger/black-action@v1.0.1 19 | with: 20 | args: "--check --verbose cosima_cookbook test" 21 | 22 | testing: 23 | needs: formatting 24 | runs-on: ubuntu-latest 25 | strategy: 26 | matrix: 27 | python-version: ['3.8', '3.9', '3.10'] 28 | sqa-version: ['<1.4', '==1.4.*'] 29 | 30 | steps: 31 | - uses: actions/checkout@v2 32 | - name: Set up Python ${{ matrix.python-version }} 33 | uses: actions/setup-python@v2 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install '.[build]' 39 | python -m pip install "sqlalchemy${{ matrix.sqa-version}}" 40 | - name: Unit tests 41 | run: | 42 | python -m pytest --cov cosima_cookbook test 43 | - name: Upload coverage reports to Codecov with GitHub Action 44 | uses: codecov/codecov-action@v3 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | doc/_build 92 | doc/diagnostics 93 | doc/configurations 94 | doc/notebooks 95 | doc/gen_modules 96 | 97 | dask-worker-space 98 | 99 | # from https://github.com/github/gitignore/blob/master/Global/macOS.gitignore 100 | 101 | # General 102 | .DS_Store 103 | .AppleDouble 104 | .LSOverride 105 | 106 | # Icon must end with two \r 107 | Icon 108 | 109 | 110 | # Thumbnails 111 | ._* 112 | 113 | # Files that might appear in the root of a volume 114 | .DocumentRevisions-V100 115 | .fseventsd 116 | .Spotlight-V100 117 | .TemporaryItems 118 | .Trashes 119 | .VolumeIcon.icns 120 | .com.apple.timemachine.donotpresent 121 | 122 | # Directories potentially created on remote AFP share 123 | .AppleDB 124 | .AppleDesktop 125 | Network Trash Folder 126 | Temporary Items 127 | .apdisk 128 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

3 | 4 | 5 | latest docs 6 | 7 | 8 | # cosima-cookbook package 9 | 10 | This repository hosts the `cosima_cookbook` which is a [Python package](https://anaconda.org/coecms/cosima-cookbook) for managing a database of ocean model output and loading the output via xarray. 11 | 12 | ⚠️ **The `cosima_cookbook` Python package is deprecated and no longer being developed!** ⚠️ 13 | 14 | Use the [ACCESS-NRI Intake catalog](https://cosima-recipes.readthedocs.io/en/latest/Tutorials/ACCESS-NRI_Intake_Catalog.html) instead. 15 | 16 | ## What now? Where should I go? 17 | 18 | We refer users to [COSIMA Cookbook repository](https://github.com/COSIMA/cosima-recipes) where they will find tutorials and 'recipes' (that is, examples) of various analyses that one can do using ocean-sea ice model output. 19 | 20 | [![Documentation Status](https://readthedocs.org/projects/cosima-cookbook/badge/?version=latest)](https://cosima-cookbook.readthedocs.org/en/latest) 21 | -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = sandbox/alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # timezone to use when rendering the date 11 | # within the migration file as well as the filename. 12 | # string value is passed to dateutil.tz.gettz() 13 | # leave blank for localtime 14 | # timezone = 15 | 16 | # max length of characters to apply to the 17 | # "slug" field 18 | # truncate_slug_length = 40 19 | 20 | # set to 'true' to run the environment during 21 | # the 'revision' command, regardless of autogenerate 22 | # revision_environment = false 23 | 24 | # set to 'true' to allow .pyc and .pyo files without 25 | # a source .py file to be detected as revisions in the 26 | # versions/ directory 27 | # sourceless = false 28 | 29 | # version location specification; this defaults 30 | # to sandbox/alembic/versions. When using multiple version 31 | # directories, initial revisions must be specified with --version-path 32 | # version_locations = %(here)s/bar %(here)s/bat sandbox/alembic/versions 33 | 34 | # the output encoding used when revision files 35 | # are written from script.py.mako 36 | # output_encoding = utf-8 37 | 38 | sqlalchemy.url = sqlite:///path-to-db.db 39 | 40 | 41 | [post_write_hooks] 42 | # post_write_hooks defines scripts or Python functions that are run 43 | # on newly generated revision scripts. See the documentation for further 44 | # detail and examples 45 | 46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 47 | # hooks=black 48 | # black.type=console_scripts 49 | # black.entrypoint=black 50 | # black.options=-l 79 51 | 52 | # Logging configuration 53 | [loggers] 54 | keys = root,sqlalchemy,alembic 55 | 56 | [handlers] 57 | keys = console 58 | 59 | [formatters] 60 | keys = generic 61 | 62 | [logger_root] 63 | level = WARN 64 | handlers = console 65 | qualname = 66 | 67 | [logger_sqlalchemy] 68 | level = WARN 69 | handlers = 70 | qualname = sqlalchemy.engine 71 | 72 | [logger_alembic] 73 | level = INFO 74 | handlers = 75 | qualname = alembic 76 | 77 | [handler_console] 78 | class = StreamHandler 79 | args = (sys.stderr,) 80 | level = NOTSET 81 | formatter = generic 82 | 83 | [formatter_generic] 84 | format = %(levelname)-5.5s [%(name)s] %(message)s 85 | datefmt = %H:%M:%S 86 | -------------------------------------------------------------------------------- /conda/environment.yml: -------------------------------------------------------------------------------- 1 | name: ncimonitor 2 | 3 | channels: 4 | - coecms 5 | - conda-forge 6 | - defaults 7 | 8 | dependencies: 9 | - python 10 | - numpy 11 | - dask 12 | - distributed 13 | - xarray 14 | - netcdf4 15 | - joblib 16 | - tqdm 17 | - sqlalchemy 18 | 19 | -------------------------------------------------------------------------------- /conda/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: cosima-cookbook 3 | version: {{ GIT_DESCRIBE_TAG}} 4 | 5 | source: 6 | git_rev: master 7 | git_url: ../ 8 | 9 | build: 10 | noarch: python 11 | number: {{ GIT_DESCRIBE_NUMBER }} 12 | script: python setup.py install --single-version-externally-managed --record=record.txt 13 | 14 | requirements: 15 | build: 16 | - python>=3.6 17 | - setuptools 18 | - setuptools_scm 19 | - pbr 20 | run: 21 | - python>=3.6 22 | - numpy 23 | - dask 24 | - distributed 25 | - xarray 26 | - netcdf4 27 | - joblib 28 | - tqdm 29 | - sqlalchemy<2.0 30 | - ipywidgets 31 | - cftime>1.2.1 32 | - lxml 33 | 34 | about: 35 | home: http://cosima-cookbook.readthedocs.io 36 | license: Apache License 2.0 37 | 38 | -------------------------------------------------------------------------------- /cosima_cookbook/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Common tools for working with COSIMA model output 4 | """ 5 | 6 | from . import database 7 | from . import querying 8 | from . import explore 9 | 10 | from importlib.metadata import version, PackageNotFoundError 11 | 12 | try: 13 | __version__ = version("cosima-cookbook") 14 | except PackageNotFoundError: 15 | pass 16 | -------------------------------------------------------------------------------- /cosima_cookbook/database_update.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pathlib 3 | 4 | import cosima_cookbook as cc 5 | 6 | 7 | def main(argv=None): 8 | parser = argparse.ArgumentParser(description="Update COSIMA cookbook database.") 9 | parser.add_argument( 10 | "dirs", type=pathlib.Path, nargs="+", help="Directories to index." 11 | ) 12 | parser.add_argument( 13 | "-db", 14 | "--database", 15 | dest="db", 16 | action="store", 17 | default="cosima_master.db", 18 | help="Database to update.", 19 | ) 20 | args = parser.parse_args(argv) 21 | 22 | print(cc) 23 | 24 | print("Establishing a DB connection to: {}".format(args.db)) 25 | session = cc.database.create_session(args.db, timeout=30) 26 | 27 | for dir in args.dirs: 28 | print("Indexing: {}".format(dir)) 29 | cc.database.build_index( 30 | dir, session, prune="delete", force=False, followsymlinks=True, nfiles=1000 31 | ) 32 | -------------------------------------------------------------------------------- /cosima_cookbook/database_utils.py: -------------------------------------------------------------------------------- 1 | # enforce unique ORM objects: https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObject 2 | 3 | 4 | def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw): 5 | cache = getattr(session, "_unique_cache", None) 6 | if cache is None: 7 | session._unique_cache = cache = {} 8 | 9 | key = (cls, hashfunc(*arg, **kw)) 10 | if key in cache: 11 | return cache[key] 12 | else: 13 | with session.no_autoflush: 14 | q = session.query(cls) 15 | q = queryfunc(q, *arg, **kw) 16 | obj = q.first() 17 | if not obj: 18 | obj = constructor(*arg, **kw) 19 | session.add(obj) 20 | cache[key] = obj 21 | return obj 22 | 23 | 24 | class UniqueMixin(object): 25 | @classmethod 26 | def unique_hash(cls, *arg, **kw): 27 | return NotImplementedError() 28 | 29 | @classmethod 30 | def unique_filter(cls, query, *arg, **kw): 31 | return NotImplementedError() 32 | 33 | @classmethod 34 | def as_unique(cls, session, *arg, **kw): 35 | return _unique(session, cls, cls.unique_hash, cls.unique_filter, cls, arg, kw) 36 | -------------------------------------------------------------------------------- /cosima_cookbook/date_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 ARC Centre of Excellence for Climate Systems Science 3 | author: Aidan Heerdegen 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | See the License for the specific language governing permissions and 12 | limitations under the License. 13 | """ 14 | 15 | from __future__ import print_function 16 | 17 | import datetime 18 | 19 | import cftime 20 | from cftime import num2date, date2num 21 | import numpy as np 22 | import xarray as xr 23 | from xarray.coding.cftime_offsets import to_cftime_datetime 24 | 25 | rebase_attr = "_rebased_units" 26 | rebase_shift_attr = "_rebased_shift" 27 | bounds = "bounds" 28 | boundsvar = "bounds_var" 29 | 30 | datetimeformat = "%Y-%m-%d %H:%M:%S" 31 | 32 | # Code adapted from https://github.com/spencerahill/aospy/issues/212 33 | 34 | 35 | def date2num_round(dates, units, calendar): 36 | return np.round(date2num(dates, units, calendar), 8) 37 | 38 | 39 | def rebase_times(values, input_units, calendar, output_units): 40 | dates = num2date(values, input_units, calendar) 41 | return date2num_round(dates, output_units, calendar) 42 | 43 | 44 | def is_bounds(var): 45 | """ 46 | Return True if the xarray variable has been flagged as a bounds 47 | variable (has a bounds_var attribute) 48 | """ 49 | return boundsvar in var.attrs 50 | 51 | 52 | def set_bounds(var, varname): 53 | """ 54 | Set the bounds_var attribute to the name of the dimension for which 55 | it is the bounds 56 | """ 57 | var.attrs[boundsvar] = varname 58 | 59 | 60 | def flag_bounds(ds): 61 | """ 62 | Cycle through all the variables in a dataset and mark variables which 63 | are bounds as such by adding a bounds_var attribute 64 | """ 65 | for name in ds.variables: 66 | if is_bounds(ds[name]): 67 | # This is a bounds variable and has been flagged as such 68 | continue 69 | if bounds in ds[name].attrs: 70 | # Flag bounds variable as such 71 | try: 72 | set_bounds(ds[ds[name].attrs[bounds]], name) 73 | except KeyError: 74 | # Ignore if bounds variable not present 75 | pass 76 | 77 | 78 | def unflag_bounds(ds): 79 | """ 80 | Cycle through all the variables in a dataset and unflag variables which 81 | are bounds by deleting any bounds_var 82 | """ 83 | for name in ds.variables: 84 | try: 85 | del ds[name].attrs[boundsvar] 86 | except KeyError: 87 | pass 88 | 89 | 90 | def rebase_variable(var, calendar=None, target_units=None, src_units=None, offset=None): 91 | """ 92 | Create rebased time variable 93 | """ 94 | attributes = var.attrs 95 | 96 | # If no target_units are specified check if the variable has been previously 97 | # rebased and use this as the target, which will undo the previous rebasing 98 | if calendar == None: 99 | try: 100 | calendar = var.attrs["calendar"] 101 | except KeyError: 102 | try: 103 | calendar = var.encoding["calendar"] 104 | except KeyError: 105 | raise AttributeError("No calendar attribute found and none specified") 106 | 107 | # Default to src_units being the units for the variable (bounds variables 108 | # may not have correct units so in this case it has to be specified) 109 | if src_units is None: 110 | src_units = attributes["units"] 111 | 112 | # If no target_units are specified check if the variable has been previously 113 | # rebased and use this as the target, which will undo the previous rebasing 114 | if target_units == None: 115 | try: 116 | target_units = attributes[rebase_attr] 117 | except KeyError: 118 | raise AttributeError( 119 | "No existing rebase found and target_units not specified" 120 | ) 121 | finally: 122 | del attributes[rebase_attr] 123 | else: 124 | attributes[rebase_attr] = src_units 125 | 126 | # Rebase 127 | newvar = xr.apply_ufunc( 128 | rebase_times, var, src_units, calendar, target_units, dask="allowed" 129 | ) 130 | 131 | if rebase_shift_attr in attributes: 132 | newvar = newvar - attributes[rebase_shift_attr] 133 | del attributes[rebase_shift_attr] 134 | else: 135 | if offset is not None: 136 | # Offset can be an integer, 'auto', or datetime.delta 137 | 138 | if offset == "auto": 139 | # Generate a timedelta offset based on the calendars of src 140 | # and target 141 | offset = num2date(0, target_units, calendar) - num2date( 142 | 0, src_units, calendar 143 | ) 144 | 145 | if isinstance(offset, datetime.timedelta): 146 | # Add delta to src calendar origin and convert to integer offset 147 | offset = date2num_round( 148 | num2date(0, src_units, calendar) + offset, src_units, calendar 149 | ) 150 | 151 | newvar = newvar + offset 152 | attributes[rebase_shift_attr] = offset 153 | 154 | if newvar.min() < 0: 155 | raise ValueError( 156 | "Rebase creates negative dates, specify offset=auto to shift dates appropriately" 157 | ) 158 | 159 | # Save the values back into the variable, put back the attributes and update 160 | # the units 161 | newvar.attrs = attributes 162 | newvar.attrs["units"] = target_units 163 | 164 | return newvar 165 | 166 | 167 | def rebase_dataset(ds, target_units=None, timevar="time", offset=None): 168 | """ 169 | Rebase the time dimension variable in a dataset to a different start date. 170 | This is useful to overcome limitations in pandas datetime indices used in 171 | xarray, and to place two datasets with different date indices onto a common 172 | date index 173 | """ 174 | 175 | # The units are defined as the units used by timevar 176 | units = ds[timevar].attrs["units"] 177 | calendar = ds[timevar].attrs["calendar"] 178 | 179 | newds = ds.copy() 180 | 181 | # Cycle through all variables, setting a flag if they are a bounds variable 182 | flag_bounds(newds) 183 | 184 | for name in newds.variables: 185 | if is_bounds(newds[name]): 186 | # This is a bounds variable and has been flagged as such so ignore 187 | # as it will be processed by the variable for which it is the bounds 188 | continue 189 | if newds[name].attrs["units"] == units: 190 | newds[name] = rebase_variable( 191 | newds[name], calendar, target_units, offset=offset 192 | ) 193 | if bounds in newds[name].attrs: 194 | # Must make the same adjustment to the bounds variable 195 | bvarname = newds[name].attrs[bounds] 196 | try: 197 | newds[bvarname] = rebase_variable( 198 | newds[bvarname], 199 | calendar, 200 | target_units, 201 | src_units=units, 202 | offset=offset, 203 | ) 204 | except KeyError: 205 | # Ignore if bounds_var missing 206 | pass 207 | 208 | # Unset bounds flags 209 | unflag_bounds(newds) 210 | 211 | # newds = xr.decode_cf(newds, decode_coords=False, decode_times=True) 212 | 213 | return newds 214 | 215 | 216 | def shift_time(ds): 217 | """ 218 | Apply time shift to un-decoded time axis, to align datasets and 219 | """ 220 | pass 221 | 222 | 223 | def format_datetime(datetime, format=datetimeformat): 224 | """ 225 | Standard method to convert cftime.datetime objects to strings for 226 | storage in SQL database. Hard code the length as some datetime 227 | objects don't space pad when formatted! 228 | """ 229 | return "{:0>19}".format(datetime.strftime(format).lstrip()) 230 | 231 | 232 | def parse_datetime(datetimestring, calendar="proleptic_gregorian"): 233 | """ 234 | Standard method to convert datetime obkects stored as strings in SQL database 235 | back into cftime.datetime objects 236 | """ 237 | # xarray supports parsing dates strings to cftime.datetime objects, but 238 | # requires ISO-8601 format (https://en.wikipedia.org/wiki/ISO_8601). 239 | # Convert string to ISO-8601 before parsing by adding separator 240 | # between date and time elements 241 | datetimestring = datetimestring[:10] + "T" + datetimestring[11:] 242 | 243 | # Note: uses non-public xarray method that may change or be deleted 244 | # in the future 245 | return to_cftime_datetime(datetimestring, calendar) 246 | -------------------------------------------------------------------------------- /cosima_cookbook/diagnostics/__init__.py: -------------------------------------------------------------------------------- 1 | from .mean_tau_x import mean_tau_x 2 | from .simple import * 3 | from .overturning import * 4 | 5 | __all__ = [ 6 | "mean_tau_x", 7 | "annual_scalar", 8 | "drake_passage", 9 | "sea_surface_temperature", 10 | "sea_surface_salinity", 11 | "psi_avg", 12 | "zonal_mean", 13 | "mixed_layer_depth", 14 | ] 15 | -------------------------------------------------------------------------------- /cosima_cookbook/diagnostics/mean_tau_x.py: -------------------------------------------------------------------------------- 1 | from ..memory import memory 2 | from ..querying import getvar 3 | 4 | 5 | @memory.cache 6 | def mean_tau_x(expt): 7 | """ 8 | 10-year zonal average of horizontal wind stress. 9 | """ 10 | tau_x = get_nc_variable( 11 | expt, "ocean_month.nc", "tau_x", time_units="days since 1900-01-01", n=10 12 | ) 13 | 14 | mean_tau_x = tau_x.mean("xu_ocean").mean("time") 15 | mean_tau_x = mean_tau_x.compute() 16 | mean_tau_x.name = "mean_tau_x" 17 | 18 | return mean_tau_x 19 | -------------------------------------------------------------------------------- /cosima_cookbook/diagnostics/overturning.py: -------------------------------------------------------------------------------- 1 | from ..querying import getvar, get_variables 2 | from ..memory import memory 3 | 4 | 5 | @memory.cache 6 | def psi_avg(expt, n=10): 7 | def op(p): 8 | summed_p = p.sum("grid_xt_ocean") 9 | # summed_p.attrs['units'] = p.units 10 | return summed_p 11 | 12 | psi = get_nc_variable( 13 | expt, 14 | "ocean.nc", 15 | "ty_trans_rho", 16 | # op=op, 17 | chunks={"potrho": None}, 18 | n=n, 19 | time_units="days since 1900-01-01", 20 | ) 21 | psi = psi.sum("grid_xt_ocean") 22 | 23 | varlist = get_variables(expt, "ocean.nc") 24 | if "ty_trans_rho_gm" in varlist: 25 | GM = True 26 | psiGM = get_nc_variable( 27 | expt, 28 | "ocean.nc", 29 | "ty_trans_rho_gm", 30 | # op=op, 31 | chunks={"potrho": None}, 32 | n=n, 33 | time_units="days since 1900-01-01", 34 | ) 35 | psiGM = psiGM.sum("grid_xt_ocean") 36 | else: 37 | GM = False 38 | 39 | # if psi.units == 'kg/s': 40 | # print('WARNING: Changing units for ', expt) 41 | # assume units of kg/s, convert to Sv. 42 | psi = psi * 1.0e-9 43 | if GM: 44 | psiGM = psiGM * 1.0e-9 45 | 46 | psi_avg = psi.cumsum("potrho").mean("time") - psi.sum("potrho").mean("time") 47 | if GM: 48 | psi_avg = psi_avg + psiGM.mean("time") 49 | 50 | psi_avg.load() 51 | 52 | return psi_avg 53 | 54 | 55 | @memory.cache 56 | def calc_aabw(expt): 57 | print("Calculating {} timeseries of AABW transport at 55S ".format(expt)) 58 | 59 | def op(p): 60 | summed_p = p.sum("grid_xt_ocean") 61 | # summed_p.attrs['units'] = p.units 62 | return summed_p 63 | 64 | psi = get_nc_variable( 65 | expt, 66 | "ocean.nc", 67 | "ty_trans_rho", 68 | # op=op, 69 | chunks={"potrho": None}, 70 | time_units="days since 1900-01-01", 71 | ) 72 | psi = psi.sum("grid_xt_ocean") 73 | 74 | varlist = get_variables(expt, "ocean.nc") 75 | if "ty_trans_rho_gm" in varlist: 76 | GM = True 77 | psiGM = get_nc_variable( 78 | expt, 79 | "ocean.nc", 80 | "ty_trans_rho_gm", 81 | # op=op, 82 | chunks={"potrho": None}, 83 | time_units="days since 1900-01-01", 84 | ) 85 | psiGM = psiGM.sum("grid_xt_ocean") 86 | else: 87 | GM = False 88 | 89 | # if psi.units == 'kg/s': 90 | # print('WARNING: Changing units for ', expt) 91 | # assume units of kg/s, convert to Sv. 92 | 93 | psi = psi * 1.0e-9 94 | if GM: 95 | psiGM = psiGM * 1.0e-9 96 | 97 | psi_sum = psi.cumsum("potrho") - psi.sum("potrho") 98 | if GM: 99 | psi_sum = psi_sum + psiGM 100 | 101 | psi_aabw = ( 102 | psi_sum.sel(method="Nearest", grid_yu_ocean=-40) 103 | .sel(potrho=slice(1036, None)) 104 | .min("potrho") 105 | .resample("3A", dim="time") 106 | ) 107 | psi_aabw = psi_aabw.compute() 108 | 109 | return psi_aabw 110 | 111 | 112 | @memory.cache 113 | def calc_amoc(expt): 114 | print("Calculating {} timeseries of AMOC transport at 26N ".format(expt)) 115 | 116 | def op(p): 117 | summed_p = p.sum("grid_xt_ocean") 118 | # summed_p.attrs['units'] = p.units 119 | return summed_p 120 | 121 | psi = get_nc_variable( 122 | expt, 123 | "ocean.nc", 124 | "ty_trans_rho", 125 | # op=op, 126 | chunks={"potrho": None}, 127 | time_units="days since 1900-01-01", 128 | ) 129 | psi = psi.sum("grid_xt_ocean") 130 | 131 | varlist = get_variables(expt, "ocean.nc") 132 | if "ty_trans_rho_gm" in varlist: 133 | GM = True 134 | psiGM = get_nc_variable( 135 | expt, 136 | "ocean.nc", 137 | "ty_trans_rho_gm", 138 | # op=op, 139 | chunks={"potrho": None}, 140 | time_units="days since 1900-01-01", 141 | ) 142 | psiGM = psiGM.sum("grid_xt_ocean") 143 | else: 144 | GM = False 145 | 146 | # if psi.units == 'kg/s': 147 | # print('WARNING: Changing units for ', expt) 148 | # assume units of kg/s, convert to Sv. 149 | 150 | psi = psi * 1.0e-9 151 | if GM: 152 | psiGM = psiGM * 1.0e-9 153 | 154 | psi_sum = psi.cumsum("potrho") - psi.sum("potrho") 155 | if GM: 156 | psi_sum = psi_sum + psiGM 157 | 158 | psi_amoc = ( 159 | psi_sum.sel(method="Nearest", grid_yu_ocean=26) 160 | .sel(potrho=slice(1035.5, None)) 161 | .max("potrho") 162 | .resample("3A", dim="time") 163 | ) 164 | psi_amoc = psi_amoc.compute() 165 | 166 | return psi_amoc 167 | 168 | 169 | @memory.cache 170 | def calc_amoc_south(expt): 171 | print("Calculating {} timeseries of AMOC transport at 35S ".format(expt)) 172 | 173 | def op(p): 174 | summed_p = p.sum("grid_xt_ocean") 175 | # summed_p.attrs['units'] = p.units 176 | return summed_p 177 | 178 | psi = get_nc_variable( 179 | expt, 180 | "ocean.nc", 181 | "ty_trans_rho", 182 | # op=op, 183 | chunks={"potrho": None}, 184 | time_units="days since 1900-01-01", 185 | ) 186 | psi = psi.sum("grid_xt_ocean") 187 | 188 | varlist = get_variables(expt, "ocean.nc") 189 | if "ty_trans_rho_gm" in varlist: 190 | GM = True 191 | psiGM = get_nc_variable( 192 | expt, 193 | "ocean.nc", 194 | "ty_trans_rho_gm", 195 | # op=op, 196 | chunks={"potrho": None}, 197 | time_units="days since 1900-01-01", 198 | ) 199 | psiGM = psiGM.sum("grid_xt_ocean") 200 | else: 201 | GM = False 202 | 203 | # if psi.units == 'kg/s': 204 | # print('WARNING: Changing units for ', expt) 205 | # assume units of kg/s, convert to Sv. 206 | 207 | psi = psi * 1.0e-9 208 | if GM: 209 | psiGM = psiGM * 1.0e-9 210 | 211 | psi_sum = psi.cumsum("potrho") - psi.sum("potrho") 212 | if GM: 213 | psi_sum = psi_sum + psiGM 214 | 215 | psi_amoc_south = ( 216 | psi_sum.sel(method="Nearest", grid_yu_ocean=-35) 217 | .sel(potrho=slice(1035.5, None)) 218 | .max("potrho") 219 | .resample("3A", dim="time") 220 | ) 221 | psi_amoc_south = psi_amoc_south.compute() 222 | 223 | return psi_amoc_south 224 | 225 | 226 | @memory.cache 227 | def zonal_mean(expt, variable, n=10, resolution=1): 228 | zonal_var = get_nc_variable( 229 | expt, 230 | "ocean.nc", 231 | variable, 232 | chunks={"st_ocean": None}, 233 | n=n, 234 | time_units="days since 1900-01-01", 235 | ) 236 | 237 | # Annual Average WOA13 long-term climatology. 238 | if resolution == 1: 239 | zonal_WOA13 = ( 240 | get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", variable) 241 | .mean("GRID_X_T") 242 | .mean("time") 243 | ) 244 | elif resolution == 0.25: 245 | zonal_WOA13 = ( 246 | get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", variable) 247 | .mean("GRID_X_T") 248 | .mean("time") 249 | ) 250 | elif resolution == 0.1: 251 | zonal_WOA13 = ( 252 | get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", variable) 253 | .mean("GRID_X_T") 254 | .mean("time") 255 | ) 256 | else: 257 | print("WARNING: Sorry, we dont seem to recognise resolution ", resolution) 258 | 259 | zonal_WOA13.compute() 260 | if variable == "temp": 261 | zonal_WOA13 = zonal_WOA13 + 273.15 262 | 263 | zonal_mean = zonal_var.mean("xt_ocean").mean("time") 264 | zonal_mean.compute() 265 | zonal_diff = zonal_mean - zonal_WOA13.values 266 | 267 | return zonal_mean, zonal_diff 268 | -------------------------------------------------------------------------------- /cosima_cookbook/diagnostics/simple.py: -------------------------------------------------------------------------------- 1 | from ..querying import getvar, get_variables 2 | from ..memory import memory 3 | 4 | import logging 5 | 6 | 7 | @memory.cache 8 | def annual_scalar(expt, variables): 9 | """ """ 10 | 11 | logging.debug("Building dataset") 12 | darray = get_nc_variable( 13 | expt, 14 | "ocean_scalar.nc", 15 | variables, 16 | time_units="days since 1900-01-01", 17 | use_bag=True, 18 | ) 19 | 20 | logging.debug("Resampling in time") 21 | annual_average = darray.resample(time="A").mean("time") 22 | 23 | for v in annual_average.data_vars: 24 | avar = annual_average.variables[v] 25 | dvar = darray.variables[v] 26 | avar.attrs["long_name"] = dvar.attrs["long_name"] + " (annual average)" 27 | avar.attrs["units"] = dvar.attrs["units"] 28 | 29 | return annual_average 30 | 31 | 32 | @memory.cache 33 | def drake_passage(expt): 34 | "Calculate transport through Drake Passage" 35 | 36 | tx = get_nc_variable( 37 | expt, 38 | "ocean_month.nc", 39 | "tx_trans_int_z", 40 | chunks={"yt_ocean": 200}, 41 | time_units="days since 1900-01-01", 42 | use_bag=False, 43 | ) 44 | 45 | tx_trans = tx.sel(xu_ocean=-69, method="nearest").sel(yt_ocean=slice(-72, -52)) 46 | 47 | if tx_trans.units == "Sv (10^9 kg/s)": 48 | transport = tx_trans.sum("yt_ocean").resample(time="A").mean("time") 49 | else: 50 | # print('WARNING: Changing units for ', expt) 51 | transport = tx_trans.sum("yt_ocean").resample(time="A").mean("time") * 1.0e-9 52 | 53 | transport.load() 54 | 55 | return transport 56 | 57 | 58 | @memory.cache 59 | def bering_strait(expt): 60 | ty = get_nc_variable( 61 | expt, 62 | "ocean_month.nc", 63 | "ty_trans_int_z", 64 | chunks={"yu_ocean": 200}, 65 | time_units="days since 1900-01-01", 66 | ) 67 | ty_trans = ty.sel(yu_ocean=67, method="nearest").sel(xt_ocean=slice(-171, -167)) 68 | if ty_trans.units == "Sv (10^9 kg/s)": 69 | transport = ty_trans.sum("xt_ocean").resample(time="A").mean("time") 70 | else: 71 | # print('WARNING: Changing units for ', expt) 72 | transport = ty_trans.sum("xt_ocean").resample(time="A").mean("time") * 1.0e-9 73 | 74 | transport.load() 75 | 76 | return transport 77 | 78 | 79 | @memory.cache 80 | def sea_surface_temperature(expt, resolution=1): 81 | ## Load SST from expt 82 | varlist = get_variables(expt, "ocean_month.nc") 83 | if "surface_temp" in varlist: 84 | SST = get_nc_variable( 85 | expt, 86 | "ocean_month.nc", 87 | "surface_temp", 88 | n=10, 89 | time_units="days since 1900-01-01", 90 | ) 91 | else: 92 | SST = get_nc_variable( 93 | expt, "ocean.nc", "temp", n=10, time_units="days since 1900-01-01" 94 | ).isel(st_ocean=0) 95 | 96 | if SST.units == "degrees K": 97 | SST = SST - 273.15 98 | 99 | # Annual Average WOA13 long-term climatology. 100 | if resolution == 1: 101 | SST_WOA13 = get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", "temp").isel( 102 | ZT=0 103 | ) 104 | elif resolution == 0.25: 105 | SST_WOA13 = get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", "temp").isel( 106 | ZT=0 107 | ) 108 | elif resolution == 0.1: 109 | SST_WOA13 = get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", "temp").isel( 110 | ZT=0 111 | ) 112 | else: 113 | print("WARNING: Sorry, we dont seem to recognise resolution ", resolution) 114 | 115 | # Average 116 | SST = SST.mean("time") 117 | SSTdiff = SST - SST_WOA13.mean("time").values 118 | 119 | return SST, SSTdiff 120 | 121 | 122 | @memory.cache 123 | def sea_surface_salinity(expt, resolution=1): 124 | ## Load SSS from expt 125 | varlist = get_variables(expt, "ocean_month.nc") 126 | if "surface_salt" in varlist: 127 | SSS = get_nc_variable(expt, "ocean_month.nc", "surface_salt", n=10) 128 | else: 129 | SSS = get_nc_variable(expt, "ocean.nc", "salt", n=10).isel(st_ocean=0) 130 | 131 | # Annual Average WOA13 long-term climatology. 132 | if resolution == 1: 133 | SSS_WOA13 = get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", "salt").isel( 134 | ZT=0 135 | ) 136 | elif resolution == 0.25: 137 | SSS_WOA13 = get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", "salt").isel( 138 | ZT=0 139 | ) 140 | elif resolution == 0.1: 141 | SSS_WOA13 = get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", "salt").isel( 142 | ZT=0 143 | ) 144 | else: 145 | print("WARNING: Sorry, we dont seem to recognise resolution ", resolution) 146 | 147 | # Average over last 10 time slices - prefer to do this by year. 148 | SSS = SSS.mean("time") 149 | SSSdiff = SSS - SSS_WOA13.mean("time").values 150 | 151 | return SSS, SSSdiff 152 | 153 | 154 | @memory.cache 155 | def mixed_layer_depth(expt): 156 | ## Load MLD from expt 157 | varlist = get_variables(expt, "ocean_month.nc") 158 | if "mld" in varlist: 159 | MLD = get_nc_variable(expt, "ocean_month.nc", "mld", n=10) 160 | 161 | # Average over last 10 time slices - prefer to do this by year. 162 | MLD = MLD.mean("time") 163 | 164 | return MLD 165 | -------------------------------------------------------------------------------- /cosima_cookbook/distributed.py: -------------------------------------------------------------------------------- 1 | import os, socket, getpass 2 | from distributed import Client, LocalCluster 3 | 4 | from itertools import product 5 | import numpy as np 6 | import xarray as xr 7 | 8 | from tqdm import tqdm_notebook 9 | 10 | 11 | def start_cluster(diagnostics_port=0): 12 | "Set up a LocalCluster for distributed" 13 | 14 | hostname = socket.gethostname() 15 | n_workers = os.cpu_count() // 2 16 | cluster = LocalCluster( 17 | ip="localhost", 18 | n_workers=n_workers, 19 | diagnostics_port=diagnostics_port, 20 | memory_limit=6e9, 21 | ) 22 | client = Client(cluster) 23 | 24 | params = { 25 | "bokeh_port": cluster.scheduler.services["bokeh"].port, 26 | "user": getpass.getuser(), 27 | "scheduler_ip": cluster.scheduler.ip, 28 | "hostname": hostname, 29 | } 30 | 31 | print( 32 | "If the link to the dashboard below doesn't work, run this command on a local terminal to set up a SSH tunnel:" 33 | ) 34 | print() 35 | print( 36 | " ssh -N -L {bokeh_port}:{scheduler_ip}:{bokeh_port} {hostname}.nci.org.au -l {user}".format( 37 | **params 38 | ) 39 | ) 40 | 41 | return client 42 | 43 | 44 | def compute_by_block(dsx): 45 | """ """ 46 | 47 | # determine index key for each chunk 48 | slices = [] 49 | for chunks in dsx.chunks: 50 | L = [ 51 | 0, 52 | ] + list(np.cumsum(chunks)) 53 | slices.append([slice(a, b) for a, b in (zip(L[:-1], L[1:]))]) 54 | indexes = list(product(*slices)) 55 | 56 | # allocate memory to receive result 57 | if isinstance(dsx, xr.DataArray): 58 | result = xr.zeros_like(dsx).load() 59 | else: 60 | result = np.zeros(dsx.shape) 61 | 62 | # evaluate each chunk one at a time 63 | for index in tqdm_notebook(indexes, leave=False): 64 | block = dsx.__getitem__(index).compute() 65 | result.__setitem__(index, block) 66 | 67 | return result 68 | -------------------------------------------------------------------------------- /cosima_cookbook/memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Caching 3 | 4 | The memory object lives in this module. 5 | Other components of the cookbook access by 6 | 7 | from ..memory import memory 8 | """ 9 | 10 | from joblib import Memory 11 | 12 | import os, getpass, tempfile 13 | 14 | username = getpass.getuser() 15 | 16 | 17 | # pick up cachedir from an environment variable? 18 | # Append username to prevent clashes with others users 19 | cachedir = os.path.join(tempfile.gettempdir(), username) 20 | memory = Memory(cachedir=cachedir, verbose=0) 21 | -------------------------------------------------------------------------------- /cosima_cookbook/netcdf_utils.py: -------------------------------------------------------------------------------- 1 | def find_record_dimension(d): 2 | """Find the record dimension (i.e. time) in a netCDF4 Dataset.""" 3 | 4 | for dim in d.dimensions: 5 | if d.dimensions[dim].isunlimited(): 6 | return dim 7 | 8 | return None 9 | 10 | 11 | def find_dimension_with_attribute(d, attribute, value): 12 | """Find a matching dimension with attribute=value, or None.""" 13 | 14 | for dim in d.dimensions: 15 | if dim not in d.variables: 16 | continue 17 | 18 | if getattr(d.variables[dim], attribute, None) == value: 19 | return dim 20 | 21 | return None 22 | 23 | 24 | def find_time_dimension(d): 25 | """Find a time dimension in a netCDF4 Dataset.""" 26 | 27 | # this is a bit heuristic, but we cascade through some checks, guided by 28 | # the CF conventions 29 | 30 | dim = find_dimension_with_attribute(d, "standard_name", "time") 31 | if dim is not None: 32 | return dim 33 | 34 | dim = find_dimension_with_attribute(d, "axis", "T") 35 | if dim is not None: 36 | return dim 37 | 38 | dim = find_record_dimension(d) 39 | if dim is not None: 40 | return dim 41 | 42 | for dim in d.dimensions: 43 | if dim.lower() == "time": 44 | return dim 45 | 46 | # CF conventions also suggests the units attribute, 47 | # but time_bounds may have the same units, and a false positive 48 | # here could be very confusing... 49 | return None 50 | -------------------------------------------------------------------------------- /cosima_cookbook/plots/__init__.py: -------------------------------------------------------------------------------- 1 | from .lineplots import * 2 | from .overturning import * 3 | from .maps import sea_surface_temperature, sea_surface_salinity, mixed_layer_depth 4 | 5 | # __all__ = ['wind_stress'] 6 | -------------------------------------------------------------------------------- /cosima_cookbook/plots/lineplots.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import cosima_cookbook as cc 3 | from tqdm import tqdm_notebook 4 | import IPython.display 5 | 6 | 7 | def wind_stress(expts=[]): 8 | """ 9 | Plot zonally averaged wind stress. 10 | 11 | Parameters 12 | ---------- 13 | expts : str or list of str 14 | Experiment name(s). 15 | """ 16 | 17 | if not isinstance(expts, list): 18 | expts = [expts] 19 | 20 | # computing 21 | results = [] 22 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 23 | result = {"mean_tau_x": cc.diagnostics.mean_tau_x(expt), "expt": expt} 24 | results.append(result) 25 | 26 | IPython.display.clear_output() 27 | 28 | plt.figure(figsize=(12, 6)) 29 | 30 | # plotting 31 | for result in results: 32 | mean_tau_x = result["mean_tau_x"] 33 | expt = result["expt"] 34 | plt.plot(mean_tau_x, mean_tau_x.yu_ocean, linewidth=2, label=expt) 35 | plt.ylim([-70, 65]) 36 | plt.xlim([-0.08, 0.20]) 37 | plt.ylabel("Latitude ($^\circ$N)") 38 | plt.xlabel("Stress (N m$^{-2}$)") 39 | plt.legend(fontsize=10, loc="best") 40 | 41 | 42 | def annual_scalar(expts=[], variables=[]): 43 | """ 44 | Calculate and plot annual average of variable(s) for experiment(s). 45 | 46 | Parameters 47 | ---------- 48 | expts : str or list of str 49 | Experiment name(s). 50 | variable : str or list of str 51 | Variable name(s). 52 | """ 53 | 54 | if not isinstance(expts, list): 55 | expts = [expts] 56 | 57 | if not isinstance(variables, list): 58 | variables = [variables] 59 | 60 | # computing 61 | results = [] 62 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 63 | annual_average = cc.diagnostics.annual_scalar(expt, variables) 64 | 65 | result = {"annual_average": annual_average, "expt": expt} 66 | results.append(result) 67 | 68 | IPython.display.clear_output() 69 | 70 | # plotting each variable in a separate plot 71 | for variable in variables: 72 | plt.figure(figsize=(12, 6)) 73 | 74 | for result in results: 75 | annual_average = result["annual_average"] 76 | expt = result["expt"] 77 | 78 | annual_average[variable].plot(label=expt) 79 | 80 | plt.title(annual_average[variable].long_name) 81 | plt.legend(fontsize=10, bbox_to_anchor=(1, 1), loc="best", borderaxespad=0.0) 82 | 83 | plt.xlabel("Time") 84 | 85 | 86 | def drake_passage(expts=[]): 87 | """ 88 | Plot Drake Passage transport. 89 | 90 | Parameters 91 | ---------- 92 | expts : str or list of str 93 | Experiment name(s). 94 | """ 95 | 96 | plt.figure(figsize=(12, 6)) 97 | 98 | if not isinstance(expts, list): 99 | expts = [expts] 100 | 101 | # computing 102 | results = [] 103 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 104 | transport = cc.diagnostics.drake_passage(expt) 105 | 106 | result = {"transport": transport, "expt": expt} 107 | results.append(result) 108 | 109 | IPython.display.clear_output() 110 | 111 | # plotting 112 | for result in results: 113 | transport = result["transport"] 114 | expt = result["expt"] 115 | transport.plot(label=expt) 116 | 117 | plt.title("Drake Passage Transport") 118 | plt.xlabel("Time") 119 | plt.ylabel("Transport (Sv)") 120 | plt.legend(fontsize=10, loc="best") 121 | 122 | 123 | def bering_strait(expts=[]): 124 | """ 125 | Plot Bering Strait transport. 126 | 127 | Parameters 128 | ---------- 129 | expts : str or list of str 130 | Experiment name(s). 131 | """ 132 | 133 | plt.figure(figsize=(12, 6)) 134 | 135 | if not isinstance(expts, list): 136 | expts = [expts] 137 | 138 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 139 | transport = cc.diagnostics.bering_strait(expt) 140 | transport.plot(label=expt) 141 | 142 | IPython.display.clear_output() 143 | 144 | plt.title("Bering Strait Transport") 145 | plt.xlabel("Time") 146 | plt.ylabel("Transport (Sv)") 147 | plt.legend(fontsize=10, loc="best") 148 | 149 | 150 | def aabw(expts=[]): 151 | """ 152 | Plot timeseries of AABW transport measured at 55S. 153 | 154 | Parameters 155 | ---------- 156 | expts : str or list of str 157 | Experiment name(s). 158 | """ 159 | 160 | plt.figure(figsize=(12, 6)) 161 | 162 | if not isinstance(expts, list): 163 | expts = [expts] 164 | 165 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 166 | psi_aabw = cc.diagnostics.calc_aabw(expt) 167 | psi_aabw.plot(label=expt) 168 | 169 | IPython.display.clear_output() 170 | 171 | plt.title("AABW Transport at 40S") 172 | plt.xlabel("Time") 173 | plt.ylabel("Transport (Sv)") 174 | plt.legend(fontsize=10, loc="best") 175 | 176 | 177 | def amoc(expts=[]): 178 | """ 179 | Plot timeseries of AMOC transport measured at 26N. 180 | 181 | Parameters 182 | ---------- 183 | expts : str or list of str 184 | Experiment name(s). 185 | """ 186 | 187 | plt.figure(figsize=(12, 6)) 188 | 189 | if not isinstance(expts, list): 190 | expts = [expts] 191 | 192 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 193 | psi_amoc = cc.diagnostics.calc_amoc(expt) 194 | psi_amoc.plot(label=expt) 195 | 196 | IPython.display.clear_output() 197 | 198 | plt.title("AMOC Transport at 26N") 199 | plt.xlabel("Time") 200 | plt.ylabel("Transport (Sv)") 201 | plt.legend(fontsize=10, loc="best") 202 | 203 | 204 | def amoc_south(expts=[]): 205 | """ 206 | Plot timeseries of AMOC transport measured at 35S. 207 | 208 | Parameters 209 | ---------- 210 | expts : str or list of str 211 | Experiment name(s). 212 | """ 213 | 214 | plt.figure(figsize=(12, 6)) 215 | 216 | if not isinstance(expts, list): 217 | expts = [expts] 218 | 219 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 220 | psi_amoc_south = cc.diagnostics.calc_amoc_south(expt) 221 | psi_amoc_south.plot(label=expt) 222 | 223 | IPython.display.clear_output() 224 | 225 | plt.title("AMOC Transport at 35S") 226 | plt.xlabel("Time") 227 | plt.ylabel("Transport (Sv)") 228 | plt.legend(fontsize=10, loc="best") 229 | -------------------------------------------------------------------------------- /cosima_cookbook/plots/maps.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import cosima_cookbook as cc 3 | from tqdm import tqdm_notebook 4 | 5 | import IPython.display 6 | 7 | 8 | def sea_surface_temperature(expts=[], resolution=1): 9 | """ 10 | Plot a map of SST from last decade of run. 11 | """ 12 | 13 | if not isinstance(expts, list): 14 | expts = [expts] 15 | 16 | # computing 17 | results = [] 18 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 19 | SST, SSTdiff = cc.diagnostics.sea_surface_temperature(expt, resolution) 20 | 21 | result = {"SST": SST, "SSTdiff": SSTdiff, "expt": expt} 22 | results.append(result) 23 | 24 | IPython.display.clear_output() 25 | 26 | # plotting 27 | for result in results: 28 | SST = result["SST"] 29 | SSTdiff = result["SSTdiff"] 30 | expt = result["expt"] 31 | 32 | plt.figure(figsize=(12, 4)) 33 | plt.subplot(121) 34 | SST.plot() 35 | plt.title(expt) 36 | plt.subplot(122) 37 | SSTdiff.plot(robust=True) 38 | plt.title(expt) 39 | 40 | 41 | def sea_surface_salinity(expts=[], resolution=1): 42 | """ 43 | Plot a map of SSS from last decade of run. 44 | """ 45 | 46 | if not isinstance(expts, list): 47 | expts = [expts] 48 | 49 | # computing 50 | results = [] 51 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 52 | SSS, SSSdiff = cc.diagnostics.sea_surface_salinity(expt, resolution) 53 | 54 | result = {"SSS": SSS, "SSSdiff": SSSdiff, "expt": expt} 55 | results.append(result) 56 | 57 | IPython.display.clear_output() 58 | 59 | # plotting 60 | for result in results: 61 | SSS = result["SSS"] 62 | SSSdiff = result["SSSdiff"] 63 | expt = result["expt"] 64 | 65 | plt.figure(figsize=(12, 4)) 66 | plt.subplot(121) 67 | SSS.plot() 68 | plt.title(expt) 69 | plt.subplot(122) 70 | SSSdiff.plot(robust=True) 71 | plt.title(expt) 72 | 73 | 74 | def mixed_layer_depth(expts=[]): 75 | """ 76 | Plot a map of MLD from last decade of run. 77 | """ 78 | 79 | if not isinstance(expts, list): 80 | expts = [expts] 81 | 82 | # computing 83 | results = [] 84 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 85 | MLD = cc.diagnostics.mixed_layer_depth(expt) 86 | 87 | result = {"MLD": MLD, "expt": expt} 88 | results.append(result) 89 | 90 | IPython.display.clear_output() 91 | 92 | # plotting 93 | for result in results: 94 | MLD = result["MLD"] 95 | expt = result["expt"] 96 | 97 | plt.figure(figsize=(6, 4)) 98 | MLD.plot() 99 | plt.title(expt) 100 | -------------------------------------------------------------------------------- /cosima_cookbook/plots/overturning.py: -------------------------------------------------------------------------------- 1 | import cosima_cookbook as cc 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from tqdm import tqdm_notebook 5 | 6 | import IPython.display 7 | 8 | 9 | def psi_avg(expts, n=10, clev=np.arange(-20, 20, 2)): 10 | if not isinstance(expts, list): 11 | expts = [expts] 12 | 13 | # computing 14 | results = [] 15 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 16 | psi_avg = cc.diagnostics.psi_avg(expt, n) 17 | 18 | result = {"psi_avg": psi_avg, "expt": expt} 19 | results.append(result) 20 | 21 | IPython.display.clear_output() 22 | 23 | # plotting 24 | for result in results: 25 | psi_avg = result["psi_avg"] 26 | expt = result["expt"] 27 | 28 | plt.figure(figsize=(10, 5)) 29 | plt.contourf( 30 | psi_avg.grid_yu_ocean, 31 | psi_avg.potrho, 32 | psi_avg, 33 | cmap=plt.cm.PiYG, 34 | levels=clev, 35 | extend="both", 36 | ) 37 | cb = plt.colorbar(orientation="vertical", shrink=0.7) 38 | 39 | cb.ax.set_xlabel("Sv") 40 | plt.contour( 41 | psi_avg.grid_yu_ocean, 42 | psi_avg.potrho, 43 | psi_avg, 44 | levels=clev, 45 | colors="k", 46 | linewidths=0.25, 47 | ) 48 | plt.contour( 49 | psi_avg.grid_yu_ocean, 50 | psi_avg.potrho, 51 | psi_avg, 52 | levels=[ 53 | 0.0, 54 | ], 55 | colors="k", 56 | linewidths=0.5, 57 | ) 58 | plt.gca().invert_yaxis() 59 | 60 | plt.ylim((1037.5, 1034)) 61 | plt.ylabel("Potential Density (kg m$^{-3}$)") 62 | plt.xlabel("Latitude ($^\circ$N)") 63 | plt.xlim([-75, 85]) 64 | plt.title("Overturning in %s" % expt) 65 | 66 | 67 | def zonal_mean(expts, variable, n=10, resolution=1): 68 | if not isinstance(expts, list): 69 | expts = [expts] 70 | 71 | # computing 72 | results = [] 73 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"): 74 | zonal_mean, zonal_diff = cc.diagnostics.zonal_mean( 75 | expt, variable, n, resolution 76 | ) 77 | 78 | result = {"zonal_mean": zonal_mean, "zonal_diff": zonal_diff, "expt": expt} 79 | results.append(result) 80 | 81 | IPython.display.clear_output() 82 | 83 | # plotting 84 | for result in results: 85 | zonal_mean = result["zonal_mean"] 86 | zonal_diff = result["zonal_diff"] 87 | expt = result["expt"] 88 | 89 | plt.figure(figsize=(12, 5)) 90 | plt.subplot(121) 91 | zonal_mean.plot() 92 | plt.title(expt) 93 | plt.gca().invert_yaxis() 94 | plt.title("{}: Zonal Mean {}".format(expt, variable)) 95 | plt.subplot(122) 96 | zonal_diff.plot() 97 | plt.title(expt) 98 | plt.gca().invert_yaxis() 99 | plt.title("{}: Zonal Mean {} Change".format(expt, variable)) 100 | -------------------------------------------------------------------------------- /cosima_cookbook/plots/scalar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/cosima_cookbook/plots/scalar.py -------------------------------------------------------------------------------- /cosima_cookbook/querying.py: -------------------------------------------------------------------------------- 1 | """querying.py 2 | 3 | Functions for data discovery. 4 | 5 | """ 6 | 7 | import logging 8 | import os.path 9 | import pandas as pd 10 | from sqlalchemy import func, distinct, or_ 11 | from sqlalchemy.orm import aliased 12 | from sqlalchemy.sql.selectable import subquery 13 | import warnings 14 | import xarray as xr 15 | 16 | from . import database 17 | from .database import NCExperiment, NCFile, CFVariable, NCVar, Keyword 18 | from .database import NCAttribute, NCAttributeString 19 | 20 | 21 | class VariableNotFoundError(Exception): 22 | pass 23 | 24 | 25 | class QueryWarning(UserWarning): 26 | pass 27 | 28 | 29 | # By default all ambiguous queries will raise an exception 30 | warnings.simplefilter("error", category=QueryWarning, lineno=0, append=False) 31 | 32 | 33 | def get_experiments( 34 | session, 35 | experiment=True, 36 | keywords=None, 37 | variables=None, 38 | all=False, 39 | exptname=None, 40 | **kwargs, 41 | ): 42 | """ 43 | Returns a DataFrame of all experiments and the number of netCDF4 files contained 44 | within each experiment. 45 | 46 | Optionally one or more keywords can be specified, and only experiments with all the 47 | specified keywords will be return. The keyword strings can utilise SQL wildcard 48 | characters, "%" and "_", to match multiple keywords. 49 | 50 | Optionally variables can also be specified, and only experiments containing all those 51 | variables will be returned. 52 | 53 | All metadata fields will be returned if all=True, or individual metadata fields 54 | can be selected by passing field=True, where available fields are: 55 | contact, email, created, description, notes, url and root_dir 56 | """ 57 | 58 | # Determine which attributes to return. Special case experiment 59 | # as this is the only one that defaults to True 60 | columns = [] 61 | if experiment: 62 | columns.append(NCExperiment.experiment) 63 | 64 | for f in NCExperiment.metadata_keys + ["root_dir"]: 65 | # Explicitly don't support returning keyword metadata 66 | if f == "keywords": 67 | continue 68 | if kwargs.get(f, all): 69 | columns.append(getattr(NCExperiment, f)) 70 | 71 | q = ( 72 | session.query(*columns, func.count(NCFile.experiment_id).label("ncfiles")) 73 | .join(NCFile.experiment) 74 | .group_by(NCFile.experiment_id) 75 | ) 76 | 77 | if keywords is not None: 78 | if isinstance(keywords, str): 79 | keywords = [keywords] 80 | q = q.filter(*(NCExperiment.keywords.like(k) for k in keywords)) 81 | 82 | if variables is not None: 83 | if isinstance(variables, str): 84 | variables = [variables] 85 | 86 | expt_query = ( 87 | session.query(NCExperiment.id) 88 | .join(NCFile.experiment) 89 | .join(NCFile.ncvars) 90 | .join(NCVar.variable) 91 | .group_by(NCExperiment.experiment) 92 | .having(func.count(distinct(CFVariable.name)) == len(variables)) 93 | .filter(CFVariable.name.in_(variables)) 94 | ) 95 | 96 | q = q.filter(NCExperiment.id.in_(expt_query)) 97 | 98 | if exptname is not None: 99 | q = q.filter(NCExperiment.experiment == exptname) 100 | 101 | return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions]) 102 | 103 | 104 | def get_ncfiles(session, experiment): 105 | """ 106 | Returns a DataFrame of all netcdf files for a given experiment. 107 | """ 108 | 109 | q = ( 110 | session.query(NCFile.ncfile, NCFile.index_time) 111 | .join(NCFile.experiment) 112 | .filter(NCExperiment.experiment == experiment) 113 | .order_by(NCFile.ncfile) 114 | ) 115 | 116 | return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions]) 117 | 118 | 119 | def get_keywords(session, experiment=None): 120 | """ 121 | Returns a set of all keywords, and optionally only for a given experiment 122 | """ 123 | 124 | if experiment is not None: 125 | q = session.query(NCExperiment).filter(NCExperiment.experiment == experiment) 126 | return q.scalar().keywords 127 | else: 128 | q = session.query(Keyword) 129 | return {r.keyword for r in q} 130 | 131 | 132 | def get_variables( 133 | session, 134 | experiment=None, 135 | frequency=None, 136 | cellmethods=None, 137 | inferred=False, 138 | search=None, 139 | ): 140 | """ 141 | Returns a DataFrame of variables for a given experiment if experiment 142 | name is specified, and optionally a given diagnostic frequency. 143 | If inferred is True and some experiment specific properties inferred from other 144 | fields are also returned: coordinate, model and restart. 145 | - coordinate: True if coordinate, False otherwise 146 | - model: model from which variable output, possible values are ocean, 147 | atmosphere, land, ice, or none if can't be identified 148 | - restart: True if variable from a restart file, False otherwise 149 | If experiment is not specified all variables for all experiments are returned, 150 | without experiment specific data. 151 | Specifying an array of search strings will limit variables returned to any 152 | containing any of the search terms in variable name, long name, or standard name. 153 | """ 154 | 155 | # Default columns 156 | columns = [ 157 | CFVariable.name, 158 | CFVariable.long_name, 159 | CFVariable.units, 160 | ] 161 | 162 | if experiment: 163 | # Create aliases so as to able to join to the NCAttribute table 164 | # twice, for the name and value 165 | ncas1 = aliased(NCAttributeString) 166 | ncas2 = aliased(NCAttributeString) 167 | subq = ( 168 | session.query( 169 | NCAttribute.ncvar_id.label("ncvar_id"), 170 | ncas2.value.label("value"), 171 | ) 172 | .join(ncas1, NCAttribute.name_id == ncas1.id) 173 | .join(ncas2, NCAttribute.value_id == ncas2.id) 174 | .filter(ncas1.value == "cell_methods") 175 | ).subquery(name="attrs") 176 | 177 | columns.extend( 178 | [ 179 | NCFile.frequency, 180 | NCFile.ncfile, 181 | subq.c.value.label("cell_methods"), 182 | func.count(NCFile.ncfile).label("# ncfiles"), 183 | func.min(NCFile.time_start).label("time_start"), 184 | func.max(NCFile.time_end).label("time_end"), 185 | ] 186 | ) 187 | 188 | if inferred: 189 | # Return inferred information 190 | columns.extend( 191 | [ 192 | CFVariable.is_coordinate.label("coordinate"), 193 | NCFile.model, 194 | NCFile.is_restart.label("restart"), 195 | ] 196 | ) 197 | 198 | # Base query 199 | q = ( 200 | session.query(*columns) 201 | .join(NCFile.experiment) 202 | .join(NCFile.ncvars) 203 | .join(NCVar.variable) 204 | ) 205 | 206 | if experiment is not None: 207 | # Join against the NCAttribute table above. Outer join ensures 208 | # variables without cell_methods attribute still appear with NULL 209 | q = q.outerjoin(subq, subq.c.ncvar_id == NCVar.id) 210 | 211 | q = q.order_by(NCFile.frequency, CFVariable.name, NCFile.time_start, NCFile.ncfile) 212 | q = q.group_by(CFVariable, NCFile.frequency) 213 | 214 | if experiment is not None: 215 | q = q.group_by(subq.c.value) 216 | q = q.filter(NCExperiment.experiment == experiment) 217 | 218 | # Filtering on frequency only makes sense if experiment is specified 219 | if frequency is not None: 220 | q = q.filter(NCFile.frequency == frequency) 221 | 222 | # Filtering on cell methods only makes sense if experiment is specified 223 | if cellmethods is not None: 224 | q = q.filter(subq.c.value == cellmethods) 225 | 226 | if search is not None: 227 | # Filter based on search term appearing in name, long_name or standard_name 228 | if isinstance(search, str): 229 | search = [ 230 | search, 231 | ] 232 | q = q.filter( 233 | or_( 234 | column.contains(word) 235 | for word in search 236 | for column in ( 237 | CFVariable.name, 238 | CFVariable.long_name, 239 | CFVariable.standard_name, 240 | ) 241 | ) 242 | ) 243 | 244 | default_dtypes = { 245 | "# ncfiles": "int64", 246 | "coordinate": "boolean", 247 | "model": "category", 248 | "restart": "boolean", 249 | } 250 | 251 | df = pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions]) 252 | 253 | return df.astype({k: v for k, v in default_dtypes.items() if k in df.columns}) 254 | 255 | 256 | def get_frequencies(session, experiment=None): 257 | """ 258 | Returns a DataFrame with all diagnostics frequencies and optionally 259 | for a given experiment. 260 | """ 261 | 262 | if experiment is None: 263 | q = session.query(NCFile.frequency).group_by(NCFile.frequency) 264 | else: 265 | q = ( 266 | session.query(NCFile.frequency) 267 | .join(NCFile.experiment) 268 | .filter(NCExperiment.experiment == experiment) 269 | .group_by(NCFile.frequency) 270 | ) 271 | 272 | return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions]) 273 | 274 | 275 | def getvar( 276 | expt, 277 | variable, 278 | session, 279 | ncfile=None, 280 | start_time=None, 281 | end_time=None, 282 | n=None, 283 | frequency=None, 284 | attrs=None, 285 | attrs_unique=None, 286 | return_dataset=False, 287 | **kwargs, 288 | ): 289 | """For a given experiment, return an xarray DataArray containing the 290 | specified variable. 291 | 292 | expt - text string indicating the name of the experiment 293 | variable - text string indicating the name of the variable to load 294 | session - a database session created by cc.database.create_session() 295 | ncfile - an optional text string indicating the pattern for filenames 296 | to load. All filenames containing this string will match, so 297 | be specific. '/' can be used to match the start of the 298 | filename, and '%' is a wildcard character. 299 | start_time - only load data after this date. specify as a text string, 300 | e.g. '1900-01-01' 301 | end_time - only load data before this date. specify as a text string, 302 | e.g. '1900-01-01' 303 | n - after all other queries, restrict the total number of files to the 304 | first n. pass a negative value to restrict to the last n 305 | frequency - specify frequency to disambiguate identical variables saved 306 | at different temporal resolution 307 | attrs - a dictionary of attribute names and their values that must be 308 | present on the returned variables 309 | attrs_unique - a dictionary of attribute names and their values that 310 | must be unique on the returned variables. Defaults to 311 | {'cell_methods': 'time: mean'} and should not generally be 312 | changed. 313 | return_dataset - if True, return xarray.Dataset, containing the 314 | requested variable, along with its time_bounds, 315 | if present. Otherwise (default), return 316 | xarray.DataArray containing only the variable 317 | 318 | Note that if start_time and/or end_time are used, the time range 319 | of the resulting dataset may not be bounded exactly on those 320 | values, depending on where the underlying files start/end. Use 321 | dataset.sel() to exactly select times from the dataset. 322 | 323 | Other kwargs are passed through to xarray.open_mfdataset, including: 324 | 325 | chunks - Override any chunking by passing a chunks dictionary. 326 | decode_times - Time decoding can be disabled by passing decode_times=False 327 | 328 | """ 329 | 330 | if attrs_unique is None: 331 | attrs_unique = {"cell_methods": "time: mean"} 332 | 333 | ncfiles = _ncfiles_for_variable( 334 | expt, 335 | variable, 336 | session, 337 | ncfile, 338 | start_time, 339 | end_time, 340 | n, 341 | frequency, 342 | attrs, 343 | attrs_unique, 344 | ) 345 | 346 | variables = [variable] 347 | if return_dataset: 348 | # we know at least one variable was returned, so we can index ncfiles 349 | # ask for the extra variables associated with cell_methods, etc. 350 | variables += _bounds_vars_for_variable(*ncfiles[0]) 351 | 352 | # chunking -- use first row/file and assume it's the same across the whole dataset 353 | xr_kwargs = {"chunks": _parse_chunks(ncfiles[0].NCVar)} 354 | xr_kwargs.update(kwargs) 355 | 356 | def _preprocess(d): 357 | if variable in d.coords: 358 | # just return coordinate data 359 | return d 360 | 361 | # otherwise, figure out if we need any ancilliary data 362 | # like time_bounds 363 | return d[variables] 364 | 365 | ncfiles = list(str(f.NCFile.ncfile_path) for f in ncfiles) 366 | 367 | ds = xr.open_mfdataset( 368 | ncfiles, 369 | parallel=True, 370 | combine="by_coords", 371 | preprocess=_preprocess, 372 | **xr_kwargs, 373 | ) 374 | 375 | if return_dataset: 376 | da = ds 377 | else: 378 | # if we want a dataarray, we'll strip off the extra info 379 | da = ds[variable] 380 | 381 | # Check the chunks given were actually in the data 382 | chunks = xr_kwargs.get("chunks", None) 383 | if chunks is not None: 384 | missing_chunk_dims = set(chunks.keys()) - set(da.dims) 385 | if len(missing_chunk_dims) > 0: 386 | logging.warning( 387 | f"chunking along dimensions {missing_chunk_dims} is not possible. Available dimensions for chunking are {set(da.dims)}" 388 | ) 389 | 390 | da.attrs["ncfiles"] = ncfiles 391 | 392 | # Get experiment metadata, delete extraneous fields and add 393 | # to attributes 394 | metadata = get_experiments( 395 | session, experiment=False, exptname=expt, all=True 396 | ).to_dict(orient="records")[0] 397 | 398 | metadata = { 399 | k: v 400 | for k, v in metadata.items() 401 | if k not in ["ncfiles", "index", "root_dir"] 402 | and (v is not None and v != "None" and v != "") 403 | } 404 | 405 | da.attrs.update(metadata) 406 | 407 | return da 408 | 409 | 410 | def _bounds_vars_for_variable(ncfile, ncvar): 411 | """Return a list of names for a variable and its bounds""" 412 | 413 | variables = [] 414 | 415 | if "cell_methods" not in ncvar.attrs: 416 | # no cell methods, so no need to look for bounds 417 | return variables 418 | 419 | # [cell methods] is a string attribute comprising a list of 420 | # blank-separated words of the form "name: method" 421 | cell_methods = iter(ncvar.attrs["cell_methods"].split()) 422 | 423 | # for the moment, we're only looking for a time mean 424 | for dim, method in zip(cell_methods, cell_methods): 425 | if not (dim[:-1] == "time" and method == "mean"): 426 | continue 427 | 428 | bounds_var = ncfile.ncvars["time"].attrs.get("bounds") 429 | if bounds_var is not None: 430 | variables.append(bounds_var) 431 | 432 | return variables 433 | 434 | 435 | def _ncfiles_for_variable( 436 | expt, 437 | variable, 438 | session, 439 | ncfile=None, 440 | start_time=None, 441 | end_time=None, 442 | n=None, 443 | frequency=None, 444 | attrs=None, 445 | attrs_unique=None, 446 | ): 447 | """Return a list of (NCFile, NCVar) pairs corresponding to the 448 | database objects for a given variable. 449 | 450 | Optionally, pass ncfile, start_time, end_time, frequency, attrs, 451 | attrs_unique, or n for additional disambiguation (see getvar 452 | documentation for their semantics). 453 | """ 454 | 455 | if attrs is None: 456 | attrs = {} 457 | 458 | if attrs_unique is None: 459 | attrs_unique = {} 460 | 461 | f, v = database.NCFile, database.NCVar 462 | q = ( 463 | session.query(f, v) 464 | .join(f.ncvars) 465 | .join(f.experiment) 466 | .filter(v.varname == variable) 467 | .filter(database.NCExperiment.experiment == expt) 468 | .filter(f.present) 469 | .order_by(f.time_start) 470 | ) 471 | 472 | # additional disambiguation 473 | if ncfile is not None: 474 | q = q.filter(f.ncfile.like("%" + ncfile)) 475 | if start_time is not None: 476 | q = q.filter(f.time_end >= start_time) 477 | if end_time is not None: 478 | q = q.filter(f.time_start <= end_time) 479 | if frequency is not None: 480 | q = q.filter(f.frequency == frequency) 481 | 482 | # Attributes that are required to be unique to ensure disambiguation 483 | for attr, val in attrs_unique.items(): 484 | # If default attribute present and not currently in filter 485 | # add to attributes filter 486 | if attr not in attrs: 487 | if q.filter(v.ncvar_attrs.any(name=attr, value=val)).first(): 488 | attrs.update({attr: val}) 489 | 490 | # requested specific attribute values 491 | for attr, val in attrs.items(): 492 | q = q.filter(v.ncvar_attrs.any(name=attr, value=val)) 493 | 494 | ncfiles = q.all() 495 | 496 | if n is not None: 497 | if n > 0: 498 | ncfiles = ncfiles[:n] 499 | else: 500 | ncfiles = ncfiles[n:] 501 | 502 | # ensure we actually got a result 503 | if not ncfiles: 504 | raise VariableNotFoundError( 505 | "No files were found containing '{}' in the '{}' experiment".format( 506 | variable, expt 507 | ) 508 | ) 509 | 510 | # check whether the results are unique 511 | for attr in attrs_unique: 512 | unique_attributes = set() 513 | for f in ncfiles: 514 | if attr in f.NCVar.attrs: 515 | unique_attributes.add(str(f.NCVar.attrs[attr])) 516 | else: 517 | unique_attributes.add(None) 518 | if len(unique_attributes) > 1: 519 | warnings.warn( 520 | f"Your query returns variables from files with different {attr}: {unique_attributes}. " 521 | "This could lead to unexpected behaviour! Disambiguate by passing " 522 | f"attrs={{'{attr}':''}} to getvar, specifying the desired attribute value.", 523 | QueryWarning, 524 | ) 525 | 526 | unique_freqs = set(f.NCFile.frequency for f in ncfiles) 527 | if len(unique_freqs) > 1: 528 | warnings.warn( 529 | f"Your query returns files with differing frequencies: {unique_freqs}. " 530 | "This could lead to unexpected behaviour! Disambiguate by passing " 531 | "frequency= to getvar, specifying the desired frequency.", 532 | QueryWarning, 533 | ) 534 | 535 | return ncfiles 536 | 537 | 538 | def _parse_chunks(ncvar): 539 | """Parse an NCVar, returning a dictionary mapping dimensions to chunking along that dimension.""" 540 | 541 | try: 542 | # this should give either a list, or 'None' (other values will raise an exception) 543 | var_chunks = eval(ncvar.chunking) 544 | if var_chunks is not None: 545 | return dict(zip(eval(ncvar.dimensions), var_chunks)) 546 | 547 | return None 548 | 549 | except NameError: 550 | # chunking could be 'contiguous', which doesn't evaluate 551 | return None 552 | -------------------------------------------------------------------------------- /cosima_cookbook/summary/__init__.py: -------------------------------------------------------------------------------- 1 | from .nml_diff import * 2 | from .nml_summary import * 3 | 4 | # __all__ = [] 5 | -------------------------------------------------------------------------------- /cosima_cookbook/summary/nml_diff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Tools to read a set of namelist files and find their superset and difference. 4 | # The functions are general-purpose (i.e. no ACCESS-OM2-related assumptions). 5 | # Andrew Kiss https://github.com/aekiss 6 | 7 | 8 | import f90nml # from http://f90nml.readthedocs.io/en/latest/ 9 | import os 10 | 11 | 12 | def nmldict(nmlfnames): 13 | """Return dict of the groups/group members of multiple 14 | FORTRAN namelist files. 15 | 16 | Input: tuple of any number of namelist file path strings 17 | (non-existent files are silently ignored) 18 | Output: dict with key:value pairs where 19 | key is filename path string 20 | value is complete Namelist from filename 21 | """ 22 | nmlfnames = set(nmlfnames) # remove any duplicates from nmlfnames 23 | 24 | nmlall = {} # dict keys are nml paths, values are Namelist dicts 25 | for nml in nmlfnames: 26 | if os.path.exists(nml): 27 | nmlall[nml] = f90nml.read(nml) 28 | return nmlall 29 | 30 | 31 | def superset(nmlall): 32 | """Return dict of groups/group members present in any of the input Namelists. 33 | 34 | Input: dict with key:value pairs where 35 | key is arbitrary (typically a filename string) 36 | value is Namelist (typically from filename) 37 | Output: dict with key:value pairs where 38 | key is group name (including all groups present in any input Namelist) 39 | value is Namelist for group (with nothing common to all other files) 40 | """ 41 | nmlsuperset = {} 42 | for nml in nmlall: 43 | nmlsuperset.update(nmlall[nml]) 44 | # nmlsuperset now contains all groups that were in any nml 45 | for group in nmlsuperset: 46 | # to avoid the next bit changing the original groups 47 | nmlsuperset[group] = nmlsuperset[group].copy() 48 | for nml in nmlall: 49 | if group in nmlall[nml]: 50 | nmlsuperset[group].update(nmlall[nml][group]) 51 | # nmlsuperset groups now contain all keys that were in any nml 52 | return nmlsuperset 53 | 54 | 55 | def nmldiff(nmlall): 56 | """Remove every group/group member that is the same in all file Namelists. 57 | 58 | Parameter 59 | --------- 60 | Input : dict 61 | (e.g. returned by nmldict) with key:value pairs where 62 | key is filename path string 63 | value is complete Namelist from filename 64 | Output : dict 65 | modified input dict with key:value pairs where 66 | key is filename strings 67 | value is Namelist from filename, with any group/group member 68 | common to all other files removed 69 | """ 70 | 71 | # Create diff by removing common groups/members from nmlall. 72 | # This is complicated by the fact group names / member names may differ 73 | # or be absent across different nml files. 74 | 75 | # First make a superset that has all group names and group members that 76 | # appear in any nml file 77 | nmlsuperset = superset(nmlall) 78 | 79 | # now go through nmlall and remove any groups / members from nmlall that 80 | # are identical to superset in all nmls 81 | # first delete any group members that are common to all nmls, then delete 82 | # any empty groups common to all nmls 83 | for group in nmlsuperset: 84 | # init: whether group is present and identical in all namelist files 85 | deletegroup = True 86 | for nml in nmlall: 87 | deletegroup = deletegroup and (group in nmlall[nml]) 88 | if deletegroup: # group present in all namelist files 89 | for mem in nmlsuperset[group]: 90 | # init: whether group member is present and identical 91 | # in all namelist files 92 | deletemem = True 93 | for nml in nmlall: 94 | deletemem = deletemem and (mem in nmlall[nml][group]) 95 | if deletemem: # group member is present in all namelist files 96 | for nml in nmlall: 97 | # ... now check if values match in all namelist files 98 | deletemem = deletemem and ( 99 | nmlall[nml][group][mem] == nmlsuperset[group][mem] 100 | ) 101 | if deletemem: 102 | for nml in nmlall: 103 | # delete mem from this group in all nmls 104 | del nmlall[nml][group][mem] 105 | for nml in nmlall: 106 | deletegroup = deletegroup and (len(nmlall[nml][group]) == 0) 107 | if deletegroup: 108 | # group is common to all nmls and now empty so delete 109 | for nml in nmlall: 110 | del nmlall[nml][group] 111 | return nmlall 112 | -------------------------------------------------------------------------------- /cosima_cookbook/summary/nml_summary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Create tabulated summary of namelists for a set of files. 4 | # These functions assume we are dealing with ACCESS-OM2 data. 5 | # Andrew Kiss https://github.com/aekiss 6 | 7 | 8 | import cosima_cookbook as cc 9 | from IPython.display import display, Markdown 10 | import os 11 | 12 | 13 | def summary_md( 14 | configuration, 15 | expts, 16 | path="/g/data3/hh5/tmp/cosima/", 17 | search="https://github.com/OceansAus/access-om2/search?&q=", 18 | nmls=[ 19 | "atmosphere/input_atm.nml", 20 | "ice/cice_in.nml", 21 | "ice/input_ice.nml", 22 | "ice/input_ice_gfdl.nml", 23 | "ice/input_ice_monin.nml", 24 | "ocean/input.nml", 25 | ], 26 | ): 27 | for nml in nmls: 28 | epaths = [] 29 | for e in expts: 30 | # NB: only look at output000 31 | epaths.append(os.path.join(path, configuration, e, "output000", nml)) 32 | nmld = cc.nmldiff(cc.nmldict(tuple(epaths))) 33 | epaths = list(nmld.keys()) # redefine to handle missing paths 34 | epaths.sort() 35 | nmldss = cc.superset(nmld) 36 | display(Markdown("### " + nml + " namelist differences")) 37 | if len(nmldss) == 0: 38 | display(Markdown("no differences")) 39 | else: 40 | mdstr = "| group | variable | " 41 | for e in epaths: 42 | mdstr = mdstr + e.replace("/", "/
") + " | " 43 | mdstr = mdstr + "\n|---|:--|" + ":-:|" * len(epaths) 44 | for group in sorted(nmldss): 45 | for mem in sorted(nmldss[group]): 46 | mdstr = mdstr + "\n| " + "&" + group + " | " + mem + " | " 47 | # search doesn't work on github submodules or forks 48 | # '[' + group + '](' + search + group + ')' + ' | ' + \ 49 | # '[' + mem + '](' + search + mem + ')' + ' | ' 50 | for e in epaths: 51 | if group in nmld[e]: 52 | if mem in nmld[e][group]: 53 | mdstr = mdstr + repr(nmld[e][group][mem]) 54 | mdstr = mdstr + " | " 55 | display(Markdown(mdstr)) 56 | return 57 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==2 2 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | #import os 14 | #import sys 15 | #sys.path.insert(0, os.path.abspath('../../cosima-cookbook')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'cosima-cookbook' 21 | copyright = '2019, James Munroe' 22 | author = 'James Munroe' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | # release = '0.3.1' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.napoleon', 36 | ] 37 | 38 | napoleon_google_docstring = False 39 | napoleon_use_param = False 40 | napoleon_use_ivar = True 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ['_templates'] 44 | 45 | # List of patterns, relative to source directory, that match files and 46 | # directories to ignore when looking for source files. 47 | # This pattern also affects html_static_path and html_extra_path. 48 | exclude_patterns = [] 49 | 50 | 51 | # -- Options for HTML output ------------------------------------------------- 52 | 53 | # The theme to use for HTML and HTML Help pages. See the documentation for 54 | # a list of builtin themes. 55 | # 56 | html_theme = 'sphinx_rtd_theme' 57 | 58 | # Add any paths that contain custom static files (such as style sheets) here, 59 | # relative to this directory. They are copied after the builtin static files, 60 | # so a file named "default.css" will overwrite the builtin "default.css". 61 | html_static_path = ['_static'] 62 | -------------------------------------------------------------------------------- /docs/source/cosima_cookbook.rst: -------------------------------------------------------------------------------- 1 | cosima\_cookbook package 2 | ======================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | cosima_cookbook.diagnostics 10 | cosima_cookbook.plots 11 | cosima_cookbook.summary 12 | 13 | Submodules 14 | ---------- 15 | 16 | cosima\_cookbook.database module 17 | -------------------------------- 18 | 19 | .. automodule:: cosima_cookbook.database 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | cosima\_cookbook.database\_utils module 25 | --------------------------------------- 26 | 27 | .. automodule:: cosima_cookbook.database_utils 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | cosima\_cookbook.date\_utils module 33 | ----------------------------------- 34 | 35 | .. automodule:: cosima_cookbook.date_utils 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | 40 | cosima\_cookbook.distributed module 41 | ----------------------------------- 42 | 43 | .. automodule:: cosima_cookbook.distributed 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | 48 | cosima\_cookbook.memory module 49 | ------------------------------ 50 | 51 | .. automodule:: cosima_cookbook.memory 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | 56 | cosima\_cookbook.netcdf\_index module 57 | ------------------------------------- 58 | 59 | .. automodule:: cosima_cookbook.netcdf_index 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | 64 | cosima\_cookbook.netcdf\_utils module 65 | ------------------------------------- 66 | 67 | .. automodule:: cosima_cookbook.netcdf_utils 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | 72 | cosima\_cookbook.querying module 73 | -------------------------------- 74 | 75 | .. automodule:: cosima_cookbook.querying 76 | :members: 77 | :undoc-members: 78 | :show-inheritance: 79 | 80 | 81 | Module contents 82 | --------------- 83 | 84 | .. automodule:: cosima_cookbook 85 | :members: 86 | :undoc-members: 87 | :show-inheritance: 88 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Getting Started 3 | =============== 4 | 5 | The cookbook consists of a Python 3 package that contains infrastructure 6 | for indexing COSIMA model output and convenient methods for searching for 7 | and loading the data into `xarray `_ datastructures. 8 | 9 | Some users may find it sufficient to browse through the examples and tutorials 10 | in the `COSIMA recipes `_ repository. 11 | The Jupyter notebooks that can be downloaded from COSIMA recipes need this package 12 | (called cosima_cookbook) to be installed. 13 | 14 | Choosing your platform 15 | ====================== 16 | 17 | COSIMA ocean and ice models are typically run on `NCI `_, a HPC 18 | computing centre in Australia. The output data is very large and it is 19 | assumed that this data resides on a NCI storage system. 20 | 21 | The cookbook is supported on two NCI systems 22 | 23 | #. `Virtual Desktop Infrastructure (VDI) `_ 24 | #. `gadi (gadi.nci.org.au) `_ 25 | 26 | Connecting 27 | ========== 28 | 29 | For both VDI and gadi scripts are used to start a `jupyter notebook `_ 30 | or `jupyter lab `_ session on the chosen system 31 | and automatically create an `ssh tunnel `_ 32 | such that the jupyter session can be opened in your local browser using a url 33 | like that appears to be on your own local machine. 34 | 35 | Scripts for this purpose are provided by the CLEX CMS team in this repository 36 | 37 | https://github.com/coecms/nci_scripts 38 | 39 | Clone the repository to your local computer. There are instructions in the repository 40 | on the requirements for each script and how to use them. 41 | 42 | Alternatively if you are using the VDI Strudel environment and accessing the VDI 43 | through a virtual desktop you can load the same python conda environment that is 44 | used in the scripts above and start a jupyter notebook session like so: 45 | :: 46 | 47 | module use /g/data3/hh5/public/modules 48 | module load conda/analysis3 49 | 50 | jupyter notebook 51 | 52 | Finding data 53 | ============ 54 | 55 | Most of the infrastructure the COSIMA Cookbook provides revolves around indexing 56 | data output from COSIMA models and providing a python based API to access the 57 | data in a convenient and straight forward way. 58 | 59 | There are graphical user interface (GUI) tools to help with data discovering and 60 | exploration. There is a 61 | `tutorial `_ 62 | in the COSIMA recipes repository which demonstrates the available tools. 63 | 64 | Tutorials and examples 65 | ====================== 66 | 67 | COSIMA recipes provides `tutorials `_ 68 | and `documented examples `_ 69 | which can be used to learn how to use the Cookbook and for ideas and inspiration for your own analysis. 70 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. cosima-cookbook documentation master file, created by 2 | sphinx-quickstart on Mon Aug 12 20:35:06 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | COSIMA Cookbook 7 | =============== 8 | 9 | Ocean and ice models are evaluated and compared by using diagnostics. 10 | `COSIMA recipes `_ is a collection of 11 | diagnostics by the `COSIMA `_ community. 12 | 13 | The recipes rely on infrastructure that is provided by the COSIMA Cookbook. 14 | The cookbook is under active development on 15 | Github: `COSIMA-Cookbook `_ 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | :caption: Contents: 20 | 21 | getting_started 22 | related_projects 23 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | cosima_cookbook 2 | =============== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | cosima_cookbook 8 | -------------------------------------------------------------------------------- /docs/source/related_projects.rst: -------------------------------------------------------------------------------- 1 | Related Projects 2 | ================ 3 | 4 | COSIMA-Cookbook is a solution for efficient calculation diagnostics of 5 | output of high resolution an ocean and ice model. It is targeted 6 | at the COSIMA community where models (e.g. MOM5) are run on NCI 7 | resources such as raijin. 8 | 9 | The problem of dealing with increasing large output from atmosphere, 10 | climate, and ocean models is being addressed by several groups. 11 | Active development is currently underway as part of 12 | the PangeoData_ initiative. COSIMA-Cookbok logically sits on top 13 | of that project. 14 | 15 | 16 | Underlying Python technologies 17 | ------------------------------ 18 | 19 | Dask 20 | 21 | xarray 22 | 23 | Jupyter 24 | 25 | 26 | .. _PangeoData: https://pangeo-data.github.io 27 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: docs/source/conf.py 11 | 12 | # Optionally set the version of Python and requirements required to build your docs 13 | python: 14 | version: 3.7 15 | install: 16 | - requirements: docs/requirements.txt 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # install dependencies from setup.py, and the cookbook in editable mode 2 | -e .[build] 3 | -------------------------------------------------------------------------------- /sandbox/alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. 2 | 3 | Make sure to edit sqlalchemy.url in alembic.ini! -------------------------------------------------------------------------------- /sandbox/alembic/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from sqlalchemy import engine_from_config 4 | from sqlalchemy import pool 5 | 6 | from alembic import context 7 | 8 | # this is the Alembic Config object, which provides 9 | # access to the values within the .ini file in use. 10 | config = context.config 11 | 12 | # Interpret the config file for Python logging. 13 | # This line sets up loggers basically. 14 | fileConfig(config.config_file_name) 15 | 16 | # add your model's MetaData object here 17 | # for 'autogenerate' support 18 | # from myapp import mymodel 19 | # target_metadata = mymodel.Base.metadata 20 | import cosima_cookbook as cc 21 | target_metadata = cc.database.Base.metadata 22 | 23 | # other values from the config, defined by the needs of env.py, 24 | # can be acquired: 25 | # my_important_option = config.get_main_option("my_important_option") 26 | # ... etc. 27 | 28 | 29 | def run_migrations_offline(): 30 | """Run migrations in 'offline' mode. 31 | 32 | This configures the context with just a URL 33 | and not an Engine, though an Engine is acceptable 34 | here as well. By skipping the Engine creation 35 | we don't even need a DBAPI to be available. 36 | 37 | Calls to context.execute() here emit the given string to the 38 | script output. 39 | 40 | """ 41 | url = config.get_main_option("sqlalchemy.url") 42 | context.configure( 43 | url=url, 44 | target_metadata=target_metadata, 45 | literal_binds=True, 46 | dialect_opts={"paramstyle": "named"}, 47 | ) 48 | 49 | with context.begin_transaction(): 50 | context.run_migrations() 51 | 52 | 53 | def run_migrations_online(): 54 | """Run migrations in 'online' mode. 55 | 56 | In this scenario we need to create an Engine 57 | and associate a connection with the context. 58 | 59 | """ 60 | connectable = engine_from_config( 61 | config.get_section(config.config_ini_section), 62 | prefix="sqlalchemy.", 63 | poolclass=pool.NullPool, 64 | ) 65 | 66 | with connectable.connect() as connection: 67 | context.configure( 68 | connection=connection, target_metadata=target_metadata 69 | ) 70 | 71 | with context.begin_transaction(): 72 | context.run_migrations() 73 | 74 | 75 | if context.is_offline_mode(): 76 | run_migrations_offline() 77 | else: 78 | run_migrations_online() 79 | -------------------------------------------------------------------------------- /sandbox/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /sandbox/alembic/versions/16223b92479e_add_keywords.py: -------------------------------------------------------------------------------- 1 | """add keywords 2 | 3 | Revision ID: 16223b92479e 4 | Revises: 5 | Create Date: 2020-06-30 13:22:36.407339 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy import orm 11 | 12 | import cosima_cookbook as cc 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = '16223b92479e' 16 | down_revision = None 17 | branch_labels = None 18 | depends_on = None 19 | 20 | 21 | def upgrade(): 22 | bind = op.get_bind() 23 | session = orm.Session(bind=bind) 24 | 25 | # ### commands auto generated by Alembic - please adjust! ### 26 | op.create_table('keywords', 27 | sa.Column('id', sa.Integer(), nullable=False), 28 | sa.Column('_keyword', sa.String(collation='NOCASE'), nullable=False), 29 | sa.PrimaryKeyConstraint('id') 30 | ) 31 | op.create_index(op.f('ix_keywords__keyword'), 'keywords', ['_keyword'], unique=True) 32 | op.create_table('keyword_assoc', 33 | sa.Column('expt_id', sa.Integer(), nullable=True), 34 | sa.Column('keyword_id', sa.Integer(), nullable=True), 35 | sa.ForeignKeyConstraint(['expt_id'], ['experiments.id'], ), 36 | sa.ForeignKeyConstraint(['keyword_id'], ['keywords.id'], ) 37 | ) 38 | # ### end Alembic commands ### 39 | op.execute('PRAGMA user_version=3') 40 | 41 | # reindex metadata for experiments 42 | for expt in session.query(cc.database.NCExperiment): 43 | cc.database.update_metadata(expt, session) 44 | session.commit() 45 | 46 | def downgrade(): 47 | # ### commands auto generated by Alembic - please adjust! ### 48 | op.drop_table('keyword_assoc') 49 | op.drop_index(op.f('ix_keywords__keyword'), table_name='keywords') 50 | op.drop_table('keywords') 51 | # ### end Alembic commands ### 52 | op.execute('PRAGMA user_version=2') 53 | -------------------------------------------------------------------------------- /sandbox/diag-vis.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from sys import argv 3 | 4 | import cosima_cookbook as cc 5 | import pandas as pd 6 | from sqlalchemy import select, distinct, bindparam 7 | 8 | from bokeh.io import curdoc 9 | from bokeh.layouts import column 10 | from bokeh.models.callbacks import CustomJS 11 | from bokeh.models.sources import ColumnDataSource 12 | from bokeh.models.tools import BoxSelectTool, HoverTool, TapTool 13 | from bokeh.models.widgets import Select, Button, Div 14 | import bokeh.palettes 15 | from bokeh.plotting import figure 16 | from bokeh.transform import factor_cmap 17 | 18 | if len(argv) < 2: 19 | raise Exception('Usage: bokeh serve diag-vis.py --args ') 20 | db = argv[1] 21 | 22 | conn, tables = cc.database.create_database(db) 23 | 24 | expt_query = select([distinct(tables['ncfiles'].c.experiment)]) 25 | vars_query = select([distinct(tables['ncvars'].c.variable)]) \ 26 | .select_from(tables['ncvars'].join(tables['ncfiles'])) \ 27 | .where(tables['ncfiles'].c.experiment == bindparam('expt')) 28 | data_query = select([tables['ncfiles'].c.ncfile, tables['ncfiles'].c.run, tables['ncvars'].c.variable, 29 | tables['ncfiles'].c.time_start, tables['ncfiles'].c.time_end, tables['ncfiles'].c.frequency]) \ 30 | .select_from(tables['ncfiles'].join(tables['ncvars'])) \ 31 | .where(tables['ncfiles'].c.experiment == bindparam('expt')) \ 32 | .where(tables['ncfiles'].c.time_start is not None) \ 33 | .where(tables['ncfiles'].c.frequency != 'static') \ 34 | .order_by(tables['ncvars'].c.variable, tables['ncfiles'].c.time_start) 35 | 36 | expts = [e[0] for e in conn.execute(expt_query)] 37 | 38 | def get_data(expt): 39 | data = conn.execute(data_query, expt=expt).fetchall() 40 | df = pd.DataFrame(data, columns=['ncfile', 'run', 'variable', 'time_start', 'time_end', 'frequency']) 41 | df[['time_start', 'time_end']] = df[['time_start', 'time_end']].applymap( 42 | lambda s: datetime.strptime(s, '%Y-%m-%d %H:%M:%S')) 43 | 44 | return df 45 | 46 | def print_selected(div): 47 | return CustomJS(args=dict(div=div), code=""" 48 | var source = cb_obj; 49 | var unique_vars = {}; 50 | for (var i of source.selected['1d'].indices) { 51 | var v = source.data['variable'][i]; 52 | if (v in unique_vars) { 53 | unique_vars[v]['time_start'] = Math.min(unique_vars[v]['time_start'], source.data['time_start'][i]); 54 | unique_vars[v]['time_end'] = Math.max(unique_vars[v]['time_end'], source.data['time_end'][i]); 55 | } else { 56 | unique_vars[v] = { time_start: source.data['time_start'][i], 57 | time_end: source.data['time_end'][i] }; 58 | } 59 | } 60 | 61 | var text = ''; 62 | for (var p in unique_vars) { 63 | var ts = new Date(unique_vars[p]['time_start']); 64 | var te = new Date(unique_vars[p]['time_end']); 65 | text = text.concat(''); 66 | } 67 | text = text.concat('
NameStartEnd
'+p+''+ts.toISOString().substr(0,10)+''+te.toISOString().substr(0,10)+'
') 68 | div.text = text; 69 | """) 70 | 71 | 72 | # create widgets 73 | expt_select = Select(title='Experiment:', options=expts, value=expts[0]) 74 | refresh = Button(label='Update') 75 | div = Div(width=1000) 76 | 77 | # hover tools 78 | hover = HoverTool(tooltips=[ 79 | ('variable', '@variable'), ('start', '@time_start{%F}'), 80 | ('end', '@time_end{%F}'), ('run', '@run'), ('file', '@ncfile')], 81 | formatters={ 82 | 'time_start': 'datetime', 83 | 'time_end': 'datetime' 84 | }) 85 | tap = TapTool() 86 | box_select = BoxSelectTool() 87 | tools = [hover, box_select, tap, 'pan', 'box_zoom', 'wheel_zoom', 'reset'] 88 | 89 | df = get_data(expt_select.value) 90 | freqs = df.frequency.unique() 91 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs) 92 | cds = ColumnDataSource(df, callback=print_selected(div)) 93 | 94 | p = figure(y_range=df.variable.unique(), x_range=(df.iloc[0].time_start, df.iloc[-1].time_end), 95 | title=expt_select.value, tools=tools) 96 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs) 97 | hb = p.hbar(y='variable', left='time_start', right='time_end', height=0.4, source=cds, 98 | fill_color=cmap, legend='frequency') 99 | 100 | # callback routines to repopulate list of variables 101 | def get_vars(expt): 102 | return [e[0] for e in conn.execute(vars_query, expt=expt)] 103 | 104 | def refresh_output(): 105 | # get new data 106 | df = get_data(expt_select.value) 107 | freqs = df.frequency.unique() 108 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs) 109 | 110 | # update figure itself 111 | p.y_range.factors = list(df.variable.unique()) 112 | (p.x_range.start, p.x_range.end) = (df.iloc[0].time_start, df.iloc[-1].time_end) 113 | p.title.text = expt_select.value 114 | 115 | # update data source for plot 116 | hb.data_source.data = hb.data_source.from_df(df) 117 | # update colourmap if necessary 118 | hb.glyph.fill_color = cmap 119 | 120 | refresh.on_click(refresh_output) 121 | 122 | # layout and show 123 | layout = column(expt_select, refresh, p, div) 124 | curdoc().add_root(layout) 125 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='cosima_cookbook', 5 | description='Diagnostics for COSIMA: Consortium for Ocean-Sea Ice Modelling in Australia', 6 | url='https://github.com/COSIMA/cosima-cookbook', 7 | author='COSIMA', 8 | license='Apache License 2.0', 9 | use_scm_version=True, 10 | packages=find_packages(), 11 | setup_requires=["setuptools_scm"], 12 | 13 | install_requires=[ 14 | 'dask', 15 | 'xarray', 16 | 'numpy', 17 | 'matplotlib', 18 | 'bokeh', 19 | 'netcdf4', 20 | 'tqdm', 21 | 'sqlalchemy<2.0', 22 | 'cftime', 23 | 'f90nml', 24 | 'joblib', 25 | 'ipywidgets', 26 | 'lxml', 27 | ], 28 | entry_points={ 29 | 'console_scripts': [ 30 | 'cosima_cookbook-update_db = cosima_cookbook.database_update:main', 31 | ] 32 | }, 33 | extras_require = { 34 | 'build': ['distributed', 'pytest', 'pytest-cov'] 35 | } 36 | ) 37 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from dask.distributed import Client 3 | 4 | from cosima_cookbook import database 5 | 6 | 7 | @pytest.fixture(scope="module") 8 | def client(): 9 | client = Client(processes=False, dashboard_address=None) 10 | yield client 11 | client.close() 12 | 13 | 14 | @pytest.fixture(scope="function") 15 | def session_db(tmp_path): 16 | db = tmp_path / "test.db" 17 | s = database.create_session(str(db)) 18 | yield s, db 19 | 20 | s.close() 21 | -------------------------------------------------------------------------------- /test/data/explore/duplicate/one/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2018-01-01 4 | description: Description 5 | notes: Notes 6 | keywords: 7 | - cosima 8 | - ACCESS-OM2-01 9 | - ryf9091 10 | -------------------------------------------------------------------------------- /test/data/explore/duplicate/one/ocean/ocean_age.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/duplicate/one/ocean/ocean_age.nc -------------------------------------------------------------------------------- /test/data/explore/one/atmosphere/ty_trans.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/atmosphere/ty_trans.nc -------------------------------------------------------------------------------- /test/data/explore/one/ice/hi_m.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/ice/hi_m.nc -------------------------------------------------------------------------------- /test/data/explore/one/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2018-01-01 4 | description: Description 5 | notes: Notes 6 | keywords: 7 | - cosima 8 | - ACCESS-OM2-01 9 | - ryf9091 10 | -------------------------------------------------------------------------------- /test/data/explore/one/ocean/ocean.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/ocean/ocean.nc -------------------------------------------------------------------------------- /test/data/explore/one/restart/ocean_velocity_advection.res.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/restart/ocean_velocity_advection.res.nc -------------------------------------------------------------------------------- /test/data/explore/two/atm/hi_m.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/atm/hi_m.nc -------------------------------------------------------------------------------- /test/data/explore/two/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2020-01-01 4 | description: Description again! 5 | notes: Notes 6 | keywords: 7 | - cosima 8 | - another-keyword 9 | -------------------------------------------------------------------------------- /test/data/explore/two/nomodel/ty_trans.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/nomodel/ty_trans.nc -------------------------------------------------------------------------------- /test/data/explore/two/ocn/ocean.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/ocn/ocean.nc -------------------------------------------------------------------------------- /test/data/explore/two/ocn/ocean_month.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/ocn/ocean_month.nc -------------------------------------------------------------------------------- /test/data/explore/two/restart/ocean_velocity_advection.res.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/restart/ocean_velocity_advection.res.nc -------------------------------------------------------------------------------- /test/data/indexing/alternate/experiment_a/test2.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/alternate/experiment_a/test2.nc -------------------------------------------------------------------------------- /test/data/indexing/broken_file/output000/test.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/broken_file/output000/test.nc -------------------------------------------------------------------------------- /test/data/indexing/broken_metadata/metadata.yaml: -------------------------------------------------------------------------------- 1 | this: is: broken! 2 | -------------------------------------------------------------------------------- /test/data/indexing/broken_metadata/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/broken_metadata/test1.nc -------------------------------------------------------------------------------- /test/data/indexing/empty_file/output000/empty.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/empty_file/output000/empty.nc -------------------------------------------------------------------------------- /test/data/indexing/longnames/output000/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/longnames/output000/test1.nc -------------------------------------------------------------------------------- /test/data/indexing/longnames/output000/test2.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/longnames/output000/test2.nc -------------------------------------------------------------------------------- /test/data/indexing/metadata/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2018-01-01 4 | url: https://github.com/COSIMA/oracle 5 | description: >- 6 | Attempted spinup, using salt flux fix 7 | https://arccss.slack.com/archives/C6PP0GU9Y/p1515460656000124 and 8 | https://github.com/mom-ocean/MOM5/pull/208/commits/9f4ee6f8b72b76c96a25bf26f3f6cdf773b424d2 9 | from the start. Used mushy ice from July year 1 onwards to avoid 10 | vertical thermo error in cice 11 | https://arccss.slack.com/archives/C6PP0GU9Y/p1515842016000079 12 | notes: >- 13 | Stripy salt restoring: 14 | https://github.com/OceansAus/access-om2/issues/74 tripole seam bug: 15 | https://github.com/OceansAus/access-om2/issues/86 requires dt=300s 16 | in May, dt=240s in Aug to maintain CFL in CICE near tripoles (storms 17 | in those months in 8485RYF); all other months work with dt=400s 18 | -------------------------------------------------------------------------------- /test/data/indexing/metadata/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/metadata/test1.nc -------------------------------------------------------------------------------- /test/data/indexing/multiple/experiment_a/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/multiple/experiment_a/test1.nc -------------------------------------------------------------------------------- /test/data/indexing/multiple/experiment_b/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/multiple/experiment_b/test1.nc -------------------------------------------------------------------------------- /test/data/indexing/single_broken_file/output000/broken.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/single_broken_file/output000/broken.nc -------------------------------------------------------------------------------- /test/data/indexing/single_broken_file/output000/test.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/single_broken_file/output000/test.nc -------------------------------------------------------------------------------- /test/data/indexing/symlinked/experiment_a: -------------------------------------------------------------------------------- 1 | ../multiple/experiment_a -------------------------------------------------------------------------------- /test/data/indexing/time/t1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t1.nc -------------------------------------------------------------------------------- /test/data/indexing/time/t2.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t2.nc -------------------------------------------------------------------------------- /test/data/indexing/time/t3.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t3.nc -------------------------------------------------------------------------------- /test/data/indexing/time/t4.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t4.nc -------------------------------------------------------------------------------- /test/data/indexing/time/t5.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t5.nc -------------------------------------------------------------------------------- /test/data/indexing/time_bounds/file001.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time_bounds/file001.nc -------------------------------------------------------------------------------- /test/data/metadata/keywords/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2018-01-01 4 | description: Description 5 | notes: Notes 6 | keywords: 7 | - cosima 8 | - ACCESS-OM2-01 9 | - ryf9091 10 | -------------------------------------------------------------------------------- /test/data/metadata/keywords/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/metadata/keywords/test1.nc -------------------------------------------------------------------------------- /test/data/metadata/keywords2/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2020-01-01 4 | description: Description again! 5 | notes: Notes 6 | keywords: 7 | - cosima 8 | - another-keyword 9 | -------------------------------------------------------------------------------- /test/data/metadata/keywords2/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/metadata/keywords2/test1.nc -------------------------------------------------------------------------------- /test/data/metadata/string_keyword/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2020-01-02 4 | description: String keywords 5 | notes: Notes 6 | keywords: cosima 7 | -------------------------------------------------------------------------------- /test/data/metadata/upcase/metadata.yaml: -------------------------------------------------------------------------------- 1 | contact: The ACCESS Oracle 2 | email: oracle@example.com 3 | created: 2018-01-01 4 | description: Description (with uppercase keywords!) 5 | notes: Notes 6 | keywords: 7 | - COSIMA 8 | - access-om2-01 9 | - RYF9091 10 | -------------------------------------------------------------------------------- /test/data/ocean_sealevel.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/ocean_sealevel.nc -------------------------------------------------------------------------------- /test/data/querying/output000/hi_m.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/output000/hi_m.nc -------------------------------------------------------------------------------- /test/data/querying/output000/ocean.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/output000/ocean.nc -------------------------------------------------------------------------------- /test/data/querying/restart000/ty_trans.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/restart000/ty_trans.nc -------------------------------------------------------------------------------- /test/data/querying_disambiguation/output000/ocean.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying_disambiguation/output000/ocean.nc -------------------------------------------------------------------------------- /test/data/querying_disambiguation/output000/ocean_month.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying_disambiguation/output000/ocean_month.nc -------------------------------------------------------------------------------- /test/data/update/experiment_a/test1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/update/experiment_a/test1.nc -------------------------------------------------------------------------------- /test/data/update/experiment_b/test2.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/update/experiment_b/test2.nc -------------------------------------------------------------------------------- /test/test_database.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import sqlalchemy as sa 4 | from cosima_cookbook import database 5 | 6 | 7 | @pytest.fixture 8 | def db_env(tmp_path): 9 | old_db = os.getenv("COSIMA_COOKBOOK_DB") 10 | db = tmp_path / "test.db" 11 | os.environ["COSIMA_COOKBOOK_DB"] = str(db) 12 | 13 | yield db 14 | 15 | # clean up by resetting the env var 16 | if old_db: 17 | os.environ["COSIMA_COOKBOOK_DB"] = old_db 18 | else: 19 | del os.environ["COSIMA_COOKBOOK_DB"] 20 | 21 | 22 | def test_default(tmp_path): 23 | db = tmp_path / "test.db" 24 | # override the NCI-specific default 25 | database.__DEFAULT_DB__ = str(db) 26 | 27 | s = database.create_session() 28 | 29 | assert db.exists() 30 | 31 | 32 | def test_env_var(db_env): 33 | # make sure we use the environment variable 34 | # override with no arguments supplied 35 | s = database.create_session() 36 | assert db_env.exists() 37 | 38 | 39 | def test_arg_override(tmp_path, db_env): 40 | # check that if we supply an argument, that 41 | # is used rather than the environment variable 42 | db = tmp_path / "test_other.db" 43 | s = database.create_session(str(db)) 44 | 45 | assert not db_env.exists() 46 | assert db.exists() 47 | 48 | 49 | def test_creation(session_db): 50 | """Test that a database file is created with a session 51 | when the session file doesn't exist.""" 52 | 53 | s, db = session_db 54 | assert db.exists() 55 | 56 | # we should be able to query against a table that exists 57 | # with no error 58 | s.execute("SELECT * FROM ncfiles") 59 | 60 | # but not a non-existent table 61 | with pytest.raises(sa.exc.OperationalError, match="no such table"): 62 | s.execute("SELECT * FROM ncfiles_notfound") 63 | 64 | 65 | def test_reopen(tmp_path): 66 | """Test that we can reopen a database of the correct version.""" 67 | 68 | db = tmp_path / "test.db" 69 | s = database.create_session(str(db)) 70 | 71 | s.close() 72 | s = database.create_session(str(db)) 73 | s.close() 74 | 75 | 76 | def test_outdated(tmp_path): 77 | """Test that we can't use an outdated database""" 78 | 79 | db = tmp_path / "test.db" 80 | s = database.create_session(str(db)) 81 | 82 | # check that the current version matches that defined in the module 83 | ver = s.execute("PRAGMA user_version").fetchone()[0] 84 | assert ver == database.__DB_VERSION__ 85 | 86 | # reset version to one prior 87 | s.execute("PRAGMA user_version={}".format(database.__DB_VERSION__ - 1)) 88 | s.close() 89 | 90 | # recreate the session 91 | with pytest.raises(Exception, match="Incompatible database versions"): 92 | s = database.create_session(str(db)) 93 | 94 | 95 | def test_outdated_notmodified(tmp_path): 96 | """Test that we don't try to modify an outdated database. 97 | This includes adding tables that don't yet exist because 98 | it's a previous version. 99 | """ 100 | 101 | # set up an empty database with a previous version 102 | db = tmp_path / "test.db" 103 | conn = sa.create_engine("sqlite:///" + str(db)).connect() 104 | conn.execute("PRAGMA user_version={}".format(database.__DB_VERSION__ - 1)) 105 | conn.close() 106 | 107 | # try to create the session 108 | # this should fail and not modify the existing database 109 | with pytest.raises(Exception): 110 | s = database.create_session(str(db)) 111 | 112 | # reopen the connection and ensure tables weren't created 113 | conn = sa.create_engine("sqlite:///" + str(db)).connect() 114 | with pytest.raises(sa.exc.OperationalError, match="no such table"): 115 | conn.execute("SELECT * FROM ncfiles") 116 | 117 | 118 | def test_delete_experiment(session_db): 119 | """Test that we can completely delete an experiment 120 | and its associated data. 121 | """ 122 | 123 | session, db = session_db 124 | database.build_index("test/data/indexing/longnames", session) 125 | 126 | # make sure we actually did index something 127 | expt = ( 128 | session.query(database.NCExperiment) 129 | .filter(database.NCExperiment.experiment == "longnames") 130 | .one_or_none() 131 | ) 132 | assert expt is not None 133 | 134 | database.delete_experiment("longnames", session) 135 | expt = ( 136 | session.query(database.NCExperiment) 137 | .filter(database.NCExperiment.experiment == "longnames") 138 | .one_or_none() 139 | ) 140 | assert expt is None 141 | 142 | # check that all files are removed 143 | files = session.query(sa.func.count(database.NCFile.id)).scalar() 144 | assert files == 0 145 | 146 | # make sure all ncvars are removed 147 | vars = session.query(sa.func.count(database.NCVar.id)).scalar() 148 | assert vars == 0 149 | -------------------------------------------------------------------------------- /test/test_dates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2017 ARC Centre of Excellence for Climate Systems Science 5 | author: Aidan Heerdegen 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from __future__ import print_function 18 | 19 | import pytest 20 | import sys, os, time, glob 21 | import shutil 22 | import pdb # Add pdb.set_trace() to set breakpoints 23 | import xarray as xr 24 | import numpy as np 25 | import cftime 26 | from datetime import datetime, timedelta 27 | 28 | from cosima_cookbook.date_utils import ( 29 | rebase_times, 30 | rebase_dataset, 31 | rebase_variable, 32 | rebase_shift_attr, 33 | format_datetime, 34 | parse_datetime, 35 | ) 36 | 37 | from xarray.testing import assert_equal 38 | 39 | verbose = True 40 | 41 | times = [] 42 | 43 | 44 | def setup_module(module): 45 | if verbose: 46 | print("setup_module module:%s" % module.__name__) 47 | if verbose: 48 | print("Python version: {}".format(sys.version)) 49 | # Put any setup code in here, like making temporary files 50 | # Make 5 years of a noleap calendar on the first of each month 51 | global times 52 | for y in range(1, 6): 53 | for m in range(1, 13): 54 | times.append( 55 | np.round( 56 | cftime.date2num( 57 | cftime.datetime(y, m, 1), 58 | units="days since 01-01-01", 59 | calendar="noleap", 60 | ), 61 | 8, 62 | ) 63 | ) 64 | times = np.array(times) 65 | 66 | 67 | def teardown_module(module): 68 | if verbose: 69 | print("teardown_module module:%s" % module.__name__) 70 | # Put any taerdown code in here, like deleting temporary files 71 | 72 | 73 | def test_format_parse_datetime(): 74 | dates = [ 75 | cftime.num2date(t, units="days since 01-01-01", calendar="noleap") 76 | for t in times 77 | ] 78 | assert format_datetime(dates[0]) == "0001-01-01 00:00:00" 79 | assert format_datetime(dates[-1]) == "0005-12-01 00:00:00" 80 | 81 | for d in dates: 82 | assert parse_datetime(format_datetime(d), "noleap") == d 83 | 84 | dates = [ 85 | cftime.num2date(t, units="days since 01-01-01", calendar="proleptic_gregorian") 86 | for t in times 87 | ] 88 | assert format_datetime(dates[0]) == "0001-01-01 00:00:00" 89 | assert format_datetime(dates[-1]) == "0005-11-30 00:00:00" 90 | 91 | for d in dates: 92 | assert parse_datetime(format_datetime(d), "proleptic_gregorian") == d 93 | 94 | 95 | def test_rebase_times(): 96 | # Should be a 10 year offset between original times and rebased times 97 | assert not np.any( 98 | (times + 365 * 10) 99 | - rebase_times( 100 | times, "days since 1980-01-01", "noleap", "days since 1970-01-01" 101 | ) 102 | ) 103 | 104 | # Should be a -10 year offset between original times and rebased times 105 | assert not np.any( 106 | (times - 365 * 10) 107 | - rebase_times( 108 | times, "days since 1980-01-01", "noleap", "days since 1990-01-01" 109 | ) 110 | ) 111 | 112 | 113 | def test_rebase_variable(): 114 | timesvar = xr.DataArray( 115 | times, attrs={"units": "days since 1980-01-01", "calendar": "noleap"} 116 | ) 117 | 118 | print("att:", timesvar.attrs) 119 | 120 | # Test we can rebase with and without explicitly setting a calendar 121 | timesvar_rebased = rebase_variable(timesvar, target_units="days since 1970-01-01") 122 | assert timesvar_rebased.equals( 123 | rebase_variable(timesvar, "noleap", target_units="days since 1970-01-01") 124 | ) 125 | 126 | assert not timesvar.equals(timesvar_rebased) 127 | 128 | # Should be a 10 year offset between original times and rebased times 129 | assert not np.any((times + 365 * 10) - timesvar_rebased.values) 130 | # assert(not np.any((times + 365*10) - rebase_variable(timesvar, 'noleap', target_units='days since 1970-01-01').values)) 131 | 132 | with pytest.raises(ValueError): 133 | timesvar_rebased = rebase_variable( 134 | timesvar, "noleap", target_units="days since 1990-01-01" 135 | ) 136 | 137 | # Rebase with an offset otherwise would have negative dates 138 | timesvar_rebased = rebase_variable( 139 | timesvar, "noleap", target_units="days since 1990-01-01", offset=365 * 10 140 | ) 141 | 142 | # Values should be the same 143 | assert not np.any(times - timesvar_rebased.values) 144 | 145 | # But the rebase_shift_attr should be set to 10 years 146 | assert timesvar_rebased.attrs[rebase_shift_attr] == 365 * 10 147 | 148 | # Check we get back timesvar if rebased again with no arguments (rebases to previous 149 | # units and applies offset if required in this instance) 150 | assert timesvar.equals(rebase_variable(timesvar_rebased)) 151 | 152 | 153 | def test_matching_time_units(): 154 | testfile = "test/data/ocean_sealevel.nc" 155 | 156 | ds = xr.open_dataset(testfile, decode_times=False) 157 | target_units = "days since 1800-01-01" 158 | 159 | ds1 = rebase_dataset(ds, target_units) 160 | # s1.to_netcdf('tmp.nc') 161 | 162 | ds2 = rebase_dataset(ds1) 163 | # ds2.to_netcdf('tmp2.nc') 164 | 165 | # Rebasing again without target_units specified should 166 | # un-do previous rebase 167 | assert ds.equals(ds2) 168 | 169 | # An offset is required as the target units are ahead of the data in time 170 | target_units = "days since 2000-01-01" 171 | 172 | # Offset can be automatically generated as difference between target and src units 173 | ds1 = rebase_dataset(ds, target_units, offset="auto") 174 | ds2 = rebase_dataset(ds1) 175 | 176 | assert ds.equals(ds2) 177 | 178 | # Offset can be an integer, but need to know what units are being used, days, hours etc 179 | ds1 = rebase_dataset(ds, target_units, offset=100 * 365) 180 | ds2 = rebase_dataset(ds1) 181 | 182 | assert ds.equals(ds2) 183 | 184 | # Offset can be a datetime.timedelta object, but this would need some knowledge of 185 | # the calendar 186 | ds1 = rebase_dataset(ds, target_units, offset=timedelta(days=100 * 365)) 187 | ds2 = rebase_dataset(ds1) 188 | 189 | # A different offset will yield a different dataset, but upon rebasing a second time 190 | # should still be the same as the original regardless of offset. 191 | ds3 = rebase_dataset(ds, target_units, offset=timedelta(days=200 * 365)) 192 | ds4 = rebase_dataset(ds3) 193 | 194 | assert ds.equals(ds4) 195 | assert not ds1.equals(ds3) 196 | 197 | # Test graceful recovery if time_bounds missing. 198 | del ds["time_bounds"] 199 | ds3 = rebase_dataset(ds, target_units, offset=timedelta(days=200 * 365)) 200 | ds4 = rebase_dataset(ds3) 201 | 202 | assert ds.equals(ds4) 203 | assert not ds1.equals(ds3) 204 | 205 | ds = xr.open_dataset(testfile, decode_times=False)[["sea_level"]] 206 | target_units = "days since 1800-01-01" 207 | 208 | ds1 = rebase_dataset(ds, target_units) 209 | 210 | 211 | def test_chunking(): 212 | # An offset is required as the target units are ahead of the data in time 213 | target_units = "days since 2000-01-01" 214 | 215 | testfile = "test/data/ocean_sealevel.nc" 216 | 217 | ds = xr.open_dataset(testfile, decode_times=False, chunks={"time": 10}) 218 | target_units = "days since 1800-01-01" 219 | 220 | ds1 = rebase_dataset(ds, target_units) 221 | -------------------------------------------------------------------------------- /test/test_explore.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datetime import datetime 4 | import os.path 5 | import shutil 6 | 7 | import xarray as xr 8 | import pandas as pd 9 | from pandas.testing import assert_frame_equal, assert_series_equal 10 | 11 | import cosima_cookbook as cc 12 | 13 | from cosima_cookbook.database import NCExperiment, NCFile 14 | 15 | 16 | def metadata_for_experiment( 17 | path, session, metadata_file=None, name="test", commit=True 18 | ): 19 | """Method to read metadata for an experiment without requiring 20 | the rest of the indexing infrastructure. 21 | """ 22 | 23 | expt = NCExperiment(experiment=name, root_dir=path) 24 | 25 | # look for this experiment in the database 26 | q = ( 27 | session.query(NCExperiment) 28 | .filter(NCExperiment.experiment == expt.experiment) 29 | .filter(NCExperiment.root_dir == expt.root_dir) 30 | ) 31 | r = q.one_or_none() 32 | if r is not None: 33 | expt = r 34 | 35 | cc.database.update_metadata(expt, session, metadata_file) 36 | 37 | if commit: 38 | session.add(expt) 39 | session.commit() 40 | else: 41 | return expt 42 | 43 | 44 | @pytest.fixture(scope="module") 45 | def session(tmp_path_factory): 46 | # index test directory into temp database 47 | d = tmp_path_factory.mktemp("database") 48 | db = d / "test.db" 49 | session = cc.database.create_session(str(db)) 50 | 51 | # build index for entire module 52 | cc.database.build_index( 53 | [ 54 | "test/data/explore/one", 55 | "test/data/explore/two", 56 | "test/data/explore/duplicate/one", 57 | ], 58 | session, 59 | ) 60 | 61 | # force all files to be marked as present, even if they're empty 62 | ncfiles = session.query(cc.database.NCFile).all() 63 | for f in ncfiles: 64 | f.present = True 65 | session.commit() 66 | 67 | return session 68 | 69 | 70 | def test_database_explorer(session): 71 | dbx = cc.explore.DatabaseExplorer(session=session) 72 | 73 | assert dbx.session is session 74 | 75 | # Experiment selector 76 | assert dbx.expt_selector.options == ("one", "two") 77 | 78 | # Keyword filter selector 79 | assert dbx.filter_widget.options == tuple(dbx.keywords) 80 | 81 | in_one = set(cc.querying.get_variables(session, "one").name) 82 | in_two = set(cc.querying.get_variables(session, "two").name) 83 | 84 | # The variable filter box 85 | assert len(dbx.var_filter.selector.variables) == len((in_one | in_two)) 86 | 87 | # Turn off filtering so all variables are present in the filter selector 88 | dbx.var_filter.selector._filter_variables(coords=False, restarts=False, model="") 89 | 90 | truth = { 91 | "age_global": "Age (global) (yr)", 92 | "diff_cbt_t": "total vert diff_cbt(temp) (w/o neutral included) (m^2/s)", 93 | "dzt": "t-cell thickness (m)", 94 | "hi_m": "grid cell mean ice thickness (m)", 95 | "neutral": "neutral density (kg/m^3)", 96 | "neutralrho_edges": "neutral density edges (kg/m^3)", 97 | "nv": "vertex number", 98 | "pot_rho_0": "potential density referenced to 0 dbar (kg/m^3)", 99 | "pot_rho_2": "potential density referenced to 2000 dbar (kg/m^3)", 100 | "salt": "Practical Salinity (psu)", 101 | "st_edges_ocean": "tcell zstar depth edges (meters)", 102 | "st_ocean": "tcell zstar depth (meters)", 103 | } 104 | 105 | for var, label in truth.items(): 106 | assert dbx.var_filter.selector.selector.options[var] == label 107 | 108 | # Add all variables common to both experiments and ensure after filter 109 | # experiment selector still contains both 110 | for var in in_one & in_two: 111 | dbx.var_filter.selector.selector.label = var 112 | dbx.var_filter._add_var_to_selected(None) 113 | 114 | dbx._filter_experiments(None) 115 | assert dbx.expt_selector.options == ("one", "two") 116 | 117 | dbx.var_filter.delete(in_one & in_two) 118 | assert len(dbx.var_filter.var_filter_selected.options) == 0 119 | 120 | # Now all variables only in experiment two and ensure after filter 121 | # experiment selector only contains two 122 | for var in in_two - in_one: 123 | dbx.var_filter.selector.selector.label = var 124 | dbx.var_filter._add_var_to_selected(None) 125 | 126 | dbx._filter_experiments(None) 127 | assert dbx.expt_selector.options == ("two",) 128 | 129 | 130 | def test_experiment_explorer(session): 131 | ee1 = cc.explore.ExperimentExplorer(session=session) 132 | 133 | # Experiment selector 134 | assert ee1.expt_selector.options == ("one", "two") 135 | 136 | assert len(ee1.var_selector.selector.options) == 24 137 | assert "pot_rho_0" in ee1.var_selector.selector.options 138 | assert "ty_trans_rho" not in ee1.var_selector.selector.options 139 | 140 | # Simulate selecting a different experiment from menu 141 | ee1._load_experiment("two") 142 | assert len(ee1.var_selector.selector.options) == 28 143 | assert "pot_rho_0" in ee1.var_selector.selector.options 144 | assert "ty_trans_rho" in ee1.var_selector.selector.options 145 | 146 | # Check frequency drop down changes when variable selector assigned a value 147 | assert ee1.frequency.options == () 148 | ee1.var_selector.selector.label = "ty_trans" 149 | ee1.var_selector._set_frequency_selector("ty_trans") 150 | assert ee1.frequency.options == ("1 yearly",) 151 | ee1.var_selector._set_cellmethods_selector("ty_trans", "1 yearly") 152 | assert ee1.cellmethods.options == ("time: mean",) 153 | ee1.var_selector._set_daterange_selector("ty_trans", "1 yearly", "time: mean") 154 | assert ee1.frequency.options == ("1 yearly",) 155 | 156 | # Check frequency drop down changes when variable selector assigned a value 157 | ee1.var_selector.selector.label = "tx_trans" 158 | ee1.var_selector._set_frequency_selector("tx_trans") 159 | assert ee1.frequency.options == (None,) 160 | ee1.var_selector._set_cellmethods_selector("tx_trans", None) 161 | assert ee1.cellmethods.options == ("time: mean",) 162 | ee1.var_selector._set_daterange_selector("tx_trans", None, "time: mean") 163 | print(ee1.daterange) 164 | 165 | ee2 = cc.explore.ExperimentExplorer(session=session) 166 | assert id(ee1.var_selector) != id(ee2.var_selector) 167 | 168 | 169 | def test_get_data(session): 170 | ee = cc.explore.ExperimentExplorer(session=session) 171 | 172 | assert ee.data is None 173 | 174 | ee._load_experiment("one") 175 | ee.var_selector.selector.label = "ty_trans" 176 | ee.var_selector._set_frequency_selector("ty_trans") 177 | ee.var_selector._set_cellmethods_selector("ty_trans", "1 yearly") 178 | ee.var_selector._set_daterange_selector("ty_trans", "1 yearly", "time: mean") 179 | ee._load_data(None) 180 | 181 | assert ee.frequency.options == ("1 yearly",) 182 | assert ee.daterange.options[0][0] == "0166/12/31" 183 | assert ee.daterange.options[1][0] == "0167/12/31" 184 | 185 | assert ee.data is not None 186 | assert ee.data.shape == (2, 1, 1, 1) 187 | 188 | 189 | def test_model_property(session): 190 | # Grab all variables and ensure the SQL classification matches the python version 191 | # May be some holes, as not ensured all cases covered 192 | for expt in cc.querying.get_experiments(session, all=True).experiment: 193 | for index, row in cc.querying.get_variables( 194 | session, experiment=expt, inferred=True 195 | ).iterrows(): 196 | ncfile = NCFile( 197 | index_time=datetime.now(), 198 | ncfile=row.ncfile, 199 | present=True, 200 | ) 201 | assert ncfile.model == row.model 202 | -------------------------------------------------------------------------------- /test/test_indexing.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pytest 4 | import shutil 5 | import time 6 | import xarray as xr 7 | from pathlib import Path 8 | from cosima_cookbook import database 9 | from sqlalchemy import func, inspect 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | 14 | def rm_tree(pth): 15 | pth = Path(pth) 16 | for child in pth.glob("*"): 17 | if child.is_file(): 18 | child.unlink() 19 | else: 20 | rm_tree(child) 21 | pth.rmdir() 22 | 23 | 24 | def assert_dictionaries_same(expected, actual): 25 | for key in expected.keys(): 26 | if key not in actual or expected[key] != actual[key]: 27 | return False 28 | 29 | return True 30 | 31 | 32 | @pytest.fixture 33 | def unreadable_dir(tmp_path): 34 | expt_path = tmp_path / "expt_dir" 35 | expt_path.mkdir() 36 | idx_dir = expt_path / "unreadable" 37 | idx_dir.mkdir() 38 | idx_dir.chmod(0o300) 39 | 40 | yield idx_dir 41 | 42 | idx_dir.chmod(0o700) 43 | rm_tree(expt_path) 44 | 45 | 46 | def test_find_files(): 47 | files = database.find_files("test/data/indexing/") 48 | assert len(files) == 17 49 | 50 | for f in files: 51 | assert Path(f).suffix == ".nc" 52 | 53 | # No python source files in data subdirectory 54 | assert len(database.find_files("test/data/indexing/", "*.py")) == 0 55 | 56 | # Test works with alternative suffix 57 | files = database.find_files("test/", "*.py") 58 | assert len(files) == 9 59 | 60 | for f in files: 61 | assert Path(f).suffix == ".py" 62 | 63 | 64 | def test_find_experiment(session_db): 65 | session, db = session_db 66 | 67 | directory = Path("test/data/indexing/broken_file") 68 | 69 | assert None == database.find_experiment(session, directory) 70 | 71 | expt = database.NCExperiment( 72 | experiment=str(directory.name), root_dir=str(directory.resolve()) 73 | ) 74 | session.add(expt) 75 | session.flush() 76 | 77 | assert expt == database.find_experiment(session, directory) 78 | 79 | 80 | def test_index_experiment(session_db): 81 | session, db = session_db 82 | 83 | directory = Path("test/data/indexing/longnames") 84 | expt = database.NCExperiment( 85 | experiment=str(directory.name), root_dir=str(directory.resolve()) 86 | ) 87 | 88 | files = database.find_files(directory) 89 | 90 | # Index just one file 91 | database.index_experiment(set(list(files)[:1]), session, expt) 92 | session.flush() 93 | 94 | assert expt == database.find_experiment(session, directory) 95 | assert len(database.find_experiment(session, directory).ncfiles) == 1 96 | 97 | # Index the other file 98 | database.index_experiment(set(list(files)[1:]), session, expt) 99 | 100 | assert expt == database.find_experiment(session, directory) 101 | assert len(database.find_experiment(session, directory).ncfiles) == 2 102 | 103 | 104 | def test_unreadable(session_db, unreadable_dir): 105 | session, db = session_db 106 | 107 | with pytest.warns(UserWarning, match="Some files or directories could not be read"): 108 | indexed = database.build_index(str(unreadable_dir), session) 109 | 110 | 111 | def test_broken(session_db): 112 | session, db = session_db 113 | indexed = database.build_index("test/data/indexing/broken_file", session) 114 | 115 | # make sure the database was created 116 | assert db.exists() 117 | 118 | # we indexed a single file 119 | assert indexed == 1 120 | 121 | # query ncfiles table -- should have a single file, marked as empty 122 | q = session.query(database.NCFile) 123 | r = q.all() 124 | assert len(r) == 1 125 | assert not r[0].present 126 | 127 | # query ncvars table -- should be empty 128 | q = session.query(func.count(database.NCVar.id)) 129 | assert q.scalar() == 0 130 | 131 | 132 | def test_empty_file(session_db): 133 | session, db = session_db 134 | indexed = database.build_index("test/data/indexing/empty_file", session) 135 | 136 | # as with test_broken, we should have seen a single file, 137 | # but it should be marked as empty 138 | assert db.exists() 139 | assert indexed == 1 140 | q = session.query(database.NCFile) 141 | r = q.all() 142 | assert len(r) == 1 143 | assert not r[0].present 144 | 145 | # but there should be a valid variable 146 | q = session.query(func.count(database.NCVar.id)).filter( 147 | database.NCVar.varname == "ty_trans_rho" 148 | ) 149 | assert q.scalar() == 1 150 | 151 | 152 | def test_update_nonew(session_db): 153 | session, db = session_db 154 | database.build_index("test/data/indexing/broken_file", session) 155 | assert db.exists() 156 | 157 | # re-run the index, make sure we don't re-index anything 158 | reindexed = database.build_index( 159 | "test/data/indexing/broken_file", session, prune="flag" 160 | ) 161 | assert reindexed == 0 162 | 163 | 164 | def test_reindex_force(session_db): 165 | session, db = session_db 166 | database.build_index("test/data/indexing/broken_file", session) 167 | assert db.exists() 168 | 169 | # re-run the index, make sure re-index 170 | reindexed = database.build_index( 171 | "test/data/indexing/broken_file", session, force=True 172 | ) 173 | assert reindexed == 1 174 | 175 | 176 | def test_update_newfile(session_db, tmp_path): 177 | session, db = session_db 178 | shutil.copy( 179 | "test/data/indexing/longnames/output000/test1.nc", str(tmp_path / "test1.nc") 180 | ) 181 | database.build_index(str(tmp_path), session) 182 | 183 | # add another file 184 | shutil.copy( 185 | "test/data/indexing/longnames/output000/test2.nc", str(tmp_path / "test2.nc") 186 | ) 187 | reindexed = database.build_index(str(tmp_path), session) 188 | assert reindexed == 1 189 | 190 | 191 | def test_updated_file(session_db, tmp_path, caplog): 192 | session, db = session_db 193 | 194 | # Make tmp_path a concrete path otherwise filesystem ops won't work 195 | tmp_path = Path(tmp_path) 196 | 197 | ncfile = "test1.nc" 198 | ncpath = Path("test/data/indexing/longnames/output000/") / ncfile 199 | shutil.copy(str(ncpath), str(tmp_path / ncfile)) 200 | indexed = database.build_index(str(tmp_path), session) 201 | assert indexed == 1 202 | 203 | # Should not reindex 204 | reindexed = database.build_index(str(tmp_path), session) 205 | assert reindexed == 0 206 | 207 | # Should reindex as file is updated 208 | time.sleep(1) 209 | (tmp_path / ncfile).touch() 210 | reindexed = database.build_index(str(tmp_path), session) 211 | assert reindexed == 1 212 | 213 | # Should not reindex as flagging as missing will not remove 214 | # file from the database, so will not be reindexed 215 | time.sleep(1) 216 | (tmp_path / ncfile).touch() 217 | with caplog.at_level(logging.WARNING): 218 | reindexed = database.build_index(str(tmp_path), session, prune="flag") 219 | assert reindexed == 0 220 | assert "Set prune to 'delete' to reindex updated files" in caplog.text 221 | 222 | 223 | def test_single_broken(session_db): 224 | session, db = session_db 225 | database.build_index("test/data/indexing/single_broken_file", session) 226 | 227 | # query ncfiles table -- should have two entries 228 | q = session.query(func.count(database.NCFile.id)) 229 | assert q.scalar() == 2 230 | 231 | # query ncvars table -- should have a single entry 232 | q = session.query(func.count(database.NCVar.id)) 233 | assert q.scalar() == 1 234 | 235 | 236 | def test_longnames(session_db): 237 | session, db = session_db 238 | database.build_index("test/data/indexing/longnames", session) 239 | 240 | # query ncvars table -- should have two entries 241 | q = session.query(func.count(database.NCVar.id)) 242 | assert q.scalar() == 2 243 | 244 | # query generic table -- should only be a single variable 245 | q = session.query(database.CFVariable) 246 | r = q.all() 247 | assert len(r) == 1 248 | assert r[0].long_name == "Test Variable" 249 | 250 | 251 | def test_multiple_experiments(session_db): 252 | session, db = session_db 253 | # index multiple experiments, which have duplicate data and therefore push 254 | # against some unique constraints 255 | database.build_index( 256 | [ 257 | "test/data/indexing/multiple/experiment_a", 258 | "test/data/indexing/multiple/experiment_b", 259 | ], 260 | session, 261 | ) 262 | 263 | q = session.query(database.NCExperiment) 264 | assert q.count() == 2 265 | 266 | 267 | def test_same_expt_name(session_db): 268 | session, db = session_db 269 | # index multiple experiments with different root directories, but the same 270 | # final path component (experiment name) 271 | database.build_index( 272 | [ 273 | "test/data/indexing/multiple/experiment_a", 274 | "test/data/indexing/alternate/experiment_a", 275 | ], 276 | session, 277 | ) 278 | 279 | # the indexing shouldn't fail, and we should have two distinct experiments 280 | # with the same name 281 | 282 | q = session.query(database.NCExperiment).filter( 283 | database.NCExperiment.experiment == "experiment_a" 284 | ) 285 | r = q.all() 286 | assert len(r) == 2 287 | assert r[0].root_dir != r[1].root_dir 288 | 289 | 290 | def test_following_symlinks(session_db): 291 | session, db = session_db 292 | 293 | # Indexing symlinked experiment should fail with default arguments 294 | database.build_index("test/data/indexing/symlinked/experiment_a", session) 295 | 296 | q = session.query(database.NCExperiment) 297 | assert q.count() == 0 298 | 299 | # Now specify to follow symlinks 300 | database.build_index( 301 | "test/data/indexing/symlinked/experiment_a", session, followsymlinks=True 302 | ) 303 | 304 | q = session.query(database.NCExperiment) 305 | assert q.count() == 1 306 | 307 | 308 | def test_broken_metadata(session_db): 309 | session, db = session_db 310 | indexed = database.build_index("test/data/indexing/broken_metadata", session) 311 | 312 | assert indexed == 1 313 | 314 | 315 | def test_time_dimension(session_db): 316 | session, db = session_db 317 | database.build_index("test/data/indexing/time", session) 318 | 319 | q = session.query(database.NCFile.time_start, database.NCFile.time_end) 320 | assert q.count() == 5 # should pick up 5 files 321 | 322 | q = q.filter( 323 | (database.NCFile.time_start is None) | (database.NCFile.time_end is None) 324 | ) 325 | assert q.count() == 0 # but all of them should have times populated 326 | 327 | # there should be 5 separate time variables 328 | q = session.query(database.CFVariable) 329 | assert q.count() == 5 330 | 331 | # each file should have exactly one time dimension 332 | q = ( 333 | session.query(func.count(database.NCFile.ncvars)) 334 | .join(database.NCFile.ncvars) 335 | .group_by(database.NCFile.id) 336 | ) 337 | for r in q.all(): 338 | assert r[0] == 1 339 | 340 | 341 | def test_missing_time_bounds(session_db): 342 | session, db = session_db 343 | database.build_index("test/data/indexing/time_bounds", session) 344 | 345 | # Should have one experiment 346 | q = session.query(database.NCExperiment) 347 | assert q.count() == 1 348 | 349 | # And one correctly indexed (present) file 350 | q = session.query(database.NCFile) 351 | r = q.all() 352 | assert len(r) == 1 353 | assert r[0].present 354 | 355 | 356 | def test_index_attributes(session_db): 357 | session, db = session_db 358 | database.build_index("test/data/querying", session) 359 | 360 | inspector = inspect(session.get_bind()) 361 | assert assert_dictionaries_same( 362 | { 363 | "name": "ix_ncattributes_ncvar_id", 364 | "column_names": ["ncvar_id"], 365 | "unique": 0, 366 | }, 367 | inspector.get_indexes("ncattributes")[0], 368 | ) 369 | 370 | ncfile = "output000/ocean.nc" 371 | 372 | # check that we have the right attributes for a file (just use a subset) 373 | f = session.query(database.NCFile).filter(database.NCFile.ncfile == ncfile).one() 374 | 375 | file_attrs = { 376 | "filename": "ocean.nc", 377 | "title": "MOM5", 378 | "grid_type": "mosaic", 379 | "grid_tile": "1", 380 | } 381 | for attr, attr_val in file_attrs.items(): 382 | assert attr in f.attrs and f.attrs[attr] == attr_val 383 | 384 | # and check a particular variable 385 | v = ( 386 | session.query(database.NCVar) 387 | .join(database.NCFile) 388 | .filter(database.NCFile.ncfile == ncfile) 389 | .filter(database.NCVar.varname == "temp") 390 | .one() 391 | ) 392 | var_attrs = { 393 | "long_name": "Potential temperature", 394 | "cell_methods": "time: mean", 395 | "coordinates": "geolon_t geolat_t", 396 | } 397 | for attr, attr_val in var_attrs.items(): 398 | assert attr in v.attrs and v.attrs[attr] == attr_val 399 | 400 | 401 | def test_prune_broken(session_db): 402 | session, db = session_db 403 | database.build_index("test/data/indexing/broken_file", session) 404 | 405 | assert db.exists() 406 | 407 | # check that we have one file 408 | q = session.query(database.NCFile) 409 | r = q.all() 410 | assert len(r) == 1 411 | 412 | # prune experiment 413 | database.prune_experiment("broken_file", session) 414 | 415 | # now the database should be empty 416 | q = session.query(database.NCFile) 417 | r = q.all() 418 | assert len(r) == 0 419 | 420 | 421 | def test_prune_missing_experiment(session_db): 422 | session, db = session_db 423 | database.build_index("test/data/indexing/broken_file", session) 424 | 425 | assert db.exists() 426 | 427 | # check that we have one file 428 | q = session.query(database.NCFile) 429 | r = q.all() 430 | assert len(r) == 1 431 | 432 | # prune experiment 433 | experiment = "incorrect_experiment" 434 | with pytest.raises(RuntimeError, match="No such experiment: ".format(experiment)): 435 | database.prune_experiment(experiment, session) 436 | 437 | 438 | def test_prune_nodelete(session_db, tmp_path): 439 | session, db = session_db 440 | expt_dir = tmp_path / "expt" 441 | expt_dir.mkdir() 442 | 443 | # copy the file to a new experiment directory and index 444 | shutil.copy( 445 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc") 446 | ) 447 | database.build_index(str(expt_dir), session) 448 | 449 | # check that we have a valid file 450 | q = session.query(database.NCFile).filter(database.NCFile.present) 451 | r = q.all() 452 | assert len(r) == 1 453 | 454 | # remove the file and prune 455 | os.remove(expt_dir / "test1.nc") 456 | database.prune_experiment("expt", session, delete=False) 457 | 458 | # now we should still have one file, but now not present 459 | q = session.query(database.NCFile) 460 | r = q.one_or_none() 461 | assert r is not None 462 | assert not r.present 463 | 464 | 465 | def test_prune_delete(session_db, tmp_path): 466 | session, db = session_db 467 | expt_dir = tmp_path / "expt" 468 | expt_dir.mkdir() 469 | 470 | # copy the file to a new experiment directory and index 471 | shutil.copy( 472 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc") 473 | ) 474 | database.build_index(str(expt_dir), session) 475 | 476 | # check that we have a valid file 477 | q = session.query(database.NCFile).filter(database.NCFile.present) 478 | r = q.all() 479 | assert len(r) == 1 480 | 481 | # remove the file and prune 482 | os.remove(expt_dir / "test1.nc") 483 | database.prune_experiment("expt", session) 484 | 485 | # now we should still have no files 486 | q = session.query(database.NCFile) 487 | r = q.one_or_none() 488 | assert r is None 489 | 490 | 491 | def test_index_with_prune_nodelete(session_db, tmp_path): 492 | session, db = session_db 493 | expt_dir = tmp_path / "expt" 494 | expt_dir.mkdir() 495 | 496 | # copy the file to a new experiment directory and index 497 | shutil.copy( 498 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc") 499 | ) 500 | database.build_index(str(expt_dir), session) 501 | 502 | # check that we have a valid file 503 | q = session.query(database.NCFile).filter(database.NCFile.present) 504 | r = q.all() 505 | assert len(r) == 1 506 | 507 | # remove the file and build with pruning 508 | os.remove(expt_dir / "test1.nc") 509 | database.build_index(str(expt_dir), session, prune="flag") 510 | 511 | # now we should still have one file, but now not present 512 | q = session.query(database.NCFile) 513 | r = q.one_or_none() 514 | assert r is not None 515 | assert not r.present 516 | 517 | 518 | def test_index_with_prune_delete(session_db, tmp_path): 519 | session, db = session_db 520 | expt_dir = tmp_path / "expt" 521 | expt_dir.mkdir() 522 | 523 | # copy the file to a new experiment directory and index 524 | shutil.copy( 525 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc") 526 | ) 527 | database.build_index(str(expt_dir), session) 528 | 529 | # check that we have a valid file 530 | q = session.query(database.NCFile).filter(database.NCFile.present) 531 | r = q.all() 532 | assert len(r) == 1 533 | 534 | # remove the file and build with pruning 535 | os.remove(expt_dir / "test1.nc") 536 | database.build_index(str(expt_dir), session, prune="delete") 537 | 538 | # now we should still have no files 539 | q = session.query(database.NCFile) 540 | r = q.one_or_none() 541 | assert r is None 542 | -------------------------------------------------------------------------------- /test/test_metadata.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datetime import datetime 3 | 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | 7 | from cosima_cookbook import database, querying 8 | 9 | 10 | def metadata_for_experiment(path, session, name="test", commit=True): 11 | """Method to read metadata for an experiment without requiring 12 | the rest of the indexing infrastructure. 13 | """ 14 | 15 | expt = database.NCExperiment(experiment=name, root_dir=path) 16 | database.update_metadata(expt, session) 17 | 18 | if commit: 19 | session.add(expt) 20 | session.commit() 21 | else: 22 | return expt 23 | 24 | 25 | def test_metadata(session_db): 26 | """Test that metadata.yaml is read for an experiment during indexing""" 27 | 28 | session, db = session_db 29 | database.build_index("test/data/indexing/metadata", session) 30 | 31 | # query metadata 32 | q = session.query( 33 | database.NCExperiment.contact, 34 | database.NCExperiment.created, 35 | database.NCExperiment.description, 36 | ) 37 | r = q.one() 38 | assert r[0] == "The ACCESS Oracle" 39 | assert r[1] == "2018-01-01" 40 | assert len(r[2]) > 0 41 | 42 | 43 | def test_get_experiments_metadata(session_db): 44 | """Test that get_experiments returns metadata correctly""" 45 | 46 | session, db = session_db 47 | database.build_index("test/data/indexing/metadata", session) 48 | 49 | r = querying.get_experiments(session, contact=True) 50 | df = pd.DataFrame.from_dict( 51 | {"experiment": ["metadata"], "contact": ["The ACCESS Oracle"], "ncfiles": [1]} 52 | ) 53 | assert_frame_equal(r, df) 54 | 55 | r = querying.get_experiments(session, email=True) 56 | df = pd.DataFrame.from_dict( 57 | {"experiment": ["metadata"], "email": ["oracle@example.com"], "ncfiles": [1]} 58 | ) 59 | assert_frame_equal(r, df) 60 | 61 | r = querying.get_experiments(session, url=True) 62 | df = pd.DataFrame.from_dict( 63 | { 64 | "experiment": ["metadata"], 65 | "url": ["https://github.com/COSIMA/oracle"], 66 | "ncfiles": [1], 67 | } 68 | ) 69 | assert_frame_equal(r, df) 70 | 71 | r = querying.get_experiments(session, description=True) 72 | df = pd.DataFrame.from_dict( 73 | { 74 | "experiment": ["metadata"], 75 | "description": [ 76 | ( 77 | "Attempted spinup, using salt flux fix " 78 | "https://arccss.slack.com/archives/C6PP0GU9Y/p1515460656000124 " 79 | "and https://github.com/mom-ocean/MOM5/pull/208/commits/9f4ee6f8b72b76c96a25bf26f3f6cdf773b424d2 " 80 | "from the start. Used mushy ice from July year 1 onwards to avoid vertical thermo error in cice " 81 | "https://arccss.slack.com/archives/C6PP0GU9Y/p1515842016000079" 82 | ) 83 | ], 84 | "ncfiles": [1], 85 | } 86 | ) 87 | assert_frame_equal(r, df) 88 | 89 | r = querying.get_experiments(session, notes=True) 90 | df = pd.DataFrame.from_dict( 91 | { 92 | "experiment": ["metadata"], 93 | "notes": [ 94 | ( 95 | "Stripy salt restoring: " 96 | "https://github.com/OceansAus/access-om2/issues/74 tripole seam bug: " 97 | "https://github.com/OceansAus/access-om2/issues/86 requires dt=300s " 98 | "in May, dt=240s in Aug to maintain CFL in CICE near tripoles (storms " 99 | "in those months in 8485RYF); all other months work with dt=400s" 100 | ) 101 | ], 102 | "ncfiles": [1], 103 | } 104 | ) 105 | assert_frame_equal(r, df) 106 | 107 | r = querying.get_experiments(session, created=True) 108 | df = pd.DataFrame.from_dict( 109 | {"experiment": ["metadata"], "created": ["2018-01-01"], "ncfiles": [1]} 110 | ) 111 | assert_frame_equal(r, df) 112 | 113 | r = querying.get_experiments(session, root_dir=True) 114 | # Won't try and match a path that can change on different platforms 115 | # assert_frame_equal(r, df) 116 | assert r.shape == (1, 3) 117 | 118 | r = querying.get_experiments(session, all=True) 119 | # Won't try and match everything, just check dimensions are correct 120 | assert r.shape == (1, 9) 121 | 122 | # Test turning off returning experiment (bit dumb, but hey ...) 123 | r = querying.get_experiments(session, experiment=False) 124 | df = pd.DataFrame.from_dict({"ncfiles": [1]}) 125 | assert_frame_equal(r, df) 126 | 127 | 128 | def test_keywords(session_db): 129 | """Test that keywords are read for an experiment""" 130 | 131 | session, db = session_db 132 | metadata_for_experiment("test/data/metadata/keywords", session) 133 | 134 | q = session.query(database.NCExperiment).filter( 135 | database.NCExperiment.experiment == "test" 136 | ) 137 | r = q.one() 138 | assert len(r.keywords) == 3 139 | assert "cosima" in r.keywords 140 | assert "not-a-keyword" not in r.keywords 141 | 142 | 143 | def test_duplicate_keywords_commit(session_db): 144 | """Test that the uniqueness constraint works across experiments. 145 | This simulates separate index calls, where the session is committed in between. 146 | """ 147 | 148 | session, db = session_db 149 | metadata_for_experiment("test/data/metadata/keywords", session, name="e1") 150 | metadata_for_experiment("test/data/metadata/keywords2", session, name="e2") 151 | 152 | q = session.query(database.Keyword) 153 | r = q.all() 154 | assert len(r) == 4 155 | 156 | 157 | def test_duplicate_keywords_nocommit(session_db): 158 | """Test that the uniqueness constraint works across experiments. 159 | This simulates multiple experiments being added in a single call. 160 | """ 161 | 162 | session, db = session_db 163 | e1 = metadata_for_experiment( 164 | "test/data/metadata/keywords", session, name="e1", commit=False 165 | ) 166 | e2 = metadata_for_experiment( 167 | "test/data/metadata/keywords2", session, name="e2", commit=False 168 | ) 169 | session.add_all([e1, e2]) 170 | session.commit() 171 | 172 | q = session.query(database.Keyword) 173 | r = q.all() 174 | assert len(r) == 4 175 | 176 | 177 | def test_keyword_upcast(session_db): 178 | """Test that a string keyword is added correctly.""" 179 | 180 | session, db = session_db 181 | metadata_for_experiment("test/data/metadata/string_keyword", session) 182 | 183 | q = session.query(database.NCExperiment).filter( 184 | database.NCExperiment.experiment == "test" 185 | ) 186 | r = q.one() 187 | assert "cosima" in r.keywords 188 | assert "c" not in r.keywords # make sure it wasn't added as a string 189 | 190 | 191 | def test_keyword_case_sensitivity(session_db): 192 | """Test that keywords are treated in a case-insensitive manner, 193 | both for metadata retrieval and querying. 194 | """ 195 | 196 | session, db = session_db 197 | metadata_for_experiment("test/data/metadata/keywords", session, name="e1") 198 | metadata_for_experiment("test/data/metadata/upcase", session, name="e2") 199 | 200 | # we should be able to find the keyword in lowercase 201 | q = session.query(database.Keyword).filter(database.Keyword.keyword == "cosima") 202 | k1 = q.one_or_none() 203 | assert k1 is not None 204 | 205 | # and in uppercase 206 | q = session.query(database.Keyword).filter(database.Keyword.keyword == "COSIMA") 207 | k2 = q.one_or_none() 208 | assert k2 is not None 209 | 210 | # but they should resolve to the same keyword 211 | assert k1 is k2 212 | 213 | # finally, the set of keywords should all be lowercase 214 | q = session.query(database.NCExperiment).filter( 215 | database.NCExperiment.experiment == "e2" 216 | ) 217 | r = q.one() 218 | for kw in r.keywords: 219 | assert kw == kw.lower() 220 | 221 | 222 | def test_get_keywords(session_db): 223 | """Test retrieval of keywords""" 224 | 225 | session, db = session_db 226 | metadata_for_experiment("test/data/metadata/keywords", session, name="e1") 227 | metadata_for_experiment("test/data/metadata/keywords2", session, name="e2") 228 | 229 | # Grab keywords for individual experiments 230 | r = querying.get_keywords(session, "e1") 231 | assert r == {"access-om2-01", "ryf9091", "cosima"} 232 | 233 | r = querying.get_keywords(session, "e2") 234 | assert r == {"another-keyword", "cosima"} 235 | 236 | # Test retrieving all keywords 237 | r = querying.get_keywords(session) 238 | assert r == {"access-om2-01", "ryf9091", "another-keyword", "cosima"} 239 | 240 | 241 | def test_get_experiments_with_keywords(session_db): 242 | """Test retrieval of experiments with keyword filtering""" 243 | session, db = session_db 244 | database.build_index("test/data/metadata/keywords", session) 245 | database.build_index("test/data/metadata/keywords2", session) 246 | 247 | # Test keyword common to both experiments 248 | r = querying.get_experiments(session, keywords="cosima") 249 | df = pd.DataFrame.from_dict( 250 | {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]} 251 | ) 252 | assert_frame_equal(r, df) 253 | 254 | # Test keyword common to both experiments using wildcard 255 | r = querying.get_experiments(session, keywords="cos%") 256 | df = pd.DataFrame.from_dict( 257 | {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]} 258 | ) 259 | assert_frame_equal(r, df) 260 | 261 | r = querying.get_experiments(session, keywords="%-%") 262 | df = pd.DataFrame.from_dict( 263 | {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]} 264 | ) 265 | assert_frame_equal(r, df) 266 | 267 | r = querying.get_experiments(session, keywords="access-om2%") 268 | df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]}) 269 | assert_frame_equal(r, df) 270 | 271 | # Test keyword in only one experiment 272 | r = querying.get_experiments(session, keywords="another-keyword") 273 | df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]}) 274 | assert_frame_equal(r, df) 275 | 276 | r = querying.get_experiments(session, keywords="ryf9091") 277 | df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]}) 278 | assert_frame_equal(r, df) 279 | 280 | # Test passing an array of keywords that match only one experiment 281 | r = querying.get_experiments(session, keywords=["cosima", "another-keyword"]) 282 | df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]}) 283 | assert_frame_equal(r, df) 284 | 285 | # Test passing an array of keywords that will not match any one experiment 286 | r = querying.get_experiments(session, keywords=["another-keyword", "ryf9091"]) 287 | df = pd.DataFrame(columns=["experiment", "ncfiles"]) 288 | assert_frame_equal(r, df) 289 | 290 | # Test passing a non-existent keyword along with one present. Should return 291 | # nothing as no experiment contains it 292 | r = querying.get_experiments(session, keywords=["ryf9091", "not-a-keyword"]) 293 | df = pd.DataFrame(columns=["experiment", "ncfiles"]) 294 | assert_frame_equal(r, df) 295 | 296 | # Test passing only a non-existent keyword 297 | r = querying.get_experiments(session, keywords=["not-a-keyword"]) 298 | df = pd.DataFrame(columns=["experiment", "ncfiles"]) 299 | assert_frame_equal(r, df) 300 | 301 | # Test passing only a non-existent wildcard keyword 302 | r = querying.get_experiments(session, keywords=["z%"]) 303 | df = pd.DataFrame(columns=["experiment", "ncfiles"]) 304 | assert_frame_equal(r, df) 305 | 306 | 307 | def test_getvar_with_metadata(session_db): 308 | session, db = session_db 309 | database.build_index("test/data/indexing/metadata", session) 310 | 311 | with querying.getvar("metadata", "test", session, decode_times=False) as v: 312 | assert v.attrs["long_name"] == "Test Variable" 313 | assert v.attrs["contact"] == "The ACCESS Oracle" 314 | assert v.attrs["email"] == "oracle@example.com" 315 | assert v.attrs["created"] == "2018-01-01" 316 | assert "description" in v.attrs 317 | -------------------------------------------------------------------------------- /test/test_querying.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from datetime import datetime 4 | 5 | import pytest 6 | 7 | import xarray as xr 8 | import pandas as pd 9 | from pandas.testing import assert_frame_equal, assert_series_equal 10 | import numpy as np 11 | 12 | import cosima_cookbook as cc 13 | from cosima_cookbook.querying import QueryWarning 14 | from cosima_cookbook.database import NCFile, CFVariable 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def session(tmp_path_factory): 19 | # index test directory into temp database 20 | d = tmp_path_factory.mktemp("database") 21 | db = d / "test.db" 22 | session = cc.database.create_session(str(db)) 23 | 24 | # build index for entire module 25 | cc.database.build_index( 26 | ["test/data/querying", "test/data/querying_disambiguation"], session 27 | ) 28 | 29 | # force all files to be marked as present, even though they're empty 30 | ncfiles = session.query(cc.database.NCFile).all() 31 | for f in ncfiles: 32 | f.present = True 33 | session.commit() 34 | 35 | return session 36 | 37 | 38 | def test_valid_query(session): 39 | with cc.querying.getvar("querying", "temp", session, decode_times=False) as v: 40 | assert isinstance(v, xr.DataArray) 41 | assert len(v.attrs["ncfiles"]) == 1 42 | assert v.attrs["ncfiles"][0].endswith("test/data/querying/output000/ocean.nc") 43 | # Make sure other fields aren't included in attributes 44 | assert "index" not in v.attrs 45 | assert "root_dir" not in v.attrs 46 | # Make sure empty metadata fields haven't been included as attributes 47 | assert "contact" not in v.attrs 48 | assert "notes" not in v.attrs 49 | assert "description" not in v.attrs 50 | assert "email" not in v.attrs 51 | 52 | 53 | def test_invalid_query(session): 54 | with pytest.raises(cc.querying.VariableNotFoundError): 55 | cc.querying.getvar("querying", "notfound", session, decode_times=False) 56 | 57 | 58 | def test_warning_on_ambiguous_attr(session): 59 | with pytest.warns(QueryWarning) as record: 60 | cc.querying._ncfiles_for_variable( 61 | "querying_disambiguation", 62 | "v", 63 | session, 64 | attrs_unique={"cell_methods": "bar"}, 65 | ) 66 | 67 | assert len(record) == 1 68 | assert ( 69 | record[0] 70 | .message.args[0] 71 | .startswith( 72 | "Your query returns variables from files with different cell_methods" 73 | ) 74 | ) 75 | 76 | with pytest.warns(QueryWarning) as record: 77 | files = cc.querying._ncfiles_for_variable( 78 | "querying_disambiguation", 79 | "u", 80 | session, 81 | attrs_unique={"cell_methods": "time: no_valid"}, 82 | ) 83 | 84 | assert len(files) == 2 85 | assert len(record) == 1 86 | assert ( 87 | record[0] 88 | .message.args[0] 89 | .startswith( 90 | "Your query returns variables from files with different cell_methods" 91 | ) 92 | ) 93 | 94 | # Raise an exception if QueryWarning set to error 95 | warnings.simplefilter("error", QueryWarning) 96 | with pytest.raises(QueryWarning) as record: 97 | cc.querying._ncfiles_for_variable( 98 | "querying_disambiguation", 99 | "v", 100 | session, 101 | attrs_unique={"cell_methods": "bar"}, 102 | ) 103 | 104 | with warnings.catch_warnings(record=True) as record: 105 | # Turn off warnings, will run without exception 106 | # and record will be empty 107 | warnings.simplefilter("ignore", QueryWarning) 108 | 109 | cc.querying._ncfiles_for_variable( 110 | "querying_disambiguation", 111 | "v", 112 | session, 113 | attrs_unique={"cell_methods": "bar"}, 114 | ) 115 | 116 | assert len(record) == 0 117 | 118 | 119 | def test_disambiguation_on_default_attr(session): 120 | files = cc.querying._ncfiles_for_variable( 121 | "querying_disambiguation", 122 | "v", 123 | session, 124 | attrs_unique={"cell_methods": "mean_pow(02)"}, 125 | ) 126 | 127 | assert len(files) == 1 128 | assert files[0].NCVar.attrs["cell_methods"] == "mean_pow(02)" 129 | 130 | files = cc.querying._ncfiles_for_variable( 131 | "querying_disambiguation", 132 | "v", 133 | session, 134 | attrs_unique={"cell_methods": "time: mean"}, 135 | ) 136 | 137 | assert len(files) == 1 138 | assert files[0].NCVar.attrs["cell_methods"] == "time: mean" 139 | 140 | # One file has no cell_methods attribute 141 | files = cc.querying._ncfiles_for_variable( 142 | "querying_disambiguation", 143 | "u", 144 | session, 145 | attrs_unique={"cell_methods": "time: mean"}, 146 | ) 147 | 148 | assert len(files) == 1 149 | assert files[0].NCVar.attrs["cell_methods"] == "time: mean" 150 | 151 | # Add another unique attribute not present (should be ignored) 152 | files = cc.querying._ncfiles_for_variable( 153 | "querying_disambiguation", 154 | "v", 155 | session, 156 | attrs_unique={"cell_methods": "time: mean", "foo": "bar"}, 157 | ) 158 | 159 | assert len(files) == 1 160 | assert files[0].NCVar.attrs["cell_methods"] == "time: mean" 161 | 162 | 163 | def test_query_times(session): 164 | with cc.querying.getvar("querying", "ty_trans", session) as v: 165 | assert isinstance(v, xr.DataArray) 166 | 167 | 168 | def test_chunk_parsing_chunked(session): 169 | var = ( 170 | session.query(cc.database.NCVar) 171 | .filter(cc.database.NCVar.varname == "salt") 172 | .first() 173 | ) 174 | 175 | chunk_dict = { 176 | "time": 1, 177 | "st_ocean": 15, 178 | "yt_ocean": 216, 179 | "xt_ocean": 288, 180 | } 181 | 182 | assert cc.querying._parse_chunks(var) == chunk_dict 183 | 184 | 185 | def test_chunk_parsing_contiguous(session): 186 | var = ( 187 | session.query(cc.database.NCVar) 188 | .filter(cc.database.NCVar.varname == "potrho") 189 | .first() 190 | ) 191 | 192 | assert var.chunking == "contiguous" 193 | assert cc.querying._parse_chunks(var) is None 194 | 195 | 196 | def test_chunk_parsing_unchunked(session): 197 | var = ( 198 | session.query(cc.database.NCVar) 199 | .filter(cc.database.NCVar.varname == "hi_m") 200 | .first() 201 | ) 202 | 203 | assert var.chunking == "None" 204 | assert cc.querying._parse_chunks(var) is None 205 | 206 | 207 | def test_get_experiments(session): 208 | r = cc.querying.get_experiments(session) 209 | 210 | df = pd.DataFrame.from_dict( 211 | {"experiment": ["querying", "querying_disambiguation"], "ncfiles": [3, 2]} 212 | ) 213 | assert_frame_equal(r, df) 214 | 215 | metadata_keys = [ 216 | "root_dir", 217 | "contact", 218 | "email", 219 | "created", 220 | "url", 221 | "description", 222 | "notes", 223 | ] 224 | 225 | # Won't try and match everything, there is not much useful metadata, just 226 | # check dimensions are correct. Metadata correctness checked in test_metadata 227 | for k in metadata_keys: 228 | r = cc.querying.get_experiments(session, **{k: True}) 229 | assert k == r.columns[1] 230 | assert r.shape == (2, 3) 231 | 232 | # Test all = True to select all available metadata 233 | r = cc.querying.get_experiments(session, all=True) 234 | assert r.shape == (2, 9) 235 | 236 | # Functionally equivalent to above 237 | r = cc.querying.get_experiments(session, **{k: True for k in metadata_keys}) 238 | assert r.shape == (2, 9) 239 | 240 | # Functionally equivalent to above 241 | r = cc.querying.get_experiments( 242 | session, experiment=False, exptname="querying", all=True 243 | ) 244 | assert r.shape == (1, 8) 245 | assert "experiment" not in r 246 | 247 | # Test for filtering by variables 248 | in_both = {"potrho_edges", "age_global", "tx_trans_rho"} 249 | only_in_querying = {"hi_m", "ty_trans"} 250 | 251 | r = cc.querying.get_experiments(session, variables=in_both) 252 | assert r.shape == (2, 2) 253 | 254 | r = cc.querying.get_experiments(session, variables=(in_both | only_in_querying)) 255 | assert r.shape == (1, 2) 256 | 257 | r = cc.querying.get_experiments( 258 | session, variables=(in_both | only_in_querying | {"none"}) 259 | ) 260 | assert r.shape == (0, 2) 261 | 262 | 263 | def test_get_ncfiles(session): 264 | r = cc.querying.get_ncfiles(session, "querying") 265 | 266 | df = pd.DataFrame.from_dict( 267 | { 268 | "ncfile": [ 269 | "output000/hi_m.nc", 270 | "output000/ocean.nc", 271 | "restart000/ty_trans.nc", 272 | ], 273 | "index_time": [ 274 | pd.Timestamp("2019-08-09 21:51:12.090930"), 275 | pd.Timestamp("2019-08-09 21:51:12.143794"), 276 | pd.Timestamp("2019-08-09 21:51:12.148942"), 277 | ], 278 | } 279 | ) 280 | 281 | # The Timestamps will not be the same so check only that the ncfiles are correct 282 | assert_series_equal(r["ncfile"], df["ncfile"]) 283 | 284 | 285 | def test_get_variables(session): 286 | r = cc.querying.get_variables(session, "querying", "1 monthly") 287 | 288 | df = pd.DataFrame.from_dict( 289 | { 290 | "name": ["TLAT", "TLON", "hi_m", "tarea", "time", "time_bounds"], 291 | "long_name": [ 292 | "T grid center latitude", 293 | "T grid center longitude", 294 | "grid cell mean ice thickness", 295 | "area of T grid cells", 296 | "model time", 297 | "boundaries for time-averaging interval", 298 | ], 299 | "units": [ 300 | "degrees_north", 301 | "degrees_east", 302 | "m", 303 | "m^2", 304 | "days since 1900-01-01 00:00:00", 305 | "days since 1900-01-01 00:00:00", 306 | ], 307 | "frequency": ["1 monthly"] * 6, 308 | "ncfile": ["output000/hi_m.nc"] * 6, 309 | "cell_methods": [None, None, "time: mean", None, None, None], 310 | "# ncfiles": [1] * 6, 311 | "time_start": ["1900-01-01 00:00:00"] * 6, 312 | "time_end": ["1900-02-01 00:00:00"] * 6, 313 | } 314 | ) 315 | 316 | assert_frame_equal(r, df) 317 | 318 | r = cc.querying.get_variables(session, "querying", search="temp") 319 | 320 | df = pd.DataFrame.from_dict( 321 | { 322 | "name": ["diff_cbt_t", "temp", "temp_xflux_adv", "temp_yflux_adv"], 323 | "long_name": [ 324 | "total vert diff_cbt(temp) (w/o neutral included)", 325 | "Potential temperature", 326 | "cp*rho*dzt*dyt*u*temp", 327 | "cp*rho*dzt*dxt*v*temp", 328 | ], 329 | "units": ["m^2/s", "degrees K", "Watts", "Watts"], 330 | "frequency": [None] * 4, 331 | "ncfile": ["output000/ocean.nc"] * 4, 332 | "cell_methods": ["time: mean"] * 4, 333 | "# ncfiles": [1] * 4, 334 | "time_start": [None] * 4, 335 | "time_end": [None] * 4, 336 | } 337 | ) 338 | 339 | assert_frame_equal(r, df) 340 | 341 | r = cc.querying.get_variables(session, search="temp") 342 | 343 | df = pd.DataFrame.from_dict( 344 | { 345 | "name": ["diff_cbt_t", "temp", "temp_xflux_adv", "temp_yflux_adv"], 346 | "long_name": [ 347 | "total vert diff_cbt(temp) (w/o neutral included)", 348 | "Potential temperature", 349 | "cp*rho*dzt*dyt*u*temp", 350 | "cp*rho*dzt*dxt*v*temp", 351 | ], 352 | "units": ["m^2/s", "degrees K", "Watts", "Watts"], 353 | } 354 | ) 355 | 356 | assert_frame_equal(r, df) 357 | 358 | r = cc.querying.get_variables(session, search=("temp", "velocity")) 359 | 360 | df = pd.DataFrame.from_dict( 361 | { 362 | "name": [ 363 | "diff_cbt_t", 364 | "temp", 365 | "temp_xflux_adv", 366 | "temp_yflux_adv", 367 | "u", 368 | "v", 369 | "wt", 370 | ], 371 | "long_name": [ 372 | "total vert diff_cbt(temp) (w/o neutral included)", 373 | "Potential temperature", 374 | "cp*rho*dzt*dyt*u*temp", 375 | "cp*rho*dzt*dxt*v*temp", 376 | "i-current", 377 | "j-current", 378 | "dia-surface velocity T-points", 379 | ], 380 | "units": [ 381 | "m^2/s", 382 | "degrees K", 383 | "Watts", 384 | "Watts", 385 | "m/sec", 386 | "m/sec", 387 | "m/sec", 388 | ], 389 | } 390 | ) 391 | 392 | r = cc.querying.get_variables(session, search=("temp", "velocity")) 393 | 394 | df = pd.DataFrame.from_dict( 395 | { 396 | "name": [ 397 | "diff_cbt_t", 398 | "temp", 399 | "temp_xflux_adv", 400 | "temp_yflux_adv", 401 | "u", 402 | "v", 403 | "wt", 404 | ], 405 | "long_name": [ 406 | "total vert diff_cbt(temp) (w/o neutral included)", 407 | "Potential temperature", 408 | "cp*rho*dzt*dyt*u*temp", 409 | "cp*rho*dzt*dxt*v*temp", 410 | "i-current", 411 | "j-current", 412 | "dia-surface velocity T-points", 413 | ], 414 | "units": [ 415 | "m^2/s", 416 | "degrees K", 417 | "Watts", 418 | "Watts", 419 | "m/sec", 420 | "m/sec", 421 | "m/sec", 422 | ], 423 | "frequency": [None] * 7, 424 | "ncfile": ["output000/ocean.nc"] * 7, 425 | "# ncfiles": [1] * 7, 426 | "time_start": [None] * 7, 427 | "time_end": [None] * 7, 428 | } 429 | ) 430 | 431 | 432 | def test_model_property(session): 433 | filename_map = { 434 | "ocean": ( 435 | "output/ocean/ice.nc", 436 | "output/ocn/land.nc", 437 | "output/ocean/atmos.nc", 438 | "ocean/ocean_daily.nc", 439 | "output/ocean/ocean_daily.nc.0000", 440 | "ocean/atmos.nc", 441 | ), 442 | "atmosphere": ( 443 | "output/atm/fire.nc", 444 | "output/atmos/ice.nc", 445 | "output/atmosphere/ice.nc", 446 | "atmosphere/ice.nc", 447 | "atmos/ice.nc", 448 | ), 449 | "land": ( 450 | "output/land/fire.nc", 451 | "output/lnd/ice.nc", 452 | "land/fire.nc", 453 | "lnd/ice.nc", 454 | ), 455 | "ice": ( 456 | "output/ice/fire.nc", 457 | "output/ice/in/here/land.nc", 458 | "ice/fire.nc", 459 | "ice/in/here/land.nc", 460 | ), 461 | "none": ( 462 | "output/ocean.nc", # only a model if part of path, not filename 463 | "someotherpath/ocean_daily.nc", 464 | "lala/land_daily.nc.0000", 465 | "output/atmosphere_ice.nc", 466 | "output/noice/in/here/land.nc", 467 | ), 468 | } 469 | for model in filename_map: 470 | for fpath in filename_map[model]: 471 | ncfile = NCFile( 472 | index_time=datetime.now(), 473 | ncfile=fpath, 474 | present=True, 475 | ) 476 | assert ncfile.model == model 477 | 478 | 479 | def test_is_restart_property(session): 480 | filename_map = { 481 | True: ( 482 | "output/restart/ice.nc", 483 | "output/restart000/land.nc", 484 | "restart/land.nc", 485 | ), 486 | False: ( 487 | "output/restartice.nc", 488 | "output/lastrestart/land.nc", 489 | ), 490 | } 491 | for isrestart in filename_map: 492 | for fpath in filename_map[isrestart]: 493 | ncfile = NCFile( 494 | index_time=datetime.now(), 495 | ncfile=fpath, 496 | present=True, 497 | ) 498 | assert ncfile.is_restart == isrestart 499 | 500 | # Grab all variables and ensure the SQL classification matches the python version 501 | # May be some holes, as not ensured all cases covered 502 | for index, row in cc.querying.get_variables( 503 | session, "querying", inferred=True 504 | ).iterrows(): 505 | ncfile = NCFile( 506 | index_time=datetime.now(), 507 | ncfile=row.ncfile, 508 | present=True, 509 | ) 510 | assert ncfile.is_restart == row.restart 511 | 512 | 513 | def test_is_coordinate_property(session): 514 | units_map = { 515 | True: ( 516 | "degrees_", 517 | "degrees_E", 518 | "degrees_N", 519 | "degrees_east", 520 | "hours since a long time ago", 521 | "radians", 522 | "days", 523 | "days since a while ago", 524 | ), 525 | False: ("degrees K",), 526 | } 527 | 528 | for iscoord in units_map: 529 | for units in units_map[iscoord]: 530 | assert CFVariable(name="bogus", units=units).is_coordinate == iscoord 531 | 532 | # Grab all variables and ensure the SQL classification matches the python version 533 | # May be some holes, as not ensured all cases covered 534 | for index, row in cc.querying.get_variables(session, inferred=True).iterrows(): 535 | assert ( 536 | CFVariable(name=row["name"], units=row.units).is_coordinate 537 | == row.coordinate 538 | ) 539 | 540 | 541 | def test_get_frequencies(session): 542 | r = cc.querying.get_frequencies(session, "querying") 543 | 544 | df = pd.DataFrame.from_dict({"frequency": [None, "1 monthly", "1 yearly"]}) 545 | 546 | assert_frame_equal(r, df) 547 | 548 | 549 | def test_disambiguation_by_frequency(session): 550 | with pytest.warns(UserWarning) as record: 551 | assert len(cc.querying._ncfiles_for_variable("querying", "time", session)) == 3 552 | 553 | if len(record) != 1: 554 | raise ValueError("|".join([r.message.args[0] for r in record])) 555 | 556 | assert len(record) == 1 557 | assert ( 558 | record[0] 559 | .message.args[0] 560 | .startswith("Your query returns files with differing frequencies:") 561 | ) 562 | 563 | assert ( 564 | len( 565 | cc.querying._ncfiles_for_variable( 566 | "querying", "time", session, frequency="1 monthly" 567 | ) 568 | ) 569 | == 1 570 | ) 571 | assert ( 572 | len( 573 | cc.querying._ncfiles_for_variable( 574 | "querying", "time", session, frequency="1 yearly" 575 | ) 576 | ) 577 | == 1 578 | ) 579 | 580 | # Both of these select a single file and successfully return an xarray object 581 | assert cc.querying.getvar( 582 | "querying", "time", session, frequency="1 monthly" 583 | ).shape == (1,) 584 | assert cc.querying.getvar( 585 | "querying", "time", session, frequency="1 yearly" 586 | ).shape == (2,) 587 | 588 | 589 | def test_time_bounds_on_dataarray(session): 590 | var_salt = cc.querying.getvar( 591 | "querying", "salt", session, decode_times=False, return_dataset=True 592 | ) 593 | 594 | # we should have added time_bounds into the DataArray's attributes 595 | assert "time_bounds" in var_salt 596 | 597 | # and time_bounds should itself be a DataArray 598 | assert isinstance(var_salt["time_bounds"], xr.DataArray) 599 | 600 | 601 | def test_query_with_attrs(session): 602 | attrs = { 603 | "long_name": "Practical Salinity", 604 | "units": "psu", 605 | } 606 | 607 | # a valid set of attributes 608 | var_salt = cc.querying.getvar( 609 | "querying", "salt", session, decode_times=False, attrs=attrs 610 | ) 611 | 612 | for attr, val in attrs.items(): 613 | assert var_salt.attrs[attr] == val 614 | 615 | # make sure that this is actually applied as an additional filter 616 | # by making failing queries 617 | # first: incorrect attribute value 618 | with pytest.raises(cc.querying.VariableNotFoundError): 619 | cc.querying.getvar( 620 | "querying", 621 | "salt", 622 | session, 623 | decode_times=False, 624 | attrs={"units": "degrees K"}, 625 | ) 626 | 627 | # second: non-present attribute name 628 | with pytest.raises(cc.querying.VariableNotFoundError): 629 | cc.querying.getvar( 630 | "querying", "salt", session, decode_times=False, attrs={"not_found": "psu"} 631 | ) 632 | 633 | 634 | def test_query_chunks(session, caplog): 635 | with cc.querying.getvar( 636 | "querying", "ty_trans", session, chunks={"invalid": 99} 637 | ) as v: 638 | assert "chunking along dimensions {'invalid'} is not possible" in caplog.text 639 | -------------------------------------------------------------------------------- /test/test_sqa14.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from cosima_cookbook.database import * 3 | 4 | 5 | def test_empty_file(session_db): 6 | session, db = session_db 7 | 8 | exp = NCExperiment(experiment="a", root_dir="b") 9 | file = NCFile() 10 | 11 | file.experiment = exp 12 | 13 | session.add(exp) 14 | session.commit() 15 | 16 | assert session.query(NCFile).count() == 1 17 | assert session.query(NCExperiment).count() == 1 18 | 19 | 20 | def test_file_one_var(session_db): 21 | session, db = session_db 22 | 23 | exp = NCExperiment(experiment="a", root_dir="b") 24 | file = NCFile() 25 | cfvar = CFVariable(name="c") 26 | var = NCVar() 27 | 28 | file.experiment = exp 29 | var.ncfile = file 30 | var.variable = cfvar 31 | 32 | session.add(exp) 33 | session.commit() 34 | 35 | assert session.query(NCFile).count() == 1 36 | assert session.query(NCVar).count() == 1 37 | 38 | 39 | def test_file_attr(session_db): 40 | session, db = session_db 41 | 42 | exp = NCExperiment(experiment="a", root_dir="b") 43 | file = NCFile() 44 | cfvar = CFVariable(name="c") 45 | var = NCVar() 46 | 47 | file.experiment = exp 48 | file.attrs["x"] = "y" 49 | 50 | session.add(exp) 51 | session.commit() 52 | 53 | assert session.query(NCFile).count() == 1 54 | assert session.query(NCAttribute).count() == 1 55 | assert session.query(NCAttributeString).count() == 2 56 | 57 | # Add another attribute with duplicate string 58 | file.attrs["z"] = "y" 59 | 60 | session.add(exp) 61 | session.commit() 62 | 63 | assert session.query(NCFile).count() == 1 64 | assert session.query(NCAttribute).count() == 2 65 | assert session.query(NCAttributeString).count() == 3 66 | 67 | 68 | def test_var_attr(session_db): 69 | session, db = session_db 70 | 71 | exp = NCExperiment(experiment="a", root_dir="b") 72 | file = NCFile() 73 | cfvar = CFVariable(name="c") 74 | var = NCVar() 75 | 76 | file.experiment = exp 77 | var.ncfile = file 78 | var.variable = cfvar 79 | var.attrs["x"] = "y" 80 | 81 | session.add(exp) 82 | session.commit() 83 | 84 | assert session.query(NCFile).count() == 1 85 | assert session.query(NCAttribute).count() == 1 86 | assert session.query(NCAttributeString).count() == 2 87 | 88 | # Add another attribute with duplicate string 89 | var.attrs["z"] = "y" 90 | 91 | session.add(exp) 92 | session.commit() 93 | 94 | assert session.query(NCAttribute).count() == 2 95 | assert session.query(NCAttributeString).count() == 3 96 | 97 | # Add an attribute to the file 98 | file.attrs["y"] = "x" 99 | 100 | session.add(exp) 101 | session.commit() 102 | 103 | assert session.query(NCAttribute).count() == 3 104 | assert session.query(NCAttributeString).count() == 3 105 | 106 | 107 | def test_index_file(session_db): 108 | session, db = session_db 109 | 110 | exp = NCExperiment(experiment="a", root_dir="test/data/querying") 111 | 112 | file = index_file("output000/ocean.nc", exp, session) 113 | 114 | session.add(exp) 115 | session.commit() 116 | 117 | assert session.query(NCFile).count() == 1 118 | assert session.query(CFVariable).count() == 38 119 | assert session.query(NCVar).count() == 38 120 | assert session.query(NCAttribute).count() == 243 - 18 121 | 122 | var = session.query(NCVar).filter(NCVar.varname == "temp").one() 123 | assert var.attrs["long_name"] == "Potential temperature" 124 | 125 | 126 | def test_file_delete(session_db): 127 | session, db = session_db 128 | 129 | exp = NCExperiment(experiment="a", root_dir="test/data/querying") 130 | 131 | file = index_file("output000/ocean.nc", exp, session) 132 | 133 | session.add(exp) 134 | session.commit() 135 | 136 | assert session.query(NCFile).count() == 1 137 | 138 | session.delete(file) 139 | session.commit() 140 | 141 | assert session.query(NCExperiment).count() == 1 142 | assert session.query(NCFile).count() == 0 143 | assert session.query(CFVariable).count() == 38 # Not cascaded 144 | assert session.query(NCVar).count() == 0 145 | assert session.query(NCAttribute).count() == 0 146 | assert session.query(NCAttributeString).count() == 114 # Not cascaded 147 | -------------------------------------------------------------------------------- /test/test_update.py: -------------------------------------------------------------------------------- 1 | import shlex 2 | from cosima_cookbook import database_update 3 | 4 | 5 | def test_database_update(tmp_path): 6 | args = shlex.split( 7 | "-db {db} test/data/update/experiment_a test/data/update/experiment_b".format( 8 | db=tmp_path / "test.db" 9 | ) 10 | ) 11 | 12 | database_update.main(args) 13 | --------------------------------------------------------------------------------