├── .circleci
    └── config.yml
├── .gitattributes
├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── alembic.ini
├── conda
    ├── environment.yml
    └── meta.yaml
├── cosima_cookbook
    ├── __init__.py
    ├── database.py
    ├── database_update.py
    ├── database_utils.py
    ├── date_utils.py
    ├── diagnostics
    │   ├── __init__.py
    │   ├── mean_tau_x.py
    │   ├── overturning.py
    │   └── simple.py
    ├── distributed.py
    ├── explore.py
    ├── memory.py
    ├── netcdf_index.py
    ├── netcdf_utils.py
    ├── plots
    │   ├── __init__.py
    │   ├── lineplots.py
    │   ├── maps.py
    │   ├── overturning.py
    │   └── scalar.py
    ├── querying.py
    └── summary
    │   ├── __init__.py
    │   ├── nml_diff.py
    │   └── nml_summary.py
├── docs
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── conf.py
    │   ├── cosima_cookbook.rst
    │   ├── getting_started.rst
    │   ├── index.rst
    │   ├── modules.rst
    │   └── related_projects.rst
├── readthedocs.yml
├── requirements.txt
├── sandbox
    ├── alembic
    │   ├── README
    │   ├── env.py
    │   ├── script.py.mako
    │   └── versions
    │   │   └── 16223b92479e_add_keywords.py
    └── diag-vis.py
├── setup.py
└── test
    ├── conftest.py
    ├── data
        ├── explore
        │   ├── duplicate
        │   │   └── one
        │   │   │   ├── metadata.yaml
        │   │   │   └── ocean
        │   │   │       └── ocean_age.nc
        │   ├── one
        │   │   ├── atmosphere
        │   │   │   └── ty_trans.nc
        │   │   ├── ice
        │   │   │   └── hi_m.nc
        │   │   ├── metadata.yaml
        │   │   ├── ocean
        │   │   │   └── ocean.nc
        │   │   └── restart
        │   │   │   └── ocean_velocity_advection.res.nc
        │   └── two
        │   │   ├── atm
        │   │       └── hi_m.nc
        │   │   ├── metadata.yaml
        │   │   ├── nomodel
        │   │       └── ty_trans.nc
        │   │   ├── ocn
        │   │       ├── ocean.nc
        │   │       └── ocean_month.nc
        │   │   └── restart
        │   │       └── ocean_velocity_advection.res.nc
        ├── indexing
        │   ├── alternate
        │   │   └── experiment_a
        │   │   │   └── test2.nc
        │   ├── broken_file
        │   │   └── output000
        │   │   │   └── test.nc
        │   ├── broken_metadata
        │   │   ├── metadata.yaml
        │   │   └── test1.nc
        │   ├── empty_file
        │   │   └── output000
        │   │   │   └── empty.nc
        │   ├── longnames
        │   │   └── output000
        │   │   │   ├── test1.nc
        │   │   │   └── test2.nc
        │   ├── metadata
        │   │   ├── metadata.yaml
        │   │   └── test1.nc
        │   ├── multiple
        │   │   ├── experiment_a
        │   │   │   └── test1.nc
        │   │   └── experiment_b
        │   │   │   └── test1.nc
        │   ├── single_broken_file
        │   │   └── output000
        │   │   │   ├── broken.nc
        │   │   │   └── test.nc
        │   ├── symlinked
        │   │   └── experiment_a
        │   ├── time
        │   │   ├── t1.nc
        │   │   ├── t2.nc
        │   │   ├── t3.nc
        │   │   ├── t4.nc
        │   │   └── t5.nc
        │   └── time_bounds
        │   │   └── file001.nc
        ├── metadata
        │   ├── keywords
        │   │   ├── metadata.yaml
        │   │   └── test1.nc
        │   ├── keywords2
        │   │   ├── metadata.yaml
        │   │   └── test1.nc
        │   ├── string_keyword
        │   │   └── metadata.yaml
        │   └── upcase
        │   │   └── metadata.yaml
        ├── ocean_sealevel.nc
        ├── querying
        │   ├── output000
        │   │   ├── hi_m.nc
        │   │   └── ocean.nc
        │   └── restart000
        │   │   └── ty_trans.nc
        ├── querying_disambiguation
        │   └── output000
        │   │   ├── ocean.nc
        │   │   └── ocean_month.nc
        └── update
        │   ├── experiment_a
        │       └── test1.nc
        │   └── experiment_b
        │       └── test2.nc
    ├── test_database.py
    ├── test_dates.py
    ├── test_explore.py
    ├── test_indexing.py
    ├── test_metadata.py
    ├── test_querying.py
    ├── test_sqa14.py
    └── test_update.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.0
 2 | jobs:
 3 |     py36:
 4 |         working_directory: ~/circleci
 5 |         docker:
 6 |             - image: circleci/python
 7 |               environment:
 8 |                 PYTHON_VER: 3.6
 9 |         resource_class: medium+
10 |         steps:
11 |             - checkout
12 | 
13 |             - run: |
14 |                 wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O conda.sh
15 |                 bash conda.sh -b -p ~/conda
16 |                 ~/conda/bin/conda config --system --add channels conda-forge
17 |                 ~/conda/bin/conda config --system --add channels coecms
18 |                 ~/conda/bin/conda update conda
19 |                 ~/conda/bin/conda install --yes conda-build conda-verify
20 | 
21 |             - run: |
22 |                 ~/conda/bin/conda build -c coecms conda --python=${PYTHON_VER}
23 | 
24 |             - run: |
25 |                 mkdir ~/artefacts
26 |                 cp $(~/conda/bin/conda build conda --python=${PYTHON_VER} --output) ~/artefacts
27 | 
28 |             - persist_to_workspace:
29 |                 root: ~/artefacts
30 |                 paths: '*'
31 | 
32 |     publish:
33 |         working_directory: /circleci
34 |         docker:
35 |             - image: scottwales/conda-build
36 |         resource_class: medium+
37 |         steps:
38 |             - attach_workspace:
39 |                 at: /artefacts
40 | 
41 |             - run:
42 |                 anaconda --token "${ANACONDA_TOKEN}" upload --user "${ANACONDA_USER}" /artefacts/*.tar.bz2
43 | 
44 | workflows:
45 |     version: 2
46 |     build_and_publsh:
47 |         jobs:
48 |             - py36:
49 |                 filters:
50 |                     tags:
51 |                         only: /.*/
52 | 
53 |             - publish:
54 |                 requires:
55 |                     - py36
56 |                 filters:
57 |                     tags:
58 |                         only: /.*/
59 |                     branches:
60 |                         ignore: /.*/
61 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | 
2 | *.ipynb	diff=jupyternotebook
3 | 
4 | *.ipynb	merge=jupyternotebook
5 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Testing
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 |   pull_request:
 8 |     branches:
 9 |     - master
10 | 
11 | jobs:
12 |   formatting:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v2
17 |       - name: Code formatting
18 |         uses: lgeiger/black-action@v1.0.1
19 |         with:
20 |           args: "--check --verbose cosima_cookbook test"
21 | 
22 |   testing:
23 |     needs: formatting
24 |     runs-on: ubuntu-latest
25 |     strategy:
26 |       matrix:
27 |         python-version: ['3.8', '3.9', '3.10']
28 |         sqa-version: ['<1.4', '==1.4.*']
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v2
32 |       - name: Set up Python ${{ matrix.python-version }}
33 |         uses: actions/setup-python@v2
34 |         with:
35 |           python-version: ${{ matrix.python-version }}
36 |       - name: Install dependencies
37 |         run: |
38 |           python -m pip install '.[build]'
39 |           python -m pip install "sqlalchemy${{ matrix.sqa-version}}"
40 |       - name: Unit tests
41 |         run: |
42 |           python -m pytest --cov cosima_cookbook test
43 |       - name: Upload coverage reports to Codecov with GitHub Action
44 |         uses: codecov/codecov-action@v3
45 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | 
 85 | # Spyder project settings
 86 | .spyderproject
 87 | 
 88 | # Rope project settings
 89 | .ropeproject
 90 | 
 91 | doc/_build
 92 | doc/diagnostics
 93 | doc/configurations
 94 | doc/notebooks
 95 | doc/gen_modules
 96 | 
 97 | dask-worker-space
 98 | 
 99 | # from https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
100 | 
101 | # General
102 | .DS_Store
103 | .AppleDouble
104 | .LSOverride
105 | 
106 | # Icon must end with two \r
107 | Icon
108 | 
109 | 
110 | # Thumbnails
111 | ._*
112 | 
113 | # Files that might appear in the root of a volume
114 | .DocumentRevisions-V100
115 | .fseventsd
116 | .Spotlight-V100
117 | .TemporaryItems
118 | .Trashes
119 | .VolumeIcon.icns
120 | .com.apple.timemachine.donotpresent
121 | 
122 | # Directories potentially created on remote AFP share
123 | .AppleDB
124 | .AppleDesktop
125 | Network Trash Folder
126 | Temporary Items
127 | .apdisk
128 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="https://github.com/COSIMA/logo/blob/master/png/logo_word.png" width="800"/>
 2 | <br/> <br/>
 3 | 
 4 | <a href="https://cosima-recipes.readthedocs.io/en/latest">
 5 |     <img alt="latest docs" src="https://img.shields.io/badge/docs-latest-blue.svg">
 6 | </a>
 7 | 
 8 | # cosima-cookbook package
 9 | 
10 | This repository hosts the `cosima_cookbook` which is a [Python package](https://anaconda.org/coecms/cosima-cookbook) for managing a database of ocean model output and loading the output via xarray.
11 | 
12 | ⚠️ **The `cosima_cookbook` Python package is deprecated and no longer being developed!** ⚠️
13 | 
14 | Use the [ACCESS-NRI Intake catalog](https://cosima-recipes.readthedocs.io/en/latest/Tutorials/ACCESS-NRI_Intake_Catalog.html) instead.
15 | 
16 | ## What now? Where should I go?
17 | 
18 | We refer users to [COSIMA Cookbook repository](https://github.com/COSIMA/cosima-recipes) where they will find tutorials and 'recipes' (that is, examples) of various analyses that one can do using ocean-sea ice model output.
19 | 
20 | [![Documentation Status](https://readthedocs.org/projects/cosima-cookbook/badge/?version=latest)](https://cosima-cookbook.readthedocs.org/en/latest)
21 | 


--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = sandbox/alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # timezone to use when rendering the date
11 | # within the migration file as well as the filename.
12 | # string value is passed to dateutil.tz.gettz()
13 | # leave blank for localtime
14 | # timezone =
15 | 
16 | # max length of characters to apply to the
17 | # "slug" field
18 | # truncate_slug_length = 40
19 | 
20 | # set to 'true' to run the environment during
21 | # the 'revision' command, regardless of autogenerate
22 | # revision_environment = false
23 | 
24 | # set to 'true' to allow .pyc and .pyo files without
25 | # a source .py file to be detected as revisions in the
26 | # versions/ directory
27 | # sourceless = false
28 | 
29 | # version location specification; this defaults
30 | # to sandbox/alembic/versions.  When using multiple version
31 | # directories, initial revisions must be specified with --version-path
32 | # version_locations = %(here)s/bar %(here)s/bat sandbox/alembic/versions
33 | 
34 | # the output encoding used when revision files
35 | # are written from script.py.mako
36 | # output_encoding = utf-8
37 | 
38 | sqlalchemy.url = sqlite:///path-to-db.db
39 | 
40 | 
41 | [post_write_hooks]
42 | # post_write_hooks defines scripts or Python functions that are run
43 | # on newly generated revision scripts.  See the documentation for further
44 | # detail and examples
45 | 
46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
47 | # hooks=black
48 | # black.type=console_scripts
49 | # black.entrypoint=black
50 | # black.options=-l 79
51 | 
52 | # Logging configuration
53 | [loggers]
54 | keys = root,sqlalchemy,alembic
55 | 
56 | [handlers]
57 | keys = console
58 | 
59 | [formatters]
60 | keys = generic
61 | 
62 | [logger_root]
63 | level = WARN
64 | handlers = console
65 | qualname =
66 | 
67 | [logger_sqlalchemy]
68 | level = WARN
69 | handlers =
70 | qualname = sqlalchemy.engine
71 | 
72 | [logger_alembic]
73 | level = INFO
74 | handlers =
75 | qualname = alembic
76 | 
77 | [handler_console]
78 | class = StreamHandler
79 | args = (sys.stderr,)
80 | level = NOTSET
81 | formatter = generic
82 | 
83 | [formatter_generic]
84 | format = %(levelname)-5.5s [%(name)s] %(message)s
85 | datefmt = %H:%M:%S
86 | 


--------------------------------------------------------------------------------
/conda/environment.yml:
--------------------------------------------------------------------------------
 1 | name: ncimonitor
 2 | 
 3 | channels:
 4 |     - coecms
 5 |     - conda-forge
 6 |     - defaults
 7 | 
 8 | dependencies:
 9 |     - python
10 |     - numpy
11 |     - dask
12 |     - distributed
13 |     - xarray
14 |     - netcdf4
15 |     - joblib
16 |     - tqdm
17 |     - sqlalchemy
18 | 
19 | 


--------------------------------------------------------------------------------
/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |     name: cosima-cookbook
 3 |     version: {{ GIT_DESCRIBE_TAG}}
 4 | 
 5 | source:
 6 |     git_rev: master
 7 |     git_url: ../
 8 | 
 9 | build:
10 |     noarch: python
11 |     number: {{ GIT_DESCRIBE_NUMBER }}
12 |     script: python setup.py install --single-version-externally-managed --record=record.txt
13 | 
14 | requirements:
15 |     build:
16 |         - python>=3.6
17 |         - setuptools
18 |         - setuptools_scm
19 |         - pbr
20 |     run:
21 |         - python>=3.6
22 |         - numpy
23 |         - dask
24 |         - distributed
25 |         - xarray
26 |         - netcdf4
27 |         - joblib
28 |         - tqdm
29 |         - sqlalchemy<2.0
30 |         - ipywidgets
31 |         - cftime>1.2.1
32 |         - lxml
33 | 
34 | about:
35 |     home: http://cosima-cookbook.readthedocs.io
36 |     license: Apache License 2.0
37 | 
38 | 


--------------------------------------------------------------------------------
/cosima_cookbook/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Common tools for working with COSIMA model output
 4 | """
 5 | 
 6 | from . import database
 7 | from . import querying
 8 | from . import explore
 9 | 
10 | from importlib.metadata import version, PackageNotFoundError
11 | 
12 | try:
13 |     __version__ = version("cosima-cookbook")
14 | except PackageNotFoundError:
15 |     pass
16 | 


--------------------------------------------------------------------------------
/cosima_cookbook/database_update.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pathlib
 3 | 
 4 | import cosima_cookbook as cc
 5 | 
 6 | 
 7 | def main(argv=None):
 8 |     parser = argparse.ArgumentParser(description="Update COSIMA cookbook database.")
 9 |     parser.add_argument(
10 |         "dirs", type=pathlib.Path, nargs="+", help="Directories to index."
11 |     )
12 |     parser.add_argument(
13 |         "-db",
14 |         "--database",
15 |         dest="db",
16 |         action="store",
17 |         default="cosima_master.db",
18 |         help="Database to update.",
19 |     )
20 |     args = parser.parse_args(argv)
21 | 
22 |     print(cc)
23 | 
24 |     print("Establishing a DB connection to: {}".format(args.db))
25 |     session = cc.database.create_session(args.db, timeout=30)
26 | 
27 |     for dir in args.dirs:
28 |         print("Indexing: {}".format(dir))
29 |         cc.database.build_index(
30 |             dir, session, prune="delete", force=False, followsymlinks=True, nfiles=1000
31 |         )
32 | 


--------------------------------------------------------------------------------
/cosima_cookbook/database_utils.py:
--------------------------------------------------------------------------------
 1 | # enforce unique ORM objects: https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObject
 2 | 
 3 | 
 4 | def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
 5 |     cache = getattr(session, "_unique_cache", None)
 6 |     if cache is None:
 7 |         session._unique_cache = cache = {}
 8 | 
 9 |     key = (cls, hashfunc(*arg, **kw))
10 |     if key in cache:
11 |         return cache[key]
12 |     else:
13 |         with session.no_autoflush:
14 |             q = session.query(cls)
15 |             q = queryfunc(q, *arg, **kw)
16 |             obj = q.first()
17 |             if not obj:
18 |                 obj = constructor(*arg, **kw)
19 |                 session.add(obj)
20 |         cache[key] = obj
21 |         return obj
22 | 
23 | 
24 | class UniqueMixin(object):
25 |     @classmethod
26 |     def unique_hash(cls, *arg, **kw):
27 |         return NotImplementedError()
28 | 
29 |     @classmethod
30 |     def unique_filter(cls, query, *arg, **kw):
31 |         return NotImplementedError()
32 | 
33 |     @classmethod
34 |     def as_unique(cls, session, *arg, **kw):
35 |         return _unique(session, cls, cls.unique_hash, cls.unique_filter, cls, arg, kw)
36 | 


--------------------------------------------------------------------------------
/cosima_cookbook/date_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2018 ARC Centre of Excellence for Climate Systems Science
  3 | author: Aidan Heerdegen <aidan.heerdegen@anu.edu.au>
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | Unless required by applicable law or agreed to in writing, software
  9 | distributed under the License is distributed on an "AS IS" BASIS,
 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | See the License for the specific language governing permissions and
 12 | limitations under the License.
 13 | """
 14 | 
 15 | from __future__ import print_function
 16 | 
 17 | import datetime
 18 | 
 19 | import cftime
 20 | from cftime import num2date, date2num
 21 | import numpy as np
 22 | import xarray as xr
 23 | from xarray.coding.cftime_offsets import to_cftime_datetime
 24 | 
 25 | rebase_attr = "_rebased_units"
 26 | rebase_shift_attr = "_rebased_shift"
 27 | bounds = "bounds"
 28 | boundsvar = "bounds_var"
 29 | 
 30 | datetimeformat = "%Y-%m-%d %H:%M:%S"
 31 | 
 32 | # Code adapted from https://github.com/spencerahill/aospy/issues/212
 33 | 
 34 | 
 35 | def date2num_round(dates, units, calendar):
 36 |     return np.round(date2num(dates, units, calendar), 8)
 37 | 
 38 | 
 39 | def rebase_times(values, input_units, calendar, output_units):
 40 |     dates = num2date(values, input_units, calendar)
 41 |     return date2num_round(dates, output_units, calendar)
 42 | 
 43 | 
 44 | def is_bounds(var):
 45 |     """
 46 |     Return True if the xarray variable has been flagged as a bounds
 47 |     variable (has a bounds_var attribute)
 48 |     """
 49 |     return boundsvar in var.attrs
 50 | 
 51 | 
 52 | def set_bounds(var, varname):
 53 |     """
 54 |     Set the bounds_var attribute to the name of the dimension for which
 55 |     it is the bounds
 56 |     """
 57 |     var.attrs[boundsvar] = varname
 58 | 
 59 | 
 60 | def flag_bounds(ds):
 61 |     """
 62 |     Cycle through all the variables in a dataset and mark variables which
 63 |     are bounds as such by adding a bounds_var attribute
 64 |     """
 65 |     for name in ds.variables:
 66 |         if is_bounds(ds[name]):
 67 |             # This is a bounds variable and has been flagged as such
 68 |             continue
 69 |         if bounds in ds[name].attrs:
 70 |             # Flag bounds variable as such
 71 |             try:
 72 |                 set_bounds(ds[ds[name].attrs[bounds]], name)
 73 |             except KeyError:
 74 |                 # Ignore if bounds variable not present
 75 |                 pass
 76 | 
 77 | 
 78 | def unflag_bounds(ds):
 79 |     """
 80 |     Cycle through all the variables in a dataset and unflag variables which
 81 |     are bounds by deleting any bounds_var
 82 |     """
 83 |     for name in ds.variables:
 84 |         try:
 85 |             del ds[name].attrs[boundsvar]
 86 |         except KeyError:
 87 |             pass
 88 | 
 89 | 
 90 | def rebase_variable(var, calendar=None, target_units=None, src_units=None, offset=None):
 91 |     """
 92 |     Create rebased time variable
 93 |     """
 94 |     attributes = var.attrs
 95 | 
 96 |     # If no target_units are specified check if the variable has been previously
 97 |     # rebased and use this as the target, which will undo the previous rebasing
 98 |     if calendar == None:
 99 |         try:
100 |             calendar = var.attrs["calendar"]
101 |         except KeyError:
102 |             try:
103 |                 calendar = var.encoding["calendar"]
104 |             except KeyError:
105 |                 raise AttributeError("No calendar attribute found and none specified")
106 | 
107 |     # Default to src_units being the units for the variable (bounds variables
108 |     # may not have correct units so in this case it has to be specified)
109 |     if src_units is None:
110 |         src_units = attributes["units"]
111 | 
112 |     # If no target_units are specified check if the variable has been previously
113 |     # rebased and use this as the target, which will undo the previous rebasing
114 |     if target_units == None:
115 |         try:
116 |             target_units = attributes[rebase_attr]
117 |         except KeyError:
118 |             raise AttributeError(
119 |                 "No existing rebase found and target_units not specified"
120 |             )
121 |         finally:
122 |             del attributes[rebase_attr]
123 |     else:
124 |         attributes[rebase_attr] = src_units
125 | 
126 |     # Rebase
127 |     newvar = xr.apply_ufunc(
128 |         rebase_times, var, src_units, calendar, target_units, dask="allowed"
129 |     )
130 | 
131 |     if rebase_shift_attr in attributes:
132 |         newvar = newvar - attributes[rebase_shift_attr]
133 |         del attributes[rebase_shift_attr]
134 |     else:
135 |         if offset is not None:
136 |             # Offset can be an integer, 'auto', or datetime.delta
137 | 
138 |             if offset == "auto":
139 |                 # Generate a timedelta offset based on the calendars of src
140 |                 # and target
141 |                 offset = num2date(0, target_units, calendar) - num2date(
142 |                     0, src_units, calendar
143 |                 )
144 | 
145 |             if isinstance(offset, datetime.timedelta):
146 |                 # Add delta to src calendar origin and convert to integer offset
147 |                 offset = date2num_round(
148 |                     num2date(0, src_units, calendar) + offset, src_units, calendar
149 |                 )
150 | 
151 |             newvar = newvar + offset
152 |             attributes[rebase_shift_attr] = offset
153 | 
154 |     if newvar.min() < 0:
155 |         raise ValueError(
156 |             "Rebase creates negative dates, specify offset=auto to shift dates appropriately"
157 |         )
158 | 
159 |     # Save the values back into the variable, put back the attributes and update
160 |     # the units
161 |     newvar.attrs = attributes
162 |     newvar.attrs["units"] = target_units
163 | 
164 |     return newvar
165 | 
166 | 
167 | def rebase_dataset(ds, target_units=None, timevar="time", offset=None):
168 |     """
169 |     Rebase the time dimension variable in a dataset to a different start date.
170 |     This is useful to overcome limitations in pandas datetime indices used in
171 |     xarray, and to place two datasets with different date indices onto a common
172 |     date index
173 |     """
174 | 
175 |     # The units are defined as the units used by timevar
176 |     units = ds[timevar].attrs["units"]
177 |     calendar = ds[timevar].attrs["calendar"]
178 | 
179 |     newds = ds.copy()
180 | 
181 |     # Cycle through all variables, setting a flag if they are a bounds variable
182 |     flag_bounds(newds)
183 | 
184 |     for name in newds.variables:
185 |         if is_bounds(newds[name]):
186 |             # This is a bounds variable and has been flagged as such so ignore
187 |             # as it will be processed by the variable for which it is the bounds
188 |             continue
189 |         if newds[name].attrs["units"] == units:
190 |             newds[name] = rebase_variable(
191 |                 newds[name], calendar, target_units, offset=offset
192 |             )
193 |             if bounds in newds[name].attrs:
194 |                 # Must make the same adjustment to the bounds variable
195 |                 bvarname = newds[name].attrs[bounds]
196 |                 try:
197 |                     newds[bvarname] = rebase_variable(
198 |                         newds[bvarname],
199 |                         calendar,
200 |                         target_units,
201 |                         src_units=units,
202 |                         offset=offset,
203 |                     )
204 |                 except KeyError:
205 |                     # Ignore if bounds_var missing
206 |                     pass
207 | 
208 |     # Unset bounds flags
209 |     unflag_bounds(newds)
210 | 
211 |     # newds = xr.decode_cf(newds, decode_coords=False, decode_times=True)
212 | 
213 |     return newds
214 | 
215 | 
216 | def shift_time(ds):
217 |     """
218 |     Apply time shift to un-decoded time axis, to align datasets and
219 |     """
220 |     pass
221 | 
222 | 
223 | def format_datetime(datetime, format=datetimeformat):
224 |     """
225 |     Standard method to convert cftime.datetime objects to strings for
226 |     storage in SQL database. Hard code the length as some datetime
227 |     objects don't space pad when formatted!
228 |     """
229 |     return "{:0>19}".format(datetime.strftime(format).lstrip())
230 | 
231 | 
232 | def parse_datetime(datetimestring, calendar="proleptic_gregorian"):
233 |     """
234 |     Standard method to convert datetime obkects stored as strings in SQL database
235 |     back into cftime.datetime objects
236 |     """
237 |     # xarray supports parsing dates strings to cftime.datetime objects, but
238 |     # requires ISO-8601 format (https://en.wikipedia.org/wiki/ISO_8601).
239 |     # Convert string to ISO-8601 before parsing by adding separator
240 |     # between date and time elements
241 |     datetimestring = datetimestring[:10] + "T" + datetimestring[11:]
242 | 
243 |     # Note: uses non-public xarray method that may change or be deleted
244 |     # in the future
245 |     return to_cftime_datetime(datetimestring, calendar)
246 | 


--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mean_tau_x import mean_tau_x
 2 | from .simple import *
 3 | from .overturning import *
 4 | 
 5 | __all__ = [
 6 |     "mean_tau_x",
 7 |     "annual_scalar",
 8 |     "drake_passage",
 9 |     "sea_surface_temperature",
10 |     "sea_surface_salinity",
11 |     "psi_avg",
12 |     "zonal_mean",
13 |     "mixed_layer_depth",
14 | ]
15 | 


--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/mean_tau_x.py:
--------------------------------------------------------------------------------
 1 | from ..memory import memory
 2 | from ..querying import getvar
 3 | 
 4 | 
 5 | @memory.cache
 6 | def mean_tau_x(expt):
 7 |     """
 8 |     10-year zonal average of horizontal wind stress.
 9 |     """
10 |     tau_x = get_nc_variable(
11 |         expt, "ocean_month.nc", "tau_x", time_units="days since 1900-01-01", n=10
12 |     )
13 | 
14 |     mean_tau_x = tau_x.mean("xu_ocean").mean("time")
15 |     mean_tau_x = mean_tau_x.compute()
16 |     mean_tau_x.name = "mean_tau_x"
17 | 
18 |     return mean_tau_x
19 | 


--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/overturning.py:
--------------------------------------------------------------------------------
  1 | from ..querying import getvar, get_variables
  2 | from ..memory import memory
  3 | 
  4 | 
  5 | @memory.cache
  6 | def psi_avg(expt, n=10):
  7 |     def op(p):
  8 |         summed_p = p.sum("grid_xt_ocean")
  9 |         # summed_p.attrs['units'] = p.units
 10 |         return summed_p
 11 | 
 12 |     psi = get_nc_variable(
 13 |         expt,
 14 |         "ocean.nc",
 15 |         "ty_trans_rho",
 16 |         #   op=op,
 17 |         chunks={"potrho": None},
 18 |         n=n,
 19 |         time_units="days since 1900-01-01",
 20 |     )
 21 |     psi = psi.sum("grid_xt_ocean")
 22 | 
 23 |     varlist = get_variables(expt, "ocean.nc")
 24 |     if "ty_trans_rho_gm" in varlist:
 25 |         GM = True
 26 |         psiGM = get_nc_variable(
 27 |             expt,
 28 |             "ocean.nc",
 29 |             "ty_trans_rho_gm",
 30 |             #    op=op,
 31 |             chunks={"potrho": None},
 32 |             n=n,
 33 |             time_units="days since 1900-01-01",
 34 |         )
 35 |         psiGM = psiGM.sum("grid_xt_ocean")
 36 |     else:
 37 |         GM = False
 38 | 
 39 |     # if psi.units == 'kg/s':
 40 |     # print('WARNING: Changing units for ', expt)
 41 |     # assume units of kg/s, convert to Sv.
 42 |     psi = psi * 1.0e-9
 43 |     if GM:
 44 |         psiGM = psiGM * 1.0e-9
 45 | 
 46 |     psi_avg = psi.cumsum("potrho").mean("time") - psi.sum("potrho").mean("time")
 47 |     if GM:
 48 |         psi_avg = psi_avg + psiGM.mean("time")
 49 | 
 50 |     psi_avg.load()
 51 | 
 52 |     return psi_avg
 53 | 
 54 | 
 55 | @memory.cache
 56 | def calc_aabw(expt):
 57 |     print("Calculating {} timeseries of AABW transport at 55S ".format(expt))
 58 | 
 59 |     def op(p):
 60 |         summed_p = p.sum("grid_xt_ocean")
 61 |         # summed_p.attrs['units'] = p.units
 62 |         return summed_p
 63 | 
 64 |     psi = get_nc_variable(
 65 |         expt,
 66 |         "ocean.nc",
 67 |         "ty_trans_rho",
 68 |         # op=op,
 69 |         chunks={"potrho": None},
 70 |         time_units="days since 1900-01-01",
 71 |     )
 72 |     psi = psi.sum("grid_xt_ocean")
 73 | 
 74 |     varlist = get_variables(expt, "ocean.nc")
 75 |     if "ty_trans_rho_gm" in varlist:
 76 |         GM = True
 77 |         psiGM = get_nc_variable(
 78 |             expt,
 79 |             "ocean.nc",
 80 |             "ty_trans_rho_gm",
 81 |             # op=op,
 82 |             chunks={"potrho": None},
 83 |             time_units="days since 1900-01-01",
 84 |         )
 85 |         psiGM = psiGM.sum("grid_xt_ocean")
 86 |     else:
 87 |         GM = False
 88 | 
 89 |     # if psi.units == 'kg/s':
 90 |     # print('WARNING: Changing units for ', expt)
 91 |     # assume units of kg/s, convert to Sv.
 92 | 
 93 |     psi = psi * 1.0e-9
 94 |     if GM:
 95 |         psiGM = psiGM * 1.0e-9
 96 | 
 97 |     psi_sum = psi.cumsum("potrho") - psi.sum("potrho")
 98 |     if GM:
 99 |         psi_sum = psi_sum + psiGM
100 | 
101 |     psi_aabw = (
102 |         psi_sum.sel(method="Nearest", grid_yu_ocean=-40)
103 |         .sel(potrho=slice(1036, None))
104 |         .min("potrho")
105 |         .resample("3A", dim="time")
106 |     )
107 |     psi_aabw = psi_aabw.compute()
108 | 
109 |     return psi_aabw
110 | 
111 | 
112 | @memory.cache
113 | def calc_amoc(expt):
114 |     print("Calculating {} timeseries of AMOC transport at 26N ".format(expt))
115 | 
116 |     def op(p):
117 |         summed_p = p.sum("grid_xt_ocean")
118 |         # summed_p.attrs['units'] = p.units
119 |         return summed_p
120 | 
121 |     psi = get_nc_variable(
122 |         expt,
123 |         "ocean.nc",
124 |         "ty_trans_rho",
125 |         # op=op,
126 |         chunks={"potrho": None},
127 |         time_units="days since 1900-01-01",
128 |     )
129 |     psi = psi.sum("grid_xt_ocean")
130 | 
131 |     varlist = get_variables(expt, "ocean.nc")
132 |     if "ty_trans_rho_gm" in varlist:
133 |         GM = True
134 |         psiGM = get_nc_variable(
135 |             expt,
136 |             "ocean.nc",
137 |             "ty_trans_rho_gm",
138 |             # op=op,
139 |             chunks={"potrho": None},
140 |             time_units="days since 1900-01-01",
141 |         )
142 |         psiGM = psiGM.sum("grid_xt_ocean")
143 |     else:
144 |         GM = False
145 | 
146 |     # if psi.units == 'kg/s':
147 |     # print('WARNING: Changing units for ', expt)
148 |     # assume units of kg/s, convert to Sv.
149 | 
150 |     psi = psi * 1.0e-9
151 |     if GM:
152 |         psiGM = psiGM * 1.0e-9
153 | 
154 |     psi_sum = psi.cumsum("potrho") - psi.sum("potrho")
155 |     if GM:
156 |         psi_sum = psi_sum + psiGM
157 | 
158 |     psi_amoc = (
159 |         psi_sum.sel(method="Nearest", grid_yu_ocean=26)
160 |         .sel(potrho=slice(1035.5, None))
161 |         .max("potrho")
162 |         .resample("3A", dim="time")
163 |     )
164 |     psi_amoc = psi_amoc.compute()
165 | 
166 |     return psi_amoc
167 | 
168 | 
169 | @memory.cache
170 | def calc_amoc_south(expt):
171 |     print("Calculating {} timeseries of AMOC transport at 35S ".format(expt))
172 | 
173 |     def op(p):
174 |         summed_p = p.sum("grid_xt_ocean")
175 |         # summed_p.attrs['units'] = p.units
176 |         return summed_p
177 | 
178 |     psi = get_nc_variable(
179 |         expt,
180 |         "ocean.nc",
181 |         "ty_trans_rho",
182 |         # op=op,
183 |         chunks={"potrho": None},
184 |         time_units="days since 1900-01-01",
185 |     )
186 |     psi = psi.sum("grid_xt_ocean")
187 | 
188 |     varlist = get_variables(expt, "ocean.nc")
189 |     if "ty_trans_rho_gm" in varlist:
190 |         GM = True
191 |         psiGM = get_nc_variable(
192 |             expt,
193 |             "ocean.nc",
194 |             "ty_trans_rho_gm",
195 |             # op=op,
196 |             chunks={"potrho": None},
197 |             time_units="days since 1900-01-01",
198 |         )
199 |         psiGM = psiGM.sum("grid_xt_ocean")
200 |     else:
201 |         GM = False
202 | 
203 |     # if psi.units == 'kg/s':
204 |     # print('WARNING: Changing units for ', expt)
205 |     # assume units of kg/s, convert to Sv.
206 | 
207 |     psi = psi * 1.0e-9
208 |     if GM:
209 |         psiGM = psiGM * 1.0e-9
210 | 
211 |     psi_sum = psi.cumsum("potrho") - psi.sum("potrho")
212 |     if GM:
213 |         psi_sum = psi_sum + psiGM
214 | 
215 |     psi_amoc_south = (
216 |         psi_sum.sel(method="Nearest", grid_yu_ocean=-35)
217 |         .sel(potrho=slice(1035.5, None))
218 |         .max("potrho")
219 |         .resample("3A", dim="time")
220 |     )
221 |     psi_amoc_south = psi_amoc_south.compute()
222 | 
223 |     return psi_amoc_south
224 | 
225 | 
226 | @memory.cache
227 | def zonal_mean(expt, variable, n=10, resolution=1):
228 |     zonal_var = get_nc_variable(
229 |         expt,
230 |         "ocean.nc",
231 |         variable,
232 |         chunks={"st_ocean": None},
233 |         n=n,
234 |         time_units="days since 1900-01-01",
235 |     )
236 | 
237 |     # Annual Average  WOA13 long-term climatology.
238 |     if resolution == 1:
239 |         zonal_WOA13 = (
240 |             get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", variable)
241 |             .mean("GRID_X_T")
242 |             .mean("time")
243 |         )
244 |     elif resolution == 0.25:
245 |         zonal_WOA13 = (
246 |             get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", variable)
247 |             .mean("GRID_X_T")
248 |             .mean("time")
249 |         )
250 |     elif resolution == 0.1:
251 |         zonal_WOA13 = (
252 |             get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", variable)
253 |             .mean("GRID_X_T")
254 |             .mean("time")
255 |         )
256 |     else:
257 |         print("WARNING: Sorry, we dont seem to recognise resolution ", resolution)
258 | 
259 |     zonal_WOA13.compute()
260 |     if variable == "temp":
261 |         zonal_WOA13 = zonal_WOA13 + 273.15
262 | 
263 |     zonal_mean = zonal_var.mean("xt_ocean").mean("time")
264 |     zonal_mean.compute()
265 |     zonal_diff = zonal_mean - zonal_WOA13.values
266 | 
267 |     return zonal_mean, zonal_diff
268 | 


--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/simple.py:
--------------------------------------------------------------------------------
  1 | from ..querying import getvar, get_variables
  2 | from ..memory import memory
  3 | 
  4 | import logging
  5 | 
  6 | 
  7 | @memory.cache
  8 | def annual_scalar(expt, variables):
  9 |     """ """
 10 | 
 11 |     logging.debug("Building dataset")
 12 |     darray = get_nc_variable(
 13 |         expt,
 14 |         "ocean_scalar.nc",
 15 |         variables,
 16 |         time_units="days since 1900-01-01",
 17 |         use_bag=True,
 18 |     )
 19 | 
 20 |     logging.debug("Resampling in time")
 21 |     annual_average = darray.resample(time="A").mean("time")
 22 | 
 23 |     for v in annual_average.data_vars:
 24 |         avar = annual_average.variables[v]
 25 |         dvar = darray.variables[v]
 26 |         avar.attrs["long_name"] = dvar.attrs["long_name"] + " (annual average)"
 27 |         avar.attrs["units"] = dvar.attrs["units"]
 28 | 
 29 |     return annual_average
 30 | 
 31 | 
 32 | @memory.cache
 33 | def drake_passage(expt):
 34 |     "Calculate transport through Drake Passage"
 35 | 
 36 |     tx = get_nc_variable(
 37 |         expt,
 38 |         "ocean_month.nc",
 39 |         "tx_trans_int_z",
 40 |         chunks={"yt_ocean": 200},
 41 |         time_units="days since 1900-01-01",
 42 |         use_bag=False,
 43 |     )
 44 | 
 45 |     tx_trans = tx.sel(xu_ocean=-69, method="nearest").sel(yt_ocean=slice(-72, -52))
 46 | 
 47 |     if tx_trans.units == "Sv (10^9 kg/s)":
 48 |         transport = tx_trans.sum("yt_ocean").resample(time="A").mean("time")
 49 |     else:
 50 |         # print('WARNING: Changing units for ', expt)
 51 |         transport = tx_trans.sum("yt_ocean").resample(time="A").mean("time") * 1.0e-9
 52 | 
 53 |     transport.load()
 54 | 
 55 |     return transport
 56 | 
 57 | 
 58 | @memory.cache
 59 | def bering_strait(expt):
 60 |     ty = get_nc_variable(
 61 |         expt,
 62 |         "ocean_month.nc",
 63 |         "ty_trans_int_z",
 64 |         chunks={"yu_ocean": 200},
 65 |         time_units="days since 1900-01-01",
 66 |     )
 67 |     ty_trans = ty.sel(yu_ocean=67, method="nearest").sel(xt_ocean=slice(-171, -167))
 68 |     if ty_trans.units == "Sv (10^9 kg/s)":
 69 |         transport = ty_trans.sum("xt_ocean").resample(time="A").mean("time")
 70 |     else:
 71 |         # print('WARNING: Changing units for ', expt)
 72 |         transport = ty_trans.sum("xt_ocean").resample(time="A").mean("time") * 1.0e-9
 73 | 
 74 |     transport.load()
 75 | 
 76 |     return transport
 77 | 
 78 | 
 79 | @memory.cache
 80 | def sea_surface_temperature(expt, resolution=1):
 81 |     ## Load SST from expt
 82 |     varlist = get_variables(expt, "ocean_month.nc")
 83 |     if "surface_temp" in varlist:
 84 |         SST = get_nc_variable(
 85 |             expt,
 86 |             "ocean_month.nc",
 87 |             "surface_temp",
 88 |             n=10,
 89 |             time_units="days since 1900-01-01",
 90 |         )
 91 |     else:
 92 |         SST = get_nc_variable(
 93 |             expt, "ocean.nc", "temp", n=10, time_units="days since 1900-01-01"
 94 |         ).isel(st_ocean=0)
 95 | 
 96 |     if SST.units == "degrees K":
 97 |         SST = SST - 273.15
 98 | 
 99 |     # Annual Average  WOA13 long-term climatology.
100 |     if resolution == 1:
101 |         SST_WOA13 = get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", "temp").isel(
102 |             ZT=0
103 |         )
104 |     elif resolution == 0.25:
105 |         SST_WOA13 = get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", "temp").isel(
106 |             ZT=0
107 |         )
108 |     elif resolution == 0.1:
109 |         SST_WOA13 = get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", "temp").isel(
110 |             ZT=0
111 |         )
112 |     else:
113 |         print("WARNING: Sorry, we dont seem to recognise resolution ", resolution)
114 | 
115 |     # Average
116 |     SST = SST.mean("time")
117 |     SSTdiff = SST - SST_WOA13.mean("time").values
118 | 
119 |     return SST, SSTdiff
120 | 
121 | 
122 | @memory.cache
123 | def sea_surface_salinity(expt, resolution=1):
124 |     ## Load SSS from expt
125 |     varlist = get_variables(expt, "ocean_month.nc")
126 |     if "surface_salt" in varlist:
127 |         SSS = get_nc_variable(expt, "ocean_month.nc", "surface_salt", n=10)
128 |     else:
129 |         SSS = get_nc_variable(expt, "ocean.nc", "salt", n=10).isel(st_ocean=0)
130 | 
131 |     # Annual Average  WOA13 long-term climatology.
132 |     if resolution == 1:
133 |         SSS_WOA13 = get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", "salt").isel(
134 |             ZT=0
135 |         )
136 |     elif resolution == 0.25:
137 |         SSS_WOA13 = get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", "salt").isel(
138 |             ZT=0
139 |         )
140 |     elif resolution == 0.1:
141 |         SSS_WOA13 = get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", "salt").isel(
142 |             ZT=0
143 |         )
144 |     else:
145 |         print("WARNING: Sorry, we dont seem to recognise resolution ", resolution)
146 | 
147 |     # Average over last 10 time slices - prefer to do this by year.
148 |     SSS = SSS.mean("time")
149 |     SSSdiff = SSS - SSS_WOA13.mean("time").values
150 | 
151 |     return SSS, SSSdiff
152 | 
153 | 
154 | @memory.cache
155 | def mixed_layer_depth(expt):
156 |     ## Load MLD from expt
157 |     varlist = get_variables(expt, "ocean_month.nc")
158 |     if "mld" in varlist:
159 |         MLD = get_nc_variable(expt, "ocean_month.nc", "mld", n=10)
160 | 
161 |     # Average over last 10 time slices - prefer to do this by year.
162 |     MLD = MLD.mean("time")
163 | 
164 |     return MLD
165 | 


--------------------------------------------------------------------------------
/cosima_cookbook/distributed.py:
--------------------------------------------------------------------------------
 1 | import os, socket, getpass
 2 | from distributed import Client, LocalCluster
 3 | 
 4 | from itertools import product
 5 | import numpy as np
 6 | import xarray as xr
 7 | 
 8 | from tqdm import tqdm_notebook
 9 | 
10 | 
11 | def start_cluster(diagnostics_port=0):
12 |     "Set up a LocalCluster for distributed"
13 | 
14 |     hostname = socket.gethostname()
15 |     n_workers = os.cpu_count() // 2
16 |     cluster = LocalCluster(
17 |         ip="localhost",
18 |         n_workers=n_workers,
19 |         diagnostics_port=diagnostics_port,
20 |         memory_limit=6e9,
21 |     )
22 |     client = Client(cluster)
23 | 
24 |     params = {
25 |         "bokeh_port": cluster.scheduler.services["bokeh"].port,
26 |         "user": getpass.getuser(),
27 |         "scheduler_ip": cluster.scheduler.ip,
28 |         "hostname": hostname,
29 |     }
30 | 
31 |     print(
32 |         "If the link to the dashboard below doesn't work, run this command on a local terminal to set up a SSH tunnel:"
33 |     )
34 |     print()
35 |     print(
36 |         "  ssh -N -L {bokeh_port}:{scheduler_ip}:{bokeh_port} {hostname}.nci.org.au -l {user}".format(
37 |             **params
38 |         )
39 |     )
40 | 
41 |     return client
42 | 
43 | 
44 | def compute_by_block(dsx):
45 |     """ """
46 | 
47 |     # determine index key for each chunk
48 |     slices = []
49 |     for chunks in dsx.chunks:
50 |         L = [
51 |             0,
52 |         ] + list(np.cumsum(chunks))
53 |         slices.append([slice(a, b) for a, b in (zip(L[:-1], L[1:]))])
54 |     indexes = list(product(*slices))
55 | 
56 |     # allocate memory to receive result
57 |     if isinstance(dsx, xr.DataArray):
58 |         result = xr.zeros_like(dsx).load()
59 |     else:
60 |         result = np.zeros(dsx.shape)
61 | 
62 |     # evaluate each chunk one at a time
63 |     for index in tqdm_notebook(indexes, leave=False):
64 |         block = dsx.__getitem__(index).compute()
65 |         result.__setitem__(index, block)
66 | 
67 |     return result
68 | 


--------------------------------------------------------------------------------
/cosima_cookbook/memory.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Caching
 3 | 
 4 | The memory object lives in this module.
 5 | Other components of the cookbook access by
 6 | 
 7 | from ..memory import memory
 8 | """
 9 | 
10 | from joblib import Memory
11 | 
12 | import os, getpass, tempfile
13 | 
14 | username = getpass.getuser()
15 | 
16 | 
17 | # pick up cachedir from an environment variable?
18 | # Append username to prevent clashes with others users
19 | cachedir = os.path.join(tempfile.gettempdir(), username)
20 | memory = Memory(cachedir=cachedir, verbose=0)
21 | 


--------------------------------------------------------------------------------
/cosima_cookbook/netcdf_utils.py:
--------------------------------------------------------------------------------
 1 | def find_record_dimension(d):
 2 |     """Find the record dimension (i.e. time) in a netCDF4 Dataset."""
 3 | 
 4 |     for dim in d.dimensions:
 5 |         if d.dimensions[dim].isunlimited():
 6 |             return dim
 7 | 
 8 |     return None
 9 | 
10 | 
11 | def find_dimension_with_attribute(d, attribute, value):
12 |     """Find a matching dimension with attribute=value, or None."""
13 | 
14 |     for dim in d.dimensions:
15 |         if dim not in d.variables:
16 |             continue
17 | 
18 |         if getattr(d.variables[dim], attribute, None) == value:
19 |             return dim
20 | 
21 |     return None
22 | 
23 | 
24 | def find_time_dimension(d):
25 |     """Find a time dimension in a netCDF4 Dataset."""
26 | 
27 |     # this is a bit heuristic, but we cascade through some checks, guided by
28 |     # the CF conventions
29 | 
30 |     dim = find_dimension_with_attribute(d, "standard_name", "time")
31 |     if dim is not None:
32 |         return dim
33 | 
34 |     dim = find_dimension_with_attribute(d, "axis", "T")
35 |     if dim is not None:
36 |         return dim
37 | 
38 |     dim = find_record_dimension(d)
39 |     if dim is not None:
40 |         return dim
41 | 
42 |     for dim in d.dimensions:
43 |         if dim.lower() == "time":
44 |             return dim
45 | 
46 |     # CF conventions also suggests the units attribute,
47 |     # but time_bounds may have the same units, and a false positive
48 |     # here could be very confusing...
49 |     return None
50 | 


--------------------------------------------------------------------------------
/cosima_cookbook/plots/__init__.py:
--------------------------------------------------------------------------------
1 | from .lineplots import *
2 | from .overturning import *
3 | from .maps import sea_surface_temperature, sea_surface_salinity, mixed_layer_depth
4 | 
5 | # __all__ = ['wind_stress']
6 | 


--------------------------------------------------------------------------------
/cosima_cookbook/plots/lineplots.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import cosima_cookbook as cc
  3 | from tqdm import tqdm_notebook
  4 | import IPython.display
  5 | 
  6 | 
  7 | def wind_stress(expts=[]):
  8 |     """
  9 |     Plot zonally averaged wind stress.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     expts : str or list of str
 14 |         Experiment name(s).
 15 |     """
 16 | 
 17 |     if not isinstance(expts, list):
 18 |         expts = [expts]
 19 | 
 20 |     # computing
 21 |     results = []
 22 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
 23 |         result = {"mean_tau_x": cc.diagnostics.mean_tau_x(expt), "expt": expt}
 24 |         results.append(result)
 25 | 
 26 |     IPython.display.clear_output()
 27 | 
 28 |     plt.figure(figsize=(12, 6))
 29 | 
 30 |     # plotting
 31 |     for result in results:
 32 |         mean_tau_x = result["mean_tau_x"]
 33 |         expt = result["expt"]
 34 |         plt.plot(mean_tau_x, mean_tau_x.yu_ocean, linewidth=2, label=expt)
 35 |     plt.ylim([-70, 65])
 36 |     plt.xlim([-0.08, 0.20])
 37 |     plt.ylabel("Latitude ($^\circ$N)")
 38 |     plt.xlabel("Stress (N m$^{-2}$)")
 39 |     plt.legend(fontsize=10, loc="best")
 40 | 
 41 | 
 42 | def annual_scalar(expts=[], variables=[]):
 43 |     """
 44 |     Calculate and plot annual average of variable(s) for experiment(s).
 45 | 
 46 |     Parameters
 47 |     ----------
 48 |     expts : str or list of str
 49 |         Experiment name(s).
 50 |     variable : str or list of str
 51 |         Variable name(s).
 52 |     """
 53 | 
 54 |     if not isinstance(expts, list):
 55 |         expts = [expts]
 56 | 
 57 |     if not isinstance(variables, list):
 58 |         variables = [variables]
 59 | 
 60 |     # computing
 61 |     results = []
 62 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
 63 |         annual_average = cc.diagnostics.annual_scalar(expt, variables)
 64 | 
 65 |         result = {"annual_average": annual_average, "expt": expt}
 66 |         results.append(result)
 67 | 
 68 |     IPython.display.clear_output()
 69 | 
 70 |     # plotting each variable in a separate plot
 71 |     for variable in variables:
 72 |         plt.figure(figsize=(12, 6))
 73 | 
 74 |         for result in results:
 75 |             annual_average = result["annual_average"]
 76 |             expt = result["expt"]
 77 | 
 78 |             annual_average[variable].plot(label=expt)
 79 | 
 80 |         plt.title(annual_average[variable].long_name)
 81 |         plt.legend(fontsize=10, bbox_to_anchor=(1, 1), loc="best", borderaxespad=0.0)
 82 | 
 83 |         plt.xlabel("Time")
 84 | 
 85 | 
 86 | def drake_passage(expts=[]):
 87 |     """
 88 |     Plot Drake Passage transport.
 89 | 
 90 |     Parameters
 91 |     ----------
 92 |     expts : str or list of str
 93 |         Experiment name(s).
 94 |     """
 95 | 
 96 |     plt.figure(figsize=(12, 6))
 97 | 
 98 |     if not isinstance(expts, list):
 99 |         expts = [expts]
100 | 
101 |     # computing
102 |     results = []
103 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
104 |         transport = cc.diagnostics.drake_passage(expt)
105 | 
106 |         result = {"transport": transport, "expt": expt}
107 |         results.append(result)
108 | 
109 |     IPython.display.clear_output()
110 | 
111 |     # plotting
112 |     for result in results:
113 |         transport = result["transport"]
114 |         expt = result["expt"]
115 |         transport.plot(label=expt)
116 | 
117 |     plt.title("Drake Passage Transport")
118 |     plt.xlabel("Time")
119 |     plt.ylabel("Transport (Sv)")
120 |     plt.legend(fontsize=10, loc="best")
121 | 
122 | 
123 | def bering_strait(expts=[]):
124 |     """
125 |     Plot Bering Strait transport.
126 | 
127 |     Parameters
128 |     ----------
129 |     expts : str or list of str
130 |         Experiment name(s).
131 |     """
132 | 
133 |     plt.figure(figsize=(12, 6))
134 | 
135 |     if not isinstance(expts, list):
136 |         expts = [expts]
137 | 
138 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
139 |         transport = cc.diagnostics.bering_strait(expt)
140 |         transport.plot(label=expt)
141 | 
142 |     IPython.display.clear_output()
143 | 
144 |     plt.title("Bering Strait Transport")
145 |     plt.xlabel("Time")
146 |     plt.ylabel("Transport (Sv)")
147 |     plt.legend(fontsize=10, loc="best")
148 | 
149 | 
150 | def aabw(expts=[]):
151 |     """
152 |     Plot timeseries of AABW transport measured at 55S.
153 | 
154 |     Parameters
155 |     ----------
156 |     expts : str or list of str
157 |         Experiment name(s).
158 |     """
159 | 
160 |     plt.figure(figsize=(12, 6))
161 | 
162 |     if not isinstance(expts, list):
163 |         expts = [expts]
164 | 
165 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
166 |         psi_aabw = cc.diagnostics.calc_aabw(expt)
167 |         psi_aabw.plot(label=expt)
168 | 
169 |     IPython.display.clear_output()
170 | 
171 |     plt.title("AABW Transport at 40S")
172 |     plt.xlabel("Time")
173 |     plt.ylabel("Transport (Sv)")
174 |     plt.legend(fontsize=10, loc="best")
175 | 
176 | 
177 | def amoc(expts=[]):
178 |     """
179 |     Plot timeseries of AMOC transport measured at 26N.
180 | 
181 |     Parameters
182 |     ----------
183 |     expts : str or list of str
184 |         Experiment name(s).
185 |     """
186 | 
187 |     plt.figure(figsize=(12, 6))
188 | 
189 |     if not isinstance(expts, list):
190 |         expts = [expts]
191 | 
192 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
193 |         psi_amoc = cc.diagnostics.calc_amoc(expt)
194 |         psi_amoc.plot(label=expt)
195 | 
196 |     IPython.display.clear_output()
197 | 
198 |     plt.title("AMOC Transport at 26N")
199 |     plt.xlabel("Time")
200 |     plt.ylabel("Transport (Sv)")
201 |     plt.legend(fontsize=10, loc="best")
202 | 
203 | 
204 | def amoc_south(expts=[]):
205 |     """
206 |     Plot timeseries of AMOC transport measured at 35S.
207 | 
208 |     Parameters
209 |     ----------
210 |     expts : str or list of str
211 |         Experiment name(s).
212 |     """
213 | 
214 |     plt.figure(figsize=(12, 6))
215 | 
216 |     if not isinstance(expts, list):
217 |         expts = [expts]
218 | 
219 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
220 |         psi_amoc_south = cc.diagnostics.calc_amoc_south(expt)
221 |         psi_amoc_south.plot(label=expt)
222 | 
223 |     IPython.display.clear_output()
224 | 
225 |     plt.title("AMOC Transport at 35S")
226 |     plt.xlabel("Time")
227 |     plt.ylabel("Transport (Sv)")
228 |     plt.legend(fontsize=10, loc="best")
229 | 


--------------------------------------------------------------------------------
/cosima_cookbook/plots/maps.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import cosima_cookbook as cc
  3 | from tqdm import tqdm_notebook
  4 | 
  5 | import IPython.display
  6 | 
  7 | 
  8 | def sea_surface_temperature(expts=[], resolution=1):
  9 |     """
 10 |     Plot a map of SST from last decade of run.
 11 |     """
 12 | 
 13 |     if not isinstance(expts, list):
 14 |         expts = [expts]
 15 | 
 16 |     # computing
 17 |     results = []
 18 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
 19 |         SST, SSTdiff = cc.diagnostics.sea_surface_temperature(expt, resolution)
 20 | 
 21 |         result = {"SST": SST, "SSTdiff": SSTdiff, "expt": expt}
 22 |         results.append(result)
 23 | 
 24 |     IPython.display.clear_output()
 25 | 
 26 |     # plotting
 27 |     for result in results:
 28 |         SST = result["SST"]
 29 |         SSTdiff = result["SSTdiff"]
 30 |         expt = result["expt"]
 31 | 
 32 |         plt.figure(figsize=(12, 4))
 33 |         plt.subplot(121)
 34 |         SST.plot()
 35 |         plt.title(expt)
 36 |         plt.subplot(122)
 37 |         SSTdiff.plot(robust=True)
 38 |         plt.title(expt)
 39 | 
 40 | 
 41 | def sea_surface_salinity(expts=[], resolution=1):
 42 |     """
 43 |     Plot a map of SSS from last decade of run.
 44 |     """
 45 | 
 46 |     if not isinstance(expts, list):
 47 |         expts = [expts]
 48 | 
 49 |     # computing
 50 |     results = []
 51 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
 52 |         SSS, SSSdiff = cc.diagnostics.sea_surface_salinity(expt, resolution)
 53 | 
 54 |         result = {"SSS": SSS, "SSSdiff": SSSdiff, "expt": expt}
 55 |         results.append(result)
 56 | 
 57 |     IPython.display.clear_output()
 58 | 
 59 |     # plotting
 60 |     for result in results:
 61 |         SSS = result["SSS"]
 62 |         SSSdiff = result["SSSdiff"]
 63 |         expt = result["expt"]
 64 | 
 65 |         plt.figure(figsize=(12, 4))
 66 |         plt.subplot(121)
 67 |         SSS.plot()
 68 |         plt.title(expt)
 69 |         plt.subplot(122)
 70 |         SSSdiff.plot(robust=True)
 71 |         plt.title(expt)
 72 | 
 73 | 
 74 | def mixed_layer_depth(expts=[]):
 75 |     """
 76 |     Plot a map of MLD from last decade of run.
 77 |     """
 78 | 
 79 |     if not isinstance(expts, list):
 80 |         expts = [expts]
 81 | 
 82 |     # computing
 83 |     results = []
 84 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
 85 |         MLD = cc.diagnostics.mixed_layer_depth(expt)
 86 | 
 87 |         result = {"MLD": MLD, "expt": expt}
 88 |         results.append(result)
 89 | 
 90 |     IPython.display.clear_output()
 91 | 
 92 |     # plotting
 93 |     for result in results:
 94 |         MLD = result["MLD"]
 95 |         expt = result["expt"]
 96 | 
 97 |         plt.figure(figsize=(6, 4))
 98 |         MLD.plot()
 99 |         plt.title(expt)
100 | 


--------------------------------------------------------------------------------
/cosima_cookbook/plots/overturning.py:
--------------------------------------------------------------------------------
  1 | import cosima_cookbook as cc
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | from tqdm import tqdm_notebook
  5 | 
  6 | import IPython.display
  7 | 
  8 | 
  9 | def psi_avg(expts, n=10, clev=np.arange(-20, 20, 2)):
 10 |     if not isinstance(expts, list):
 11 |         expts = [expts]
 12 | 
 13 |     # computing
 14 |     results = []
 15 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
 16 |         psi_avg = cc.diagnostics.psi_avg(expt, n)
 17 | 
 18 |         result = {"psi_avg": psi_avg, "expt": expt}
 19 |         results.append(result)
 20 | 
 21 |     IPython.display.clear_output()
 22 | 
 23 |     # plotting
 24 |     for result in results:
 25 |         psi_avg = result["psi_avg"]
 26 |         expt = result["expt"]
 27 | 
 28 |         plt.figure(figsize=(10, 5))
 29 |         plt.contourf(
 30 |             psi_avg.grid_yu_ocean,
 31 |             psi_avg.potrho,
 32 |             psi_avg,
 33 |             cmap=plt.cm.PiYG,
 34 |             levels=clev,
 35 |             extend="both",
 36 |         )
 37 |         cb = plt.colorbar(orientation="vertical", shrink=0.7)
 38 | 
 39 |         cb.ax.set_xlabel("Sv")
 40 |         plt.contour(
 41 |             psi_avg.grid_yu_ocean,
 42 |             psi_avg.potrho,
 43 |             psi_avg,
 44 |             levels=clev,
 45 |             colors="k",
 46 |             linewidths=0.25,
 47 |         )
 48 |         plt.contour(
 49 |             psi_avg.grid_yu_ocean,
 50 |             psi_avg.potrho,
 51 |             psi_avg,
 52 |             levels=[
 53 |                 0.0,
 54 |             ],
 55 |             colors="k",
 56 |             linewidths=0.5,
 57 |         )
 58 |         plt.gca().invert_yaxis()
 59 | 
 60 |         plt.ylim((1037.5, 1034))
 61 |         plt.ylabel("Potential Density (kg m$^{-3}$)")
 62 |         plt.xlabel("Latitude ($^\circ$N)")
 63 |         plt.xlim([-75, 85])
 64 |         plt.title("Overturning in %s" % expt)
 65 | 
 66 | 
 67 | def zonal_mean(expts, variable, n=10, resolution=1):
 68 |     if not isinstance(expts, list):
 69 |         expts = [expts]
 70 | 
 71 |     # computing
 72 |     results = []
 73 |     for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
 74 |         zonal_mean, zonal_diff = cc.diagnostics.zonal_mean(
 75 |             expt, variable, n, resolution
 76 |         )
 77 | 
 78 |         result = {"zonal_mean": zonal_mean, "zonal_diff": zonal_diff, "expt": expt}
 79 |         results.append(result)
 80 | 
 81 |     IPython.display.clear_output()
 82 | 
 83 |     # plotting
 84 |     for result in results:
 85 |         zonal_mean = result["zonal_mean"]
 86 |         zonal_diff = result["zonal_diff"]
 87 |         expt = result["expt"]
 88 | 
 89 |         plt.figure(figsize=(12, 5))
 90 |         plt.subplot(121)
 91 |         zonal_mean.plot()
 92 |         plt.title(expt)
 93 |         plt.gca().invert_yaxis()
 94 |         plt.title("{}: Zonal Mean {}".format(expt, variable))
 95 |         plt.subplot(122)
 96 |         zonal_diff.plot()
 97 |         plt.title(expt)
 98 |         plt.gca().invert_yaxis()
 99 |         plt.title("{}: Zonal Mean {} Change".format(expt, variable))
100 | 


--------------------------------------------------------------------------------
/cosima_cookbook/plots/scalar.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/cosima_cookbook/plots/scalar.py


--------------------------------------------------------------------------------
/cosima_cookbook/querying.py:
--------------------------------------------------------------------------------
  1 | """querying.py
  2 | 
  3 | Functions for data discovery.
  4 | 
  5 | """
  6 | 
  7 | import logging
  8 | import os.path
  9 | import pandas as pd
 10 | from sqlalchemy import func, distinct, or_
 11 | from sqlalchemy.orm import aliased
 12 | from sqlalchemy.sql.selectable import subquery
 13 | import warnings
 14 | import xarray as xr
 15 | 
 16 | from . import database
 17 | from .database import NCExperiment, NCFile, CFVariable, NCVar, Keyword
 18 | from .database import NCAttribute, NCAttributeString
 19 | 
 20 | 
 21 | class VariableNotFoundError(Exception):
 22 |     pass
 23 | 
 24 | 
 25 | class QueryWarning(UserWarning):
 26 |     pass
 27 | 
 28 | 
 29 | # By default all ambiguous queries will raise an exception
 30 | warnings.simplefilter("error", category=QueryWarning, lineno=0, append=False)
 31 | 
 32 | 
 33 | def get_experiments(
 34 |     session,
 35 |     experiment=True,
 36 |     keywords=None,
 37 |     variables=None,
 38 |     all=False,
 39 |     exptname=None,
 40 |     **kwargs,
 41 | ):
 42 |     """
 43 |     Returns a DataFrame of all experiments and the number of netCDF4 files contained
 44 |     within each experiment.
 45 | 
 46 |     Optionally one or more keywords can be specified, and only experiments with all the
 47 |     specified keywords will be return. The keyword strings can utilise SQL wildcard
 48 |     characters, "%" and "_", to match multiple keywords.
 49 | 
 50 |     Optionally variables can also be specified, and only experiments containing all those
 51 |     variables will be returned.
 52 | 
 53 |     All metadata fields will be returned if all=True, or individual metadata fields
 54 |     can be selected by passing field=True, where available fields are:
 55 |     contact, email, created, description, notes, url and root_dir
 56 |     """
 57 | 
 58 |     # Determine which attributes to return. Special case experiment
 59 |     # as this is the only one that defaults to True
 60 |     columns = []
 61 |     if experiment:
 62 |         columns.append(NCExperiment.experiment)
 63 | 
 64 |     for f in NCExperiment.metadata_keys + ["root_dir"]:
 65 |         # Explicitly don't support returning keyword metadata
 66 |         if f == "keywords":
 67 |             continue
 68 |         if kwargs.get(f, all):
 69 |             columns.append(getattr(NCExperiment, f))
 70 | 
 71 |     q = (
 72 |         session.query(*columns, func.count(NCFile.experiment_id).label("ncfiles"))
 73 |         .join(NCFile.experiment)
 74 |         .group_by(NCFile.experiment_id)
 75 |     )
 76 | 
 77 |     if keywords is not None:
 78 |         if isinstance(keywords, str):
 79 |             keywords = [keywords]
 80 |         q = q.filter(*(NCExperiment.keywords.like(k) for k in keywords))
 81 | 
 82 |     if variables is not None:
 83 |         if isinstance(variables, str):
 84 |             variables = [variables]
 85 | 
 86 |         expt_query = (
 87 |             session.query(NCExperiment.id)
 88 |             .join(NCFile.experiment)
 89 |             .join(NCFile.ncvars)
 90 |             .join(NCVar.variable)
 91 |             .group_by(NCExperiment.experiment)
 92 |             .having(func.count(distinct(CFVariable.name)) == len(variables))
 93 |             .filter(CFVariable.name.in_(variables))
 94 |         )
 95 | 
 96 |         q = q.filter(NCExperiment.id.in_(expt_query))
 97 | 
 98 |     if exptname is not None:
 99 |         q = q.filter(NCExperiment.experiment == exptname)
100 | 
101 |     return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
102 | 
103 | 
104 | def get_ncfiles(session, experiment):
105 |     """
106 |     Returns a DataFrame of all netcdf files for a given experiment.
107 |     """
108 | 
109 |     q = (
110 |         session.query(NCFile.ncfile, NCFile.index_time)
111 |         .join(NCFile.experiment)
112 |         .filter(NCExperiment.experiment == experiment)
113 |         .order_by(NCFile.ncfile)
114 |     )
115 | 
116 |     return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
117 | 
118 | 
119 | def get_keywords(session, experiment=None):
120 |     """
121 |     Returns a set of all keywords, and optionally only for a given experiment
122 |     """
123 | 
124 |     if experiment is not None:
125 |         q = session.query(NCExperiment).filter(NCExperiment.experiment == experiment)
126 |         return q.scalar().keywords
127 |     else:
128 |         q = session.query(Keyword)
129 |         return {r.keyword for r in q}
130 | 
131 | 
132 | def get_variables(
133 |     session,
134 |     experiment=None,
135 |     frequency=None,
136 |     cellmethods=None,
137 |     inferred=False,
138 |     search=None,
139 | ):
140 |     """
141 |     Returns a DataFrame of variables for a given experiment if experiment
142 |     name is specified, and optionally a given diagnostic frequency.
143 |     If inferred is True and some experiment specific properties inferred from other
144 |     fields are also returned: coordinate, model and restart.
145 |            - coordinate: True if coordinate, False otherwise
146 |            - model: model from which variable output, possible values are ocean,
147 |                     atmosphere, land, ice, or none if can't be identified
148 |            - restart: True if variable from a restart file, False otherwise
149 |     If experiment is not specified all variables for all experiments are returned,
150 |     without experiment specific data.
151 |     Specifying an array of search strings will limit variables returned to any
152 |     containing any of the search terms in variable name, long name, or standard name.
153 |     """
154 | 
155 |     # Default columns
156 |     columns = [
157 |         CFVariable.name,
158 |         CFVariable.long_name,
159 |         CFVariable.units,
160 |     ]
161 | 
162 |     if experiment:
163 |         # Create aliases so as to able to join to the NCAttribute table
164 |         # twice, for the name and value
165 |         ncas1 = aliased(NCAttributeString)
166 |         ncas2 = aliased(NCAttributeString)
167 |         subq = (
168 |             session.query(
169 |                 NCAttribute.ncvar_id.label("ncvar_id"),
170 |                 ncas2.value.label("value"),
171 |             )
172 |             .join(ncas1, NCAttribute.name_id == ncas1.id)
173 |             .join(ncas2, NCAttribute.value_id == ncas2.id)
174 |             .filter(ncas1.value == "cell_methods")
175 |         ).subquery(name="attrs")
176 | 
177 |         columns.extend(
178 |             [
179 |                 NCFile.frequency,
180 |                 NCFile.ncfile,
181 |                 subq.c.value.label("cell_methods"),
182 |                 func.count(NCFile.ncfile).label("# ncfiles"),
183 |                 func.min(NCFile.time_start).label("time_start"),
184 |                 func.max(NCFile.time_end).label("time_end"),
185 |             ]
186 |         )
187 | 
188 |     if inferred:
189 |         # Return inferred information
190 |         columns.extend(
191 |             [
192 |                 CFVariable.is_coordinate.label("coordinate"),
193 |                 NCFile.model,
194 |                 NCFile.is_restart.label("restart"),
195 |             ]
196 |         )
197 | 
198 |     # Base query
199 |     q = (
200 |         session.query(*columns)
201 |         .join(NCFile.experiment)
202 |         .join(NCFile.ncvars)
203 |         .join(NCVar.variable)
204 |     )
205 | 
206 |     if experiment is not None:
207 |         # Join against the NCAttribute table above. Outer join ensures
208 |         # variables without cell_methods attribute still appear with NULL
209 |         q = q.outerjoin(subq, subq.c.ncvar_id == NCVar.id)
210 | 
211 |     q = q.order_by(NCFile.frequency, CFVariable.name, NCFile.time_start, NCFile.ncfile)
212 |     q = q.group_by(CFVariable, NCFile.frequency)
213 | 
214 |     if experiment is not None:
215 |         q = q.group_by(subq.c.value)
216 |         q = q.filter(NCExperiment.experiment == experiment)
217 | 
218 |         # Filtering on frequency only makes sense if experiment is specified
219 |         if frequency is not None:
220 |             q = q.filter(NCFile.frequency == frequency)
221 | 
222 |         # Filtering on cell methods only makes sense if experiment is specified
223 |         if cellmethods is not None:
224 |             q = q.filter(subq.c.value == cellmethods)
225 | 
226 |     if search is not None:
227 |         # Filter based on search term appearing in name, long_name or standard_name
228 |         if isinstance(search, str):
229 |             search = [
230 |                 search,
231 |             ]
232 |         q = q.filter(
233 |             or_(
234 |                 column.contains(word)
235 |                 for word in search
236 |                 for column in (
237 |                     CFVariable.name,
238 |                     CFVariable.long_name,
239 |                     CFVariable.standard_name,
240 |                 )
241 |             )
242 |         )
243 | 
244 |     default_dtypes = {
245 |         "# ncfiles": "int64",
246 |         "coordinate": "boolean",
247 |         "model": "category",
248 |         "restart": "boolean",
249 |     }
250 | 
251 |     df = pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
252 | 
253 |     return df.astype({k: v for k, v in default_dtypes.items() if k in df.columns})
254 | 
255 | 
256 | def get_frequencies(session, experiment=None):
257 |     """
258 |     Returns a DataFrame with all diagnostics frequencies and optionally
259 |     for a given experiment.
260 |     """
261 | 
262 |     if experiment is None:
263 |         q = session.query(NCFile.frequency).group_by(NCFile.frequency)
264 |     else:
265 |         q = (
266 |             session.query(NCFile.frequency)
267 |             .join(NCFile.experiment)
268 |             .filter(NCExperiment.experiment == experiment)
269 |             .group_by(NCFile.frequency)
270 |         )
271 | 
272 |     return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
273 | 
274 | 
275 | def getvar(
276 |     expt,
277 |     variable,
278 |     session,
279 |     ncfile=None,
280 |     start_time=None,
281 |     end_time=None,
282 |     n=None,
283 |     frequency=None,
284 |     attrs=None,
285 |     attrs_unique=None,
286 |     return_dataset=False,
287 |     **kwargs,
288 | ):
289 |     """For a given experiment, return an xarray DataArray containing the
290 |     specified variable.
291 | 
292 |     expt - text string indicating the name of the experiment
293 |     variable - text string indicating the name of the variable to load
294 |     session - a database session created by cc.database.create_session()
295 |     ncfile -  an optional text string indicating the pattern for filenames
296 |               to load. All filenames containing this string will match, so
297 |               be specific. '/' can be used to match the start of the
298 |               filename, and '%' is a wildcard character.
299 |     start_time - only load data after this date. specify as a text string,
300 |                  e.g. '1900-01-01'
301 |     end_time - only load data before this date. specify as a text string,
302 |                e.g. '1900-01-01'
303 |     n - after all other queries, restrict the total number of files to the
304 |         first n. pass a negative value to restrict to the last n
305 |     frequency - specify frequency to disambiguate identical variables saved
306 |                 at different temporal resolution
307 |     attrs - a dictionary of attribute names and their values that must be
308 |             present on the returned variables
309 |     attrs_unique - a dictionary of attribute names and their values that
310 |             must be unique on the returned variables. Defaults to
311 |             {'cell_methods': 'time: mean'} and should not generally be
312 |             changed.
313 |     return_dataset - if True, return xarray.Dataset, containing the
314 |                      requested variable, along with its time_bounds,
315 |                      if present.  Otherwise (default), return
316 |                      xarray.DataArray containing only the variable
317 | 
318 |     Note that if start_time and/or end_time are used, the time range
319 |     of the resulting dataset may not be bounded exactly on those
320 |     values, depending on where the underlying files start/end. Use
321 |     dataset.sel() to exactly select times from the dataset.
322 | 
323 |     Other kwargs are passed through to xarray.open_mfdataset, including:
324 | 
325 |     chunks - Override any chunking by passing a chunks dictionary.
326 |     decode_times - Time decoding can be disabled by passing decode_times=False
327 | 
328 |     """
329 | 
330 |     if attrs_unique is None:
331 |         attrs_unique = {"cell_methods": "time: mean"}
332 | 
333 |     ncfiles = _ncfiles_for_variable(
334 |         expt,
335 |         variable,
336 |         session,
337 |         ncfile,
338 |         start_time,
339 |         end_time,
340 |         n,
341 |         frequency,
342 |         attrs,
343 |         attrs_unique,
344 |     )
345 | 
346 |     variables = [variable]
347 |     if return_dataset:
348 |         # we know at least one variable was returned, so we can index ncfiles
349 |         # ask for the extra variables associated with cell_methods, etc.
350 |         variables += _bounds_vars_for_variable(*ncfiles[0])
351 | 
352 |     # chunking -- use first row/file and assume it's the same across the whole dataset
353 |     xr_kwargs = {"chunks": _parse_chunks(ncfiles[0].NCVar)}
354 |     xr_kwargs.update(kwargs)
355 | 
356 |     def _preprocess(d):
357 |         if variable in d.coords:
358 |             # just return coordinate data
359 |             return d
360 | 
361 |         # otherwise, figure out if we need any ancilliary data
362 |         # like time_bounds
363 |         return d[variables]
364 | 
365 |     ncfiles = list(str(f.NCFile.ncfile_path) for f in ncfiles)
366 | 
367 |     ds = xr.open_mfdataset(
368 |         ncfiles,
369 |         parallel=True,
370 |         combine="by_coords",
371 |         preprocess=_preprocess,
372 |         **xr_kwargs,
373 |     )
374 | 
375 |     if return_dataset:
376 |         da = ds
377 |     else:
378 |         # if we want a dataarray, we'll strip off the extra info
379 |         da = ds[variable]
380 | 
381 |     # Check the chunks given were actually in the data
382 |     chunks = xr_kwargs.get("chunks", None)
383 |     if chunks is not None:
384 |         missing_chunk_dims = set(chunks.keys()) - set(da.dims)
385 |         if len(missing_chunk_dims) > 0:
386 |             logging.warning(
387 |                 f"chunking along dimensions {missing_chunk_dims} is not possible. Available dimensions for chunking are {set(da.dims)}"
388 |             )
389 | 
390 |     da.attrs["ncfiles"] = ncfiles
391 | 
392 |     # Get experiment metadata, delete extraneous fields and add
393 |     # to attributes
394 |     metadata = get_experiments(
395 |         session, experiment=False, exptname=expt, all=True
396 |     ).to_dict(orient="records")[0]
397 | 
398 |     metadata = {
399 |         k: v
400 |         for k, v in metadata.items()
401 |         if k not in ["ncfiles", "index", "root_dir"]
402 |         and (v is not None and v != "None" and v != "")
403 |     }
404 | 
405 |     da.attrs.update(metadata)
406 | 
407 |     return da
408 | 
409 | 
410 | def _bounds_vars_for_variable(ncfile, ncvar):
411 |     """Return a list of names for a variable and its bounds"""
412 | 
413 |     variables = []
414 | 
415 |     if "cell_methods" not in ncvar.attrs:
416 |         # no cell methods, so no need to look for bounds
417 |         return variables
418 | 
419 |     # [cell methods] is a string attribute comprising a list of
420 |     # blank-separated words of the form "name: method"
421 |     cell_methods = iter(ncvar.attrs["cell_methods"].split())
422 | 
423 |     # for the moment, we're only looking for a time mean
424 |     for dim, method in zip(cell_methods, cell_methods):
425 |         if not (dim[:-1] == "time" and method == "mean"):
426 |             continue
427 | 
428 |     bounds_var = ncfile.ncvars["time"].attrs.get("bounds")
429 |     if bounds_var is not None:
430 |         variables.append(bounds_var)
431 | 
432 |     return variables
433 | 
434 | 
435 | def _ncfiles_for_variable(
436 |     expt,
437 |     variable,
438 |     session,
439 |     ncfile=None,
440 |     start_time=None,
441 |     end_time=None,
442 |     n=None,
443 |     frequency=None,
444 |     attrs=None,
445 |     attrs_unique=None,
446 | ):
447 |     """Return a list of (NCFile, NCVar) pairs corresponding to the
448 |     database objects for a given variable.
449 | 
450 |     Optionally, pass ncfile, start_time, end_time, frequency, attrs,
451 |     attrs_unique, or n for additional disambiguation (see getvar
452 |     documentation for their semantics).
453 |     """
454 | 
455 |     if attrs is None:
456 |         attrs = {}
457 | 
458 |     if attrs_unique is None:
459 |         attrs_unique = {}
460 | 
461 |     f, v = database.NCFile, database.NCVar
462 |     q = (
463 |         session.query(f, v)
464 |         .join(f.ncvars)
465 |         .join(f.experiment)
466 |         .filter(v.varname == variable)
467 |         .filter(database.NCExperiment.experiment == expt)
468 |         .filter(f.present)
469 |         .order_by(f.time_start)
470 |     )
471 | 
472 |     # additional disambiguation
473 |     if ncfile is not None:
474 |         q = q.filter(f.ncfile.like("%" + ncfile))
475 |     if start_time is not None:
476 |         q = q.filter(f.time_end >= start_time)
477 |     if end_time is not None:
478 |         q = q.filter(f.time_start <= end_time)
479 |     if frequency is not None:
480 |         q = q.filter(f.frequency == frequency)
481 | 
482 |     # Attributes that are required to be unique to ensure disambiguation
483 |     for attr, val in attrs_unique.items():
484 |         # If default attribute present and not currently in filter
485 |         # add to attributes filter
486 |         if attr not in attrs:
487 |             if q.filter(v.ncvar_attrs.any(name=attr, value=val)).first():
488 |                 attrs.update({attr: val})
489 | 
490 |     # requested specific attribute values
491 |     for attr, val in attrs.items():
492 |         q = q.filter(v.ncvar_attrs.any(name=attr, value=val))
493 | 
494 |     ncfiles = q.all()
495 | 
496 |     if n is not None:
497 |         if n > 0:
498 |             ncfiles = ncfiles[:n]
499 |         else:
500 |             ncfiles = ncfiles[n:]
501 | 
502 |     # ensure we actually got a result
503 |     if not ncfiles:
504 |         raise VariableNotFoundError(
505 |             "No files were found containing '{}' in the '{}' experiment".format(
506 |                 variable, expt
507 |             )
508 |         )
509 | 
510 |     # check whether the results are unique
511 |     for attr in attrs_unique:
512 |         unique_attributes = set()
513 |         for f in ncfiles:
514 |             if attr in f.NCVar.attrs:
515 |                 unique_attributes.add(str(f.NCVar.attrs[attr]))
516 |             else:
517 |                 unique_attributes.add(None)
518 |         if len(unique_attributes) > 1:
519 |             warnings.warn(
520 |                 f"Your query returns variables from files with different {attr}: {unique_attributes}. "
521 |                 "This could lead to unexpected behaviour! Disambiguate by passing "
522 |                 f"attrs={{'{attr}':''}} to getvar, specifying the desired attribute value.",
523 |                 QueryWarning,
524 |             )
525 | 
526 |     unique_freqs = set(f.NCFile.frequency for f in ncfiles)
527 |     if len(unique_freqs) > 1:
528 |         warnings.warn(
529 |             f"Your query returns files with differing frequencies: {unique_freqs}. "
530 |             "This could lead to unexpected behaviour! Disambiguate by passing "
531 |             "frequency= to getvar, specifying the desired frequency.",
532 |             QueryWarning,
533 |         )
534 | 
535 |     return ncfiles
536 | 
537 | 
538 | def _parse_chunks(ncvar):
539 |     """Parse an NCVar, returning a dictionary mapping dimensions to chunking along that dimension."""
540 | 
541 |     try:
542 |         # this should give either a list, or 'None' (other values will raise an exception)
543 |         var_chunks = eval(ncvar.chunking)
544 |         if var_chunks is not None:
545 |             return dict(zip(eval(ncvar.dimensions), var_chunks))
546 | 
547 |         return None
548 | 
549 |     except NameError:
550 |         # chunking could be 'contiguous', which doesn't evaluate
551 |         return None
552 | 


--------------------------------------------------------------------------------
/cosima_cookbook/summary/__init__.py:
--------------------------------------------------------------------------------
1 | from .nml_diff import *
2 | from .nml_summary import *
3 | 
4 | # __all__ = []
5 | 


--------------------------------------------------------------------------------
/cosima_cookbook/summary/nml_diff.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Tools to read a set of namelist files and find their superset and difference.
  4 | # The functions are general-purpose (i.e. no ACCESS-OM2-related assumptions).
  5 | # Andrew Kiss https://github.com/aekiss
  6 | 
  7 | 
  8 | import f90nml  # from http://f90nml.readthedocs.io/en/latest/
  9 | import os
 10 | 
 11 | 
 12 | def nmldict(nmlfnames):
 13 |     """Return dict of the groups/group members of multiple
 14 |         FORTRAN namelist files.
 15 | 
 16 |     Input: tuple of any number of namelist file path strings
 17 |             (non-existent files are silently ignored)
 18 |     Output: dict with key:value pairs where
 19 |             key is filename path string
 20 |             value is complete Namelist from filename
 21 |     """
 22 |     nmlfnames = set(nmlfnames)  # remove any duplicates from nmlfnames
 23 | 
 24 |     nmlall = {}  # dict keys are nml paths, values are Namelist dicts
 25 |     for nml in nmlfnames:
 26 |         if os.path.exists(nml):
 27 |             nmlall[nml] = f90nml.read(nml)
 28 |     return nmlall
 29 | 
 30 | 
 31 | def superset(nmlall):
 32 |     """Return dict of groups/group members present in any of the input Namelists.
 33 | 
 34 |     Input: dict with key:value pairs where
 35 |             key is arbitrary (typically a filename string)
 36 |             value is Namelist (typically from filename)
 37 |     Output: dict with key:value pairs where
 38 |         key is group name (including all groups present in any input Namelist)
 39 |         value is Namelist for group (with nothing common to all other files)
 40 |     """
 41 |     nmlsuperset = {}
 42 |     for nml in nmlall:
 43 |         nmlsuperset.update(nmlall[nml])
 44 |     # nmlsuperset now contains all groups that were in any nml
 45 |     for group in nmlsuperset:
 46 |         # to avoid the next bit changing the original groups
 47 |         nmlsuperset[group] = nmlsuperset[group].copy()
 48 |         for nml in nmlall:
 49 |             if group in nmlall[nml]:
 50 |                 nmlsuperset[group].update(nmlall[nml][group])
 51 |     # nmlsuperset groups now contain all keys that were in any nml
 52 |     return nmlsuperset
 53 | 
 54 | 
 55 | def nmldiff(nmlall):
 56 |     """Remove every group/group member that is the same in all file Namelists.
 57 | 
 58 |     Parameter
 59 |     ---------
 60 |     Input : dict
 61 |         (e.g. returned by nmldict) with key:value pairs where
 62 |         key is filename path string
 63 |         value is complete Namelist from filename
 64 |     Output : dict
 65 |         modified input dict with key:value pairs where
 66 |         key is filename strings
 67 |         value is Namelist from filename, with any group/group member
 68 |         common to all other files removed
 69 |     """
 70 | 
 71 |     # Create diff by removing common groups/members from nmlall.
 72 |     # This is complicated by the fact group names / member names may differ
 73 |     # or be absent across different nml files.
 74 | 
 75 |     # First make a superset that has all group names and group members that
 76 |     # appear in any nml file
 77 |     nmlsuperset = superset(nmlall)
 78 | 
 79 |     # now go through nmlall and remove any groups / members from nmlall that
 80 |     #   are identical to superset in all nmls
 81 |     # first delete any group members that are common to all nmls, then delete
 82 |     #   any empty groups common to all nmls
 83 |     for group in nmlsuperset:
 84 |         # init: whether group is present and identical in all namelist files
 85 |         deletegroup = True
 86 |         for nml in nmlall:
 87 |             deletegroup = deletegroup and (group in nmlall[nml])
 88 |         if deletegroup:  # group present in all namelist files
 89 |             for mem in nmlsuperset[group]:
 90 |                 # init: whether group member is present and identical
 91 |                 #   in all namelist files
 92 |                 deletemem = True
 93 |                 for nml in nmlall:
 94 |                     deletemem = deletemem and (mem in nmlall[nml][group])
 95 |                 if deletemem:  # group member is present in all namelist files
 96 |                     for nml in nmlall:
 97 |                         # ... now check if values match in all namelist files
 98 |                         deletemem = deletemem and (
 99 |                             nmlall[nml][group][mem] == nmlsuperset[group][mem]
100 |                         )
101 |                     if deletemem:
102 |                         for nml in nmlall:
103 |                             # delete mem from this group in all nmls
104 |                             del nmlall[nml][group][mem]
105 |             for nml in nmlall:
106 |                 deletegroup = deletegroup and (len(nmlall[nml][group]) == 0)
107 |             if deletegroup:
108 |                 # group is common to all nmls and now empty so delete
109 |                 for nml in nmlall:
110 |                     del nmlall[nml][group]
111 |     return nmlall
112 | 


--------------------------------------------------------------------------------
/cosima_cookbook/summary/nml_summary.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Create tabulated summary of namelists for a set of files.
 4 | # These functions assume we are dealing with ACCESS-OM2 data.
 5 | # Andrew Kiss https://github.com/aekiss
 6 | 
 7 | 
 8 | import cosima_cookbook as cc
 9 | from IPython.display import display, Markdown
10 | import os
11 | 
12 | 
13 | def summary_md(
14 |     configuration,
15 |     expts,
16 |     path="/g/data3/hh5/tmp/cosima/",
17 |     search="https://github.com/OceansAus/access-om2/search?&q=",
18 |     nmls=[
19 |         "atmosphere/input_atm.nml",
20 |         "ice/cice_in.nml",
21 |         "ice/input_ice.nml",
22 |         "ice/input_ice_gfdl.nml",
23 |         "ice/input_ice_monin.nml",
24 |         "ocean/input.nml",
25 |     ],
26 | ):
27 |     for nml in nmls:
28 |         epaths = []
29 |         for e in expts:
30 |             # NB: only look at output000
31 |             epaths.append(os.path.join(path, configuration, e, "output000", nml))
32 |         nmld = cc.nmldiff(cc.nmldict(tuple(epaths)))
33 |         epaths = list(nmld.keys())  # redefine to handle missing paths
34 |         epaths.sort()
35 |         nmldss = cc.superset(nmld)
36 |         display(Markdown("### " + nml + " namelist differences"))
37 |         if len(nmldss) == 0:
38 |             display(Markdown("no differences"))
39 |         else:
40 |             mdstr = "| group | variable | "
41 |             for e in epaths:
42 |                 mdstr = mdstr + e.replace("/", "/<br>") + " | "
43 |             mdstr = mdstr + "\n|---|:--|" + ":-:|" * len(epaths)
44 |             for group in sorted(nmldss):
45 |                 for mem in sorted(nmldss[group]):
46 |                     mdstr = mdstr + "\n| " + "&" + group + " | " + mem + " | "
47 |                     #                        search doesn't work on github submodules or forks
48 |                     #                        '[' + group + '](' + search + group + ')' + ' | ' + \
49 |                     #                        '[' + mem + '](' + search + mem + ')' + ' | '
50 |                     for e in epaths:
51 |                         if group in nmld[e]:
52 |                             if mem in nmld[e][group]:
53 |                                 mdstr = mdstr + repr(nmld[e][group][mem])
54 |                         mdstr = mdstr + " | "
55 |             display(Markdown(mdstr))
56 |     return
57 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==2
2 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # http://www.sphinx-doc.org/en/master/config
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | #import os
14 | #import sys
15 | #sys.path.insert(0, os.path.abspath('../../cosima-cookbook'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'cosima-cookbook'
21 | copyright = '2019, James Munroe'
22 | author = 'James Munroe'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | # release = '0.3.1'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [ 
34 |     'sphinx.ext.autodoc',
35 |     'sphinx.ext.napoleon',
36 | ]
37 | 
38 | napoleon_google_docstring = False
39 | napoleon_use_param = False
40 | napoleon_use_ivar = True
41 | 
42 | # Add any paths that contain templates here, relative to this directory.
43 | templates_path = ['_templates']
44 | 
45 | # List of patterns, relative to source directory, that match files and
46 | # directories to ignore when looking for source files.
47 | # This pattern also affects html_static_path and html_extra_path.
48 | exclude_patterns = []
49 | 
50 | 
51 | # -- Options for HTML output -------------------------------------------------
52 | 
53 | # The theme to use for HTML and HTML Help pages.  See the documentation for
54 | # a list of builtin themes.
55 | #
56 | html_theme = 'sphinx_rtd_theme'
57 | 
58 | # Add any paths that contain custom static files (such as style sheets) here,
59 | # relative to this directory. They are copied after the builtin static files,
60 | # so a file named "default.css" will overwrite the builtin "default.css".
61 | html_static_path = ['_static']
62 | 


--------------------------------------------------------------------------------
/docs/source/cosima_cookbook.rst:
--------------------------------------------------------------------------------
 1 | cosima\_cookbook package
 2 | ========================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    cosima_cookbook.diagnostics
10 |    cosima_cookbook.plots
11 |    cosima_cookbook.summary
12 | 
13 | Submodules
14 | ----------
15 | 
16 | cosima\_cookbook.database module
17 | --------------------------------
18 | 
19 | .. automodule:: cosima_cookbook.database
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 
24 | cosima\_cookbook.database\_utils module
25 | ---------------------------------------
26 | 
27 | .. automodule:: cosima_cookbook.database_utils
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | cosima\_cookbook.date\_utils module
33 | -----------------------------------
34 | 
35 | .. automodule:: cosima_cookbook.date_utils
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 
40 | cosima\_cookbook.distributed module
41 | -----------------------------------
42 | 
43 | .. automodule:: cosima_cookbook.distributed
44 |    :members:
45 |    :undoc-members:
46 |    :show-inheritance:
47 | 
48 | cosima\_cookbook.memory module
49 | ------------------------------
50 | 
51 | .. automodule:: cosima_cookbook.memory
52 |    :members:
53 |    :undoc-members:
54 |    :show-inheritance:
55 | 
56 | cosima\_cookbook.netcdf\_index module
57 | -------------------------------------
58 | 
59 | .. automodule:: cosima_cookbook.netcdf_index
60 |    :members:
61 |    :undoc-members:
62 |    :show-inheritance:
63 | 
64 | cosima\_cookbook.netcdf\_utils module
65 | -------------------------------------
66 | 
67 | .. automodule:: cosima_cookbook.netcdf_utils
68 |    :members:
69 |    :undoc-members:
70 |    :show-inheritance:
71 | 
72 | cosima\_cookbook.querying module
73 | --------------------------------
74 | 
75 | .. automodule:: cosima_cookbook.querying
76 |    :members:
77 |    :undoc-members:
78 |    :show-inheritance:
79 | 
80 | 
81 | Module contents
82 | ---------------
83 | 
84 | .. automodule:: cosima_cookbook
85 |    :members:
86 |    :undoc-members:
87 |    :show-inheritance:
88 | 


--------------------------------------------------------------------------------
/docs/source/getting_started.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | Getting Started
 3 | ===============
 4 | 
 5 | The cookbook consists of a Python 3 package that contains infrastructure
 6 | for indexing COSIMA model output and convenient methods for searching for
 7 | and loading the data into `xarray <http://xarray.pydata.org/>`_ datastructures.
 8 | 
 9 | Some users may find it sufficient to browse through the examples and tutorials
10 | in the `COSIMA recipes <http://cosima-recipes.readthedocs.io/>`_ repository.
11 | The Jupyter notebooks that can be downloaded from COSIMA recipes need this package
12 | (called cosima_cookbook) to be installed.
13 | 
14 | Choosing your platform
15 | ======================
16 | 
17 | COSIMA ocean and ice models are typically run on `NCI <nci.org.au>`_, a HPC
18 | computing centre in Australia.  The output data is very large and it is 
19 | assumed that this data resides on a NCI storage system.
20 | 
21 | The cookbook is supported on two NCI systems
22 | 
23 | #. `Virtual Desktop Infrastructure (VDI) <http://nci.org.au/services/vdi/>`_
24 | #. `gadi (gadi.nci.org.au) <http://nci.org.au/systems-services/peak-system/gadi/>`_
25 | 
26 | Connecting
27 | ==========
28 | 
29 | For both VDI and gadi scripts are used to start a `jupyter notebook <http://jupyter-notebook.readthedocs.io>`_ 
30 | or `jupyter lab <http://jupyterlab.readthedocs.io>`_ session on the chosen system 
31 | and automatically create an `ssh tunnel <https://www.ssh.com/ssh/tunneling/>`_ 
32 | such that the jupyter session can be opened in your local browser using a url
33 | like <http://localhost:8888> that appears to be on your own local machine.
34 | 
35 | Scripts for this purpose are provided by the CLEX CMS team in this repository
36 | 
37 | https://github.com/coecms/nci_scripts
38 | 
39 | Clone the repository to your local computer. There are instructions in the repository 
40 | on the requirements for each script and how to use them.
41 | 
42 | Alternatively if you are using the VDI Strudel environment and accessing the VDI
43 | through a virtual desktop you can load the same python conda environment that is
44 | used in the scripts above and start a jupyter notebook session like so:
45 | ::
46 | 
47 |     module use /g/data3/hh5/public/modules
48 |     module load conda/analysis3
49 | 
50 |     jupyter notebook
51 | 
52 | Finding data
53 | ============
54 | 
55 | Most of the infrastructure the COSIMA Cookbook provides revolves around indexing
56 | data output from COSIMA models and providing a python based API to access the 
57 | data in a convenient and straight forward way.
58 | 
59 | There are graphical user interface (GUI) tools to help with data discovering and
60 | exploration. There is a 
61 | `tutorial <https://nbviewer.jupyter.org/github/COSIMA/cosima-recipes/blob/master/Tutorials/Using_Explorer_tools.ipynb>`_
62 | in the COSIMA recipes repository which demonstrates the available tools.
63 | 
64 | Tutorials and examples
65 | ======================
66 | 
67 | COSIMA recipes provides `tutorials <https://cosima-recipes.readthedocs.io/en/latest/tutorials.html>`_
68 | and `documented examples <https://cosima-recipes.readthedocs.io/en/latest/documented_examples.html>`_ 
69 | which can be used to learn how to use the Cookbook and for ideas and inspiration for your own analysis.
70 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. cosima-cookbook documentation master file, created by
 2 |    sphinx-quickstart on Mon Aug 12 20:35:06 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | COSIMA Cookbook
 7 | ===============
 8 | 
 9 | Ocean and ice models are evaluated and compared by using diagnostics.
10 | `COSIMA recipes <http://cosima-recipes.readthedocs.io/>`_ is a collection of 
11 | diagnostics by the `COSIMA <http://cosima.org.au>`_ community.
12 | 
13 | The recipes rely on infrastructure that is provided by the COSIMA Cookbook.
14 | The cookbook is under active development on
15 | Github: `COSIMA-Cookbook <https://github.com/COSIMA/cosima-cookbook>`_
16 | 
17 | .. toctree::
18 |     :maxdepth: 2
19 |     :caption: Contents:
20 | 
21 |     getting_started
22 |     related_projects
23 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | cosima_cookbook
2 | ===============
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    cosima_cookbook
8 | 


--------------------------------------------------------------------------------
/docs/source/related_projects.rst:
--------------------------------------------------------------------------------
 1 | Related Projects
 2 | ================
 3 | 
 4 | COSIMA-Cookbook is a solution for efficient calculation diagnostics of
 5 | output of high resolution an ocean and ice model. It is targeted
 6 | at the COSIMA community where models (e.g. MOM5) are run on NCI
 7 | resources such as raijin.
 8 | 
 9 | The problem of dealing with increasing large output from atmosphere,
10 | climate, and ocean models is being addressed by several groups.  
11 | Active development is currently underway as part of
12 | the PangeoData_ initiative.  COSIMA-Cookbok logically sits on top
13 | of that project. 
14 | 
15 | 
16 | Underlying Python technologies
17 | ------------------------------
18 | 
19 | Dask
20 | 
21 | xarray
22 | 
23 | Jupyter
24 | 
25 | 
26 | .. _PangeoData: https://pangeo-data.github.io
27 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: docs/source/conf.py
11 | 
12 | # Optionally set the version of Python and requirements required to build your docs
13 | python:
14 |   version: 3.7
15 |   install:
16 |     - requirements: docs/requirements.txt
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # install dependencies from setup.py, and the cookbook in editable mode
2 | -e .[build]
3 | 


--------------------------------------------------------------------------------
/sandbox/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.
2 | 
3 | Make sure to edit sqlalchemy.url in alembic.ini!


--------------------------------------------------------------------------------
/sandbox/alembic/env.py:
--------------------------------------------------------------------------------
 1 | from logging.config import fileConfig
 2 | 
 3 | from sqlalchemy import engine_from_config
 4 | from sqlalchemy import pool
 5 | 
 6 | from alembic import context
 7 | 
 8 | # this is the Alembic Config object, which provides
 9 | # access to the values within the .ini file in use.
10 | config = context.config
11 | 
12 | # Interpret the config file for Python logging.
13 | # This line sets up loggers basically.
14 | fileConfig(config.config_file_name)
15 | 
16 | # add your model's MetaData object here
17 | # for 'autogenerate' support
18 | # from myapp import mymodel
19 | # target_metadata = mymodel.Base.metadata
20 | import cosima_cookbook as cc
21 | target_metadata = cc.database.Base.metadata
22 | 
23 | # other values from the config, defined by the needs of env.py,
24 | # can be acquired:
25 | # my_important_option = config.get_main_option("my_important_option")
26 | # ... etc.
27 | 
28 | 
29 | def run_migrations_offline():
30 |     """Run migrations in 'offline' mode.
31 | 
32 |     This configures the context with just a URL
33 |     and not an Engine, though an Engine is acceptable
34 |     here as well.  By skipping the Engine creation
35 |     we don't even need a DBAPI to be available.
36 | 
37 |     Calls to context.execute() here emit the given string to the
38 |     script output.
39 | 
40 |     """
41 |     url = config.get_main_option("sqlalchemy.url")
42 |     context.configure(
43 |         url=url,
44 |         target_metadata=target_metadata,
45 |         literal_binds=True,
46 |         dialect_opts={"paramstyle": "named"},
47 |     )
48 | 
49 |     with context.begin_transaction():
50 |         context.run_migrations()
51 | 
52 | 
53 | def run_migrations_online():
54 |     """Run migrations in 'online' mode.
55 | 
56 |     In this scenario we need to create an Engine
57 |     and associate a connection with the context.
58 | 
59 |     """
60 |     connectable = engine_from_config(
61 |         config.get_section(config.config_ini_section),
62 |         prefix="sqlalchemy.",
63 |         poolclass=pool.NullPool,
64 |     )
65 | 
66 |     with connectable.connect() as connection:
67 |         context.configure(
68 |             connection=connection, target_metadata=target_metadata
69 |         )
70 | 
71 |         with context.begin_transaction():
72 |             context.run_migrations()
73 | 
74 | 
75 | if context.is_offline_mode():
76 |     run_migrations_offline()
77 | else:
78 |     run_migrations_online()
79 | 


--------------------------------------------------------------------------------
/sandbox/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade():
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade():
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/sandbox/alembic/versions/16223b92479e_add_keywords.py:
--------------------------------------------------------------------------------
 1 | """add keywords
 2 | 
 3 | Revision ID: 16223b92479e
 4 | Revises: 
 5 | Create Date: 2020-06-30 13:22:36.407339
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | from sqlalchemy import orm
11 | 
12 | import cosima_cookbook as cc
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision = '16223b92479e'
16 | down_revision = None
17 | branch_labels = None
18 | depends_on = None
19 | 
20 | 
21 | def upgrade():
22 |     bind = op.get_bind()
23 |     session = orm.Session(bind=bind)
24 | 
25 |     # ### commands auto generated by Alembic - please adjust! ###
26 |     op.create_table('keywords',
27 |                     sa.Column('id', sa.Integer(), nullable=False),
28 |                     sa.Column('_keyword', sa.String(collation='NOCASE'), nullable=False),
29 |                     sa.PrimaryKeyConstraint('id')
30 |     )
31 |     op.create_index(op.f('ix_keywords__keyword'), 'keywords', ['_keyword'], unique=True)
32 |     op.create_table('keyword_assoc',
33 |                     sa.Column('expt_id', sa.Integer(), nullable=True),
34 |                     sa.Column('keyword_id', sa.Integer(), nullable=True),
35 |                     sa.ForeignKeyConstraint(['expt_id'], ['experiments.id'], ),
36 |                     sa.ForeignKeyConstraint(['keyword_id'], ['keywords.id'], )
37 |     )
38 |     # ### end Alembic commands ###
39 |     op.execute('PRAGMA user_version=3')
40 | 
41 |     # reindex metadata for experiments
42 |     for expt in session.query(cc.database.NCExperiment):
43 |         cc.database.update_metadata(expt, session)
44 |     session.commit()
45 | 
46 | def downgrade():
47 |     # ### commands auto generated by Alembic - please adjust! ###
48 |     op.drop_table('keyword_assoc')
49 |     op.drop_index(op.f('ix_keywords__keyword'), table_name='keywords')
50 |     op.drop_table('keywords')
51 |     # ### end Alembic commands ###
52 |     op.execute('PRAGMA user_version=2')
53 | 


--------------------------------------------------------------------------------
/sandbox/diag-vis.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from sys import argv
  3 | 
  4 | import cosima_cookbook as cc
  5 | import pandas as pd
  6 | from sqlalchemy import select, distinct, bindparam
  7 | 
  8 | from bokeh.io import curdoc
  9 | from bokeh.layouts import column
 10 | from bokeh.models.callbacks import CustomJS
 11 | from bokeh.models.sources import ColumnDataSource
 12 | from bokeh.models.tools import BoxSelectTool, HoverTool, TapTool
 13 | from bokeh.models.widgets import Select, Button, Div
 14 | import bokeh.palettes
 15 | from bokeh.plotting import figure
 16 | from bokeh.transform import factor_cmap
 17 | 
 18 | if len(argv) < 2:
 19 |     raise Exception('Usage: bokeh serve diag-vis.py --args <db>')
 20 | db = argv[1]
 21 | 
 22 | conn, tables = cc.database.create_database(db)
 23 | 
 24 | expt_query = select([distinct(tables['ncfiles'].c.experiment)])
 25 | vars_query = select([distinct(tables['ncvars'].c.variable)]) \
 26 |              .select_from(tables['ncvars'].join(tables['ncfiles'])) \
 27 |              .where(tables['ncfiles'].c.experiment == bindparam('expt'))
 28 | data_query = select([tables['ncfiles'].c.ncfile, tables['ncfiles'].c.run, tables['ncvars'].c.variable,
 29 |                      tables['ncfiles'].c.time_start, tables['ncfiles'].c.time_end, tables['ncfiles'].c.frequency]) \
 30 |              .select_from(tables['ncfiles'].join(tables['ncvars'])) \
 31 |              .where(tables['ncfiles'].c.experiment == bindparam('expt')) \
 32 |              .where(tables['ncfiles'].c.time_start is not None) \
 33 |              .where(tables['ncfiles'].c.frequency != 'static') \
 34 |              .order_by(tables['ncvars'].c.variable, tables['ncfiles'].c.time_start)
 35 | 
 36 | expts = [e[0] for e in conn.execute(expt_query)]
 37 | 
 38 | def get_data(expt):
 39 |     data = conn.execute(data_query, expt=expt).fetchall()
 40 |     df = pd.DataFrame(data, columns=['ncfile', 'run', 'variable', 'time_start', 'time_end', 'frequency'])
 41 |     df[['time_start', 'time_end']] = df[['time_start', 'time_end']].applymap(
 42 |         lambda s: datetime.strptime(s, '%Y-%m-%d %H:%M:%S'))
 43 | 
 44 |     return df
 45 | 
 46 | def print_selected(div):
 47 |     return CustomJS(args=dict(div=div), code="""
 48 | var source = cb_obj;
 49 | var unique_vars = {};
 50 | for (var i of source.selected['1d'].indices) {
 51 |   var v = source.data['variable'][i];
 52 |   if (v in unique_vars) {
 53 |     unique_vars[v]['time_start'] = Math.min(unique_vars[v]['time_start'], source.data['time_start'][i]);
 54 |     unique_vars[v]['time_end'] = Math.max(unique_vars[v]['time_end'], source.data['time_end'][i]);
 55 |   } else {
 56 |     unique_vars[v] = { time_start: source.data['time_start'][i],
 57 |                        time_end: source.data['time_end'][i] };
 58 |   }
 59 | }
 60 | 
 61 | var text = '<table><tr><th>Name</th><th>Start</th><th>End<th></tr>';
 62 | for (var p in unique_vars) {
 63 |   var ts = new Date(unique_vars[p]['time_start']);
 64 |   var te = new Date(unique_vars[p]['time_end']);
 65 |   text = text.concat('<tr><th>'+p+'</th><td>'+ts.toISOString().substr(0,10)+'</td><td>'+te.toISOString().substr(0,10)+'</td></tr>');
 66 | }
 67 | text = text.concat('</table>')
 68 | div.text = text;
 69 | """)
 70 | 
 71 | 
 72 | # create widgets
 73 | expt_select = Select(title='Experiment:', options=expts, value=expts[0])
 74 | refresh = Button(label='Update')
 75 | div = Div(width=1000)
 76 | 
 77 | # hover tools
 78 | hover = HoverTool(tooltips=[
 79 |     ('variable', '@variable'), ('start', '@time_start{%F}'),
 80 |     ('end', '@time_end{%F}'), ('run', '@run'), ('file', '@ncfile')],
 81 |                   formatters={
 82 |                       'time_start': 'datetime',
 83 |                       'time_end': 'datetime'
 84 |                       })
 85 | tap = TapTool()
 86 | box_select = BoxSelectTool()
 87 | tools = [hover, box_select, tap, 'pan', 'box_zoom', 'wheel_zoom', 'reset']
 88 | 
 89 | df = get_data(expt_select.value)
 90 | freqs = df.frequency.unique()
 91 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs)
 92 | cds = ColumnDataSource(df, callback=print_selected(div))
 93 | 
 94 | p = figure(y_range=df.variable.unique(), x_range=(df.iloc[0].time_start, df.iloc[-1].time_end),
 95 |            title=expt_select.value, tools=tools)
 96 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs)
 97 | hb = p.hbar(y='variable', left='time_start', right='time_end', height=0.4, source=cds,
 98 |             fill_color=cmap, legend='frequency')
 99 | 
100 | # callback routines to repopulate list of variables
101 | def get_vars(expt):
102 |     return [e[0] for e in conn.execute(vars_query, expt=expt)]
103 | 
104 | def refresh_output():
105 |     # get new data
106 |     df = get_data(expt_select.value)
107 |     freqs = df.frequency.unique()
108 |     cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs)
109 | 
110 |     # update figure itself
111 |     p.y_range.factors = list(df.variable.unique())
112 |     (p.x_range.start, p.x_range.end) = (df.iloc[0].time_start, df.iloc[-1].time_end)
113 |     p.title.text = expt_select.value
114 | 
115 |     # update data source for plot
116 |     hb.data_source.data = hb.data_source.from_df(df)
117 |     # update colourmap if necessary
118 |     hb.glyph.fill_color = cmap
119 |     
120 | refresh.on_click(refresh_output)
121 | 
122 | # layout and show
123 | layout = column(expt_select, refresh, p, div)
124 | curdoc().add_root(layout)
125 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='cosima_cookbook',
 5 |     description='Diagnostics for COSIMA: Consortium for Ocean-Sea Ice Modelling in Australia',
 6 |     url='https://github.com/COSIMA/cosima-cookbook',
 7 |     author='COSIMA',
 8 |     license='Apache License 2.0',
 9 |     use_scm_version=True,
10 |     packages=find_packages(),
11 |     setup_requires=["setuptools_scm"],
12 | 
13 |     install_requires=[
14 |         'dask',
15 |         'xarray',
16 |         'numpy',
17 |         'matplotlib',
18 |         'bokeh',
19 |         'netcdf4',
20 |         'tqdm',
21 |         'sqlalchemy<2.0',
22 |         'cftime',
23 |         'f90nml',
24 |         'joblib',
25 |         'ipywidgets',
26 |         'lxml',
27 |     ],
28 |     entry_points={
29 |         'console_scripts': [
30 |             'cosima_cookbook-update_db = cosima_cookbook.database_update:main',
31 |         ]
32 |     },
33 |     extras_require = {
34 |         'build': ['distributed', 'pytest', 'pytest-cov']
35 |     }
36 | )
37 | 


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from dask.distributed import Client
 3 | 
 4 | from cosima_cookbook import database
 5 | 
 6 | 
 7 | @pytest.fixture(scope="module")
 8 | def client():
 9 |     client = Client(processes=False, dashboard_address=None)
10 |     yield client
11 |     client.close()
12 | 
13 | 
14 | @pytest.fixture(scope="function")
15 | def session_db(tmp_path):
16 |     db = tmp_path / "test.db"
17 |     s = database.create_session(str(db))
18 |     yield s, db
19 | 
20 |     s.close()
21 | 


--------------------------------------------------------------------------------
/test/data/explore/duplicate/one/metadata.yaml:
--------------------------------------------------------------------------------
 1 | contact: The ACCESS Oracle
 2 | email: oracle@example.com
 3 | created: 2018-01-01
 4 | description: Description
 5 | notes: Notes
 6 | keywords:
 7 |   - cosima
 8 |   - ACCESS-OM2-01
 9 |   - ryf9091
10 | 


--------------------------------------------------------------------------------
/test/data/explore/duplicate/one/ocean/ocean_age.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/duplicate/one/ocean/ocean_age.nc


--------------------------------------------------------------------------------
/test/data/explore/one/atmosphere/ty_trans.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/atmosphere/ty_trans.nc


--------------------------------------------------------------------------------
/test/data/explore/one/ice/hi_m.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/ice/hi_m.nc


--------------------------------------------------------------------------------
/test/data/explore/one/metadata.yaml:
--------------------------------------------------------------------------------
 1 | contact: The ACCESS Oracle
 2 | email: oracle@example.com
 3 | created: 2018-01-01
 4 | description: Description
 5 | notes: Notes
 6 | keywords:
 7 |   - cosima
 8 |   - ACCESS-OM2-01
 9 |   - ryf9091
10 | 


--------------------------------------------------------------------------------
/test/data/explore/one/ocean/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/ocean/ocean.nc


--------------------------------------------------------------------------------
/test/data/explore/one/restart/ocean_velocity_advection.res.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/restart/ocean_velocity_advection.res.nc


--------------------------------------------------------------------------------
/test/data/explore/two/atm/hi_m.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/atm/hi_m.nc


--------------------------------------------------------------------------------
/test/data/explore/two/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2020-01-01
4 | description: Description again!
5 | notes: Notes
6 | keywords:
7 |   - cosima
8 |   - another-keyword
9 | 


--------------------------------------------------------------------------------
/test/data/explore/two/nomodel/ty_trans.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/nomodel/ty_trans.nc


--------------------------------------------------------------------------------
/test/data/explore/two/ocn/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/ocn/ocean.nc


--------------------------------------------------------------------------------
/test/data/explore/two/ocn/ocean_month.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/ocn/ocean_month.nc


--------------------------------------------------------------------------------
/test/data/explore/two/restart/ocean_velocity_advection.res.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/restart/ocean_velocity_advection.res.nc


--------------------------------------------------------------------------------
/test/data/indexing/alternate/experiment_a/test2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/alternate/experiment_a/test2.nc


--------------------------------------------------------------------------------
/test/data/indexing/broken_file/output000/test.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/broken_file/output000/test.nc


--------------------------------------------------------------------------------
/test/data/indexing/broken_metadata/metadata.yaml:
--------------------------------------------------------------------------------
1 | this: is: broken!
2 | 


--------------------------------------------------------------------------------
/test/data/indexing/broken_metadata/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/broken_metadata/test1.nc


--------------------------------------------------------------------------------
/test/data/indexing/empty_file/output000/empty.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/empty_file/output000/empty.nc


--------------------------------------------------------------------------------
/test/data/indexing/longnames/output000/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/longnames/output000/test1.nc


--------------------------------------------------------------------------------
/test/data/indexing/longnames/output000/test2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/longnames/output000/test2.nc


--------------------------------------------------------------------------------
/test/data/indexing/metadata/metadata.yaml:
--------------------------------------------------------------------------------
 1 | contact: The ACCESS Oracle
 2 | email: oracle@example.com
 3 | created: 2018-01-01
 4 | url: https://github.com/COSIMA/oracle
 5 | description: >-
 6 |   Attempted spinup, using salt flux fix
 7 |   https://arccss.slack.com/archives/C6PP0GU9Y/p1515460656000124 and
 8 |   https://github.com/mom-ocean/MOM5/pull/208/commits/9f4ee6f8b72b76c96a25bf26f3f6cdf773b424d2
 9 |   from the start. Used mushy ice from July year 1 onwards to avoid
10 |   vertical thermo error in cice
11 |   https://arccss.slack.com/archives/C6PP0GU9Y/p1515842016000079
12 | notes: >-
13 |   Stripy salt restoring:
14 |   https://github.com/OceansAus/access-om2/issues/74 tripole seam bug:
15 |   https://github.com/OceansAus/access-om2/issues/86 requires dt=300s
16 |   in May, dt=240s in Aug to maintain CFL in CICE near tripoles (storms
17 |   in those months in 8485RYF); all other months work with dt=400s
18 | 


--------------------------------------------------------------------------------
/test/data/indexing/metadata/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/metadata/test1.nc


--------------------------------------------------------------------------------
/test/data/indexing/multiple/experiment_a/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/multiple/experiment_a/test1.nc


--------------------------------------------------------------------------------
/test/data/indexing/multiple/experiment_b/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/multiple/experiment_b/test1.nc


--------------------------------------------------------------------------------
/test/data/indexing/single_broken_file/output000/broken.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/single_broken_file/output000/broken.nc


--------------------------------------------------------------------------------
/test/data/indexing/single_broken_file/output000/test.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/single_broken_file/output000/test.nc


--------------------------------------------------------------------------------
/test/data/indexing/symlinked/experiment_a:
--------------------------------------------------------------------------------
1 | ../multiple/experiment_a


--------------------------------------------------------------------------------
/test/data/indexing/time/t1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t1.nc


--------------------------------------------------------------------------------
/test/data/indexing/time/t2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t2.nc


--------------------------------------------------------------------------------
/test/data/indexing/time/t3.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t3.nc


--------------------------------------------------------------------------------
/test/data/indexing/time/t4.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t4.nc


--------------------------------------------------------------------------------
/test/data/indexing/time/t5.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t5.nc


--------------------------------------------------------------------------------
/test/data/indexing/time_bounds/file001.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time_bounds/file001.nc


--------------------------------------------------------------------------------
/test/data/metadata/keywords/metadata.yaml:
--------------------------------------------------------------------------------
 1 | contact: The ACCESS Oracle
 2 | email: oracle@example.com
 3 | created: 2018-01-01
 4 | description: Description
 5 | notes: Notes
 6 | keywords:
 7 |   - cosima
 8 |   - ACCESS-OM2-01
 9 |   - ryf9091
10 | 


--------------------------------------------------------------------------------
/test/data/metadata/keywords/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/metadata/keywords/test1.nc


--------------------------------------------------------------------------------
/test/data/metadata/keywords2/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2020-01-01
4 | description: Description again!
5 | notes: Notes
6 | keywords:
7 |   - cosima
8 |   - another-keyword
9 | 


--------------------------------------------------------------------------------
/test/data/metadata/keywords2/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/metadata/keywords2/test1.nc


--------------------------------------------------------------------------------
/test/data/metadata/string_keyword/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2020-01-02
4 | description: String keywords
5 | notes: Notes
6 | keywords: cosima
7 | 


--------------------------------------------------------------------------------
/test/data/metadata/upcase/metadata.yaml:
--------------------------------------------------------------------------------
 1 | contact: The ACCESS Oracle
 2 | email: oracle@example.com
 3 | created: 2018-01-01
 4 | description: Description (with uppercase keywords!)
 5 | notes: Notes
 6 | keywords:
 7 |   - COSIMA
 8 |   - access-om2-01
 9 |   - RYF9091
10 | 


--------------------------------------------------------------------------------
/test/data/ocean_sealevel.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/ocean_sealevel.nc


--------------------------------------------------------------------------------
/test/data/querying/output000/hi_m.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/output000/hi_m.nc


--------------------------------------------------------------------------------
/test/data/querying/output000/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/output000/ocean.nc


--------------------------------------------------------------------------------
/test/data/querying/restart000/ty_trans.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/restart000/ty_trans.nc


--------------------------------------------------------------------------------
/test/data/querying_disambiguation/output000/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying_disambiguation/output000/ocean.nc


--------------------------------------------------------------------------------
/test/data/querying_disambiguation/output000/ocean_month.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying_disambiguation/output000/ocean_month.nc


--------------------------------------------------------------------------------
/test/data/update/experiment_a/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/update/experiment_a/test1.nc


--------------------------------------------------------------------------------
/test/data/update/experiment_b/test2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/update/experiment_b/test2.nc


--------------------------------------------------------------------------------
/test/test_database.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import os
  3 | import sqlalchemy as sa
  4 | from cosima_cookbook import database
  5 | 
  6 | 
  7 | @pytest.fixture
  8 | def db_env(tmp_path):
  9 |     old_db = os.getenv("COSIMA_COOKBOOK_DB")
 10 |     db = tmp_path / "test.db"
 11 |     os.environ["COSIMA_COOKBOOK_DB"] = str(db)
 12 | 
 13 |     yield db
 14 | 
 15 |     # clean up by resetting the env var
 16 |     if old_db:
 17 |         os.environ["COSIMA_COOKBOOK_DB"] = old_db
 18 |     else:
 19 |         del os.environ["COSIMA_COOKBOOK_DB"]
 20 | 
 21 | 
 22 | def test_default(tmp_path):
 23 |     db = tmp_path / "test.db"
 24 |     # override the NCI-specific default
 25 |     database.__DEFAULT_DB__ = str(db)
 26 | 
 27 |     s = database.create_session()
 28 | 
 29 |     assert db.exists()
 30 | 
 31 | 
 32 | def test_env_var(db_env):
 33 |     # make sure we use the environment variable
 34 |     # override with no arguments supplied
 35 |     s = database.create_session()
 36 |     assert db_env.exists()
 37 | 
 38 | 
 39 | def test_arg_override(tmp_path, db_env):
 40 |     # check that if we supply an argument, that
 41 |     # is used rather than the environment variable
 42 |     db = tmp_path / "test_other.db"
 43 |     s = database.create_session(str(db))
 44 | 
 45 |     assert not db_env.exists()
 46 |     assert db.exists()
 47 | 
 48 | 
 49 | def test_creation(session_db):
 50 |     """Test that a database file is created with a session
 51 |     when the session file doesn't exist."""
 52 | 
 53 |     s, db = session_db
 54 |     assert db.exists()
 55 | 
 56 |     # we should be able to query against a table that exists
 57 |     # with no error
 58 |     s.execute("SELECT * FROM ncfiles")
 59 | 
 60 |     # but not a non-existent table
 61 |     with pytest.raises(sa.exc.OperationalError, match="no such table"):
 62 |         s.execute("SELECT * FROM ncfiles_notfound")
 63 | 
 64 | 
 65 | def test_reopen(tmp_path):
 66 |     """Test that we can reopen a database of the correct version."""
 67 | 
 68 |     db = tmp_path / "test.db"
 69 |     s = database.create_session(str(db))
 70 | 
 71 |     s.close()
 72 |     s = database.create_session(str(db))
 73 |     s.close()
 74 | 
 75 | 
 76 | def test_outdated(tmp_path):
 77 |     """Test that we can't use an outdated database"""
 78 | 
 79 |     db = tmp_path / "test.db"
 80 |     s = database.create_session(str(db))
 81 | 
 82 |     # check that the current version matches that defined in the module
 83 |     ver = s.execute("PRAGMA user_version").fetchone()[0]
 84 |     assert ver == database.__DB_VERSION__
 85 | 
 86 |     # reset version to one prior
 87 |     s.execute("PRAGMA user_version={}".format(database.__DB_VERSION__ - 1))
 88 |     s.close()
 89 | 
 90 |     # recreate the session
 91 |     with pytest.raises(Exception, match="Incompatible database versions"):
 92 |         s = database.create_session(str(db))
 93 | 
 94 | 
 95 | def test_outdated_notmodified(tmp_path):
 96 |     """Test that we don't try to modify an outdated database.
 97 |     This includes adding tables that don't yet exist because
 98 |     it's a previous version.
 99 |     """
100 | 
101 |     # set up an empty database with a previous version
102 |     db = tmp_path / "test.db"
103 |     conn = sa.create_engine("sqlite:///" + str(db)).connect()
104 |     conn.execute("PRAGMA user_version={}".format(database.__DB_VERSION__ - 1))
105 |     conn.close()
106 | 
107 |     # try to create the session
108 |     # this should fail and not modify the existing database
109 |     with pytest.raises(Exception):
110 |         s = database.create_session(str(db))
111 | 
112 |     # reopen the connection and ensure tables weren't created
113 |     conn = sa.create_engine("sqlite:///" + str(db)).connect()
114 |     with pytest.raises(sa.exc.OperationalError, match="no such table"):
115 |         conn.execute("SELECT * FROM ncfiles")
116 | 
117 | 
118 | def test_delete_experiment(session_db):
119 |     """Test that we can completely delete an experiment
120 |     and its associated data.
121 |     """
122 | 
123 |     session, db = session_db
124 |     database.build_index("test/data/indexing/longnames", session)
125 | 
126 |     # make sure we actually did index something
127 |     expt = (
128 |         session.query(database.NCExperiment)
129 |         .filter(database.NCExperiment.experiment == "longnames")
130 |         .one_or_none()
131 |     )
132 |     assert expt is not None
133 | 
134 |     database.delete_experiment("longnames", session)
135 |     expt = (
136 |         session.query(database.NCExperiment)
137 |         .filter(database.NCExperiment.experiment == "longnames")
138 |         .one_or_none()
139 |     )
140 |     assert expt is None
141 | 
142 |     # check that all files are removed
143 |     files = session.query(sa.func.count(database.NCFile.id)).scalar()
144 |     assert files == 0
145 | 
146 |     # make sure all ncvars are removed
147 |     vars = session.query(sa.func.count(database.NCVar.id)).scalar()
148 |     assert vars == 0
149 | 


--------------------------------------------------------------------------------
/test/test_dates.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Copyright 2017 ARC Centre of Excellence for Climate Systems Science
  5 | author: Aidan Heerdegen <aidan.heerdegen@anu.edu.au>
  6 | Licensed under the Apache License, Version 2.0 (the "License");
  7 | you may not use this file except in compliance with the License.
  8 | You may obtain a copy of the License at
  9 |     http://www.apache.org/licenses/LICENSE-2.0
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | """
 16 | 
 17 | from __future__ import print_function
 18 | 
 19 | import pytest
 20 | import sys, os, time, glob
 21 | import shutil
 22 | import pdb  # Add pdb.set_trace() to set breakpoints
 23 | import xarray as xr
 24 | import numpy as np
 25 | import cftime
 26 | from datetime import datetime, timedelta
 27 | 
 28 | from cosima_cookbook.date_utils import (
 29 |     rebase_times,
 30 |     rebase_dataset,
 31 |     rebase_variable,
 32 |     rebase_shift_attr,
 33 |     format_datetime,
 34 |     parse_datetime,
 35 | )
 36 | 
 37 | from xarray.testing import assert_equal
 38 | 
 39 | verbose = True
 40 | 
 41 | times = []
 42 | 
 43 | 
 44 | def setup_module(module):
 45 |     if verbose:
 46 |         print("setup_module      module:%s" % module.__name__)
 47 |     if verbose:
 48 |         print("Python version: {}".format(sys.version))
 49 |     # Put any setup code in here, like making temporary files
 50 |     # Make 5 years of a noleap calendar on the first of each month
 51 |     global times
 52 |     for y in range(1, 6):
 53 |         for m in range(1, 13):
 54 |             times.append(
 55 |                 np.round(
 56 |                     cftime.date2num(
 57 |                         cftime.datetime(y, m, 1),
 58 |                         units="days since 01-01-01",
 59 |                         calendar="noleap",
 60 |                     ),
 61 |                     8,
 62 |                 )
 63 |             )
 64 |     times = np.array(times)
 65 | 
 66 | 
 67 | def teardown_module(module):
 68 |     if verbose:
 69 |         print("teardown_module   module:%s" % module.__name__)
 70 |     # Put any taerdown code in here, like deleting temporary files
 71 | 
 72 | 
 73 | def test_format_parse_datetime():
 74 |     dates = [
 75 |         cftime.num2date(t, units="days since 01-01-01", calendar="noleap")
 76 |         for t in times
 77 |     ]
 78 |     assert format_datetime(dates[0]) == "0001-01-01 00:00:00"
 79 |     assert format_datetime(dates[-1]) == "0005-12-01 00:00:00"
 80 | 
 81 |     for d in dates:
 82 |         assert parse_datetime(format_datetime(d), "noleap") == d
 83 | 
 84 |     dates = [
 85 |         cftime.num2date(t, units="days since 01-01-01", calendar="proleptic_gregorian")
 86 |         for t in times
 87 |     ]
 88 |     assert format_datetime(dates[0]) == "0001-01-01 00:00:00"
 89 |     assert format_datetime(dates[-1]) == "0005-11-30 00:00:00"
 90 | 
 91 |     for d in dates:
 92 |         assert parse_datetime(format_datetime(d), "proleptic_gregorian") == d
 93 | 
 94 | 
 95 | def test_rebase_times():
 96 |     # Should be a 10 year offset between original times and rebased times
 97 |     assert not np.any(
 98 |         (times + 365 * 10)
 99 |         - rebase_times(
100 |             times, "days since 1980-01-01", "noleap", "days since 1970-01-01"
101 |         )
102 |     )
103 | 
104 |     # Should be a -10 year offset between original times and rebased times
105 |     assert not np.any(
106 |         (times - 365 * 10)
107 |         - rebase_times(
108 |             times, "days since 1980-01-01", "noleap", "days since 1990-01-01"
109 |         )
110 |     )
111 | 
112 | 
113 | def test_rebase_variable():
114 |     timesvar = xr.DataArray(
115 |         times, attrs={"units": "days since 1980-01-01", "calendar": "noleap"}
116 |     )
117 | 
118 |     print("att:", timesvar.attrs)
119 | 
120 |     # Test we can rebase with and without explicitly setting a calendar
121 |     timesvar_rebased = rebase_variable(timesvar, target_units="days since 1970-01-01")
122 |     assert timesvar_rebased.equals(
123 |         rebase_variable(timesvar, "noleap", target_units="days since 1970-01-01")
124 |     )
125 | 
126 |     assert not timesvar.equals(timesvar_rebased)
127 | 
128 |     # Should be a 10 year offset between original times and rebased times
129 |     assert not np.any((times + 365 * 10) - timesvar_rebased.values)
130 |     # assert(not np.any((times + 365*10) - rebase_variable(timesvar, 'noleap', target_units='days since 1970-01-01').values))
131 | 
132 |     with pytest.raises(ValueError):
133 |         timesvar_rebased = rebase_variable(
134 |             timesvar, "noleap", target_units="days since 1990-01-01"
135 |         )
136 | 
137 |     # Rebase with an offset otherwise would have negative dates
138 |     timesvar_rebased = rebase_variable(
139 |         timesvar, "noleap", target_units="days since 1990-01-01", offset=365 * 10
140 |     )
141 | 
142 |     # Values should be the same
143 |     assert not np.any(times - timesvar_rebased.values)
144 | 
145 |     # But the rebase_shift_attr should be set to 10 years
146 |     assert timesvar_rebased.attrs[rebase_shift_attr] == 365 * 10
147 | 
148 |     # Check we get back timesvar if rebased again with no arguments (rebases to previous
149 |     # units and applies offset if required in this instance)
150 |     assert timesvar.equals(rebase_variable(timesvar_rebased))
151 | 
152 | 
153 | def test_matching_time_units():
154 |     testfile = "test/data/ocean_sealevel.nc"
155 | 
156 |     ds = xr.open_dataset(testfile, decode_times=False)
157 |     target_units = "days since 1800-01-01"
158 | 
159 |     ds1 = rebase_dataset(ds, target_units)
160 |     # s1.to_netcdf('tmp.nc')
161 | 
162 |     ds2 = rebase_dataset(ds1)
163 |     # ds2.to_netcdf('tmp2.nc')
164 | 
165 |     # Rebasing again without target_units specified should
166 |     # un-do previous rebase
167 |     assert ds.equals(ds2)
168 | 
169 |     # An offset is required as the target units are ahead of the data in time
170 |     target_units = "days since 2000-01-01"
171 | 
172 |     # Offset can be automatically generated as difference between target and src units
173 |     ds1 = rebase_dataset(ds, target_units, offset="auto")
174 |     ds2 = rebase_dataset(ds1)
175 | 
176 |     assert ds.equals(ds2)
177 | 
178 |     # Offset can be an integer, but need to know what units are being used, days, hours etc
179 |     ds1 = rebase_dataset(ds, target_units, offset=100 * 365)
180 |     ds2 = rebase_dataset(ds1)
181 | 
182 |     assert ds.equals(ds2)
183 | 
184 |     # Offset can be a datetime.timedelta object, but this would need some knowledge of
185 |     # the calendar
186 |     ds1 = rebase_dataset(ds, target_units, offset=timedelta(days=100 * 365))
187 |     ds2 = rebase_dataset(ds1)
188 | 
189 |     # A different offset will yield a different dataset, but upon rebasing a second time
190 |     # should still be the same as the original regardless of offset.
191 |     ds3 = rebase_dataset(ds, target_units, offset=timedelta(days=200 * 365))
192 |     ds4 = rebase_dataset(ds3)
193 | 
194 |     assert ds.equals(ds4)
195 |     assert not ds1.equals(ds3)
196 | 
197 |     # Test graceful recovery if time_bounds missing.
198 |     del ds["time_bounds"]
199 |     ds3 = rebase_dataset(ds, target_units, offset=timedelta(days=200 * 365))
200 |     ds4 = rebase_dataset(ds3)
201 | 
202 |     assert ds.equals(ds4)
203 |     assert not ds1.equals(ds3)
204 | 
205 |     ds = xr.open_dataset(testfile, decode_times=False)[["sea_level"]]
206 |     target_units = "days since 1800-01-01"
207 | 
208 |     ds1 = rebase_dataset(ds, target_units)
209 | 
210 | 
211 | def test_chunking():
212 |     # An offset is required as the target units are ahead of the data in time
213 |     target_units = "days since 2000-01-01"
214 | 
215 |     testfile = "test/data/ocean_sealevel.nc"
216 | 
217 |     ds = xr.open_dataset(testfile, decode_times=False, chunks={"time": 10})
218 |     target_units = "days since 1800-01-01"
219 | 
220 |     ds1 = rebase_dataset(ds, target_units)
221 | 


--------------------------------------------------------------------------------
/test/test_explore.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from datetime import datetime
  4 | import os.path
  5 | import shutil
  6 | 
  7 | import xarray as xr
  8 | import pandas as pd
  9 | from pandas.testing import assert_frame_equal, assert_series_equal
 10 | 
 11 | import cosima_cookbook as cc
 12 | 
 13 | from cosima_cookbook.database import NCExperiment, NCFile
 14 | 
 15 | 
 16 | def metadata_for_experiment(
 17 |     path, session, metadata_file=None, name="test", commit=True
 18 | ):
 19 |     """Method to read metadata for an experiment without requiring
 20 |     the rest of the indexing infrastructure.
 21 |     """
 22 | 
 23 |     expt = NCExperiment(experiment=name, root_dir=path)
 24 | 
 25 |     # look for this experiment in the database
 26 |     q = (
 27 |         session.query(NCExperiment)
 28 |         .filter(NCExperiment.experiment == expt.experiment)
 29 |         .filter(NCExperiment.root_dir == expt.root_dir)
 30 |     )
 31 |     r = q.one_or_none()
 32 |     if r is not None:
 33 |         expt = r
 34 | 
 35 |     cc.database.update_metadata(expt, session, metadata_file)
 36 | 
 37 |     if commit:
 38 |         session.add(expt)
 39 |         session.commit()
 40 |     else:
 41 |         return expt
 42 | 
 43 | 
 44 | @pytest.fixture(scope="module")
 45 | def session(tmp_path_factory):
 46 |     # index test directory into temp database
 47 |     d = tmp_path_factory.mktemp("database")
 48 |     db = d / "test.db"
 49 |     session = cc.database.create_session(str(db))
 50 | 
 51 |     # build index for entire module
 52 |     cc.database.build_index(
 53 |         [
 54 |             "test/data/explore/one",
 55 |             "test/data/explore/two",
 56 |             "test/data/explore/duplicate/one",
 57 |         ],
 58 |         session,
 59 |     )
 60 | 
 61 |     # force all files to be marked as present, even if they're empty
 62 |     ncfiles = session.query(cc.database.NCFile).all()
 63 |     for f in ncfiles:
 64 |         f.present = True
 65 |     session.commit()
 66 | 
 67 |     return session
 68 | 
 69 | 
 70 | def test_database_explorer(session):
 71 |     dbx = cc.explore.DatabaseExplorer(session=session)
 72 | 
 73 |     assert dbx.session is session
 74 | 
 75 |     # Experiment selector
 76 |     assert dbx.expt_selector.options == ("one", "two")
 77 | 
 78 |     # Keyword filter selector
 79 |     assert dbx.filter_widget.options == tuple(dbx.keywords)
 80 | 
 81 |     in_one = set(cc.querying.get_variables(session, "one").name)
 82 |     in_two = set(cc.querying.get_variables(session, "two").name)
 83 | 
 84 |     # The variable filter box
 85 |     assert len(dbx.var_filter.selector.variables) == len((in_one | in_two))
 86 | 
 87 |     # Turn off filtering so all variables are present in the filter selector
 88 |     dbx.var_filter.selector._filter_variables(coords=False, restarts=False, model="")
 89 | 
 90 |     truth = {
 91 |         "age_global": "Age (global) (yr)",
 92 |         "diff_cbt_t": "total vert diff_cbt(temp) (w/o neutral included) (m^2/s)",
 93 |         "dzt": "t-cell thickness (m)",
 94 |         "hi_m": "grid cell mean ice thickness (m)",
 95 |         "neutral": "neutral density (kg/m^3)",
 96 |         "neutralrho_edges": "neutral density edges (kg/m^3)",
 97 |         "nv": "vertex number",
 98 |         "pot_rho_0": "potential density referenced to 0 dbar (kg/m^3)",
 99 |         "pot_rho_2": "potential density referenced to 2000 dbar (kg/m^3)",
100 |         "salt": "Practical Salinity (psu)",
101 |         "st_edges_ocean": "tcell zstar depth edges (meters)",
102 |         "st_ocean": "tcell zstar depth (meters)",
103 |     }
104 | 
105 |     for var, label in truth.items():
106 |         assert dbx.var_filter.selector.selector.options[var] == label
107 | 
108 |     # Add all variables common to both experiments and ensure after filter
109 |     # experiment selector still contains both
110 |     for var in in_one & in_two:
111 |         dbx.var_filter.selector.selector.label = var
112 |         dbx.var_filter._add_var_to_selected(None)
113 | 
114 |     dbx._filter_experiments(None)
115 |     assert dbx.expt_selector.options == ("one", "two")
116 | 
117 |     dbx.var_filter.delete(in_one & in_two)
118 |     assert len(dbx.var_filter.var_filter_selected.options) == 0
119 | 
120 |     # Now all variables only in experiment two and ensure after filter
121 |     # experiment selector only contains two
122 |     for var in in_two - in_one:
123 |         dbx.var_filter.selector.selector.label = var
124 |         dbx.var_filter._add_var_to_selected(None)
125 | 
126 |     dbx._filter_experiments(None)
127 |     assert dbx.expt_selector.options == ("two",)
128 | 
129 | 
130 | def test_experiment_explorer(session):
131 |     ee1 = cc.explore.ExperimentExplorer(session=session)
132 | 
133 |     # Experiment selector
134 |     assert ee1.expt_selector.options == ("one", "two")
135 | 
136 |     assert len(ee1.var_selector.selector.options) == 24
137 |     assert "pot_rho_0" in ee1.var_selector.selector.options
138 |     assert "ty_trans_rho" not in ee1.var_selector.selector.options
139 | 
140 |     # Simulate selecting a different experiment from menu
141 |     ee1._load_experiment("two")
142 |     assert len(ee1.var_selector.selector.options) == 28
143 |     assert "pot_rho_0" in ee1.var_selector.selector.options
144 |     assert "ty_trans_rho" in ee1.var_selector.selector.options
145 | 
146 |     # Check frequency drop down changes when variable selector assigned a value
147 |     assert ee1.frequency.options == ()
148 |     ee1.var_selector.selector.label = "ty_trans"
149 |     ee1.var_selector._set_frequency_selector("ty_trans")
150 |     assert ee1.frequency.options == ("1 yearly",)
151 |     ee1.var_selector._set_cellmethods_selector("ty_trans", "1 yearly")
152 |     assert ee1.cellmethods.options == ("time: mean",)
153 |     ee1.var_selector._set_daterange_selector("ty_trans", "1 yearly", "time: mean")
154 |     assert ee1.frequency.options == ("1 yearly",)
155 | 
156 |     # Check frequency drop down changes when variable selector assigned a value
157 |     ee1.var_selector.selector.label = "tx_trans"
158 |     ee1.var_selector._set_frequency_selector("tx_trans")
159 |     assert ee1.frequency.options == (None,)
160 |     ee1.var_selector._set_cellmethods_selector("tx_trans", None)
161 |     assert ee1.cellmethods.options == ("time: mean",)
162 |     ee1.var_selector._set_daterange_selector("tx_trans", None, "time: mean")
163 |     print(ee1.daterange)
164 | 
165 |     ee2 = cc.explore.ExperimentExplorer(session=session)
166 |     assert id(ee1.var_selector) != id(ee2.var_selector)
167 | 
168 | 
169 | def test_get_data(session):
170 |     ee = cc.explore.ExperimentExplorer(session=session)
171 | 
172 |     assert ee.data is None
173 | 
174 |     ee._load_experiment("one")
175 |     ee.var_selector.selector.label = "ty_trans"
176 |     ee.var_selector._set_frequency_selector("ty_trans")
177 |     ee.var_selector._set_cellmethods_selector("ty_trans", "1 yearly")
178 |     ee.var_selector._set_daterange_selector("ty_trans", "1 yearly", "time: mean")
179 |     ee._load_data(None)
180 | 
181 |     assert ee.frequency.options == ("1 yearly",)
182 |     assert ee.daterange.options[0][0] == "0166/12/31"
183 |     assert ee.daterange.options[1][0] == "0167/12/31"
184 | 
185 |     assert ee.data is not None
186 |     assert ee.data.shape == (2, 1, 1, 1)
187 | 
188 | 
189 | def test_model_property(session):
190 |     # Grab all variables and ensure the SQL classification matches the python version
191 |     # May be some holes, as not ensured all cases covered
192 |     for expt in cc.querying.get_experiments(session, all=True).experiment:
193 |         for index, row in cc.querying.get_variables(
194 |             session, experiment=expt, inferred=True
195 |         ).iterrows():
196 |             ncfile = NCFile(
197 |                 index_time=datetime.now(),
198 |                 ncfile=row.ncfile,
199 |                 present=True,
200 |             )
201 |             assert ncfile.model == row.model
202 | 


--------------------------------------------------------------------------------
/test/test_indexing.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pytest
  4 | import shutil
  5 | import time
  6 | import xarray as xr
  7 | from pathlib import Path
  8 | from cosima_cookbook import database
  9 | from sqlalchemy import func, inspect
 10 | 
 11 | LOGGER = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | def rm_tree(pth):
 15 |     pth = Path(pth)
 16 |     for child in pth.glob("*"):
 17 |         if child.is_file():
 18 |             child.unlink()
 19 |         else:
 20 |             rm_tree(child)
 21 |     pth.rmdir()
 22 | 
 23 | 
 24 | def assert_dictionaries_same(expected, actual):
 25 |     for key in expected.keys():
 26 |         if key not in actual or expected[key] != actual[key]:
 27 |             return False
 28 | 
 29 |     return True
 30 | 
 31 | 
 32 | @pytest.fixture
 33 | def unreadable_dir(tmp_path):
 34 |     expt_path = tmp_path / "expt_dir"
 35 |     expt_path.mkdir()
 36 |     idx_dir = expt_path / "unreadable"
 37 |     idx_dir.mkdir()
 38 |     idx_dir.chmod(0o300)
 39 | 
 40 |     yield idx_dir
 41 | 
 42 |     idx_dir.chmod(0o700)
 43 |     rm_tree(expt_path)
 44 | 
 45 | 
 46 | def test_find_files():
 47 |     files = database.find_files("test/data/indexing/")
 48 |     assert len(files) == 17
 49 | 
 50 |     for f in files:
 51 |         assert Path(f).suffix == ".nc"
 52 | 
 53 |     # No python source files in data subdirectory
 54 |     assert len(database.find_files("test/data/indexing/", "*.py")) == 0
 55 | 
 56 |     # Test works with alternative suffix
 57 |     files = database.find_files("test/", "*.py")
 58 |     assert len(files) == 9
 59 | 
 60 |     for f in files:
 61 |         assert Path(f).suffix == ".py"
 62 | 
 63 | 
 64 | def test_find_experiment(session_db):
 65 |     session, db = session_db
 66 | 
 67 |     directory = Path("test/data/indexing/broken_file")
 68 | 
 69 |     assert None == database.find_experiment(session, directory)
 70 | 
 71 |     expt = database.NCExperiment(
 72 |         experiment=str(directory.name), root_dir=str(directory.resolve())
 73 |     )
 74 |     session.add(expt)
 75 |     session.flush()
 76 | 
 77 |     assert expt == database.find_experiment(session, directory)
 78 | 
 79 | 
 80 | def test_index_experiment(session_db):
 81 |     session, db = session_db
 82 | 
 83 |     directory = Path("test/data/indexing/longnames")
 84 |     expt = database.NCExperiment(
 85 |         experiment=str(directory.name), root_dir=str(directory.resolve())
 86 |     )
 87 | 
 88 |     files = database.find_files(directory)
 89 | 
 90 |     # Index just one file
 91 |     database.index_experiment(set(list(files)[:1]), session, expt)
 92 |     session.flush()
 93 | 
 94 |     assert expt == database.find_experiment(session, directory)
 95 |     assert len(database.find_experiment(session, directory).ncfiles) == 1
 96 | 
 97 |     # Index the other file
 98 |     database.index_experiment(set(list(files)[1:]), session, expt)
 99 | 
100 |     assert expt == database.find_experiment(session, directory)
101 |     assert len(database.find_experiment(session, directory).ncfiles) == 2
102 | 
103 | 
104 | def test_unreadable(session_db, unreadable_dir):
105 |     session, db = session_db
106 | 
107 |     with pytest.warns(UserWarning, match="Some files or directories could not be read"):
108 |         indexed = database.build_index(str(unreadable_dir), session)
109 | 
110 | 
111 | def test_broken(session_db):
112 |     session, db = session_db
113 |     indexed = database.build_index("test/data/indexing/broken_file", session)
114 | 
115 |     # make sure the database was created
116 |     assert db.exists()
117 | 
118 |     # we indexed a single file
119 |     assert indexed == 1
120 | 
121 |     # query ncfiles table -- should have a single file, marked as empty
122 |     q = session.query(database.NCFile)
123 |     r = q.all()
124 |     assert len(r) == 1
125 |     assert not r[0].present
126 | 
127 |     # query ncvars table -- should be empty
128 |     q = session.query(func.count(database.NCVar.id))
129 |     assert q.scalar() == 0
130 | 
131 | 
132 | def test_empty_file(session_db):
133 |     session, db = session_db
134 |     indexed = database.build_index("test/data/indexing/empty_file", session)
135 | 
136 |     # as with test_broken, we should have seen a single file,
137 |     # but it should be marked as empty
138 |     assert db.exists()
139 |     assert indexed == 1
140 |     q = session.query(database.NCFile)
141 |     r = q.all()
142 |     assert len(r) == 1
143 |     assert not r[0].present
144 | 
145 |     # but there should be a valid variable
146 |     q = session.query(func.count(database.NCVar.id)).filter(
147 |         database.NCVar.varname == "ty_trans_rho"
148 |     )
149 |     assert q.scalar() == 1
150 | 
151 | 
152 | def test_update_nonew(session_db):
153 |     session, db = session_db
154 |     database.build_index("test/data/indexing/broken_file", session)
155 |     assert db.exists()
156 | 
157 |     # re-run the index, make sure we don't re-index anything
158 |     reindexed = database.build_index(
159 |         "test/data/indexing/broken_file", session, prune="flag"
160 |     )
161 |     assert reindexed == 0
162 | 
163 | 
164 | def test_reindex_force(session_db):
165 |     session, db = session_db
166 |     database.build_index("test/data/indexing/broken_file", session)
167 |     assert db.exists()
168 | 
169 |     # re-run the index, make sure re-index
170 |     reindexed = database.build_index(
171 |         "test/data/indexing/broken_file", session, force=True
172 |     )
173 |     assert reindexed == 1
174 | 
175 | 
176 | def test_update_newfile(session_db, tmp_path):
177 |     session, db = session_db
178 |     shutil.copy(
179 |         "test/data/indexing/longnames/output000/test1.nc", str(tmp_path / "test1.nc")
180 |     )
181 |     database.build_index(str(tmp_path), session)
182 | 
183 |     # add another file
184 |     shutil.copy(
185 |         "test/data/indexing/longnames/output000/test2.nc", str(tmp_path / "test2.nc")
186 |     )
187 |     reindexed = database.build_index(str(tmp_path), session)
188 |     assert reindexed == 1
189 | 
190 | 
191 | def test_updated_file(session_db, tmp_path, caplog):
192 |     session, db = session_db
193 | 
194 |     # Make tmp_path a concrete path otherwise filesystem ops won't work
195 |     tmp_path = Path(tmp_path)
196 | 
197 |     ncfile = "test1.nc"
198 |     ncpath = Path("test/data/indexing/longnames/output000/") / ncfile
199 |     shutil.copy(str(ncpath), str(tmp_path / ncfile))
200 |     indexed = database.build_index(str(tmp_path), session)
201 |     assert indexed == 1
202 | 
203 |     # Should not reindex
204 |     reindexed = database.build_index(str(tmp_path), session)
205 |     assert reindexed == 0
206 | 
207 |     # Should reindex as file is updated
208 |     time.sleep(1)
209 |     (tmp_path / ncfile).touch()
210 |     reindexed = database.build_index(str(tmp_path), session)
211 |     assert reindexed == 1
212 | 
213 |     # Should not reindex as flagging as missing will not remove
214 |     # file from the database, so will not be reindexed
215 |     time.sleep(1)
216 |     (tmp_path / ncfile).touch()
217 |     with caplog.at_level(logging.WARNING):
218 |         reindexed = database.build_index(str(tmp_path), session, prune="flag")
219 |         assert reindexed == 0
220 |         assert "Set prune to 'delete' to reindex updated files" in caplog.text
221 | 
222 | 
223 | def test_single_broken(session_db):
224 |     session, db = session_db
225 |     database.build_index("test/data/indexing/single_broken_file", session)
226 | 
227 |     # query ncfiles table -- should have two entries
228 |     q = session.query(func.count(database.NCFile.id))
229 |     assert q.scalar() == 2
230 | 
231 |     # query ncvars table -- should have a single entry
232 |     q = session.query(func.count(database.NCVar.id))
233 |     assert q.scalar() == 1
234 | 
235 | 
236 | def test_longnames(session_db):
237 |     session, db = session_db
238 |     database.build_index("test/data/indexing/longnames", session)
239 | 
240 |     # query ncvars table -- should have two entries
241 |     q = session.query(func.count(database.NCVar.id))
242 |     assert q.scalar() == 2
243 | 
244 |     # query generic table -- should only be a single variable
245 |     q = session.query(database.CFVariable)
246 |     r = q.all()
247 |     assert len(r) == 1
248 |     assert r[0].long_name == "Test Variable"
249 | 
250 | 
251 | def test_multiple_experiments(session_db):
252 |     session, db = session_db
253 |     # index multiple experiments, which have duplicate data and therefore push
254 |     # against some unique constraints
255 |     database.build_index(
256 |         [
257 |             "test/data/indexing/multiple/experiment_a",
258 |             "test/data/indexing/multiple/experiment_b",
259 |         ],
260 |         session,
261 |     )
262 | 
263 |     q = session.query(database.NCExperiment)
264 |     assert q.count() == 2
265 | 
266 | 
267 | def test_same_expt_name(session_db):
268 |     session, db = session_db
269 |     # index multiple experiments with different root directories, but the same
270 |     # final path component (experiment name)
271 |     database.build_index(
272 |         [
273 |             "test/data/indexing/multiple/experiment_a",
274 |             "test/data/indexing/alternate/experiment_a",
275 |         ],
276 |         session,
277 |     )
278 | 
279 |     # the indexing shouldn't fail, and we should have two distinct experiments
280 |     # with the same name
281 | 
282 |     q = session.query(database.NCExperiment).filter(
283 |         database.NCExperiment.experiment == "experiment_a"
284 |     )
285 |     r = q.all()
286 |     assert len(r) == 2
287 |     assert r[0].root_dir != r[1].root_dir
288 | 
289 | 
290 | def test_following_symlinks(session_db):
291 |     session, db = session_db
292 | 
293 |     # Indexing symlinked experiment should fail with default arguments
294 |     database.build_index("test/data/indexing/symlinked/experiment_a", session)
295 | 
296 |     q = session.query(database.NCExperiment)
297 |     assert q.count() == 0
298 | 
299 |     # Now specify to follow symlinks
300 |     database.build_index(
301 |         "test/data/indexing/symlinked/experiment_a", session, followsymlinks=True
302 |     )
303 | 
304 |     q = session.query(database.NCExperiment)
305 |     assert q.count() == 1
306 | 
307 | 
308 | def test_broken_metadata(session_db):
309 |     session, db = session_db
310 |     indexed = database.build_index("test/data/indexing/broken_metadata", session)
311 | 
312 |     assert indexed == 1
313 | 
314 | 
315 | def test_time_dimension(session_db):
316 |     session, db = session_db
317 |     database.build_index("test/data/indexing/time", session)
318 | 
319 |     q = session.query(database.NCFile.time_start, database.NCFile.time_end)
320 |     assert q.count() == 5  # should pick up 5 files
321 | 
322 |     q = q.filter(
323 |         (database.NCFile.time_start is None) | (database.NCFile.time_end is None)
324 |     )
325 |     assert q.count() == 0  # but all of them should have times populated
326 | 
327 |     # there should be 5 separate time variables
328 |     q = session.query(database.CFVariable)
329 |     assert q.count() == 5
330 | 
331 |     # each file should have exactly one time dimension
332 |     q = (
333 |         session.query(func.count(database.NCFile.ncvars))
334 |         .join(database.NCFile.ncvars)
335 |         .group_by(database.NCFile.id)
336 |     )
337 |     for r in q.all():
338 |         assert r[0] == 1
339 | 
340 | 
341 | def test_missing_time_bounds(session_db):
342 |     session, db = session_db
343 |     database.build_index("test/data/indexing/time_bounds", session)
344 | 
345 |     # Should have one experiment
346 |     q = session.query(database.NCExperiment)
347 |     assert q.count() == 1
348 | 
349 |     # And one correctly indexed (present) file
350 |     q = session.query(database.NCFile)
351 |     r = q.all()
352 |     assert len(r) == 1
353 |     assert r[0].present
354 | 
355 | 
356 | def test_index_attributes(session_db):
357 |     session, db = session_db
358 |     database.build_index("test/data/querying", session)
359 | 
360 |     inspector = inspect(session.get_bind())
361 |     assert assert_dictionaries_same(
362 |         {
363 |             "name": "ix_ncattributes_ncvar_id",
364 |             "column_names": ["ncvar_id"],
365 |             "unique": 0,
366 |         },
367 |         inspector.get_indexes("ncattributes")[0],
368 |     )
369 | 
370 |     ncfile = "output000/ocean.nc"
371 | 
372 |     # check that we have the right attributes for a file (just use a subset)
373 |     f = session.query(database.NCFile).filter(database.NCFile.ncfile == ncfile).one()
374 | 
375 |     file_attrs = {
376 |         "filename": "ocean.nc",
377 |         "title": "MOM5",
378 |         "grid_type": "mosaic",
379 |         "grid_tile": "1",
380 |     }
381 |     for attr, attr_val in file_attrs.items():
382 |         assert attr in f.attrs and f.attrs[attr] == attr_val
383 | 
384 |     # and check a particular variable
385 |     v = (
386 |         session.query(database.NCVar)
387 |         .join(database.NCFile)
388 |         .filter(database.NCFile.ncfile == ncfile)
389 |         .filter(database.NCVar.varname == "temp")
390 |         .one()
391 |     )
392 |     var_attrs = {
393 |         "long_name": "Potential temperature",
394 |         "cell_methods": "time: mean",
395 |         "coordinates": "geolon_t geolat_t",
396 |     }
397 |     for attr, attr_val in var_attrs.items():
398 |         assert attr in v.attrs and v.attrs[attr] == attr_val
399 | 
400 | 
401 | def test_prune_broken(session_db):
402 |     session, db = session_db
403 |     database.build_index("test/data/indexing/broken_file", session)
404 | 
405 |     assert db.exists()
406 | 
407 |     # check that we have one file
408 |     q = session.query(database.NCFile)
409 |     r = q.all()
410 |     assert len(r) == 1
411 | 
412 |     # prune experiment
413 |     database.prune_experiment("broken_file", session)
414 | 
415 |     # now the database should be empty
416 |     q = session.query(database.NCFile)
417 |     r = q.all()
418 |     assert len(r) == 0
419 | 
420 | 
421 | def test_prune_missing_experiment(session_db):
422 |     session, db = session_db
423 |     database.build_index("test/data/indexing/broken_file", session)
424 | 
425 |     assert db.exists()
426 | 
427 |     # check that we have one file
428 |     q = session.query(database.NCFile)
429 |     r = q.all()
430 |     assert len(r) == 1
431 | 
432 |     # prune experiment
433 |     experiment = "incorrect_experiment"
434 |     with pytest.raises(RuntimeError, match="No such experiment: ".format(experiment)):
435 |         database.prune_experiment(experiment, session)
436 | 
437 | 
438 | def test_prune_nodelete(session_db, tmp_path):
439 |     session, db = session_db
440 |     expt_dir = tmp_path / "expt"
441 |     expt_dir.mkdir()
442 | 
443 |     # copy the file to a new experiment directory and index
444 |     shutil.copy(
445 |         "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
446 |     )
447 |     database.build_index(str(expt_dir), session)
448 | 
449 |     # check that we have a valid file
450 |     q = session.query(database.NCFile).filter(database.NCFile.present)
451 |     r = q.all()
452 |     assert len(r) == 1
453 | 
454 |     # remove the file and prune
455 |     os.remove(expt_dir / "test1.nc")
456 |     database.prune_experiment("expt", session, delete=False)
457 | 
458 |     # now we should still have one file, but now not present
459 |     q = session.query(database.NCFile)
460 |     r = q.one_or_none()
461 |     assert r is not None
462 |     assert not r.present
463 | 
464 | 
465 | def test_prune_delete(session_db, tmp_path):
466 |     session, db = session_db
467 |     expt_dir = tmp_path / "expt"
468 |     expt_dir.mkdir()
469 | 
470 |     # copy the file to a new experiment directory and index
471 |     shutil.copy(
472 |         "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
473 |     )
474 |     database.build_index(str(expt_dir), session)
475 | 
476 |     # check that we have a valid file
477 |     q = session.query(database.NCFile).filter(database.NCFile.present)
478 |     r = q.all()
479 |     assert len(r) == 1
480 | 
481 |     # remove the file and prune
482 |     os.remove(expt_dir / "test1.nc")
483 |     database.prune_experiment("expt", session)
484 | 
485 |     # now we should still have no files
486 |     q = session.query(database.NCFile)
487 |     r = q.one_or_none()
488 |     assert r is None
489 | 
490 | 
491 | def test_index_with_prune_nodelete(session_db, tmp_path):
492 |     session, db = session_db
493 |     expt_dir = tmp_path / "expt"
494 |     expt_dir.mkdir()
495 | 
496 |     # copy the file to a new experiment directory and index
497 |     shutil.copy(
498 |         "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
499 |     )
500 |     database.build_index(str(expt_dir), session)
501 | 
502 |     # check that we have a valid file
503 |     q = session.query(database.NCFile).filter(database.NCFile.present)
504 |     r = q.all()
505 |     assert len(r) == 1
506 | 
507 |     # remove the file and build with pruning
508 |     os.remove(expt_dir / "test1.nc")
509 |     database.build_index(str(expt_dir), session, prune="flag")
510 | 
511 |     # now we should still have one file, but now not present
512 |     q = session.query(database.NCFile)
513 |     r = q.one_or_none()
514 |     assert r is not None
515 |     assert not r.present
516 | 
517 | 
518 | def test_index_with_prune_delete(session_db, tmp_path):
519 |     session, db = session_db
520 |     expt_dir = tmp_path / "expt"
521 |     expt_dir.mkdir()
522 | 
523 |     # copy the file to a new experiment directory and index
524 |     shutil.copy(
525 |         "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
526 |     )
527 |     database.build_index(str(expt_dir), session)
528 | 
529 |     # check that we have a valid file
530 |     q = session.query(database.NCFile).filter(database.NCFile.present)
531 |     r = q.all()
532 |     assert len(r) == 1
533 | 
534 |     # remove the file and build with pruning
535 |     os.remove(expt_dir / "test1.nc")
536 |     database.build_index(str(expt_dir), session, prune="delete")
537 | 
538 |     # now we should still have no files
539 |     q = session.query(database.NCFile)
540 |     r = q.one_or_none()
541 |     assert r is None
542 | 


--------------------------------------------------------------------------------
/test/test_metadata.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from datetime import datetime
  3 | 
  4 | import pandas as pd
  5 | from pandas.testing import assert_frame_equal
  6 | 
  7 | from cosima_cookbook import database, querying
  8 | 
  9 | 
 10 | def metadata_for_experiment(path, session, name="test", commit=True):
 11 |     """Method to read metadata for an experiment without requiring
 12 |     the rest of the indexing infrastructure.
 13 |     """
 14 | 
 15 |     expt = database.NCExperiment(experiment=name, root_dir=path)
 16 |     database.update_metadata(expt, session)
 17 | 
 18 |     if commit:
 19 |         session.add(expt)
 20 |         session.commit()
 21 |     else:
 22 |         return expt
 23 | 
 24 | 
 25 | def test_metadata(session_db):
 26 |     """Test that metadata.yaml is read for an experiment during indexing"""
 27 | 
 28 |     session, db = session_db
 29 |     database.build_index("test/data/indexing/metadata", session)
 30 | 
 31 |     # query metadata
 32 |     q = session.query(
 33 |         database.NCExperiment.contact,
 34 |         database.NCExperiment.created,
 35 |         database.NCExperiment.description,
 36 |     )
 37 |     r = q.one()
 38 |     assert r[0] == "The ACCESS Oracle"
 39 |     assert r[1] == "2018-01-01"
 40 |     assert len(r[2]) > 0
 41 | 
 42 | 
 43 | def test_get_experiments_metadata(session_db):
 44 |     """Test that get_experiments returns metadata correctly"""
 45 | 
 46 |     session, db = session_db
 47 |     database.build_index("test/data/indexing/metadata", session)
 48 | 
 49 |     r = querying.get_experiments(session, contact=True)
 50 |     df = pd.DataFrame.from_dict(
 51 |         {"experiment": ["metadata"], "contact": ["The ACCESS Oracle"], "ncfiles": [1]}
 52 |     )
 53 |     assert_frame_equal(r, df)
 54 | 
 55 |     r = querying.get_experiments(session, email=True)
 56 |     df = pd.DataFrame.from_dict(
 57 |         {"experiment": ["metadata"], "email": ["oracle@example.com"], "ncfiles": [1]}
 58 |     )
 59 |     assert_frame_equal(r, df)
 60 | 
 61 |     r = querying.get_experiments(session, url=True)
 62 |     df = pd.DataFrame.from_dict(
 63 |         {
 64 |             "experiment": ["metadata"],
 65 |             "url": ["https://github.com/COSIMA/oracle"],
 66 |             "ncfiles": [1],
 67 |         }
 68 |     )
 69 |     assert_frame_equal(r, df)
 70 | 
 71 |     r = querying.get_experiments(session, description=True)
 72 |     df = pd.DataFrame.from_dict(
 73 |         {
 74 |             "experiment": ["metadata"],
 75 |             "description": [
 76 |                 (
 77 |                     "Attempted spinup, using salt flux fix "
 78 |                     "https://arccss.slack.com/archives/C6PP0GU9Y/p1515460656000124 "
 79 |                     "and https://github.com/mom-ocean/MOM5/pull/208/commits/9f4ee6f8b72b76c96a25bf26f3f6cdf773b424d2 "
 80 |                     "from the start. Used mushy ice from July year 1 onwards to avoid vertical thermo error in cice "
 81 |                     "https://arccss.slack.com/archives/C6PP0GU9Y/p1515842016000079"
 82 |                 )
 83 |             ],
 84 |             "ncfiles": [1],
 85 |         }
 86 |     )
 87 |     assert_frame_equal(r, df)
 88 | 
 89 |     r = querying.get_experiments(session, notes=True)
 90 |     df = pd.DataFrame.from_dict(
 91 |         {
 92 |             "experiment": ["metadata"],
 93 |             "notes": [
 94 |                 (
 95 |                     "Stripy salt restoring: "
 96 |                     "https://github.com/OceansAus/access-om2/issues/74 tripole seam bug: "
 97 |                     "https://github.com/OceansAus/access-om2/issues/86 requires dt=300s "
 98 |                     "in May, dt=240s in Aug to maintain CFL in CICE near tripoles (storms "
 99 |                     "in those months in 8485RYF); all other months work with dt=400s"
100 |                 )
101 |             ],
102 |             "ncfiles": [1],
103 |         }
104 |     )
105 |     assert_frame_equal(r, df)
106 | 
107 |     r = querying.get_experiments(session, created=True)
108 |     df = pd.DataFrame.from_dict(
109 |         {"experiment": ["metadata"], "created": ["2018-01-01"], "ncfiles": [1]}
110 |     )
111 |     assert_frame_equal(r, df)
112 | 
113 |     r = querying.get_experiments(session, root_dir=True)
114 |     # Won't try and match a path that can change on different platforms
115 |     # assert_frame_equal(r, df)
116 |     assert r.shape == (1, 3)
117 | 
118 |     r = querying.get_experiments(session, all=True)
119 |     # Won't try and match everything, just check dimensions are correct
120 |     assert r.shape == (1, 9)
121 | 
122 |     # Test turning off returning experiment (bit dumb, but hey ...)
123 |     r = querying.get_experiments(session, experiment=False)
124 |     df = pd.DataFrame.from_dict({"ncfiles": [1]})
125 |     assert_frame_equal(r, df)
126 | 
127 | 
128 | def test_keywords(session_db):
129 |     """Test that keywords are read for an experiment"""
130 | 
131 |     session, db = session_db
132 |     metadata_for_experiment("test/data/metadata/keywords", session)
133 | 
134 |     q = session.query(database.NCExperiment).filter(
135 |         database.NCExperiment.experiment == "test"
136 |     )
137 |     r = q.one()
138 |     assert len(r.keywords) == 3
139 |     assert "cosima" in r.keywords
140 |     assert "not-a-keyword" not in r.keywords
141 | 
142 | 
143 | def test_duplicate_keywords_commit(session_db):
144 |     """Test that the uniqueness constraint works across experiments.
145 |     This simulates separate index calls, where the session is committed in between.
146 |     """
147 | 
148 |     session, db = session_db
149 |     metadata_for_experiment("test/data/metadata/keywords", session, name="e1")
150 |     metadata_for_experiment("test/data/metadata/keywords2", session, name="e2")
151 | 
152 |     q = session.query(database.Keyword)
153 |     r = q.all()
154 |     assert len(r) == 4
155 | 
156 | 
157 | def test_duplicate_keywords_nocommit(session_db):
158 |     """Test that the uniqueness constraint works across experiments.
159 |     This simulates multiple experiments being added in a single call.
160 |     """
161 | 
162 |     session, db = session_db
163 |     e1 = metadata_for_experiment(
164 |         "test/data/metadata/keywords", session, name="e1", commit=False
165 |     )
166 |     e2 = metadata_for_experiment(
167 |         "test/data/metadata/keywords2", session, name="e2", commit=False
168 |     )
169 |     session.add_all([e1, e2])
170 |     session.commit()
171 | 
172 |     q = session.query(database.Keyword)
173 |     r = q.all()
174 |     assert len(r) == 4
175 | 
176 | 
177 | def test_keyword_upcast(session_db):
178 |     """Test that a string keyword is added correctly."""
179 | 
180 |     session, db = session_db
181 |     metadata_for_experiment("test/data/metadata/string_keyword", session)
182 | 
183 |     q = session.query(database.NCExperiment).filter(
184 |         database.NCExperiment.experiment == "test"
185 |     )
186 |     r = q.one()
187 |     assert "cosima" in r.keywords
188 |     assert "c" not in r.keywords  # make sure it wasn't added as a string
189 | 
190 | 
191 | def test_keyword_case_sensitivity(session_db):
192 |     """Test that keywords are treated in a case-insensitive manner,
193 |     both for metadata retrieval and querying.
194 |     """
195 | 
196 |     session, db = session_db
197 |     metadata_for_experiment("test/data/metadata/keywords", session, name="e1")
198 |     metadata_for_experiment("test/data/metadata/upcase", session, name="e2")
199 | 
200 |     # we should be able to find the keyword in lowercase
201 |     q = session.query(database.Keyword).filter(database.Keyword.keyword == "cosima")
202 |     k1 = q.one_or_none()
203 |     assert k1 is not None
204 | 
205 |     # and in uppercase
206 |     q = session.query(database.Keyword).filter(database.Keyword.keyword == "COSIMA")
207 |     k2 = q.one_or_none()
208 |     assert k2 is not None
209 | 
210 |     # but they should resolve to the same keyword
211 |     assert k1 is k2
212 | 
213 |     # finally, the set of keywords should all be lowercase
214 |     q = session.query(database.NCExperiment).filter(
215 |         database.NCExperiment.experiment == "e2"
216 |     )
217 |     r = q.one()
218 |     for kw in r.keywords:
219 |         assert kw == kw.lower()
220 | 
221 | 
222 | def test_get_keywords(session_db):
223 |     """Test retrieval of keywords"""
224 | 
225 |     session, db = session_db
226 |     metadata_for_experiment("test/data/metadata/keywords", session, name="e1")
227 |     metadata_for_experiment("test/data/metadata/keywords2", session, name="e2")
228 | 
229 |     # Grab keywords for individual experiments
230 |     r = querying.get_keywords(session, "e1")
231 |     assert r == {"access-om2-01", "ryf9091", "cosima"}
232 | 
233 |     r = querying.get_keywords(session, "e2")
234 |     assert r == {"another-keyword", "cosima"}
235 | 
236 |     # Test retrieving all keywords
237 |     r = querying.get_keywords(session)
238 |     assert r == {"access-om2-01", "ryf9091", "another-keyword", "cosima"}
239 | 
240 | 
241 | def test_get_experiments_with_keywords(session_db):
242 |     """Test retrieval of experiments with keyword filtering"""
243 |     session, db = session_db
244 |     database.build_index("test/data/metadata/keywords", session)
245 |     database.build_index("test/data/metadata/keywords2", session)
246 | 
247 |     # Test keyword common to both experiments
248 |     r = querying.get_experiments(session, keywords="cosima")
249 |     df = pd.DataFrame.from_dict(
250 |         {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]}
251 |     )
252 |     assert_frame_equal(r, df)
253 | 
254 |     # Test keyword common to both experiments using wildcard
255 |     r = querying.get_experiments(session, keywords="cos%")
256 |     df = pd.DataFrame.from_dict(
257 |         {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]}
258 |     )
259 |     assert_frame_equal(r, df)
260 | 
261 |     r = querying.get_experiments(session, keywords="%-%")
262 |     df = pd.DataFrame.from_dict(
263 |         {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]}
264 |     )
265 |     assert_frame_equal(r, df)
266 | 
267 |     r = querying.get_experiments(session, keywords="access-om2%")
268 |     df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]})
269 |     assert_frame_equal(r, df)
270 | 
271 |     # Test keyword in only one experiment
272 |     r = querying.get_experiments(session, keywords="another-keyword")
273 |     df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]})
274 |     assert_frame_equal(r, df)
275 | 
276 |     r = querying.get_experiments(session, keywords="ryf9091")
277 |     df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]})
278 |     assert_frame_equal(r, df)
279 | 
280 |     # Test passing an array of keywords that match only one experiment
281 |     r = querying.get_experiments(session, keywords=["cosima", "another-keyword"])
282 |     df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]})
283 |     assert_frame_equal(r, df)
284 | 
285 |     # Test passing an array of keywords that will not match any one experiment
286 |     r = querying.get_experiments(session, keywords=["another-keyword", "ryf9091"])
287 |     df = pd.DataFrame(columns=["experiment", "ncfiles"])
288 |     assert_frame_equal(r, df)
289 | 
290 |     # Test passing a non-existent keyword along with one present. Should return
291 |     # nothing as no experiment contains it
292 |     r = querying.get_experiments(session, keywords=["ryf9091", "not-a-keyword"])
293 |     df = pd.DataFrame(columns=["experiment", "ncfiles"])
294 |     assert_frame_equal(r, df)
295 | 
296 |     # Test passing only a non-existent keyword
297 |     r = querying.get_experiments(session, keywords=["not-a-keyword"])
298 |     df = pd.DataFrame(columns=["experiment", "ncfiles"])
299 |     assert_frame_equal(r, df)
300 | 
301 |     # Test passing only a non-existent wildcard keyword
302 |     r = querying.get_experiments(session, keywords=["z%"])
303 |     df = pd.DataFrame(columns=["experiment", "ncfiles"])
304 |     assert_frame_equal(r, df)
305 | 
306 | 
307 | def test_getvar_with_metadata(session_db):
308 |     session, db = session_db
309 |     database.build_index("test/data/indexing/metadata", session)
310 | 
311 |     with querying.getvar("metadata", "test", session, decode_times=False) as v:
312 |         assert v.attrs["long_name"] == "Test Variable"
313 |         assert v.attrs["contact"] == "The ACCESS Oracle"
314 |         assert v.attrs["email"] == "oracle@example.com"
315 |         assert v.attrs["created"] == "2018-01-01"
316 |         assert "description" in v.attrs
317 | 


--------------------------------------------------------------------------------
/test/test_querying.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | from datetime import datetime
  4 | 
  5 | import pytest
  6 | 
  7 | import xarray as xr
  8 | import pandas as pd
  9 | from pandas.testing import assert_frame_equal, assert_series_equal
 10 | import numpy as np
 11 | 
 12 | import cosima_cookbook as cc
 13 | from cosima_cookbook.querying import QueryWarning
 14 | from cosima_cookbook.database import NCFile, CFVariable
 15 | 
 16 | 
 17 | @pytest.fixture(scope="module")
 18 | def session(tmp_path_factory):
 19 |     # index test directory into temp database
 20 |     d = tmp_path_factory.mktemp("database")
 21 |     db = d / "test.db"
 22 |     session = cc.database.create_session(str(db))
 23 | 
 24 |     # build index for entire module
 25 |     cc.database.build_index(
 26 |         ["test/data/querying", "test/data/querying_disambiguation"], session
 27 |     )
 28 | 
 29 |     # force all files to be marked as present, even though they're empty
 30 |     ncfiles = session.query(cc.database.NCFile).all()
 31 |     for f in ncfiles:
 32 |         f.present = True
 33 |     session.commit()
 34 | 
 35 |     return session
 36 | 
 37 | 
 38 | def test_valid_query(session):
 39 |     with cc.querying.getvar("querying", "temp", session, decode_times=False) as v:
 40 |         assert isinstance(v, xr.DataArray)
 41 |         assert len(v.attrs["ncfiles"]) == 1
 42 |         assert v.attrs["ncfiles"][0].endswith("test/data/querying/output000/ocean.nc")
 43 |         # Make sure other fields aren't included in attributes
 44 |         assert "index" not in v.attrs
 45 |         assert "root_dir" not in v.attrs
 46 |         # Make sure empty metadata fields haven't been included as attributes
 47 |         assert "contact" not in v.attrs
 48 |         assert "notes" not in v.attrs
 49 |         assert "description" not in v.attrs
 50 |         assert "email" not in v.attrs
 51 | 
 52 | 
 53 | def test_invalid_query(session):
 54 |     with pytest.raises(cc.querying.VariableNotFoundError):
 55 |         cc.querying.getvar("querying", "notfound", session, decode_times=False)
 56 | 
 57 | 
 58 | def test_warning_on_ambiguous_attr(session):
 59 |     with pytest.warns(QueryWarning) as record:
 60 |         cc.querying._ncfiles_for_variable(
 61 |             "querying_disambiguation",
 62 |             "v",
 63 |             session,
 64 |             attrs_unique={"cell_methods": "bar"},
 65 |         )
 66 | 
 67 |     assert len(record) == 1
 68 |     assert (
 69 |         record[0]
 70 |         .message.args[0]
 71 |         .startswith(
 72 |             "Your query returns variables from files with different cell_methods"
 73 |         )
 74 |     )
 75 | 
 76 |     with pytest.warns(QueryWarning) as record:
 77 |         files = cc.querying._ncfiles_for_variable(
 78 |             "querying_disambiguation",
 79 |             "u",
 80 |             session,
 81 |             attrs_unique={"cell_methods": "time: no_valid"},
 82 |         )
 83 | 
 84 |     assert len(files) == 2
 85 |     assert len(record) == 1
 86 |     assert (
 87 |         record[0]
 88 |         .message.args[0]
 89 |         .startswith(
 90 |             "Your query returns variables from files with different cell_methods"
 91 |         )
 92 |     )
 93 | 
 94 |     # Raise an exception if QueryWarning set to error
 95 |     warnings.simplefilter("error", QueryWarning)
 96 |     with pytest.raises(QueryWarning) as record:
 97 |         cc.querying._ncfiles_for_variable(
 98 |             "querying_disambiguation",
 99 |             "v",
100 |             session,
101 |             attrs_unique={"cell_methods": "bar"},
102 |         )
103 | 
104 |     with warnings.catch_warnings(record=True) as record:
105 |         # Turn off warnings, will run without exception
106 |         # and record will be empty
107 |         warnings.simplefilter("ignore", QueryWarning)
108 | 
109 |         cc.querying._ncfiles_for_variable(
110 |             "querying_disambiguation",
111 |             "v",
112 |             session,
113 |             attrs_unique={"cell_methods": "bar"},
114 |         )
115 | 
116 |     assert len(record) == 0
117 | 
118 | 
119 | def test_disambiguation_on_default_attr(session):
120 |     files = cc.querying._ncfiles_for_variable(
121 |         "querying_disambiguation",
122 |         "v",
123 |         session,
124 |         attrs_unique={"cell_methods": "mean_pow(02)"},
125 |     )
126 | 
127 |     assert len(files) == 1
128 |     assert files[0].NCVar.attrs["cell_methods"] == "mean_pow(02)"
129 | 
130 |     files = cc.querying._ncfiles_for_variable(
131 |         "querying_disambiguation",
132 |         "v",
133 |         session,
134 |         attrs_unique={"cell_methods": "time: mean"},
135 |     )
136 | 
137 |     assert len(files) == 1
138 |     assert files[0].NCVar.attrs["cell_methods"] == "time: mean"
139 | 
140 |     # One file has no cell_methods attribute
141 |     files = cc.querying._ncfiles_for_variable(
142 |         "querying_disambiguation",
143 |         "u",
144 |         session,
145 |         attrs_unique={"cell_methods": "time: mean"},
146 |     )
147 | 
148 |     assert len(files) == 1
149 |     assert files[0].NCVar.attrs["cell_methods"] == "time: mean"
150 | 
151 |     # Add another unique attribute not present (should be ignored)
152 |     files = cc.querying._ncfiles_for_variable(
153 |         "querying_disambiguation",
154 |         "v",
155 |         session,
156 |         attrs_unique={"cell_methods": "time: mean", "foo": "bar"},
157 |     )
158 | 
159 |     assert len(files) == 1
160 |     assert files[0].NCVar.attrs["cell_methods"] == "time: mean"
161 | 
162 | 
163 | def test_query_times(session):
164 |     with cc.querying.getvar("querying", "ty_trans", session) as v:
165 |         assert isinstance(v, xr.DataArray)
166 | 
167 | 
168 | def test_chunk_parsing_chunked(session):
169 |     var = (
170 |         session.query(cc.database.NCVar)
171 |         .filter(cc.database.NCVar.varname == "salt")
172 |         .first()
173 |     )
174 | 
175 |     chunk_dict = {
176 |         "time": 1,
177 |         "st_ocean": 15,
178 |         "yt_ocean": 216,
179 |         "xt_ocean": 288,
180 |     }
181 | 
182 |     assert cc.querying._parse_chunks(var) == chunk_dict
183 | 
184 | 
185 | def test_chunk_parsing_contiguous(session):
186 |     var = (
187 |         session.query(cc.database.NCVar)
188 |         .filter(cc.database.NCVar.varname == "potrho")
189 |         .first()
190 |     )
191 | 
192 |     assert var.chunking == "contiguous"
193 |     assert cc.querying._parse_chunks(var) is None
194 | 
195 | 
196 | def test_chunk_parsing_unchunked(session):
197 |     var = (
198 |         session.query(cc.database.NCVar)
199 |         .filter(cc.database.NCVar.varname == "hi_m")
200 |         .first()
201 |     )
202 | 
203 |     assert var.chunking == "None"
204 |     assert cc.querying._parse_chunks(var) is None
205 | 
206 | 
207 | def test_get_experiments(session):
208 |     r = cc.querying.get_experiments(session)
209 | 
210 |     df = pd.DataFrame.from_dict(
211 |         {"experiment": ["querying", "querying_disambiguation"], "ncfiles": [3, 2]}
212 |     )
213 |     assert_frame_equal(r, df)
214 | 
215 |     metadata_keys = [
216 |         "root_dir",
217 |         "contact",
218 |         "email",
219 |         "created",
220 |         "url",
221 |         "description",
222 |         "notes",
223 |     ]
224 | 
225 |     # Won't try and match everything, there is not much useful metadata, just
226 |     # check dimensions are correct. Metadata correctness checked in test_metadata
227 |     for k in metadata_keys:
228 |         r = cc.querying.get_experiments(session, **{k: True})
229 |         assert k == r.columns[1]
230 |         assert r.shape == (2, 3)
231 | 
232 |     # Test all = True to select all available metadata
233 |     r = cc.querying.get_experiments(session, all=True)
234 |     assert r.shape == (2, 9)
235 | 
236 |     # Functionally equivalent to above
237 |     r = cc.querying.get_experiments(session, **{k: True for k in metadata_keys})
238 |     assert r.shape == (2, 9)
239 | 
240 |     # Functionally equivalent to above
241 |     r = cc.querying.get_experiments(
242 |         session, experiment=False, exptname="querying", all=True
243 |     )
244 |     assert r.shape == (1, 8)
245 |     assert "experiment" not in r
246 | 
247 |     # Test for filtering by variables
248 |     in_both = {"potrho_edges", "age_global", "tx_trans_rho"}
249 |     only_in_querying = {"hi_m", "ty_trans"}
250 | 
251 |     r = cc.querying.get_experiments(session, variables=in_both)
252 |     assert r.shape == (2, 2)
253 | 
254 |     r = cc.querying.get_experiments(session, variables=(in_both | only_in_querying))
255 |     assert r.shape == (1, 2)
256 | 
257 |     r = cc.querying.get_experiments(
258 |         session, variables=(in_both | only_in_querying | {"none"})
259 |     )
260 |     assert r.shape == (0, 2)
261 | 
262 | 
263 | def test_get_ncfiles(session):
264 |     r = cc.querying.get_ncfiles(session, "querying")
265 | 
266 |     df = pd.DataFrame.from_dict(
267 |         {
268 |             "ncfile": [
269 |                 "output000/hi_m.nc",
270 |                 "output000/ocean.nc",
271 |                 "restart000/ty_trans.nc",
272 |             ],
273 |             "index_time": [
274 |                 pd.Timestamp("2019-08-09 21:51:12.090930"),
275 |                 pd.Timestamp("2019-08-09 21:51:12.143794"),
276 |                 pd.Timestamp("2019-08-09 21:51:12.148942"),
277 |             ],
278 |         }
279 |     )
280 | 
281 |     # The Timestamps will not be the same so check only that the ncfiles are correct
282 |     assert_series_equal(r["ncfile"], df["ncfile"])
283 | 
284 | 
285 | def test_get_variables(session):
286 |     r = cc.querying.get_variables(session, "querying", "1 monthly")
287 | 
288 |     df = pd.DataFrame.from_dict(
289 |         {
290 |             "name": ["TLAT", "TLON", "hi_m", "tarea", "time", "time_bounds"],
291 |             "long_name": [
292 |                 "T grid center latitude",
293 |                 "T grid center longitude",
294 |                 "grid cell mean ice thickness",
295 |                 "area of T grid cells",
296 |                 "model time",
297 |                 "boundaries for time-averaging interval",
298 |             ],
299 |             "units": [
300 |                 "degrees_north",
301 |                 "degrees_east",
302 |                 "m",
303 |                 "m^2",
304 |                 "days since 1900-01-01 00:00:00",
305 |                 "days since 1900-01-01 00:00:00",
306 |             ],
307 |             "frequency": ["1 monthly"] * 6,
308 |             "ncfile": ["output000/hi_m.nc"] * 6,
309 |             "cell_methods": [None, None, "time: mean", None, None, None],
310 |             "# ncfiles": [1] * 6,
311 |             "time_start": ["1900-01-01 00:00:00"] * 6,
312 |             "time_end": ["1900-02-01 00:00:00"] * 6,
313 |         }
314 |     )
315 | 
316 |     assert_frame_equal(r, df)
317 | 
318 |     r = cc.querying.get_variables(session, "querying", search="temp")
319 | 
320 |     df = pd.DataFrame.from_dict(
321 |         {
322 |             "name": ["diff_cbt_t", "temp", "temp_xflux_adv", "temp_yflux_adv"],
323 |             "long_name": [
324 |                 "total vert diff_cbt(temp) (w/o neutral included)",
325 |                 "Potential temperature",
326 |                 "cp*rho*dzt*dyt*u*temp",
327 |                 "cp*rho*dzt*dxt*v*temp",
328 |             ],
329 |             "units": ["m^2/s", "degrees K", "Watts", "Watts"],
330 |             "frequency": [None] * 4,
331 |             "ncfile": ["output000/ocean.nc"] * 4,
332 |             "cell_methods": ["time: mean"] * 4,
333 |             "# ncfiles": [1] * 4,
334 |             "time_start": [None] * 4,
335 |             "time_end": [None] * 4,
336 |         }
337 |     )
338 | 
339 |     assert_frame_equal(r, df)
340 | 
341 |     r = cc.querying.get_variables(session, search="temp")
342 | 
343 |     df = pd.DataFrame.from_dict(
344 |         {
345 |             "name": ["diff_cbt_t", "temp", "temp_xflux_adv", "temp_yflux_adv"],
346 |             "long_name": [
347 |                 "total vert diff_cbt(temp) (w/o neutral included)",
348 |                 "Potential temperature",
349 |                 "cp*rho*dzt*dyt*u*temp",
350 |                 "cp*rho*dzt*dxt*v*temp",
351 |             ],
352 |             "units": ["m^2/s", "degrees K", "Watts", "Watts"],
353 |         }
354 |     )
355 | 
356 |     assert_frame_equal(r, df)
357 | 
358 |     r = cc.querying.get_variables(session, search=("temp", "velocity"))
359 | 
360 |     df = pd.DataFrame.from_dict(
361 |         {
362 |             "name": [
363 |                 "diff_cbt_t",
364 |                 "temp",
365 |                 "temp_xflux_adv",
366 |                 "temp_yflux_adv",
367 |                 "u",
368 |                 "v",
369 |                 "wt",
370 |             ],
371 |             "long_name": [
372 |                 "total vert diff_cbt(temp) (w/o neutral included)",
373 |                 "Potential temperature",
374 |                 "cp*rho*dzt*dyt*u*temp",
375 |                 "cp*rho*dzt*dxt*v*temp",
376 |                 "i-current",
377 |                 "j-current",
378 |                 "dia-surface velocity T-points",
379 |             ],
380 |             "units": [
381 |                 "m^2/s",
382 |                 "degrees K",
383 |                 "Watts",
384 |                 "Watts",
385 |                 "m/sec",
386 |                 "m/sec",
387 |                 "m/sec",
388 |             ],
389 |         }
390 |     )
391 | 
392 |     r = cc.querying.get_variables(session, search=("temp", "velocity"))
393 | 
394 |     df = pd.DataFrame.from_dict(
395 |         {
396 |             "name": [
397 |                 "diff_cbt_t",
398 |                 "temp",
399 |                 "temp_xflux_adv",
400 |                 "temp_yflux_adv",
401 |                 "u",
402 |                 "v",
403 |                 "wt",
404 |             ],
405 |             "long_name": [
406 |                 "total vert diff_cbt(temp) (w/o neutral included)",
407 |                 "Potential temperature",
408 |                 "cp*rho*dzt*dyt*u*temp",
409 |                 "cp*rho*dzt*dxt*v*temp",
410 |                 "i-current",
411 |                 "j-current",
412 |                 "dia-surface velocity T-points",
413 |             ],
414 |             "units": [
415 |                 "m^2/s",
416 |                 "degrees K",
417 |                 "Watts",
418 |                 "Watts",
419 |                 "m/sec",
420 |                 "m/sec",
421 |                 "m/sec",
422 |             ],
423 |             "frequency": [None] * 7,
424 |             "ncfile": ["output000/ocean.nc"] * 7,
425 |             "# ncfiles": [1] * 7,
426 |             "time_start": [None] * 7,
427 |             "time_end": [None] * 7,
428 |         }
429 |     )
430 | 
431 | 
432 | def test_model_property(session):
433 |     filename_map = {
434 |         "ocean": (
435 |             "output/ocean/ice.nc",
436 |             "output/ocn/land.nc",
437 |             "output/ocean/atmos.nc",
438 |             "ocean/ocean_daily.nc",
439 |             "output/ocean/ocean_daily.nc.0000",
440 |             "ocean/atmos.nc",
441 |         ),
442 |         "atmosphere": (
443 |             "output/atm/fire.nc",
444 |             "output/atmos/ice.nc",
445 |             "output/atmosphere/ice.nc",
446 |             "atmosphere/ice.nc",
447 |             "atmos/ice.nc",
448 |         ),
449 |         "land": (
450 |             "output/land/fire.nc",
451 |             "output/lnd/ice.nc",
452 |             "land/fire.nc",
453 |             "lnd/ice.nc",
454 |         ),
455 |         "ice": (
456 |             "output/ice/fire.nc",
457 |             "output/ice/in/here/land.nc",
458 |             "ice/fire.nc",
459 |             "ice/in/here/land.nc",
460 |         ),
461 |         "none": (
462 |             "output/ocean.nc",  # only a model if part of path, not filename
463 |             "someotherpath/ocean_daily.nc",
464 |             "lala/land_daily.nc.0000",
465 |             "output/atmosphere_ice.nc",
466 |             "output/noice/in/here/land.nc",
467 |         ),
468 |     }
469 |     for model in filename_map:
470 |         for fpath in filename_map[model]:
471 |             ncfile = NCFile(
472 |                 index_time=datetime.now(),
473 |                 ncfile=fpath,
474 |                 present=True,
475 |             )
476 |             assert ncfile.model == model
477 | 
478 | 
479 | def test_is_restart_property(session):
480 |     filename_map = {
481 |         True: (
482 |             "output/restart/ice.nc",
483 |             "output/restart000/land.nc",
484 |             "restart/land.nc",
485 |         ),
486 |         False: (
487 |             "output/restartice.nc",
488 |             "output/lastrestart/land.nc",
489 |         ),
490 |     }
491 |     for isrestart in filename_map:
492 |         for fpath in filename_map[isrestart]:
493 |             ncfile = NCFile(
494 |                 index_time=datetime.now(),
495 |                 ncfile=fpath,
496 |                 present=True,
497 |             )
498 |             assert ncfile.is_restart == isrestart
499 | 
500 |     # Grab all variables and ensure the SQL classification matches the python version
501 |     # May be some holes, as not ensured all cases covered
502 |     for index, row in cc.querying.get_variables(
503 |         session, "querying", inferred=True
504 |     ).iterrows():
505 |         ncfile = NCFile(
506 |             index_time=datetime.now(),
507 |             ncfile=row.ncfile,
508 |             present=True,
509 |         )
510 |         assert ncfile.is_restart == row.restart
511 | 
512 | 
513 | def test_is_coordinate_property(session):
514 |     units_map = {
515 |         True: (
516 |             "degrees_",
517 |             "degrees_E",
518 |             "degrees_N",
519 |             "degrees_east",
520 |             "hours since a long time ago",
521 |             "radians",
522 |             "days",
523 |             "days since a while ago",
524 |         ),
525 |         False: ("degrees K",),
526 |     }
527 | 
528 |     for iscoord in units_map:
529 |         for units in units_map[iscoord]:
530 |             assert CFVariable(name="bogus", units=units).is_coordinate == iscoord
531 | 
532 |     # Grab all variables and ensure the SQL classification matches the python version
533 |     # May be some holes, as not ensured all cases covered
534 |     for index, row in cc.querying.get_variables(session, inferred=True).iterrows():
535 |         assert (
536 |             CFVariable(name=row["name"], units=row.units).is_coordinate
537 |             == row.coordinate
538 |         )
539 | 
540 | 
541 | def test_get_frequencies(session):
542 |     r = cc.querying.get_frequencies(session, "querying")
543 | 
544 |     df = pd.DataFrame.from_dict({"frequency": [None, "1 monthly", "1 yearly"]})
545 | 
546 |     assert_frame_equal(r, df)
547 | 
548 | 
549 | def test_disambiguation_by_frequency(session):
550 |     with pytest.warns(UserWarning) as record:
551 |         assert len(cc.querying._ncfiles_for_variable("querying", "time", session)) == 3
552 | 
553 |     if len(record) != 1:
554 |         raise ValueError("|".join([r.message.args[0] for r in record]))
555 | 
556 |     assert len(record) == 1
557 |     assert (
558 |         record[0]
559 |         .message.args[0]
560 |         .startswith("Your query returns files with differing frequencies:")
561 |     )
562 | 
563 |     assert (
564 |         len(
565 |             cc.querying._ncfiles_for_variable(
566 |                 "querying", "time", session, frequency="1 monthly"
567 |             )
568 |         )
569 |         == 1
570 |     )
571 |     assert (
572 |         len(
573 |             cc.querying._ncfiles_for_variable(
574 |                 "querying", "time", session, frequency="1 yearly"
575 |             )
576 |         )
577 |         == 1
578 |     )
579 | 
580 |     # Both of these select a single file and successfully return an xarray object
581 |     assert cc.querying.getvar(
582 |         "querying", "time", session, frequency="1 monthly"
583 |     ).shape == (1,)
584 |     assert cc.querying.getvar(
585 |         "querying", "time", session, frequency="1 yearly"
586 |     ).shape == (2,)
587 | 
588 | 
589 | def test_time_bounds_on_dataarray(session):
590 |     var_salt = cc.querying.getvar(
591 |         "querying", "salt", session, decode_times=False, return_dataset=True
592 |     )
593 | 
594 |     # we should have added time_bounds into the DataArray's attributes
595 |     assert "time_bounds" in var_salt
596 | 
597 |     # and time_bounds should itself be a DataArray
598 |     assert isinstance(var_salt["time_bounds"], xr.DataArray)
599 | 
600 | 
601 | def test_query_with_attrs(session):
602 |     attrs = {
603 |         "long_name": "Practical Salinity",
604 |         "units": "psu",
605 |     }
606 | 
607 |     # a valid set of attributes
608 |     var_salt = cc.querying.getvar(
609 |         "querying", "salt", session, decode_times=False, attrs=attrs
610 |     )
611 | 
612 |     for attr, val in attrs.items():
613 |         assert var_salt.attrs[attr] == val
614 | 
615 |     # make sure that this is actually applied as an additional filter
616 |     # by making failing queries
617 |     # first: incorrect attribute value
618 |     with pytest.raises(cc.querying.VariableNotFoundError):
619 |         cc.querying.getvar(
620 |             "querying",
621 |             "salt",
622 |             session,
623 |             decode_times=False,
624 |             attrs={"units": "degrees K"},
625 |         )
626 | 
627 |     # second: non-present attribute name
628 |     with pytest.raises(cc.querying.VariableNotFoundError):
629 |         cc.querying.getvar(
630 |             "querying", "salt", session, decode_times=False, attrs={"not_found": "psu"}
631 |         )
632 | 
633 | 
634 | def test_query_chunks(session, caplog):
635 |     with cc.querying.getvar(
636 |         "querying", "ty_trans", session, chunks={"invalid": 99}
637 |     ) as v:
638 |         assert "chunking along dimensions {'invalid'} is not possible" in caplog.text
639 | 


--------------------------------------------------------------------------------
/test/test_sqa14.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from cosima_cookbook.database import *
  3 | 
  4 | 
  5 | def test_empty_file(session_db):
  6 |     session, db = session_db
  7 | 
  8 |     exp = NCExperiment(experiment="a", root_dir="b")
  9 |     file = NCFile()
 10 | 
 11 |     file.experiment = exp
 12 | 
 13 |     session.add(exp)
 14 |     session.commit()
 15 | 
 16 |     assert session.query(NCFile).count() == 1
 17 |     assert session.query(NCExperiment).count() == 1
 18 | 
 19 | 
 20 | def test_file_one_var(session_db):
 21 |     session, db = session_db
 22 | 
 23 |     exp = NCExperiment(experiment="a", root_dir="b")
 24 |     file = NCFile()
 25 |     cfvar = CFVariable(name="c")
 26 |     var = NCVar()
 27 | 
 28 |     file.experiment = exp
 29 |     var.ncfile = file
 30 |     var.variable = cfvar
 31 | 
 32 |     session.add(exp)
 33 |     session.commit()
 34 | 
 35 |     assert session.query(NCFile).count() == 1
 36 |     assert session.query(NCVar).count() == 1
 37 | 
 38 | 
 39 | def test_file_attr(session_db):
 40 |     session, db = session_db
 41 | 
 42 |     exp = NCExperiment(experiment="a", root_dir="b")
 43 |     file = NCFile()
 44 |     cfvar = CFVariable(name="c")
 45 |     var = NCVar()
 46 | 
 47 |     file.experiment = exp
 48 |     file.attrs["x"] = "y"
 49 | 
 50 |     session.add(exp)
 51 |     session.commit()
 52 | 
 53 |     assert session.query(NCFile).count() == 1
 54 |     assert session.query(NCAttribute).count() == 1
 55 |     assert session.query(NCAttributeString).count() == 2
 56 | 
 57 |     # Add another attribute with duplicate string
 58 |     file.attrs["z"] = "y"
 59 | 
 60 |     session.add(exp)
 61 |     session.commit()
 62 | 
 63 |     assert session.query(NCFile).count() == 1
 64 |     assert session.query(NCAttribute).count() == 2
 65 |     assert session.query(NCAttributeString).count() == 3
 66 | 
 67 | 
 68 | def test_var_attr(session_db):
 69 |     session, db = session_db
 70 | 
 71 |     exp = NCExperiment(experiment="a", root_dir="b")
 72 |     file = NCFile()
 73 |     cfvar = CFVariable(name="c")
 74 |     var = NCVar()
 75 | 
 76 |     file.experiment = exp
 77 |     var.ncfile = file
 78 |     var.variable = cfvar
 79 |     var.attrs["x"] = "y"
 80 | 
 81 |     session.add(exp)
 82 |     session.commit()
 83 | 
 84 |     assert session.query(NCFile).count() == 1
 85 |     assert session.query(NCAttribute).count() == 1
 86 |     assert session.query(NCAttributeString).count() == 2
 87 | 
 88 |     # Add another attribute with duplicate string
 89 |     var.attrs["z"] = "y"
 90 | 
 91 |     session.add(exp)
 92 |     session.commit()
 93 | 
 94 |     assert session.query(NCAttribute).count() == 2
 95 |     assert session.query(NCAttributeString).count() == 3
 96 | 
 97 |     # Add an attribute to the file
 98 |     file.attrs["y"] = "x"
 99 | 
100 |     session.add(exp)
101 |     session.commit()
102 | 
103 |     assert session.query(NCAttribute).count() == 3
104 |     assert session.query(NCAttributeString).count() == 3
105 | 
106 | 
107 | def test_index_file(session_db):
108 |     session, db = session_db
109 | 
110 |     exp = NCExperiment(experiment="a", root_dir="test/data/querying")
111 | 
112 |     file = index_file("output000/ocean.nc", exp, session)
113 | 
114 |     session.add(exp)
115 |     session.commit()
116 | 
117 |     assert session.query(NCFile).count() == 1
118 |     assert session.query(CFVariable).count() == 38
119 |     assert session.query(NCVar).count() == 38
120 |     assert session.query(NCAttribute).count() == 243 - 18
121 | 
122 |     var = session.query(NCVar).filter(NCVar.varname == "temp").one()
123 |     assert var.attrs["long_name"] == "Potential temperature"
124 | 
125 | 
126 | def test_file_delete(session_db):
127 |     session, db = session_db
128 | 
129 |     exp = NCExperiment(experiment="a", root_dir="test/data/querying")
130 | 
131 |     file = index_file("output000/ocean.nc", exp, session)
132 | 
133 |     session.add(exp)
134 |     session.commit()
135 | 
136 |     assert session.query(NCFile).count() == 1
137 | 
138 |     session.delete(file)
139 |     session.commit()
140 | 
141 |     assert session.query(NCExperiment).count() == 1
142 |     assert session.query(NCFile).count() == 0
143 |     assert session.query(CFVariable).count() == 38  # Not cascaded
144 |     assert session.query(NCVar).count() == 0
145 |     assert session.query(NCAttribute).count() == 0
146 |     assert session.query(NCAttributeString).count() == 114  # Not cascaded
147 | 


--------------------------------------------------------------------------------
/test/test_update.py:
--------------------------------------------------------------------------------
 1 | import shlex
 2 | from cosima_cookbook import database_update
 3 | 
 4 | 
 5 | def test_database_update(tmp_path):
 6 |     args = shlex.split(
 7 |         "-db {db} test/data/update/experiment_a test/data/update/experiment_b".format(
 8 |             db=tmp_path / "test.db"
 9 |         )
10 |     )
11 | 
12 |     database_update.main(args)
13 | 


--------------------------------------------------------------------------------