├── .circleci
└── config.yml
├── .gitattributes
├── .github
└── workflows
│ └── tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── alembic.ini
├── conda
├── environment.yml
└── meta.yaml
├── cosima_cookbook
├── __init__.py
├── database.py
├── database_update.py
├── database_utils.py
├── date_utils.py
├── diagnostics
│ ├── __init__.py
│ ├── mean_tau_x.py
│ ├── overturning.py
│ └── simple.py
├── distributed.py
├── explore.py
├── memory.py
├── netcdf_index.py
├── netcdf_utils.py
├── plots
│ ├── __init__.py
│ ├── lineplots.py
│ ├── maps.py
│ ├── overturning.py
│ └── scalar.py
├── querying.py
└── summary
│ ├── __init__.py
│ ├── nml_diff.py
│ └── nml_summary.py
├── docs
├── Makefile
├── make.bat
├── requirements.txt
└── source
│ ├── conf.py
│ ├── cosima_cookbook.rst
│ ├── getting_started.rst
│ ├── index.rst
│ ├── modules.rst
│ └── related_projects.rst
├── readthedocs.yml
├── requirements.txt
├── sandbox
├── alembic
│ ├── README
│ ├── env.py
│ ├── script.py.mako
│ └── versions
│ │ └── 16223b92479e_add_keywords.py
└── diag-vis.py
├── setup.py
└── test
├── conftest.py
├── data
├── explore
│ ├── duplicate
│ │ └── one
│ │ │ ├── metadata.yaml
│ │ │ └── ocean
│ │ │ └── ocean_age.nc
│ ├── one
│ │ ├── atmosphere
│ │ │ └── ty_trans.nc
│ │ ├── ice
│ │ │ └── hi_m.nc
│ │ ├── metadata.yaml
│ │ ├── ocean
│ │ │ └── ocean.nc
│ │ └── restart
│ │ │ └── ocean_velocity_advection.res.nc
│ └── two
│ │ ├── atm
│ │ └── hi_m.nc
│ │ ├── metadata.yaml
│ │ ├── nomodel
│ │ └── ty_trans.nc
│ │ ├── ocn
│ │ ├── ocean.nc
│ │ └── ocean_month.nc
│ │ └── restart
│ │ └── ocean_velocity_advection.res.nc
├── indexing
│ ├── alternate
│ │ └── experiment_a
│ │ │ └── test2.nc
│ ├── broken_file
│ │ └── output000
│ │ │ └── test.nc
│ ├── broken_metadata
│ │ ├── metadata.yaml
│ │ └── test1.nc
│ ├── empty_file
│ │ └── output000
│ │ │ └── empty.nc
│ ├── longnames
│ │ └── output000
│ │ │ ├── test1.nc
│ │ │ └── test2.nc
│ ├── metadata
│ │ ├── metadata.yaml
│ │ └── test1.nc
│ ├── multiple
│ │ ├── experiment_a
│ │ │ └── test1.nc
│ │ └── experiment_b
│ │ │ └── test1.nc
│ ├── single_broken_file
│ │ └── output000
│ │ │ ├── broken.nc
│ │ │ └── test.nc
│ ├── symlinked
│ │ └── experiment_a
│ ├── time
│ │ ├── t1.nc
│ │ ├── t2.nc
│ │ ├── t3.nc
│ │ ├── t4.nc
│ │ └── t5.nc
│ └── time_bounds
│ │ └── file001.nc
├── metadata
│ ├── keywords
│ │ ├── metadata.yaml
│ │ └── test1.nc
│ ├── keywords2
│ │ ├── metadata.yaml
│ │ └── test1.nc
│ ├── string_keyword
│ │ └── metadata.yaml
│ └── upcase
│ │ └── metadata.yaml
├── ocean_sealevel.nc
├── querying
│ ├── output000
│ │ ├── hi_m.nc
│ │ └── ocean.nc
│ └── restart000
│ │ └── ty_trans.nc
├── querying_disambiguation
│ └── output000
│ │ ├── ocean.nc
│ │ └── ocean_month.nc
└── update
│ ├── experiment_a
│ └── test1.nc
│ └── experiment_b
│ └── test2.nc
├── test_database.py
├── test_dates.py
├── test_explore.py
├── test_indexing.py
├── test_metadata.py
├── test_querying.py
├── test_sqa14.py
└── test_update.py
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2.0
2 | jobs:
3 | py36:
4 | working_directory: ~/circleci
5 | docker:
6 | - image: circleci/python
7 | environment:
8 | PYTHON_VER: 3.6
9 | resource_class: medium+
10 | steps:
11 | - checkout
12 |
13 | - run: |
14 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O conda.sh
15 | bash conda.sh -b -p ~/conda
16 | ~/conda/bin/conda config --system --add channels conda-forge
17 | ~/conda/bin/conda config --system --add channels coecms
18 | ~/conda/bin/conda update conda
19 | ~/conda/bin/conda install --yes conda-build conda-verify
20 |
21 | - run: |
22 | ~/conda/bin/conda build -c coecms conda --python=${PYTHON_VER}
23 |
24 | - run: |
25 | mkdir ~/artefacts
26 | cp $(~/conda/bin/conda build conda --python=${PYTHON_VER} --output) ~/artefacts
27 |
28 | - persist_to_workspace:
29 | root: ~/artefacts
30 | paths: '*'
31 |
32 | publish:
33 | working_directory: /circleci
34 | docker:
35 | - image: scottwales/conda-build
36 | resource_class: medium+
37 | steps:
38 | - attach_workspace:
39 | at: /artefacts
40 |
41 | - run:
42 | anaconda --token "${ANACONDA_TOKEN}" upload --user "${ANACONDA_USER}" /artefacts/*.tar.bz2
43 |
44 | workflows:
45 | version: 2
46 | build_and_publsh:
47 | jobs:
48 | - py36:
49 | filters:
50 | tags:
51 | only: /.*/
52 |
53 | - publish:
54 | requires:
55 | - py36
56 | filters:
57 | tags:
58 | only: /.*/
59 | branches:
60 | ignore: /.*/
61 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 |
2 | *.ipynb diff=jupyternotebook
3 |
4 | *.ipynb merge=jupyternotebook
5 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Testing
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | branches:
9 | - master
10 |
11 | jobs:
12 | formatting:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Code formatting
18 | uses: lgeiger/black-action@v1.0.1
19 | with:
20 | args: "--check --verbose cosima_cookbook test"
21 |
22 | testing:
23 | needs: formatting
24 | runs-on: ubuntu-latest
25 | strategy:
26 | matrix:
27 | python-version: ['3.8', '3.9', '3.10']
28 | sqa-version: ['<1.4', '==1.4.*']
29 |
30 | steps:
31 | - uses: actions/checkout@v2
32 | - name: Set up Python ${{ matrix.python-version }}
33 | uses: actions/setup-python@v2
34 | with:
35 | python-version: ${{ matrix.python-version }}
36 | - name: Install dependencies
37 | run: |
38 | python -m pip install '.[build]'
39 | python -m pip install "sqlalchemy${{ matrix.sqa-version}}"
40 | - name: Unit tests
41 | run: |
42 | python -m pytest --cov cosima_cookbook test
43 | - name: Upload coverage reports to Codecov with GitHub Action
44 | uses: codecov/codecov-action@v3
45 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
91 | doc/_build
92 | doc/diagnostics
93 | doc/configurations
94 | doc/notebooks
95 | doc/gen_modules
96 |
97 | dask-worker-space
98 |
99 | # from https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
100 |
101 | # General
102 | .DS_Store
103 | .AppleDouble
104 | .LSOverride
105 |
106 | # Icon must end with two \r
107 | Icon
108 |
109 |
110 | # Thumbnails
111 | ._*
112 |
113 | # Files that might appear in the root of a volume
114 | .DocumentRevisions-V100
115 | .fseventsd
116 | .Spotlight-V100
117 | .TemporaryItems
118 | .Trashes
119 | .VolumeIcon.icns
120 | .com.apple.timemachine.donotpresent
121 |
122 | # Directories potentially created on remote AFP share
123 | .AppleDB
124 | .AppleDesktop
125 | Network Trash Folder
126 | Temporary Items
127 | .apdisk
128 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | # cosima-cookbook package
9 |
10 | This repository hosts the `cosima_cookbook` which is a [Python package](https://anaconda.org/coecms/cosima-cookbook) for managing a database of ocean model output and loading the output via xarray.
11 |
12 | ⚠️ **The `cosima_cookbook` Python package is deprecated and no longer being developed!** ⚠️
13 |
14 | Use the [ACCESS-NRI Intake catalog](https://cosima-recipes.readthedocs.io/en/latest/Tutorials/ACCESS-NRI_Intake_Catalog.html) instead.
15 |
16 | ## What now? Where should I go?
17 |
18 | We refer users to [COSIMA Cookbook repository](https://github.com/COSIMA/cosima-recipes) where they will find tutorials and 'recipes' (that is, examples) of various analyses that one can do using ocean-sea ice model output.
19 |
20 | [](https://cosima-cookbook.readthedocs.org/en/latest)
21 |
--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
1 | # A generic, single database configuration.
2 |
3 | [alembic]
4 | # path to migration scripts
5 | script_location = sandbox/alembic
6 |
7 | # template used to generate migration files
8 | # file_template = %%(rev)s_%%(slug)s
9 |
10 | # timezone to use when rendering the date
11 | # within the migration file as well as the filename.
12 | # string value is passed to dateutil.tz.gettz()
13 | # leave blank for localtime
14 | # timezone =
15 |
16 | # max length of characters to apply to the
17 | # "slug" field
18 | # truncate_slug_length = 40
19 |
20 | # set to 'true' to run the environment during
21 | # the 'revision' command, regardless of autogenerate
22 | # revision_environment = false
23 |
24 | # set to 'true' to allow .pyc and .pyo files without
25 | # a source .py file to be detected as revisions in the
26 | # versions/ directory
27 | # sourceless = false
28 |
29 | # version location specification; this defaults
30 | # to sandbox/alembic/versions. When using multiple version
31 | # directories, initial revisions must be specified with --version-path
32 | # version_locations = %(here)s/bar %(here)s/bat sandbox/alembic/versions
33 |
34 | # the output encoding used when revision files
35 | # are written from script.py.mako
36 | # output_encoding = utf-8
37 |
38 | sqlalchemy.url = sqlite:///path-to-db.db
39 |
40 |
41 | [post_write_hooks]
42 | # post_write_hooks defines scripts or Python functions that are run
43 | # on newly generated revision scripts. See the documentation for further
44 | # detail and examples
45 |
46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
47 | # hooks=black
48 | # black.type=console_scripts
49 | # black.entrypoint=black
50 | # black.options=-l 79
51 |
52 | # Logging configuration
53 | [loggers]
54 | keys = root,sqlalchemy,alembic
55 |
56 | [handlers]
57 | keys = console
58 |
59 | [formatters]
60 | keys = generic
61 |
62 | [logger_root]
63 | level = WARN
64 | handlers = console
65 | qualname =
66 |
67 | [logger_sqlalchemy]
68 | level = WARN
69 | handlers =
70 | qualname = sqlalchemy.engine
71 |
72 | [logger_alembic]
73 | level = INFO
74 | handlers =
75 | qualname = alembic
76 |
77 | [handler_console]
78 | class = StreamHandler
79 | args = (sys.stderr,)
80 | level = NOTSET
81 | formatter = generic
82 |
83 | [formatter_generic]
84 | format = %(levelname)-5.5s [%(name)s] %(message)s
85 | datefmt = %H:%M:%S
86 |
--------------------------------------------------------------------------------
/conda/environment.yml:
--------------------------------------------------------------------------------
1 | name: ncimonitor
2 |
3 | channels:
4 | - coecms
5 | - conda-forge
6 | - defaults
7 |
8 | dependencies:
9 | - python
10 | - numpy
11 | - dask
12 | - distributed
13 | - xarray
14 | - netcdf4
15 | - joblib
16 | - tqdm
17 | - sqlalchemy
18 |
19 |
--------------------------------------------------------------------------------
/conda/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: cosima-cookbook
3 | version: {{ GIT_DESCRIBE_TAG}}
4 |
5 | source:
6 | git_rev: master
7 | git_url: ../
8 |
9 | build:
10 | noarch: python
11 | number: {{ GIT_DESCRIBE_NUMBER }}
12 | script: python setup.py install --single-version-externally-managed --record=record.txt
13 |
14 | requirements:
15 | build:
16 | - python>=3.6
17 | - setuptools
18 | - setuptools_scm
19 | - pbr
20 | run:
21 | - python>=3.6
22 | - numpy
23 | - dask
24 | - distributed
25 | - xarray
26 | - netcdf4
27 | - joblib
28 | - tqdm
29 | - sqlalchemy<2.0
30 | - ipywidgets
31 | - cftime>1.2.1
32 | - lxml
33 |
34 | about:
35 | home: http://cosima-cookbook.readthedocs.io
36 | license: Apache License 2.0
37 |
38 |
--------------------------------------------------------------------------------
/cosima_cookbook/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Common tools for working with COSIMA model output
4 | """
5 |
6 | from . import database
7 | from . import querying
8 | from . import explore
9 |
10 | from importlib.metadata import version, PackageNotFoundError
11 |
12 | try:
13 | __version__ = version("cosima-cookbook")
14 | except PackageNotFoundError:
15 | pass
16 |
--------------------------------------------------------------------------------
/cosima_cookbook/database_update.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import pathlib
3 |
4 | import cosima_cookbook as cc
5 |
6 |
7 | def main(argv=None):
8 | parser = argparse.ArgumentParser(description="Update COSIMA cookbook database.")
9 | parser.add_argument(
10 | "dirs", type=pathlib.Path, nargs="+", help="Directories to index."
11 | )
12 | parser.add_argument(
13 | "-db",
14 | "--database",
15 | dest="db",
16 | action="store",
17 | default="cosima_master.db",
18 | help="Database to update.",
19 | )
20 | args = parser.parse_args(argv)
21 |
22 | print(cc)
23 |
24 | print("Establishing a DB connection to: {}".format(args.db))
25 | session = cc.database.create_session(args.db, timeout=30)
26 |
27 | for dir in args.dirs:
28 | print("Indexing: {}".format(dir))
29 | cc.database.build_index(
30 | dir, session, prune="delete", force=False, followsymlinks=True, nfiles=1000
31 | )
32 |
--------------------------------------------------------------------------------
/cosima_cookbook/database_utils.py:
--------------------------------------------------------------------------------
1 | # enforce unique ORM objects: https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObject
2 |
3 |
4 | def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
5 | cache = getattr(session, "_unique_cache", None)
6 | if cache is None:
7 | session._unique_cache = cache = {}
8 |
9 | key = (cls, hashfunc(*arg, **kw))
10 | if key in cache:
11 | return cache[key]
12 | else:
13 | with session.no_autoflush:
14 | q = session.query(cls)
15 | q = queryfunc(q, *arg, **kw)
16 | obj = q.first()
17 | if not obj:
18 | obj = constructor(*arg, **kw)
19 | session.add(obj)
20 | cache[key] = obj
21 | return obj
22 |
23 |
24 | class UniqueMixin(object):
25 | @classmethod
26 | def unique_hash(cls, *arg, **kw):
27 | return NotImplementedError()
28 |
29 | @classmethod
30 | def unique_filter(cls, query, *arg, **kw):
31 | return NotImplementedError()
32 |
33 | @classmethod
34 | def as_unique(cls, session, *arg, **kw):
35 | return _unique(session, cls, cls.unique_hash, cls.unique_filter, cls, arg, kw)
36 |
--------------------------------------------------------------------------------
/cosima_cookbook/date_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright 2018 ARC Centre of Excellence for Climate Systems Science
3 | author: Aidan Heerdegen
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 | http://www.apache.org/licenses/LICENSE-2.0
8 | Unless required by applicable law or agreed to in writing, software
9 | distributed under the License is distributed on an "AS IS" BASIS,
10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | See the License for the specific language governing permissions and
12 | limitations under the License.
13 | """
14 |
15 | from __future__ import print_function
16 |
17 | import datetime
18 |
19 | import cftime
20 | from cftime import num2date, date2num
21 | import numpy as np
22 | import xarray as xr
23 | from xarray.coding.cftime_offsets import to_cftime_datetime
24 |
25 | rebase_attr = "_rebased_units"
26 | rebase_shift_attr = "_rebased_shift"
27 | bounds = "bounds"
28 | boundsvar = "bounds_var"
29 |
30 | datetimeformat = "%Y-%m-%d %H:%M:%S"
31 |
32 | # Code adapted from https://github.com/spencerahill/aospy/issues/212
33 |
34 |
35 | def date2num_round(dates, units, calendar):
36 | return np.round(date2num(dates, units, calendar), 8)
37 |
38 |
39 | def rebase_times(values, input_units, calendar, output_units):
40 | dates = num2date(values, input_units, calendar)
41 | return date2num_round(dates, output_units, calendar)
42 |
43 |
44 | def is_bounds(var):
45 | """
46 | Return True if the xarray variable has been flagged as a bounds
47 | variable (has a bounds_var attribute)
48 | """
49 | return boundsvar in var.attrs
50 |
51 |
52 | def set_bounds(var, varname):
53 | """
54 | Set the bounds_var attribute to the name of the dimension for which
55 | it is the bounds
56 | """
57 | var.attrs[boundsvar] = varname
58 |
59 |
60 | def flag_bounds(ds):
61 | """
62 | Cycle through all the variables in a dataset and mark variables which
63 | are bounds as such by adding a bounds_var attribute
64 | """
65 | for name in ds.variables:
66 | if is_bounds(ds[name]):
67 | # This is a bounds variable and has been flagged as such
68 | continue
69 | if bounds in ds[name].attrs:
70 | # Flag bounds variable as such
71 | try:
72 | set_bounds(ds[ds[name].attrs[bounds]], name)
73 | except KeyError:
74 | # Ignore if bounds variable not present
75 | pass
76 |
77 |
78 | def unflag_bounds(ds):
79 | """
80 | Cycle through all the variables in a dataset and unflag variables which
81 | are bounds by deleting any bounds_var
82 | """
83 | for name in ds.variables:
84 | try:
85 | del ds[name].attrs[boundsvar]
86 | except KeyError:
87 | pass
88 |
89 |
90 | def rebase_variable(var, calendar=None, target_units=None, src_units=None, offset=None):
91 | """
92 | Create rebased time variable
93 | """
94 | attributes = var.attrs
95 |
96 | # If no target_units are specified check if the variable has been previously
97 | # rebased and use this as the target, which will undo the previous rebasing
98 | if calendar == None:
99 | try:
100 | calendar = var.attrs["calendar"]
101 | except KeyError:
102 | try:
103 | calendar = var.encoding["calendar"]
104 | except KeyError:
105 | raise AttributeError("No calendar attribute found and none specified")
106 |
107 | # Default to src_units being the units for the variable (bounds variables
108 | # may not have correct units so in this case it has to be specified)
109 | if src_units is None:
110 | src_units = attributes["units"]
111 |
112 | # If no target_units are specified check if the variable has been previously
113 | # rebased and use this as the target, which will undo the previous rebasing
114 | if target_units == None:
115 | try:
116 | target_units = attributes[rebase_attr]
117 | except KeyError:
118 | raise AttributeError(
119 | "No existing rebase found and target_units not specified"
120 | )
121 | finally:
122 | del attributes[rebase_attr]
123 | else:
124 | attributes[rebase_attr] = src_units
125 |
126 | # Rebase
127 | newvar = xr.apply_ufunc(
128 | rebase_times, var, src_units, calendar, target_units, dask="allowed"
129 | )
130 |
131 | if rebase_shift_attr in attributes:
132 | newvar = newvar - attributes[rebase_shift_attr]
133 | del attributes[rebase_shift_attr]
134 | else:
135 | if offset is not None:
136 | # Offset can be an integer, 'auto', or datetime.delta
137 |
138 | if offset == "auto":
139 | # Generate a timedelta offset based on the calendars of src
140 | # and target
141 | offset = num2date(0, target_units, calendar) - num2date(
142 | 0, src_units, calendar
143 | )
144 |
145 | if isinstance(offset, datetime.timedelta):
146 | # Add delta to src calendar origin and convert to integer offset
147 | offset = date2num_round(
148 | num2date(0, src_units, calendar) + offset, src_units, calendar
149 | )
150 |
151 | newvar = newvar + offset
152 | attributes[rebase_shift_attr] = offset
153 |
154 | if newvar.min() < 0:
155 | raise ValueError(
156 | "Rebase creates negative dates, specify offset=auto to shift dates appropriately"
157 | )
158 |
159 | # Save the values back into the variable, put back the attributes and update
160 | # the units
161 | newvar.attrs = attributes
162 | newvar.attrs["units"] = target_units
163 |
164 | return newvar
165 |
166 |
167 | def rebase_dataset(ds, target_units=None, timevar="time", offset=None):
168 | """
169 | Rebase the time dimension variable in a dataset to a different start date.
170 | This is useful to overcome limitations in pandas datetime indices used in
171 | xarray, and to place two datasets with different date indices onto a common
172 | date index
173 | """
174 |
175 | # The units are defined as the units used by timevar
176 | units = ds[timevar].attrs["units"]
177 | calendar = ds[timevar].attrs["calendar"]
178 |
179 | newds = ds.copy()
180 |
181 | # Cycle through all variables, setting a flag if they are a bounds variable
182 | flag_bounds(newds)
183 |
184 | for name in newds.variables:
185 | if is_bounds(newds[name]):
186 | # This is a bounds variable and has been flagged as such so ignore
187 | # as it will be processed by the variable for which it is the bounds
188 | continue
189 | if newds[name].attrs["units"] == units:
190 | newds[name] = rebase_variable(
191 | newds[name], calendar, target_units, offset=offset
192 | )
193 | if bounds in newds[name].attrs:
194 | # Must make the same adjustment to the bounds variable
195 | bvarname = newds[name].attrs[bounds]
196 | try:
197 | newds[bvarname] = rebase_variable(
198 | newds[bvarname],
199 | calendar,
200 | target_units,
201 | src_units=units,
202 | offset=offset,
203 | )
204 | except KeyError:
205 | # Ignore if bounds_var missing
206 | pass
207 |
208 | # Unset bounds flags
209 | unflag_bounds(newds)
210 |
211 | # newds = xr.decode_cf(newds, decode_coords=False, decode_times=True)
212 |
213 | return newds
214 |
215 |
216 | def shift_time(ds):
217 | """
218 | Apply time shift to un-decoded time axis, to align datasets and
219 | """
220 | pass
221 |
222 |
223 | def format_datetime(datetime, format=datetimeformat):
224 | """
225 | Standard method to convert cftime.datetime objects to strings for
226 | storage in SQL database. Hard code the length as some datetime
227 | objects don't space pad when formatted!
228 | """
229 | return "{:0>19}".format(datetime.strftime(format).lstrip())
230 |
231 |
232 | def parse_datetime(datetimestring, calendar="proleptic_gregorian"):
233 | """
234 | Standard method to convert datetime obkects stored as strings in SQL database
235 | back into cftime.datetime objects
236 | """
237 | # xarray supports parsing dates strings to cftime.datetime objects, but
238 | # requires ISO-8601 format (https://en.wikipedia.org/wiki/ISO_8601).
239 | # Convert string to ISO-8601 before parsing by adding separator
240 | # between date and time elements
241 | datetimestring = datetimestring[:10] + "T" + datetimestring[11:]
242 |
243 | # Note: uses non-public xarray method that may change or be deleted
244 | # in the future
245 | return to_cftime_datetime(datetimestring, calendar)
246 |
--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/__init__.py:
--------------------------------------------------------------------------------
1 | from .mean_tau_x import mean_tau_x
2 | from .simple import *
3 | from .overturning import *
4 |
5 | __all__ = [
6 | "mean_tau_x",
7 | "annual_scalar",
8 | "drake_passage",
9 | "sea_surface_temperature",
10 | "sea_surface_salinity",
11 | "psi_avg",
12 | "zonal_mean",
13 | "mixed_layer_depth",
14 | ]
15 |
--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/mean_tau_x.py:
--------------------------------------------------------------------------------
1 | from ..memory import memory
2 | from ..querying import getvar
3 |
4 |
5 | @memory.cache
6 | def mean_tau_x(expt):
7 | """
8 | 10-year zonal average of horizontal wind stress.
9 | """
10 | tau_x = get_nc_variable(
11 | expt, "ocean_month.nc", "tau_x", time_units="days since 1900-01-01", n=10
12 | )
13 |
14 | mean_tau_x = tau_x.mean("xu_ocean").mean("time")
15 | mean_tau_x = mean_tau_x.compute()
16 | mean_tau_x.name = "mean_tau_x"
17 |
18 | return mean_tau_x
19 |
--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/overturning.py:
--------------------------------------------------------------------------------
1 | from ..querying import getvar, get_variables
2 | from ..memory import memory
3 |
4 |
5 | @memory.cache
6 | def psi_avg(expt, n=10):
7 | def op(p):
8 | summed_p = p.sum("grid_xt_ocean")
9 | # summed_p.attrs['units'] = p.units
10 | return summed_p
11 |
12 | psi = get_nc_variable(
13 | expt,
14 | "ocean.nc",
15 | "ty_trans_rho",
16 | # op=op,
17 | chunks={"potrho": None},
18 | n=n,
19 | time_units="days since 1900-01-01",
20 | )
21 | psi = psi.sum("grid_xt_ocean")
22 |
23 | varlist = get_variables(expt, "ocean.nc")
24 | if "ty_trans_rho_gm" in varlist:
25 | GM = True
26 | psiGM = get_nc_variable(
27 | expt,
28 | "ocean.nc",
29 | "ty_trans_rho_gm",
30 | # op=op,
31 | chunks={"potrho": None},
32 | n=n,
33 | time_units="days since 1900-01-01",
34 | )
35 | psiGM = psiGM.sum("grid_xt_ocean")
36 | else:
37 | GM = False
38 |
39 | # if psi.units == 'kg/s':
40 | # print('WARNING: Changing units for ', expt)
41 | # assume units of kg/s, convert to Sv.
42 | psi = psi * 1.0e-9
43 | if GM:
44 | psiGM = psiGM * 1.0e-9
45 |
46 | psi_avg = psi.cumsum("potrho").mean("time") - psi.sum("potrho").mean("time")
47 | if GM:
48 | psi_avg = psi_avg + psiGM.mean("time")
49 |
50 | psi_avg.load()
51 |
52 | return psi_avg
53 |
54 |
55 | @memory.cache
56 | def calc_aabw(expt):
57 | print("Calculating {} timeseries of AABW transport at 55S ".format(expt))
58 |
59 | def op(p):
60 | summed_p = p.sum("grid_xt_ocean")
61 | # summed_p.attrs['units'] = p.units
62 | return summed_p
63 |
64 | psi = get_nc_variable(
65 | expt,
66 | "ocean.nc",
67 | "ty_trans_rho",
68 | # op=op,
69 | chunks={"potrho": None},
70 | time_units="days since 1900-01-01",
71 | )
72 | psi = psi.sum("grid_xt_ocean")
73 |
74 | varlist = get_variables(expt, "ocean.nc")
75 | if "ty_trans_rho_gm" in varlist:
76 | GM = True
77 | psiGM = get_nc_variable(
78 | expt,
79 | "ocean.nc",
80 | "ty_trans_rho_gm",
81 | # op=op,
82 | chunks={"potrho": None},
83 | time_units="days since 1900-01-01",
84 | )
85 | psiGM = psiGM.sum("grid_xt_ocean")
86 | else:
87 | GM = False
88 |
89 | # if psi.units == 'kg/s':
90 | # print('WARNING: Changing units for ', expt)
91 | # assume units of kg/s, convert to Sv.
92 |
93 | psi = psi * 1.0e-9
94 | if GM:
95 | psiGM = psiGM * 1.0e-9
96 |
97 | psi_sum = psi.cumsum("potrho") - psi.sum("potrho")
98 | if GM:
99 | psi_sum = psi_sum + psiGM
100 |
101 | psi_aabw = (
102 | psi_sum.sel(method="Nearest", grid_yu_ocean=-40)
103 | .sel(potrho=slice(1036, None))
104 | .min("potrho")
105 | .resample("3A", dim="time")
106 | )
107 | psi_aabw = psi_aabw.compute()
108 |
109 | return psi_aabw
110 |
111 |
112 | @memory.cache
113 | def calc_amoc(expt):
114 | print("Calculating {} timeseries of AMOC transport at 26N ".format(expt))
115 |
116 | def op(p):
117 | summed_p = p.sum("grid_xt_ocean")
118 | # summed_p.attrs['units'] = p.units
119 | return summed_p
120 |
121 | psi = get_nc_variable(
122 | expt,
123 | "ocean.nc",
124 | "ty_trans_rho",
125 | # op=op,
126 | chunks={"potrho": None},
127 | time_units="days since 1900-01-01",
128 | )
129 | psi = psi.sum("grid_xt_ocean")
130 |
131 | varlist = get_variables(expt, "ocean.nc")
132 | if "ty_trans_rho_gm" in varlist:
133 | GM = True
134 | psiGM = get_nc_variable(
135 | expt,
136 | "ocean.nc",
137 | "ty_trans_rho_gm",
138 | # op=op,
139 | chunks={"potrho": None},
140 | time_units="days since 1900-01-01",
141 | )
142 | psiGM = psiGM.sum("grid_xt_ocean")
143 | else:
144 | GM = False
145 |
146 | # if psi.units == 'kg/s':
147 | # print('WARNING: Changing units for ', expt)
148 | # assume units of kg/s, convert to Sv.
149 |
150 | psi = psi * 1.0e-9
151 | if GM:
152 | psiGM = psiGM * 1.0e-9
153 |
154 | psi_sum = psi.cumsum("potrho") - psi.sum("potrho")
155 | if GM:
156 | psi_sum = psi_sum + psiGM
157 |
158 | psi_amoc = (
159 | psi_sum.sel(method="Nearest", grid_yu_ocean=26)
160 | .sel(potrho=slice(1035.5, None))
161 | .max("potrho")
162 | .resample("3A", dim="time")
163 | )
164 | psi_amoc = psi_amoc.compute()
165 |
166 | return psi_amoc
167 |
168 |
169 | @memory.cache
170 | def calc_amoc_south(expt):
171 | print("Calculating {} timeseries of AMOC transport at 35S ".format(expt))
172 |
173 | def op(p):
174 | summed_p = p.sum("grid_xt_ocean")
175 | # summed_p.attrs['units'] = p.units
176 | return summed_p
177 |
178 | psi = get_nc_variable(
179 | expt,
180 | "ocean.nc",
181 | "ty_trans_rho",
182 | # op=op,
183 | chunks={"potrho": None},
184 | time_units="days since 1900-01-01",
185 | )
186 | psi = psi.sum("grid_xt_ocean")
187 |
188 | varlist = get_variables(expt, "ocean.nc")
189 | if "ty_trans_rho_gm" in varlist:
190 | GM = True
191 | psiGM = get_nc_variable(
192 | expt,
193 | "ocean.nc",
194 | "ty_trans_rho_gm",
195 | # op=op,
196 | chunks={"potrho": None},
197 | time_units="days since 1900-01-01",
198 | )
199 | psiGM = psiGM.sum("grid_xt_ocean")
200 | else:
201 | GM = False
202 |
203 | # if psi.units == 'kg/s':
204 | # print('WARNING: Changing units for ', expt)
205 | # assume units of kg/s, convert to Sv.
206 |
207 | psi = psi * 1.0e-9
208 | if GM:
209 | psiGM = psiGM * 1.0e-9
210 |
211 | psi_sum = psi.cumsum("potrho") - psi.sum("potrho")
212 | if GM:
213 | psi_sum = psi_sum + psiGM
214 |
215 | psi_amoc_south = (
216 | psi_sum.sel(method="Nearest", grid_yu_ocean=-35)
217 | .sel(potrho=slice(1035.5, None))
218 | .max("potrho")
219 | .resample("3A", dim="time")
220 | )
221 | psi_amoc_south = psi_amoc_south.compute()
222 |
223 | return psi_amoc_south
224 |
225 |
226 | @memory.cache
227 | def zonal_mean(expt, variable, n=10, resolution=1):
228 | zonal_var = get_nc_variable(
229 | expt,
230 | "ocean.nc",
231 | variable,
232 | chunks={"st_ocean": None},
233 | n=n,
234 | time_units="days since 1900-01-01",
235 | )
236 |
237 | # Annual Average WOA13 long-term climatology.
238 | if resolution == 1:
239 | zonal_WOA13 = (
240 | get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", variable)
241 | .mean("GRID_X_T")
242 | .mean("time")
243 | )
244 | elif resolution == 0.25:
245 | zonal_WOA13 = (
246 | get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", variable)
247 | .mean("GRID_X_T")
248 | .mean("time")
249 | )
250 | elif resolution == 0.1:
251 | zonal_WOA13 = (
252 | get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", variable)
253 | .mean("GRID_X_T")
254 | .mean("time")
255 | )
256 | else:
257 | print("WARNING: Sorry, we dont seem to recognise resolution ", resolution)
258 |
259 | zonal_WOA13.compute()
260 | if variable == "temp":
261 | zonal_WOA13 = zonal_WOA13 + 273.15
262 |
263 | zonal_mean = zonal_var.mean("xt_ocean").mean("time")
264 | zonal_mean.compute()
265 | zonal_diff = zonal_mean - zonal_WOA13.values
266 |
267 | return zonal_mean, zonal_diff
268 |
--------------------------------------------------------------------------------
/cosima_cookbook/diagnostics/simple.py:
--------------------------------------------------------------------------------
1 | from ..querying import getvar, get_variables
2 | from ..memory import memory
3 |
4 | import logging
5 |
6 |
7 | @memory.cache
8 | def annual_scalar(expt, variables):
9 | """ """
10 |
11 | logging.debug("Building dataset")
12 | darray = get_nc_variable(
13 | expt,
14 | "ocean_scalar.nc",
15 | variables,
16 | time_units="days since 1900-01-01",
17 | use_bag=True,
18 | )
19 |
20 | logging.debug("Resampling in time")
21 | annual_average = darray.resample(time="A").mean("time")
22 |
23 | for v in annual_average.data_vars:
24 | avar = annual_average.variables[v]
25 | dvar = darray.variables[v]
26 | avar.attrs["long_name"] = dvar.attrs["long_name"] + " (annual average)"
27 | avar.attrs["units"] = dvar.attrs["units"]
28 |
29 | return annual_average
30 |
31 |
32 | @memory.cache
33 | def drake_passage(expt):
34 | "Calculate transport through Drake Passage"
35 |
36 | tx = get_nc_variable(
37 | expt,
38 | "ocean_month.nc",
39 | "tx_trans_int_z",
40 | chunks={"yt_ocean": 200},
41 | time_units="days since 1900-01-01",
42 | use_bag=False,
43 | )
44 |
45 | tx_trans = tx.sel(xu_ocean=-69, method="nearest").sel(yt_ocean=slice(-72, -52))
46 |
47 | if tx_trans.units == "Sv (10^9 kg/s)":
48 | transport = tx_trans.sum("yt_ocean").resample(time="A").mean("time")
49 | else:
50 | # print('WARNING: Changing units for ', expt)
51 | transport = tx_trans.sum("yt_ocean").resample(time="A").mean("time") * 1.0e-9
52 |
53 | transport.load()
54 |
55 | return transport
56 |
57 |
58 | @memory.cache
59 | def bering_strait(expt):
60 | ty = get_nc_variable(
61 | expt,
62 | "ocean_month.nc",
63 | "ty_trans_int_z",
64 | chunks={"yu_ocean": 200},
65 | time_units="days since 1900-01-01",
66 | )
67 | ty_trans = ty.sel(yu_ocean=67, method="nearest").sel(xt_ocean=slice(-171, -167))
68 | if ty_trans.units == "Sv (10^9 kg/s)":
69 | transport = ty_trans.sum("xt_ocean").resample(time="A").mean("time")
70 | else:
71 | # print('WARNING: Changing units for ', expt)
72 | transport = ty_trans.sum("xt_ocean").resample(time="A").mean("time") * 1.0e-9
73 |
74 | transport.load()
75 |
76 | return transport
77 |
78 |
79 | @memory.cache
80 | def sea_surface_temperature(expt, resolution=1):
81 | ## Load SST from expt
82 | varlist = get_variables(expt, "ocean_month.nc")
83 | if "surface_temp" in varlist:
84 | SST = get_nc_variable(
85 | expt,
86 | "ocean_month.nc",
87 | "surface_temp",
88 | n=10,
89 | time_units="days since 1900-01-01",
90 | )
91 | else:
92 | SST = get_nc_variable(
93 | expt, "ocean.nc", "temp", n=10, time_units="days since 1900-01-01"
94 | ).isel(st_ocean=0)
95 |
96 | if SST.units == "degrees K":
97 | SST = SST - 273.15
98 |
99 | # Annual Average WOA13 long-term climatology.
100 | if resolution == 1:
101 | SST_WOA13 = get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", "temp").isel(
102 | ZT=0
103 | )
104 | elif resolution == 0.25:
105 | SST_WOA13 = get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", "temp").isel(
106 | ZT=0
107 | )
108 | elif resolution == 0.1:
109 | SST_WOA13 = get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", "temp").isel(
110 | ZT=0
111 | )
112 | else:
113 | print("WARNING: Sorry, we dont seem to recognise resolution ", resolution)
114 |
115 | # Average
116 | SST = SST.mean("time")
117 | SSTdiff = SST - SST_WOA13.mean("time").values
118 |
119 | return SST, SSTdiff
120 |
121 |
122 | @memory.cache
123 | def sea_surface_salinity(expt, resolution=1):
124 | ## Load SSS from expt
125 | varlist = get_variables(expt, "ocean_month.nc")
126 | if "surface_salt" in varlist:
127 | SSS = get_nc_variable(expt, "ocean_month.nc", "surface_salt", n=10)
128 | else:
129 | SSS = get_nc_variable(expt, "ocean.nc", "salt", n=10).isel(st_ocean=0)
130 |
131 | # Annual Average WOA13 long-term climatology.
132 | if resolution == 1:
133 | SSS_WOA13 = get_nc_variable("woa13/10", "woa13_ts_??_mom10.nc", "salt").isel(
134 | ZT=0
135 | )
136 | elif resolution == 0.25:
137 | SSS_WOA13 = get_nc_variable("woa13/025", "woa13_ts_??_mom025.nc", "salt").isel(
138 | ZT=0
139 | )
140 | elif resolution == 0.1:
141 | SSS_WOA13 = get_nc_variable("woa13/01", "woa13_ts_??_mom01.nc", "salt").isel(
142 | ZT=0
143 | )
144 | else:
145 | print("WARNING: Sorry, we dont seem to recognise resolution ", resolution)
146 |
147 | # Average over last 10 time slices - prefer to do this by year.
148 | SSS = SSS.mean("time")
149 | SSSdiff = SSS - SSS_WOA13.mean("time").values
150 |
151 | return SSS, SSSdiff
152 |
153 |
154 | @memory.cache
155 | def mixed_layer_depth(expt):
156 | ## Load MLD from expt
157 | varlist = get_variables(expt, "ocean_month.nc")
158 | if "mld" in varlist:
159 | MLD = get_nc_variable(expt, "ocean_month.nc", "mld", n=10)
160 |
161 | # Average over last 10 time slices - prefer to do this by year.
162 | MLD = MLD.mean("time")
163 |
164 | return MLD
165 |
--------------------------------------------------------------------------------
/cosima_cookbook/distributed.py:
--------------------------------------------------------------------------------
1 | import os, socket, getpass
2 | from distributed import Client, LocalCluster
3 |
4 | from itertools import product
5 | import numpy as np
6 | import xarray as xr
7 |
8 | from tqdm import tqdm_notebook
9 |
10 |
11 | def start_cluster(diagnostics_port=0):
12 | "Set up a LocalCluster for distributed"
13 |
14 | hostname = socket.gethostname()
15 | n_workers = os.cpu_count() // 2
16 | cluster = LocalCluster(
17 | ip="localhost",
18 | n_workers=n_workers,
19 | diagnostics_port=diagnostics_port,
20 | memory_limit=6e9,
21 | )
22 | client = Client(cluster)
23 |
24 | params = {
25 | "bokeh_port": cluster.scheduler.services["bokeh"].port,
26 | "user": getpass.getuser(),
27 | "scheduler_ip": cluster.scheduler.ip,
28 | "hostname": hostname,
29 | }
30 |
31 | print(
32 | "If the link to the dashboard below doesn't work, run this command on a local terminal to set up a SSH tunnel:"
33 | )
34 | print()
35 | print(
36 | " ssh -N -L {bokeh_port}:{scheduler_ip}:{bokeh_port} {hostname}.nci.org.au -l {user}".format(
37 | **params
38 | )
39 | )
40 |
41 | return client
42 |
43 |
44 | def compute_by_block(dsx):
45 | """ """
46 |
47 | # determine index key for each chunk
48 | slices = []
49 | for chunks in dsx.chunks:
50 | L = [
51 | 0,
52 | ] + list(np.cumsum(chunks))
53 | slices.append([slice(a, b) for a, b in (zip(L[:-1], L[1:]))])
54 | indexes = list(product(*slices))
55 |
56 | # allocate memory to receive result
57 | if isinstance(dsx, xr.DataArray):
58 | result = xr.zeros_like(dsx).load()
59 | else:
60 | result = np.zeros(dsx.shape)
61 |
62 | # evaluate each chunk one at a time
63 | for index in tqdm_notebook(indexes, leave=False):
64 | block = dsx.__getitem__(index).compute()
65 | result.__setitem__(index, block)
66 |
67 | return result
68 |
--------------------------------------------------------------------------------
/cosima_cookbook/memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Caching
3 |
4 | The memory object lives in this module.
5 | Other components of the cookbook access by
6 |
7 | from ..memory import memory
8 | """
9 |
10 | from joblib import Memory
11 |
12 | import os, getpass, tempfile
13 |
14 | username = getpass.getuser()
15 |
16 |
17 | # pick up cachedir from an environment variable?
18 | # Append username to prevent clashes with others users
19 | cachedir = os.path.join(tempfile.gettempdir(), username)
20 | memory = Memory(cachedir=cachedir, verbose=0)
21 |
--------------------------------------------------------------------------------
/cosima_cookbook/netcdf_utils.py:
--------------------------------------------------------------------------------
1 | def find_record_dimension(d):
2 | """Find the record dimension (i.e. time) in a netCDF4 Dataset."""
3 |
4 | for dim in d.dimensions:
5 | if d.dimensions[dim].isunlimited():
6 | return dim
7 |
8 | return None
9 |
10 |
11 | def find_dimension_with_attribute(d, attribute, value):
12 | """Find a matching dimension with attribute=value, or None."""
13 |
14 | for dim in d.dimensions:
15 | if dim not in d.variables:
16 | continue
17 |
18 | if getattr(d.variables[dim], attribute, None) == value:
19 | return dim
20 |
21 | return None
22 |
23 |
24 | def find_time_dimension(d):
25 | """Find a time dimension in a netCDF4 Dataset."""
26 |
27 | # this is a bit heuristic, but we cascade through some checks, guided by
28 | # the CF conventions
29 |
30 | dim = find_dimension_with_attribute(d, "standard_name", "time")
31 | if dim is not None:
32 | return dim
33 |
34 | dim = find_dimension_with_attribute(d, "axis", "T")
35 | if dim is not None:
36 | return dim
37 |
38 | dim = find_record_dimension(d)
39 | if dim is not None:
40 | return dim
41 |
42 | for dim in d.dimensions:
43 | if dim.lower() == "time":
44 | return dim
45 |
46 | # CF conventions also suggests the units attribute,
47 | # but time_bounds may have the same units, and a false positive
48 | # here could be very confusing...
49 | return None
50 |
--------------------------------------------------------------------------------
/cosima_cookbook/plots/__init__.py:
--------------------------------------------------------------------------------
1 | from .lineplots import *
2 | from .overturning import *
3 | from .maps import sea_surface_temperature, sea_surface_salinity, mixed_layer_depth
4 |
5 | # __all__ = ['wind_stress']
6 |
--------------------------------------------------------------------------------
/cosima_cookbook/plots/lineplots.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import cosima_cookbook as cc
3 | from tqdm import tqdm_notebook
4 | import IPython.display
5 |
6 |
7 | def wind_stress(expts=[]):
8 | """
9 | Plot zonally averaged wind stress.
10 |
11 | Parameters
12 | ----------
13 | expts : str or list of str
14 | Experiment name(s).
15 | """
16 |
17 | if not isinstance(expts, list):
18 | expts = [expts]
19 |
20 | # computing
21 | results = []
22 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
23 | result = {"mean_tau_x": cc.diagnostics.mean_tau_x(expt), "expt": expt}
24 | results.append(result)
25 |
26 | IPython.display.clear_output()
27 |
28 | plt.figure(figsize=(12, 6))
29 |
30 | # plotting
31 | for result in results:
32 | mean_tau_x = result["mean_tau_x"]
33 | expt = result["expt"]
34 | plt.plot(mean_tau_x, mean_tau_x.yu_ocean, linewidth=2, label=expt)
35 | plt.ylim([-70, 65])
36 | plt.xlim([-0.08, 0.20])
37 | plt.ylabel("Latitude ($^\circ$N)")
38 | plt.xlabel("Stress (N m$^{-2}$)")
39 | plt.legend(fontsize=10, loc="best")
40 |
41 |
42 | def annual_scalar(expts=[], variables=[]):
43 | """
44 | Calculate and plot annual average of variable(s) for experiment(s).
45 |
46 | Parameters
47 | ----------
48 | expts : str or list of str
49 | Experiment name(s).
50 | variable : str or list of str
51 | Variable name(s).
52 | """
53 |
54 | if not isinstance(expts, list):
55 | expts = [expts]
56 |
57 | if not isinstance(variables, list):
58 | variables = [variables]
59 |
60 | # computing
61 | results = []
62 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
63 | annual_average = cc.diagnostics.annual_scalar(expt, variables)
64 |
65 | result = {"annual_average": annual_average, "expt": expt}
66 | results.append(result)
67 |
68 | IPython.display.clear_output()
69 |
70 | # plotting each variable in a separate plot
71 | for variable in variables:
72 | plt.figure(figsize=(12, 6))
73 |
74 | for result in results:
75 | annual_average = result["annual_average"]
76 | expt = result["expt"]
77 |
78 | annual_average[variable].plot(label=expt)
79 |
80 | plt.title(annual_average[variable].long_name)
81 | plt.legend(fontsize=10, bbox_to_anchor=(1, 1), loc="best", borderaxespad=0.0)
82 |
83 | plt.xlabel("Time")
84 |
85 |
86 | def drake_passage(expts=[]):
87 | """
88 | Plot Drake Passage transport.
89 |
90 | Parameters
91 | ----------
92 | expts : str or list of str
93 | Experiment name(s).
94 | """
95 |
96 | plt.figure(figsize=(12, 6))
97 |
98 | if not isinstance(expts, list):
99 | expts = [expts]
100 |
101 | # computing
102 | results = []
103 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
104 | transport = cc.diagnostics.drake_passage(expt)
105 |
106 | result = {"transport": transport, "expt": expt}
107 | results.append(result)
108 |
109 | IPython.display.clear_output()
110 |
111 | # plotting
112 | for result in results:
113 | transport = result["transport"]
114 | expt = result["expt"]
115 | transport.plot(label=expt)
116 |
117 | plt.title("Drake Passage Transport")
118 | plt.xlabel("Time")
119 | plt.ylabel("Transport (Sv)")
120 | plt.legend(fontsize=10, loc="best")
121 |
122 |
123 | def bering_strait(expts=[]):
124 | """
125 | Plot Bering Strait transport.
126 |
127 | Parameters
128 | ----------
129 | expts : str or list of str
130 | Experiment name(s).
131 | """
132 |
133 | plt.figure(figsize=(12, 6))
134 |
135 | if not isinstance(expts, list):
136 | expts = [expts]
137 |
138 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
139 | transport = cc.diagnostics.bering_strait(expt)
140 | transport.plot(label=expt)
141 |
142 | IPython.display.clear_output()
143 |
144 | plt.title("Bering Strait Transport")
145 | plt.xlabel("Time")
146 | plt.ylabel("Transport (Sv)")
147 | plt.legend(fontsize=10, loc="best")
148 |
149 |
150 | def aabw(expts=[]):
151 | """
152 | Plot timeseries of AABW transport measured at 55S.
153 |
154 | Parameters
155 | ----------
156 | expts : str or list of str
157 | Experiment name(s).
158 | """
159 |
160 | plt.figure(figsize=(12, 6))
161 |
162 | if not isinstance(expts, list):
163 | expts = [expts]
164 |
165 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
166 | psi_aabw = cc.diagnostics.calc_aabw(expt)
167 | psi_aabw.plot(label=expt)
168 |
169 | IPython.display.clear_output()
170 |
171 | plt.title("AABW Transport at 40S")
172 | plt.xlabel("Time")
173 | plt.ylabel("Transport (Sv)")
174 | plt.legend(fontsize=10, loc="best")
175 |
176 |
177 | def amoc(expts=[]):
178 | """
179 | Plot timeseries of AMOC transport measured at 26N.
180 |
181 | Parameters
182 | ----------
183 | expts : str or list of str
184 | Experiment name(s).
185 | """
186 |
187 | plt.figure(figsize=(12, 6))
188 |
189 | if not isinstance(expts, list):
190 | expts = [expts]
191 |
192 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
193 | psi_amoc = cc.diagnostics.calc_amoc(expt)
194 | psi_amoc.plot(label=expt)
195 |
196 | IPython.display.clear_output()
197 |
198 | plt.title("AMOC Transport at 26N")
199 | plt.xlabel("Time")
200 | plt.ylabel("Transport (Sv)")
201 | plt.legend(fontsize=10, loc="best")
202 |
203 |
204 | def amoc_south(expts=[]):
205 | """
206 | Plot timeseries of AMOC transport measured at 35S.
207 |
208 | Parameters
209 | ----------
210 | expts : str or list of str
211 | Experiment name(s).
212 | """
213 |
214 | plt.figure(figsize=(12, 6))
215 |
216 | if not isinstance(expts, list):
217 | expts = [expts]
218 |
219 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
220 | psi_amoc_south = cc.diagnostics.calc_amoc_south(expt)
221 | psi_amoc_south.plot(label=expt)
222 |
223 | IPython.display.clear_output()
224 |
225 | plt.title("AMOC Transport at 35S")
226 | plt.xlabel("Time")
227 | plt.ylabel("Transport (Sv)")
228 | plt.legend(fontsize=10, loc="best")
229 |
--------------------------------------------------------------------------------
/cosima_cookbook/plots/maps.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import cosima_cookbook as cc
3 | from tqdm import tqdm_notebook
4 |
5 | import IPython.display
6 |
7 |
8 | def sea_surface_temperature(expts=[], resolution=1):
9 | """
10 | Plot a map of SST from last decade of run.
11 | """
12 |
13 | if not isinstance(expts, list):
14 | expts = [expts]
15 |
16 | # computing
17 | results = []
18 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
19 | SST, SSTdiff = cc.diagnostics.sea_surface_temperature(expt, resolution)
20 |
21 | result = {"SST": SST, "SSTdiff": SSTdiff, "expt": expt}
22 | results.append(result)
23 |
24 | IPython.display.clear_output()
25 |
26 | # plotting
27 | for result in results:
28 | SST = result["SST"]
29 | SSTdiff = result["SSTdiff"]
30 | expt = result["expt"]
31 |
32 | plt.figure(figsize=(12, 4))
33 | plt.subplot(121)
34 | SST.plot()
35 | plt.title(expt)
36 | plt.subplot(122)
37 | SSTdiff.plot(robust=True)
38 | plt.title(expt)
39 |
40 |
41 | def sea_surface_salinity(expts=[], resolution=1):
42 | """
43 | Plot a map of SSS from last decade of run.
44 | """
45 |
46 | if not isinstance(expts, list):
47 | expts = [expts]
48 |
49 | # computing
50 | results = []
51 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
52 | SSS, SSSdiff = cc.diagnostics.sea_surface_salinity(expt, resolution)
53 |
54 | result = {"SSS": SSS, "SSSdiff": SSSdiff, "expt": expt}
55 | results.append(result)
56 |
57 | IPython.display.clear_output()
58 |
59 | # plotting
60 | for result in results:
61 | SSS = result["SSS"]
62 | SSSdiff = result["SSSdiff"]
63 | expt = result["expt"]
64 |
65 | plt.figure(figsize=(12, 4))
66 | plt.subplot(121)
67 | SSS.plot()
68 | plt.title(expt)
69 | plt.subplot(122)
70 | SSSdiff.plot(robust=True)
71 | plt.title(expt)
72 |
73 |
74 | def mixed_layer_depth(expts=[]):
75 | """
76 | Plot a map of MLD from last decade of run.
77 | """
78 |
79 | if not isinstance(expts, list):
80 | expts = [expts]
81 |
82 | # computing
83 | results = []
84 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
85 | MLD = cc.diagnostics.mixed_layer_depth(expt)
86 |
87 | result = {"MLD": MLD, "expt": expt}
88 | results.append(result)
89 |
90 | IPython.display.clear_output()
91 |
92 | # plotting
93 | for result in results:
94 | MLD = result["MLD"]
95 | expt = result["expt"]
96 |
97 | plt.figure(figsize=(6, 4))
98 | MLD.plot()
99 | plt.title(expt)
100 |
--------------------------------------------------------------------------------
/cosima_cookbook/plots/overturning.py:
--------------------------------------------------------------------------------
1 | import cosima_cookbook as cc
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | from tqdm import tqdm_notebook
5 |
6 | import IPython.display
7 |
8 |
9 | def psi_avg(expts, n=10, clev=np.arange(-20, 20, 2)):
10 | if not isinstance(expts, list):
11 | expts = [expts]
12 |
13 | # computing
14 | results = []
15 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
16 | psi_avg = cc.diagnostics.psi_avg(expt, n)
17 |
18 | result = {"psi_avg": psi_avg, "expt": expt}
19 | results.append(result)
20 |
21 | IPython.display.clear_output()
22 |
23 | # plotting
24 | for result in results:
25 | psi_avg = result["psi_avg"]
26 | expt = result["expt"]
27 |
28 | plt.figure(figsize=(10, 5))
29 | plt.contourf(
30 | psi_avg.grid_yu_ocean,
31 | psi_avg.potrho,
32 | psi_avg,
33 | cmap=plt.cm.PiYG,
34 | levels=clev,
35 | extend="both",
36 | )
37 | cb = plt.colorbar(orientation="vertical", shrink=0.7)
38 |
39 | cb.ax.set_xlabel("Sv")
40 | plt.contour(
41 | psi_avg.grid_yu_ocean,
42 | psi_avg.potrho,
43 | psi_avg,
44 | levels=clev,
45 | colors="k",
46 | linewidths=0.25,
47 | )
48 | plt.contour(
49 | psi_avg.grid_yu_ocean,
50 | psi_avg.potrho,
51 | psi_avg,
52 | levels=[
53 | 0.0,
54 | ],
55 | colors="k",
56 | linewidths=0.5,
57 | )
58 | plt.gca().invert_yaxis()
59 |
60 | plt.ylim((1037.5, 1034))
61 | plt.ylabel("Potential Density (kg m$^{-3}$)")
62 | plt.xlabel("Latitude ($^\circ$N)")
63 | plt.xlim([-75, 85])
64 | plt.title("Overturning in %s" % expt)
65 |
66 |
67 | def zonal_mean(expts, variable, n=10, resolution=1):
68 | if not isinstance(expts, list):
69 | expts = [expts]
70 |
71 | # computing
72 | results = []
73 | for expt in tqdm_notebook(expts, leave=False, desc="experiments"):
74 | zonal_mean, zonal_diff = cc.diagnostics.zonal_mean(
75 | expt, variable, n, resolution
76 | )
77 |
78 | result = {"zonal_mean": zonal_mean, "zonal_diff": zonal_diff, "expt": expt}
79 | results.append(result)
80 |
81 | IPython.display.clear_output()
82 |
83 | # plotting
84 | for result in results:
85 | zonal_mean = result["zonal_mean"]
86 | zonal_diff = result["zonal_diff"]
87 | expt = result["expt"]
88 |
89 | plt.figure(figsize=(12, 5))
90 | plt.subplot(121)
91 | zonal_mean.plot()
92 | plt.title(expt)
93 | plt.gca().invert_yaxis()
94 | plt.title("{}: Zonal Mean {}".format(expt, variable))
95 | plt.subplot(122)
96 | zonal_diff.plot()
97 | plt.title(expt)
98 | plt.gca().invert_yaxis()
99 | plt.title("{}: Zonal Mean {} Change".format(expt, variable))
100 |
--------------------------------------------------------------------------------
/cosima_cookbook/plots/scalar.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/cosima_cookbook/plots/scalar.py
--------------------------------------------------------------------------------
/cosima_cookbook/querying.py:
--------------------------------------------------------------------------------
1 | """querying.py
2 |
3 | Functions for data discovery.
4 |
5 | """
6 |
7 | import logging
8 | import os.path
9 | import pandas as pd
10 | from sqlalchemy import func, distinct, or_
11 | from sqlalchemy.orm import aliased
12 | from sqlalchemy.sql.selectable import subquery
13 | import warnings
14 | import xarray as xr
15 |
16 | from . import database
17 | from .database import NCExperiment, NCFile, CFVariable, NCVar, Keyword
18 | from .database import NCAttribute, NCAttributeString
19 |
20 |
21 | class VariableNotFoundError(Exception):
22 | pass
23 |
24 |
25 | class QueryWarning(UserWarning):
26 | pass
27 |
28 |
29 | # By default all ambiguous queries will raise an exception
30 | warnings.simplefilter("error", category=QueryWarning, lineno=0, append=False)
31 |
32 |
33 | def get_experiments(
34 | session,
35 | experiment=True,
36 | keywords=None,
37 | variables=None,
38 | all=False,
39 | exptname=None,
40 | **kwargs,
41 | ):
42 | """
43 | Returns a DataFrame of all experiments and the number of netCDF4 files contained
44 | within each experiment.
45 |
46 | Optionally one or more keywords can be specified, and only experiments with all the
47 | specified keywords will be return. The keyword strings can utilise SQL wildcard
48 | characters, "%" and "_", to match multiple keywords.
49 |
50 | Optionally variables can also be specified, and only experiments containing all those
51 | variables will be returned.
52 |
53 | All metadata fields will be returned if all=True, or individual metadata fields
54 | can be selected by passing field=True, where available fields are:
55 | contact, email, created, description, notes, url and root_dir
56 | """
57 |
58 | # Determine which attributes to return. Special case experiment
59 | # as this is the only one that defaults to True
60 | columns = []
61 | if experiment:
62 | columns.append(NCExperiment.experiment)
63 |
64 | for f in NCExperiment.metadata_keys + ["root_dir"]:
65 | # Explicitly don't support returning keyword metadata
66 | if f == "keywords":
67 | continue
68 | if kwargs.get(f, all):
69 | columns.append(getattr(NCExperiment, f))
70 |
71 | q = (
72 | session.query(*columns, func.count(NCFile.experiment_id).label("ncfiles"))
73 | .join(NCFile.experiment)
74 | .group_by(NCFile.experiment_id)
75 | )
76 |
77 | if keywords is not None:
78 | if isinstance(keywords, str):
79 | keywords = [keywords]
80 | q = q.filter(*(NCExperiment.keywords.like(k) for k in keywords))
81 |
82 | if variables is not None:
83 | if isinstance(variables, str):
84 | variables = [variables]
85 |
86 | expt_query = (
87 | session.query(NCExperiment.id)
88 | .join(NCFile.experiment)
89 | .join(NCFile.ncvars)
90 | .join(NCVar.variable)
91 | .group_by(NCExperiment.experiment)
92 | .having(func.count(distinct(CFVariable.name)) == len(variables))
93 | .filter(CFVariable.name.in_(variables))
94 | )
95 |
96 | q = q.filter(NCExperiment.id.in_(expt_query))
97 |
98 | if exptname is not None:
99 | q = q.filter(NCExperiment.experiment == exptname)
100 |
101 | return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
102 |
103 |
104 | def get_ncfiles(session, experiment):
105 | """
106 | Returns a DataFrame of all netcdf files for a given experiment.
107 | """
108 |
109 | q = (
110 | session.query(NCFile.ncfile, NCFile.index_time)
111 | .join(NCFile.experiment)
112 | .filter(NCExperiment.experiment == experiment)
113 | .order_by(NCFile.ncfile)
114 | )
115 |
116 | return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
117 |
118 |
119 | def get_keywords(session, experiment=None):
120 | """
121 | Returns a set of all keywords, and optionally only for a given experiment
122 | """
123 |
124 | if experiment is not None:
125 | q = session.query(NCExperiment).filter(NCExperiment.experiment == experiment)
126 | return q.scalar().keywords
127 | else:
128 | q = session.query(Keyword)
129 | return {r.keyword for r in q}
130 |
131 |
132 | def get_variables(
133 | session,
134 | experiment=None,
135 | frequency=None,
136 | cellmethods=None,
137 | inferred=False,
138 | search=None,
139 | ):
140 | """
141 | Returns a DataFrame of variables for a given experiment if experiment
142 | name is specified, and optionally a given diagnostic frequency.
143 | If inferred is True and some experiment specific properties inferred from other
144 | fields are also returned: coordinate, model and restart.
145 | - coordinate: True if coordinate, False otherwise
146 | - model: model from which variable output, possible values are ocean,
147 | atmosphere, land, ice, or none if can't be identified
148 | - restart: True if variable from a restart file, False otherwise
149 | If experiment is not specified all variables for all experiments are returned,
150 | without experiment specific data.
151 | Specifying an array of search strings will limit variables returned to any
152 | containing any of the search terms in variable name, long name, or standard name.
153 | """
154 |
155 | # Default columns
156 | columns = [
157 | CFVariable.name,
158 | CFVariable.long_name,
159 | CFVariable.units,
160 | ]
161 |
162 | if experiment:
163 | # Create aliases so as to able to join to the NCAttribute table
164 | # twice, for the name and value
165 | ncas1 = aliased(NCAttributeString)
166 | ncas2 = aliased(NCAttributeString)
167 | subq = (
168 | session.query(
169 | NCAttribute.ncvar_id.label("ncvar_id"),
170 | ncas2.value.label("value"),
171 | )
172 | .join(ncas1, NCAttribute.name_id == ncas1.id)
173 | .join(ncas2, NCAttribute.value_id == ncas2.id)
174 | .filter(ncas1.value == "cell_methods")
175 | ).subquery(name="attrs")
176 |
177 | columns.extend(
178 | [
179 | NCFile.frequency,
180 | NCFile.ncfile,
181 | subq.c.value.label("cell_methods"),
182 | func.count(NCFile.ncfile).label("# ncfiles"),
183 | func.min(NCFile.time_start).label("time_start"),
184 | func.max(NCFile.time_end).label("time_end"),
185 | ]
186 | )
187 |
188 | if inferred:
189 | # Return inferred information
190 | columns.extend(
191 | [
192 | CFVariable.is_coordinate.label("coordinate"),
193 | NCFile.model,
194 | NCFile.is_restart.label("restart"),
195 | ]
196 | )
197 |
198 | # Base query
199 | q = (
200 | session.query(*columns)
201 | .join(NCFile.experiment)
202 | .join(NCFile.ncvars)
203 | .join(NCVar.variable)
204 | )
205 |
206 | if experiment is not None:
207 | # Join against the NCAttribute table above. Outer join ensures
208 | # variables without cell_methods attribute still appear with NULL
209 | q = q.outerjoin(subq, subq.c.ncvar_id == NCVar.id)
210 |
211 | q = q.order_by(NCFile.frequency, CFVariable.name, NCFile.time_start, NCFile.ncfile)
212 | q = q.group_by(CFVariable, NCFile.frequency)
213 |
214 | if experiment is not None:
215 | q = q.group_by(subq.c.value)
216 | q = q.filter(NCExperiment.experiment == experiment)
217 |
218 | # Filtering on frequency only makes sense if experiment is specified
219 | if frequency is not None:
220 | q = q.filter(NCFile.frequency == frequency)
221 |
222 | # Filtering on cell methods only makes sense if experiment is specified
223 | if cellmethods is not None:
224 | q = q.filter(subq.c.value == cellmethods)
225 |
226 | if search is not None:
227 | # Filter based on search term appearing in name, long_name or standard_name
228 | if isinstance(search, str):
229 | search = [
230 | search,
231 | ]
232 | q = q.filter(
233 | or_(
234 | column.contains(word)
235 | for word in search
236 | for column in (
237 | CFVariable.name,
238 | CFVariable.long_name,
239 | CFVariable.standard_name,
240 | )
241 | )
242 | )
243 |
244 | default_dtypes = {
245 | "# ncfiles": "int64",
246 | "coordinate": "boolean",
247 | "model": "category",
248 | "restart": "boolean",
249 | }
250 |
251 | df = pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
252 |
253 | return df.astype({k: v for k, v in default_dtypes.items() if k in df.columns})
254 |
255 |
256 | def get_frequencies(session, experiment=None):
257 | """
258 | Returns a DataFrame with all diagnostics frequencies and optionally
259 | for a given experiment.
260 | """
261 |
262 | if experiment is None:
263 | q = session.query(NCFile.frequency).group_by(NCFile.frequency)
264 | else:
265 | q = (
266 | session.query(NCFile.frequency)
267 | .join(NCFile.experiment)
268 | .filter(NCExperiment.experiment == experiment)
269 | .group_by(NCFile.frequency)
270 | )
271 |
272 | return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
273 |
274 |
275 | def getvar(
276 | expt,
277 | variable,
278 | session,
279 | ncfile=None,
280 | start_time=None,
281 | end_time=None,
282 | n=None,
283 | frequency=None,
284 | attrs=None,
285 | attrs_unique=None,
286 | return_dataset=False,
287 | **kwargs,
288 | ):
289 | """For a given experiment, return an xarray DataArray containing the
290 | specified variable.
291 |
292 | expt - text string indicating the name of the experiment
293 | variable - text string indicating the name of the variable to load
294 | session - a database session created by cc.database.create_session()
295 | ncfile - an optional text string indicating the pattern for filenames
296 | to load. All filenames containing this string will match, so
297 | be specific. '/' can be used to match the start of the
298 | filename, and '%' is a wildcard character.
299 | start_time - only load data after this date. specify as a text string,
300 | e.g. '1900-01-01'
301 | end_time - only load data before this date. specify as a text string,
302 | e.g. '1900-01-01'
303 | n - after all other queries, restrict the total number of files to the
304 | first n. pass a negative value to restrict to the last n
305 | frequency - specify frequency to disambiguate identical variables saved
306 | at different temporal resolution
307 | attrs - a dictionary of attribute names and their values that must be
308 | present on the returned variables
309 | attrs_unique - a dictionary of attribute names and their values that
310 | must be unique on the returned variables. Defaults to
311 | {'cell_methods': 'time: mean'} and should not generally be
312 | changed.
313 | return_dataset - if True, return xarray.Dataset, containing the
314 | requested variable, along with its time_bounds,
315 | if present. Otherwise (default), return
316 | xarray.DataArray containing only the variable
317 |
318 | Note that if start_time and/or end_time are used, the time range
319 | of the resulting dataset may not be bounded exactly on those
320 | values, depending on where the underlying files start/end. Use
321 | dataset.sel() to exactly select times from the dataset.
322 |
323 | Other kwargs are passed through to xarray.open_mfdataset, including:
324 |
325 | chunks - Override any chunking by passing a chunks dictionary.
326 | decode_times - Time decoding can be disabled by passing decode_times=False
327 |
328 | """
329 |
330 | if attrs_unique is None:
331 | attrs_unique = {"cell_methods": "time: mean"}
332 |
333 | ncfiles = _ncfiles_for_variable(
334 | expt,
335 | variable,
336 | session,
337 | ncfile,
338 | start_time,
339 | end_time,
340 | n,
341 | frequency,
342 | attrs,
343 | attrs_unique,
344 | )
345 |
346 | variables = [variable]
347 | if return_dataset:
348 | # we know at least one variable was returned, so we can index ncfiles
349 | # ask for the extra variables associated with cell_methods, etc.
350 | variables += _bounds_vars_for_variable(*ncfiles[0])
351 |
352 | # chunking -- use first row/file and assume it's the same across the whole dataset
353 | xr_kwargs = {"chunks": _parse_chunks(ncfiles[0].NCVar)}
354 | xr_kwargs.update(kwargs)
355 |
356 | def _preprocess(d):
357 | if variable in d.coords:
358 | # just return coordinate data
359 | return d
360 |
361 | # otherwise, figure out if we need any ancilliary data
362 | # like time_bounds
363 | return d[variables]
364 |
365 | ncfiles = list(str(f.NCFile.ncfile_path) for f in ncfiles)
366 |
367 | ds = xr.open_mfdataset(
368 | ncfiles,
369 | parallel=True,
370 | combine="by_coords",
371 | preprocess=_preprocess,
372 | **xr_kwargs,
373 | )
374 |
375 | if return_dataset:
376 | da = ds
377 | else:
378 | # if we want a dataarray, we'll strip off the extra info
379 | da = ds[variable]
380 |
381 | # Check the chunks given were actually in the data
382 | chunks = xr_kwargs.get("chunks", None)
383 | if chunks is not None:
384 | missing_chunk_dims = set(chunks.keys()) - set(da.dims)
385 | if len(missing_chunk_dims) > 0:
386 | logging.warning(
387 | f"chunking along dimensions {missing_chunk_dims} is not possible. Available dimensions for chunking are {set(da.dims)}"
388 | )
389 |
390 | da.attrs["ncfiles"] = ncfiles
391 |
392 | # Get experiment metadata, delete extraneous fields and add
393 | # to attributes
394 | metadata = get_experiments(
395 | session, experiment=False, exptname=expt, all=True
396 | ).to_dict(orient="records")[0]
397 |
398 | metadata = {
399 | k: v
400 | for k, v in metadata.items()
401 | if k not in ["ncfiles", "index", "root_dir"]
402 | and (v is not None and v != "None" and v != "")
403 | }
404 |
405 | da.attrs.update(metadata)
406 |
407 | return da
408 |
409 |
410 | def _bounds_vars_for_variable(ncfile, ncvar):
411 | """Return a list of names for a variable and its bounds"""
412 |
413 | variables = []
414 |
415 | if "cell_methods" not in ncvar.attrs:
416 | # no cell methods, so no need to look for bounds
417 | return variables
418 |
419 | # [cell methods] is a string attribute comprising a list of
420 | # blank-separated words of the form "name: method"
421 | cell_methods = iter(ncvar.attrs["cell_methods"].split())
422 |
423 | # for the moment, we're only looking for a time mean
424 | for dim, method in zip(cell_methods, cell_methods):
425 | if not (dim[:-1] == "time" and method == "mean"):
426 | continue
427 |
428 | bounds_var = ncfile.ncvars["time"].attrs.get("bounds")
429 | if bounds_var is not None:
430 | variables.append(bounds_var)
431 |
432 | return variables
433 |
434 |
435 | def _ncfiles_for_variable(
436 | expt,
437 | variable,
438 | session,
439 | ncfile=None,
440 | start_time=None,
441 | end_time=None,
442 | n=None,
443 | frequency=None,
444 | attrs=None,
445 | attrs_unique=None,
446 | ):
447 | """Return a list of (NCFile, NCVar) pairs corresponding to the
448 | database objects for a given variable.
449 |
450 | Optionally, pass ncfile, start_time, end_time, frequency, attrs,
451 | attrs_unique, or n for additional disambiguation (see getvar
452 | documentation for their semantics).
453 | """
454 |
455 | if attrs is None:
456 | attrs = {}
457 |
458 | if attrs_unique is None:
459 | attrs_unique = {}
460 |
461 | f, v = database.NCFile, database.NCVar
462 | q = (
463 | session.query(f, v)
464 | .join(f.ncvars)
465 | .join(f.experiment)
466 | .filter(v.varname == variable)
467 | .filter(database.NCExperiment.experiment == expt)
468 | .filter(f.present)
469 | .order_by(f.time_start)
470 | )
471 |
472 | # additional disambiguation
473 | if ncfile is not None:
474 | q = q.filter(f.ncfile.like("%" + ncfile))
475 | if start_time is not None:
476 | q = q.filter(f.time_end >= start_time)
477 | if end_time is not None:
478 | q = q.filter(f.time_start <= end_time)
479 | if frequency is not None:
480 | q = q.filter(f.frequency == frequency)
481 |
482 | # Attributes that are required to be unique to ensure disambiguation
483 | for attr, val in attrs_unique.items():
484 | # If default attribute present and not currently in filter
485 | # add to attributes filter
486 | if attr not in attrs:
487 | if q.filter(v.ncvar_attrs.any(name=attr, value=val)).first():
488 | attrs.update({attr: val})
489 |
490 | # requested specific attribute values
491 | for attr, val in attrs.items():
492 | q = q.filter(v.ncvar_attrs.any(name=attr, value=val))
493 |
494 | ncfiles = q.all()
495 |
496 | if n is not None:
497 | if n > 0:
498 | ncfiles = ncfiles[:n]
499 | else:
500 | ncfiles = ncfiles[n:]
501 |
502 | # ensure we actually got a result
503 | if not ncfiles:
504 | raise VariableNotFoundError(
505 | "No files were found containing '{}' in the '{}' experiment".format(
506 | variable, expt
507 | )
508 | )
509 |
510 | # check whether the results are unique
511 | for attr in attrs_unique:
512 | unique_attributes = set()
513 | for f in ncfiles:
514 | if attr in f.NCVar.attrs:
515 | unique_attributes.add(str(f.NCVar.attrs[attr]))
516 | else:
517 | unique_attributes.add(None)
518 | if len(unique_attributes) > 1:
519 | warnings.warn(
520 | f"Your query returns variables from files with different {attr}: {unique_attributes}. "
521 | "This could lead to unexpected behaviour! Disambiguate by passing "
522 | f"attrs={{'{attr}':''}} to getvar, specifying the desired attribute value.",
523 | QueryWarning,
524 | )
525 |
526 | unique_freqs = set(f.NCFile.frequency for f in ncfiles)
527 | if len(unique_freqs) > 1:
528 | warnings.warn(
529 | f"Your query returns files with differing frequencies: {unique_freqs}. "
530 | "This could lead to unexpected behaviour! Disambiguate by passing "
531 | "frequency= to getvar, specifying the desired frequency.",
532 | QueryWarning,
533 | )
534 |
535 | return ncfiles
536 |
537 |
538 | def _parse_chunks(ncvar):
539 | """Parse an NCVar, returning a dictionary mapping dimensions to chunking along that dimension."""
540 |
541 | try:
542 | # this should give either a list, or 'None' (other values will raise an exception)
543 | var_chunks = eval(ncvar.chunking)
544 | if var_chunks is not None:
545 | return dict(zip(eval(ncvar.dimensions), var_chunks))
546 |
547 | return None
548 |
549 | except NameError:
550 | # chunking could be 'contiguous', which doesn't evaluate
551 | return None
552 |
--------------------------------------------------------------------------------
/cosima_cookbook/summary/__init__.py:
--------------------------------------------------------------------------------
1 | from .nml_diff import *
2 | from .nml_summary import *
3 |
4 | # __all__ = []
5 |
--------------------------------------------------------------------------------
/cosima_cookbook/summary/nml_diff.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Tools to read a set of namelist files and find their superset and difference.
4 | # The functions are general-purpose (i.e. no ACCESS-OM2-related assumptions).
5 | # Andrew Kiss https://github.com/aekiss
6 |
7 |
8 | import f90nml # from http://f90nml.readthedocs.io/en/latest/
9 | import os
10 |
11 |
12 | def nmldict(nmlfnames):
13 | """Return dict of the groups/group members of multiple
14 | FORTRAN namelist files.
15 |
16 | Input: tuple of any number of namelist file path strings
17 | (non-existent files are silently ignored)
18 | Output: dict with key:value pairs where
19 | key is filename path string
20 | value is complete Namelist from filename
21 | """
22 | nmlfnames = set(nmlfnames) # remove any duplicates from nmlfnames
23 |
24 | nmlall = {} # dict keys are nml paths, values are Namelist dicts
25 | for nml in nmlfnames:
26 | if os.path.exists(nml):
27 | nmlall[nml] = f90nml.read(nml)
28 | return nmlall
29 |
30 |
31 | def superset(nmlall):
32 | """Return dict of groups/group members present in any of the input Namelists.
33 |
34 | Input: dict with key:value pairs where
35 | key is arbitrary (typically a filename string)
36 | value is Namelist (typically from filename)
37 | Output: dict with key:value pairs where
38 | key is group name (including all groups present in any input Namelist)
39 | value is Namelist for group (with nothing common to all other files)
40 | """
41 | nmlsuperset = {}
42 | for nml in nmlall:
43 | nmlsuperset.update(nmlall[nml])
44 | # nmlsuperset now contains all groups that were in any nml
45 | for group in nmlsuperset:
46 | # to avoid the next bit changing the original groups
47 | nmlsuperset[group] = nmlsuperset[group].copy()
48 | for nml in nmlall:
49 | if group in nmlall[nml]:
50 | nmlsuperset[group].update(nmlall[nml][group])
51 | # nmlsuperset groups now contain all keys that were in any nml
52 | return nmlsuperset
53 |
54 |
55 | def nmldiff(nmlall):
56 | """Remove every group/group member that is the same in all file Namelists.
57 |
58 | Parameter
59 | ---------
60 | Input : dict
61 | (e.g. returned by nmldict) with key:value pairs where
62 | key is filename path string
63 | value is complete Namelist from filename
64 | Output : dict
65 | modified input dict with key:value pairs where
66 | key is filename strings
67 | value is Namelist from filename, with any group/group member
68 | common to all other files removed
69 | """
70 |
71 | # Create diff by removing common groups/members from nmlall.
72 | # This is complicated by the fact group names / member names may differ
73 | # or be absent across different nml files.
74 |
75 | # First make a superset that has all group names and group members that
76 | # appear in any nml file
77 | nmlsuperset = superset(nmlall)
78 |
79 | # now go through nmlall and remove any groups / members from nmlall that
80 | # are identical to superset in all nmls
81 | # first delete any group members that are common to all nmls, then delete
82 | # any empty groups common to all nmls
83 | for group in nmlsuperset:
84 | # init: whether group is present and identical in all namelist files
85 | deletegroup = True
86 | for nml in nmlall:
87 | deletegroup = deletegroup and (group in nmlall[nml])
88 | if deletegroup: # group present in all namelist files
89 | for mem in nmlsuperset[group]:
90 | # init: whether group member is present and identical
91 | # in all namelist files
92 | deletemem = True
93 | for nml in nmlall:
94 | deletemem = deletemem and (mem in nmlall[nml][group])
95 | if deletemem: # group member is present in all namelist files
96 | for nml in nmlall:
97 | # ... now check if values match in all namelist files
98 | deletemem = deletemem and (
99 | nmlall[nml][group][mem] == nmlsuperset[group][mem]
100 | )
101 | if deletemem:
102 | for nml in nmlall:
103 | # delete mem from this group in all nmls
104 | del nmlall[nml][group][mem]
105 | for nml in nmlall:
106 | deletegroup = deletegroup and (len(nmlall[nml][group]) == 0)
107 | if deletegroup:
108 | # group is common to all nmls and now empty so delete
109 | for nml in nmlall:
110 | del nmlall[nml][group]
111 | return nmlall
112 |
--------------------------------------------------------------------------------
/cosima_cookbook/summary/nml_summary.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Create tabulated summary of namelists for a set of files.
4 | # These functions assume we are dealing with ACCESS-OM2 data.
5 | # Andrew Kiss https://github.com/aekiss
6 |
7 |
8 | import cosima_cookbook as cc
9 | from IPython.display import display, Markdown
10 | import os
11 |
12 |
13 | def summary_md(
14 | configuration,
15 | expts,
16 | path="/g/data3/hh5/tmp/cosima/",
17 | search="https://github.com/OceansAus/access-om2/search?&q=",
18 | nmls=[
19 | "atmosphere/input_atm.nml",
20 | "ice/cice_in.nml",
21 | "ice/input_ice.nml",
22 | "ice/input_ice_gfdl.nml",
23 | "ice/input_ice_monin.nml",
24 | "ocean/input.nml",
25 | ],
26 | ):
27 | for nml in nmls:
28 | epaths = []
29 | for e in expts:
30 | # NB: only look at output000
31 | epaths.append(os.path.join(path, configuration, e, "output000", nml))
32 | nmld = cc.nmldiff(cc.nmldict(tuple(epaths)))
33 | epaths = list(nmld.keys()) # redefine to handle missing paths
34 | epaths.sort()
35 | nmldss = cc.superset(nmld)
36 | display(Markdown("### " + nml + " namelist differences"))
37 | if len(nmldss) == 0:
38 | display(Markdown("no differences"))
39 | else:
40 | mdstr = "| group | variable | "
41 | for e in epaths:
42 | mdstr = mdstr + e.replace("/", "/ ") + " | "
43 | mdstr = mdstr + "\n|---|:--|" + ":-:|" * len(epaths)
44 | for group in sorted(nmldss):
45 | for mem in sorted(nmldss[group]):
46 | mdstr = mdstr + "\n| " + "&" + group + " | " + mem + " | "
47 | # search doesn't work on github submodules or forks
48 | # '[' + group + '](' + search + group + ')' + ' | ' + \
49 | # '[' + mem + '](' + search + mem + ')' + ' | '
50 | for e in epaths:
51 | if group in nmld[e]:
52 | if mem in nmld[e][group]:
53 | mdstr = mdstr + repr(nmld[e][group][mem])
54 | mdstr = mdstr + " | "
55 | display(Markdown(mdstr))
56 | return
57 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==2
2 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # http://www.sphinx-doc.org/en/master/config
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | #import os
14 | #import sys
15 | #sys.path.insert(0, os.path.abspath('../../cosima-cookbook'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'cosima-cookbook'
21 | copyright = '2019, James Munroe'
22 | author = 'James Munroe'
23 |
24 | # The full version, including alpha/beta/rc tags
25 | # release = '0.3.1'
26 |
27 |
28 | # -- General configuration ---------------------------------------------------
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | 'sphinx.ext.autodoc',
35 | 'sphinx.ext.napoleon',
36 | ]
37 |
38 | napoleon_google_docstring = False
39 | napoleon_use_param = False
40 | napoleon_use_ivar = True
41 |
42 | # Add any paths that contain templates here, relative to this directory.
43 | templates_path = ['_templates']
44 |
45 | # List of patterns, relative to source directory, that match files and
46 | # directories to ignore when looking for source files.
47 | # This pattern also affects html_static_path and html_extra_path.
48 | exclude_patterns = []
49 |
50 |
51 | # -- Options for HTML output -------------------------------------------------
52 |
53 | # The theme to use for HTML and HTML Help pages. See the documentation for
54 | # a list of builtin themes.
55 | #
56 | html_theme = 'sphinx_rtd_theme'
57 |
58 | # Add any paths that contain custom static files (such as style sheets) here,
59 | # relative to this directory. They are copied after the builtin static files,
60 | # so a file named "default.css" will overwrite the builtin "default.css".
61 | html_static_path = ['_static']
62 |
--------------------------------------------------------------------------------
/docs/source/cosima_cookbook.rst:
--------------------------------------------------------------------------------
1 | cosima\_cookbook package
2 | ========================
3 |
4 | Subpackages
5 | -----------
6 |
7 | .. toctree::
8 |
9 | cosima_cookbook.diagnostics
10 | cosima_cookbook.plots
11 | cosima_cookbook.summary
12 |
13 | Submodules
14 | ----------
15 |
16 | cosima\_cookbook.database module
17 | --------------------------------
18 |
19 | .. automodule:: cosima_cookbook.database
20 | :members:
21 | :undoc-members:
22 | :show-inheritance:
23 |
24 | cosima\_cookbook.database\_utils module
25 | ---------------------------------------
26 |
27 | .. automodule:: cosima_cookbook.database_utils
28 | :members:
29 | :undoc-members:
30 | :show-inheritance:
31 |
32 | cosima\_cookbook.date\_utils module
33 | -----------------------------------
34 |
35 | .. automodule:: cosima_cookbook.date_utils
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
39 |
40 | cosima\_cookbook.distributed module
41 | -----------------------------------
42 |
43 | .. automodule:: cosima_cookbook.distributed
44 | :members:
45 | :undoc-members:
46 | :show-inheritance:
47 |
48 | cosima\_cookbook.memory module
49 | ------------------------------
50 |
51 | .. automodule:: cosima_cookbook.memory
52 | :members:
53 | :undoc-members:
54 | :show-inheritance:
55 |
56 | cosima\_cookbook.netcdf\_index module
57 | -------------------------------------
58 |
59 | .. automodule:: cosima_cookbook.netcdf_index
60 | :members:
61 | :undoc-members:
62 | :show-inheritance:
63 |
64 | cosima\_cookbook.netcdf\_utils module
65 | -------------------------------------
66 |
67 | .. automodule:: cosima_cookbook.netcdf_utils
68 | :members:
69 | :undoc-members:
70 | :show-inheritance:
71 |
72 | cosima\_cookbook.querying module
73 | --------------------------------
74 |
75 | .. automodule:: cosima_cookbook.querying
76 | :members:
77 | :undoc-members:
78 | :show-inheritance:
79 |
80 |
81 | Module contents
82 | ---------------
83 |
84 | .. automodule:: cosima_cookbook
85 | :members:
86 | :undoc-members:
87 | :show-inheritance:
88 |
--------------------------------------------------------------------------------
/docs/source/getting_started.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Getting Started
3 | ===============
4 |
5 | The cookbook consists of a Python 3 package that contains infrastructure
6 | for indexing COSIMA model output and convenient methods for searching for
7 | and loading the data into `xarray `_ datastructures.
8 |
9 | Some users may find it sufficient to browse through the examples and tutorials
10 | in the `COSIMA recipes `_ repository.
11 | The Jupyter notebooks that can be downloaded from COSIMA recipes need this package
12 | (called cosima_cookbook) to be installed.
13 |
14 | Choosing your platform
15 | ======================
16 |
17 | COSIMA ocean and ice models are typically run on `NCI `_, a HPC
18 | computing centre in Australia. The output data is very large and it is
19 | assumed that this data resides on a NCI storage system.
20 |
21 | The cookbook is supported on two NCI systems
22 |
23 | #. `Virtual Desktop Infrastructure (VDI) `_
24 | #. `gadi (gadi.nci.org.au) `_
25 |
26 | Connecting
27 | ==========
28 |
29 | For both VDI and gadi scripts are used to start a `jupyter notebook `_
30 | or `jupyter lab `_ session on the chosen system
31 | and automatically create an `ssh tunnel `_
32 | such that the jupyter session can be opened in your local browser using a url
33 | like that appears to be on your own local machine.
34 |
35 | Scripts for this purpose are provided by the CLEX CMS team in this repository
36 |
37 | https://github.com/coecms/nci_scripts
38 |
39 | Clone the repository to your local computer. There are instructions in the repository
40 | on the requirements for each script and how to use them.
41 |
42 | Alternatively if you are using the VDI Strudel environment and accessing the VDI
43 | through a virtual desktop you can load the same python conda environment that is
44 | used in the scripts above and start a jupyter notebook session like so:
45 | ::
46 |
47 | module use /g/data3/hh5/public/modules
48 | module load conda/analysis3
49 |
50 | jupyter notebook
51 |
52 | Finding data
53 | ============
54 |
55 | Most of the infrastructure the COSIMA Cookbook provides revolves around indexing
56 | data output from COSIMA models and providing a python based API to access the
57 | data in a convenient and straight forward way.
58 |
59 | There are graphical user interface (GUI) tools to help with data discovering and
60 | exploration. There is a
61 | `tutorial `_
62 | in the COSIMA recipes repository which demonstrates the available tools.
63 |
64 | Tutorials and examples
65 | ======================
66 |
67 | COSIMA recipes provides `tutorials `_
68 | and `documented examples `_
69 | which can be used to learn how to use the Cookbook and for ideas and inspiration for your own analysis.
70 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. cosima-cookbook documentation master file, created by
2 | sphinx-quickstart on Mon Aug 12 20:35:06 2019.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | COSIMA Cookbook
7 | ===============
8 |
9 | Ocean and ice models are evaluated and compared by using diagnostics.
10 | `COSIMA recipes `_ is a collection of
11 | diagnostics by the `COSIMA `_ community.
12 |
13 | The recipes rely on infrastructure that is provided by the COSIMA Cookbook.
14 | The cookbook is under active development on
15 | Github: `COSIMA-Cookbook `_
16 |
17 | .. toctree::
18 | :maxdepth: 2
19 | :caption: Contents:
20 |
21 | getting_started
22 | related_projects
23 |
--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | cosima_cookbook
2 | ===============
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | cosima_cookbook
8 |
--------------------------------------------------------------------------------
/docs/source/related_projects.rst:
--------------------------------------------------------------------------------
1 | Related Projects
2 | ================
3 |
4 | COSIMA-Cookbook is a solution for efficient calculation diagnostics of
5 | output of high resolution an ocean and ice model. It is targeted
6 | at the COSIMA community where models (e.g. MOM5) are run on NCI
7 | resources such as raijin.
8 |
9 | The problem of dealing with increasing large output from atmosphere,
10 | climate, and ocean models is being addressed by several groups.
11 | Active development is currently underway as part of
12 | the PangeoData_ initiative. COSIMA-Cookbok logically sits on top
13 | of that project.
14 |
15 |
16 | Underlying Python technologies
17 | ------------------------------
18 |
19 | Dask
20 |
21 | xarray
22 |
23 | Jupyter
24 |
25 |
26 | .. _PangeoData: https://pangeo-data.github.io
27 |
--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Build documentation in the docs/ directory with Sphinx
9 | sphinx:
10 | configuration: docs/source/conf.py
11 |
12 | # Optionally set the version of Python and requirements required to build your docs
13 | python:
14 | version: 3.7
15 | install:
16 | - requirements: docs/requirements.txt
17 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # install dependencies from setup.py, and the cookbook in editable mode
2 | -e .[build]
3 |
--------------------------------------------------------------------------------
/sandbox/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.
2 |
3 | Make sure to edit sqlalchemy.url in alembic.ini!
--------------------------------------------------------------------------------
/sandbox/alembic/env.py:
--------------------------------------------------------------------------------
1 | from logging.config import fileConfig
2 |
3 | from sqlalchemy import engine_from_config
4 | from sqlalchemy import pool
5 |
6 | from alembic import context
7 |
8 | # this is the Alembic Config object, which provides
9 | # access to the values within the .ini file in use.
10 | config = context.config
11 |
12 | # Interpret the config file for Python logging.
13 | # This line sets up loggers basically.
14 | fileConfig(config.config_file_name)
15 |
16 | # add your model's MetaData object here
17 | # for 'autogenerate' support
18 | # from myapp import mymodel
19 | # target_metadata = mymodel.Base.metadata
20 | import cosima_cookbook as cc
21 | target_metadata = cc.database.Base.metadata
22 |
23 | # other values from the config, defined by the needs of env.py,
24 | # can be acquired:
25 | # my_important_option = config.get_main_option("my_important_option")
26 | # ... etc.
27 |
28 |
29 | def run_migrations_offline():
30 | """Run migrations in 'offline' mode.
31 |
32 | This configures the context with just a URL
33 | and not an Engine, though an Engine is acceptable
34 | here as well. By skipping the Engine creation
35 | we don't even need a DBAPI to be available.
36 |
37 | Calls to context.execute() here emit the given string to the
38 | script output.
39 |
40 | """
41 | url = config.get_main_option("sqlalchemy.url")
42 | context.configure(
43 | url=url,
44 | target_metadata=target_metadata,
45 | literal_binds=True,
46 | dialect_opts={"paramstyle": "named"},
47 | )
48 |
49 | with context.begin_transaction():
50 | context.run_migrations()
51 |
52 |
53 | def run_migrations_online():
54 | """Run migrations in 'online' mode.
55 |
56 | In this scenario we need to create an Engine
57 | and associate a connection with the context.
58 |
59 | """
60 | connectable = engine_from_config(
61 | config.get_section(config.config_ini_section),
62 | prefix="sqlalchemy.",
63 | poolclass=pool.NullPool,
64 | )
65 |
66 | with connectable.connect() as connection:
67 | context.configure(
68 | connection=connection, target_metadata=target_metadata
69 | )
70 |
71 | with context.begin_transaction():
72 | context.run_migrations()
73 |
74 |
75 | if context.is_offline_mode():
76 | run_migrations_offline()
77 | else:
78 | run_migrations_online()
79 |
--------------------------------------------------------------------------------
/sandbox/alembic/script.py.mako:
--------------------------------------------------------------------------------
1 | """${message}
2 |
3 | Revision ID: ${up_revision}
4 | Revises: ${down_revision | comma,n}
5 | Create Date: ${create_date}
6 |
7 | """
8 | from alembic import op
9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 |
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 |
18 |
19 | def upgrade():
20 | ${upgrades if upgrades else "pass"}
21 |
22 |
23 | def downgrade():
24 | ${downgrades if downgrades else "pass"}
25 |
--------------------------------------------------------------------------------
/sandbox/alembic/versions/16223b92479e_add_keywords.py:
--------------------------------------------------------------------------------
1 | """add keywords
2 |
3 | Revision ID: 16223b92479e
4 | Revises:
5 | Create Date: 2020-06-30 13:22:36.407339
6 |
7 | """
8 | from alembic import op
9 | import sqlalchemy as sa
10 | from sqlalchemy import orm
11 |
12 | import cosima_cookbook as cc
13 |
14 | # revision identifiers, used by Alembic.
15 | revision = '16223b92479e'
16 | down_revision = None
17 | branch_labels = None
18 | depends_on = None
19 |
20 |
21 | def upgrade():
22 | bind = op.get_bind()
23 | session = orm.Session(bind=bind)
24 |
25 | # ### commands auto generated by Alembic - please adjust! ###
26 | op.create_table('keywords',
27 | sa.Column('id', sa.Integer(), nullable=False),
28 | sa.Column('_keyword', sa.String(collation='NOCASE'), nullable=False),
29 | sa.PrimaryKeyConstraint('id')
30 | )
31 | op.create_index(op.f('ix_keywords__keyword'), 'keywords', ['_keyword'], unique=True)
32 | op.create_table('keyword_assoc',
33 | sa.Column('expt_id', sa.Integer(), nullable=True),
34 | sa.Column('keyword_id', sa.Integer(), nullable=True),
35 | sa.ForeignKeyConstraint(['expt_id'], ['experiments.id'], ),
36 | sa.ForeignKeyConstraint(['keyword_id'], ['keywords.id'], )
37 | )
38 | # ### end Alembic commands ###
39 | op.execute('PRAGMA user_version=3')
40 |
41 | # reindex metadata for experiments
42 | for expt in session.query(cc.database.NCExperiment):
43 | cc.database.update_metadata(expt, session)
44 | session.commit()
45 |
46 | def downgrade():
47 | # ### commands auto generated by Alembic - please adjust! ###
48 | op.drop_table('keyword_assoc')
49 | op.drop_index(op.f('ix_keywords__keyword'), table_name='keywords')
50 | op.drop_table('keywords')
51 | # ### end Alembic commands ###
52 | op.execute('PRAGMA user_version=2')
53 |
--------------------------------------------------------------------------------
/sandbox/diag-vis.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from sys import argv
3 |
4 | import cosima_cookbook as cc
5 | import pandas as pd
6 | from sqlalchemy import select, distinct, bindparam
7 |
8 | from bokeh.io import curdoc
9 | from bokeh.layouts import column
10 | from bokeh.models.callbacks import CustomJS
11 | from bokeh.models.sources import ColumnDataSource
12 | from bokeh.models.tools import BoxSelectTool, HoverTool, TapTool
13 | from bokeh.models.widgets import Select, Button, Div
14 | import bokeh.palettes
15 | from bokeh.plotting import figure
16 | from bokeh.transform import factor_cmap
17 |
18 | if len(argv) < 2:
19 | raise Exception('Usage: bokeh serve diag-vis.py --args ')
20 | db = argv[1]
21 |
22 | conn, tables = cc.database.create_database(db)
23 |
24 | expt_query = select([distinct(tables['ncfiles'].c.experiment)])
25 | vars_query = select([distinct(tables['ncvars'].c.variable)]) \
26 | .select_from(tables['ncvars'].join(tables['ncfiles'])) \
27 | .where(tables['ncfiles'].c.experiment == bindparam('expt'))
28 | data_query = select([tables['ncfiles'].c.ncfile, tables['ncfiles'].c.run, tables['ncvars'].c.variable,
29 | tables['ncfiles'].c.time_start, tables['ncfiles'].c.time_end, tables['ncfiles'].c.frequency]) \
30 | .select_from(tables['ncfiles'].join(tables['ncvars'])) \
31 | .where(tables['ncfiles'].c.experiment == bindparam('expt')) \
32 | .where(tables['ncfiles'].c.time_start is not None) \
33 | .where(tables['ncfiles'].c.frequency != 'static') \
34 | .order_by(tables['ncvars'].c.variable, tables['ncfiles'].c.time_start)
35 |
36 | expts = [e[0] for e in conn.execute(expt_query)]
37 |
38 | def get_data(expt):
39 | data = conn.execute(data_query, expt=expt).fetchall()
40 | df = pd.DataFrame(data, columns=['ncfile', 'run', 'variable', 'time_start', 'time_end', 'frequency'])
41 | df[['time_start', 'time_end']] = df[['time_start', 'time_end']].applymap(
42 | lambda s: datetime.strptime(s, '%Y-%m-%d %H:%M:%S'))
43 |
44 | return df
45 |
46 | def print_selected(div):
47 | return CustomJS(args=dict(div=div), code="""
48 | var source = cb_obj;
49 | var unique_vars = {};
50 | for (var i of source.selected['1d'].indices) {
51 | var v = source.data['variable'][i];
52 | if (v in unique_vars) {
53 | unique_vars[v]['time_start'] = Math.min(unique_vars[v]['time_start'], source.data['time_start'][i]);
54 | unique_vars[v]['time_end'] = Math.max(unique_vars[v]['time_end'], source.data['time_end'][i]);
55 | } else {
56 | unique_vars[v] = { time_start: source.data['time_start'][i],
57 | time_end: source.data['time_end'][i] };
58 | }
59 | }
60 |
61 | var text = '
Name
Start
End
';
62 | for (var p in unique_vars) {
63 | var ts = new Date(unique_vars[p]['time_start']);
64 | var te = new Date(unique_vars[p]['time_end']);
65 | text = text.concat('
'+p+'
'+ts.toISOString().substr(0,10)+'
'+te.toISOString().substr(0,10)+'
');
66 | }
67 | text = text.concat('
')
68 | div.text = text;
69 | """)
70 |
71 |
72 | # create widgets
73 | expt_select = Select(title='Experiment:', options=expts, value=expts[0])
74 | refresh = Button(label='Update')
75 | div = Div(width=1000)
76 |
77 | # hover tools
78 | hover = HoverTool(tooltips=[
79 | ('variable', '@variable'), ('start', '@time_start{%F}'),
80 | ('end', '@time_end{%F}'), ('run', '@run'), ('file', '@ncfile')],
81 | formatters={
82 | 'time_start': 'datetime',
83 | 'time_end': 'datetime'
84 | })
85 | tap = TapTool()
86 | box_select = BoxSelectTool()
87 | tools = [hover, box_select, tap, 'pan', 'box_zoom', 'wheel_zoom', 'reset']
88 |
89 | df = get_data(expt_select.value)
90 | freqs = df.frequency.unique()
91 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs)
92 | cds = ColumnDataSource(df, callback=print_selected(div))
93 |
94 | p = figure(y_range=df.variable.unique(), x_range=(df.iloc[0].time_start, df.iloc[-1].time_end),
95 | title=expt_select.value, tools=tools)
96 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs)
97 | hb = p.hbar(y='variable', left='time_start', right='time_end', height=0.4, source=cds,
98 | fill_color=cmap, legend='frequency')
99 |
100 | # callback routines to repopulate list of variables
101 | def get_vars(expt):
102 | return [e[0] for e in conn.execute(vars_query, expt=expt)]
103 |
104 | def refresh_output():
105 | # get new data
106 | df = get_data(expt_select.value)
107 | freqs = df.frequency.unique()
108 | cmap = factor_cmap('frequency', palette=bokeh.palettes.Category10[10], factors=freqs)
109 |
110 | # update figure itself
111 | p.y_range.factors = list(df.variable.unique())
112 | (p.x_range.start, p.x_range.end) = (df.iloc[0].time_start, df.iloc[-1].time_end)
113 | p.title.text = expt_select.value
114 |
115 | # update data source for plot
116 | hb.data_source.data = hb.data_source.from_df(df)
117 | # update colourmap if necessary
118 | hb.glyph.fill_color = cmap
119 |
120 | refresh.on_click(refresh_output)
121 |
122 | # layout and show
123 | layout = column(expt_select, refresh, p, div)
124 | curdoc().add_root(layout)
125 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name='cosima_cookbook',
5 | description='Diagnostics for COSIMA: Consortium for Ocean-Sea Ice Modelling in Australia',
6 | url='https://github.com/COSIMA/cosima-cookbook',
7 | author='COSIMA',
8 | license='Apache License 2.0',
9 | use_scm_version=True,
10 | packages=find_packages(),
11 | setup_requires=["setuptools_scm"],
12 |
13 | install_requires=[
14 | 'dask',
15 | 'xarray',
16 | 'numpy',
17 | 'matplotlib',
18 | 'bokeh',
19 | 'netcdf4',
20 | 'tqdm',
21 | 'sqlalchemy<2.0',
22 | 'cftime',
23 | 'f90nml',
24 | 'joblib',
25 | 'ipywidgets',
26 | 'lxml',
27 | ],
28 | entry_points={
29 | 'console_scripts': [
30 | 'cosima_cookbook-update_db = cosima_cookbook.database_update:main',
31 | ]
32 | },
33 | extras_require = {
34 | 'build': ['distributed', 'pytest', 'pytest-cov']
35 | }
36 | )
37 |
--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from dask.distributed import Client
3 |
4 | from cosima_cookbook import database
5 |
6 |
7 | @pytest.fixture(scope="module")
8 | def client():
9 | client = Client(processes=False, dashboard_address=None)
10 | yield client
11 | client.close()
12 |
13 |
14 | @pytest.fixture(scope="function")
15 | def session_db(tmp_path):
16 | db = tmp_path / "test.db"
17 | s = database.create_session(str(db))
18 | yield s, db
19 |
20 | s.close()
21 |
--------------------------------------------------------------------------------
/test/data/explore/duplicate/one/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2018-01-01
4 | description: Description
5 | notes: Notes
6 | keywords:
7 | - cosima
8 | - ACCESS-OM2-01
9 | - ryf9091
10 |
--------------------------------------------------------------------------------
/test/data/explore/duplicate/one/ocean/ocean_age.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/duplicate/one/ocean/ocean_age.nc
--------------------------------------------------------------------------------
/test/data/explore/one/atmosphere/ty_trans.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/atmosphere/ty_trans.nc
--------------------------------------------------------------------------------
/test/data/explore/one/ice/hi_m.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/ice/hi_m.nc
--------------------------------------------------------------------------------
/test/data/explore/one/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2018-01-01
4 | description: Description
5 | notes: Notes
6 | keywords:
7 | - cosima
8 | - ACCESS-OM2-01
9 | - ryf9091
10 |
--------------------------------------------------------------------------------
/test/data/explore/one/ocean/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/ocean/ocean.nc
--------------------------------------------------------------------------------
/test/data/explore/one/restart/ocean_velocity_advection.res.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/one/restart/ocean_velocity_advection.res.nc
--------------------------------------------------------------------------------
/test/data/explore/two/atm/hi_m.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/atm/hi_m.nc
--------------------------------------------------------------------------------
/test/data/explore/two/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2020-01-01
4 | description: Description again!
5 | notes: Notes
6 | keywords:
7 | - cosima
8 | - another-keyword
9 |
--------------------------------------------------------------------------------
/test/data/explore/two/nomodel/ty_trans.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/nomodel/ty_trans.nc
--------------------------------------------------------------------------------
/test/data/explore/two/ocn/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/ocn/ocean.nc
--------------------------------------------------------------------------------
/test/data/explore/two/ocn/ocean_month.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/ocn/ocean_month.nc
--------------------------------------------------------------------------------
/test/data/explore/two/restart/ocean_velocity_advection.res.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/explore/two/restart/ocean_velocity_advection.res.nc
--------------------------------------------------------------------------------
/test/data/indexing/alternate/experiment_a/test2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/alternate/experiment_a/test2.nc
--------------------------------------------------------------------------------
/test/data/indexing/broken_file/output000/test.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/broken_file/output000/test.nc
--------------------------------------------------------------------------------
/test/data/indexing/broken_metadata/metadata.yaml:
--------------------------------------------------------------------------------
1 | this: is: broken!
2 |
--------------------------------------------------------------------------------
/test/data/indexing/broken_metadata/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/broken_metadata/test1.nc
--------------------------------------------------------------------------------
/test/data/indexing/empty_file/output000/empty.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/empty_file/output000/empty.nc
--------------------------------------------------------------------------------
/test/data/indexing/longnames/output000/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/longnames/output000/test1.nc
--------------------------------------------------------------------------------
/test/data/indexing/longnames/output000/test2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/longnames/output000/test2.nc
--------------------------------------------------------------------------------
/test/data/indexing/metadata/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2018-01-01
4 | url: https://github.com/COSIMA/oracle
5 | description: >-
6 | Attempted spinup, using salt flux fix
7 | https://arccss.slack.com/archives/C6PP0GU9Y/p1515460656000124 and
8 | https://github.com/mom-ocean/MOM5/pull/208/commits/9f4ee6f8b72b76c96a25bf26f3f6cdf773b424d2
9 | from the start. Used mushy ice from July year 1 onwards to avoid
10 | vertical thermo error in cice
11 | https://arccss.slack.com/archives/C6PP0GU9Y/p1515842016000079
12 | notes: >-
13 | Stripy salt restoring:
14 | https://github.com/OceansAus/access-om2/issues/74 tripole seam bug:
15 | https://github.com/OceansAus/access-om2/issues/86 requires dt=300s
16 | in May, dt=240s in Aug to maintain CFL in CICE near tripoles (storms
17 | in those months in 8485RYF); all other months work with dt=400s
18 |
--------------------------------------------------------------------------------
/test/data/indexing/metadata/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/metadata/test1.nc
--------------------------------------------------------------------------------
/test/data/indexing/multiple/experiment_a/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/multiple/experiment_a/test1.nc
--------------------------------------------------------------------------------
/test/data/indexing/multiple/experiment_b/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/multiple/experiment_b/test1.nc
--------------------------------------------------------------------------------
/test/data/indexing/single_broken_file/output000/broken.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/single_broken_file/output000/broken.nc
--------------------------------------------------------------------------------
/test/data/indexing/single_broken_file/output000/test.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/single_broken_file/output000/test.nc
--------------------------------------------------------------------------------
/test/data/indexing/symlinked/experiment_a:
--------------------------------------------------------------------------------
1 | ../multiple/experiment_a
--------------------------------------------------------------------------------
/test/data/indexing/time/t1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t1.nc
--------------------------------------------------------------------------------
/test/data/indexing/time/t2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t2.nc
--------------------------------------------------------------------------------
/test/data/indexing/time/t3.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t3.nc
--------------------------------------------------------------------------------
/test/data/indexing/time/t4.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t4.nc
--------------------------------------------------------------------------------
/test/data/indexing/time/t5.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time/t5.nc
--------------------------------------------------------------------------------
/test/data/indexing/time_bounds/file001.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/indexing/time_bounds/file001.nc
--------------------------------------------------------------------------------
/test/data/metadata/keywords/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2018-01-01
4 | description: Description
5 | notes: Notes
6 | keywords:
7 | - cosima
8 | - ACCESS-OM2-01
9 | - ryf9091
10 |
--------------------------------------------------------------------------------
/test/data/metadata/keywords/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/metadata/keywords/test1.nc
--------------------------------------------------------------------------------
/test/data/metadata/keywords2/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2020-01-01
4 | description: Description again!
5 | notes: Notes
6 | keywords:
7 | - cosima
8 | - another-keyword
9 |
--------------------------------------------------------------------------------
/test/data/metadata/keywords2/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/metadata/keywords2/test1.nc
--------------------------------------------------------------------------------
/test/data/metadata/string_keyword/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2020-01-02
4 | description: String keywords
5 | notes: Notes
6 | keywords: cosima
7 |
--------------------------------------------------------------------------------
/test/data/metadata/upcase/metadata.yaml:
--------------------------------------------------------------------------------
1 | contact: The ACCESS Oracle
2 | email: oracle@example.com
3 | created: 2018-01-01
4 | description: Description (with uppercase keywords!)
5 | notes: Notes
6 | keywords:
7 | - COSIMA
8 | - access-om2-01
9 | - RYF9091
10 |
--------------------------------------------------------------------------------
/test/data/ocean_sealevel.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/ocean_sealevel.nc
--------------------------------------------------------------------------------
/test/data/querying/output000/hi_m.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/output000/hi_m.nc
--------------------------------------------------------------------------------
/test/data/querying/output000/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/output000/ocean.nc
--------------------------------------------------------------------------------
/test/data/querying/restart000/ty_trans.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying/restart000/ty_trans.nc
--------------------------------------------------------------------------------
/test/data/querying_disambiguation/output000/ocean.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying_disambiguation/output000/ocean.nc
--------------------------------------------------------------------------------
/test/data/querying_disambiguation/output000/ocean_month.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/querying_disambiguation/output000/ocean_month.nc
--------------------------------------------------------------------------------
/test/data/update/experiment_a/test1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/update/experiment_a/test1.nc
--------------------------------------------------------------------------------
/test/data/update/experiment_b/test2.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/COSIMA/cosima-cookbook/c945ef9405bd8ad350f66aa82c44a68da43606aa/test/data/update/experiment_b/test2.nc
--------------------------------------------------------------------------------
/test/test_database.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | import sqlalchemy as sa
4 | from cosima_cookbook import database
5 |
6 |
7 | @pytest.fixture
8 | def db_env(tmp_path):
9 | old_db = os.getenv("COSIMA_COOKBOOK_DB")
10 | db = tmp_path / "test.db"
11 | os.environ["COSIMA_COOKBOOK_DB"] = str(db)
12 |
13 | yield db
14 |
15 | # clean up by resetting the env var
16 | if old_db:
17 | os.environ["COSIMA_COOKBOOK_DB"] = old_db
18 | else:
19 | del os.environ["COSIMA_COOKBOOK_DB"]
20 |
21 |
22 | def test_default(tmp_path):
23 | db = tmp_path / "test.db"
24 | # override the NCI-specific default
25 | database.__DEFAULT_DB__ = str(db)
26 |
27 | s = database.create_session()
28 |
29 | assert db.exists()
30 |
31 |
32 | def test_env_var(db_env):
33 | # make sure we use the environment variable
34 | # override with no arguments supplied
35 | s = database.create_session()
36 | assert db_env.exists()
37 |
38 |
39 | def test_arg_override(tmp_path, db_env):
40 | # check that if we supply an argument, that
41 | # is used rather than the environment variable
42 | db = tmp_path / "test_other.db"
43 | s = database.create_session(str(db))
44 |
45 | assert not db_env.exists()
46 | assert db.exists()
47 |
48 |
49 | def test_creation(session_db):
50 | """Test that a database file is created with a session
51 | when the session file doesn't exist."""
52 |
53 | s, db = session_db
54 | assert db.exists()
55 |
56 | # we should be able to query against a table that exists
57 | # with no error
58 | s.execute("SELECT * FROM ncfiles")
59 |
60 | # but not a non-existent table
61 | with pytest.raises(sa.exc.OperationalError, match="no such table"):
62 | s.execute("SELECT * FROM ncfiles_notfound")
63 |
64 |
65 | def test_reopen(tmp_path):
66 | """Test that we can reopen a database of the correct version."""
67 |
68 | db = tmp_path / "test.db"
69 | s = database.create_session(str(db))
70 |
71 | s.close()
72 | s = database.create_session(str(db))
73 | s.close()
74 |
75 |
76 | def test_outdated(tmp_path):
77 | """Test that we can't use an outdated database"""
78 |
79 | db = tmp_path / "test.db"
80 | s = database.create_session(str(db))
81 |
82 | # check that the current version matches that defined in the module
83 | ver = s.execute("PRAGMA user_version").fetchone()[0]
84 | assert ver == database.__DB_VERSION__
85 |
86 | # reset version to one prior
87 | s.execute("PRAGMA user_version={}".format(database.__DB_VERSION__ - 1))
88 | s.close()
89 |
90 | # recreate the session
91 | with pytest.raises(Exception, match="Incompatible database versions"):
92 | s = database.create_session(str(db))
93 |
94 |
95 | def test_outdated_notmodified(tmp_path):
96 | """Test that we don't try to modify an outdated database.
97 | This includes adding tables that don't yet exist because
98 | it's a previous version.
99 | """
100 |
101 | # set up an empty database with a previous version
102 | db = tmp_path / "test.db"
103 | conn = sa.create_engine("sqlite:///" + str(db)).connect()
104 | conn.execute("PRAGMA user_version={}".format(database.__DB_VERSION__ - 1))
105 | conn.close()
106 |
107 | # try to create the session
108 | # this should fail and not modify the existing database
109 | with pytest.raises(Exception):
110 | s = database.create_session(str(db))
111 |
112 | # reopen the connection and ensure tables weren't created
113 | conn = sa.create_engine("sqlite:///" + str(db)).connect()
114 | with pytest.raises(sa.exc.OperationalError, match="no such table"):
115 | conn.execute("SELECT * FROM ncfiles")
116 |
117 |
118 | def test_delete_experiment(session_db):
119 | """Test that we can completely delete an experiment
120 | and its associated data.
121 | """
122 |
123 | session, db = session_db
124 | database.build_index("test/data/indexing/longnames", session)
125 |
126 | # make sure we actually did index something
127 | expt = (
128 | session.query(database.NCExperiment)
129 | .filter(database.NCExperiment.experiment == "longnames")
130 | .one_or_none()
131 | )
132 | assert expt is not None
133 |
134 | database.delete_experiment("longnames", session)
135 | expt = (
136 | session.query(database.NCExperiment)
137 | .filter(database.NCExperiment.experiment == "longnames")
138 | .one_or_none()
139 | )
140 | assert expt is None
141 |
142 | # check that all files are removed
143 | files = session.query(sa.func.count(database.NCFile.id)).scalar()
144 | assert files == 0
145 |
146 | # make sure all ncvars are removed
147 | vars = session.query(sa.func.count(database.NCVar.id)).scalar()
148 | assert vars == 0
149 |
--------------------------------------------------------------------------------
/test/test_dates.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Copyright 2017 ARC Centre of Excellence for Climate Systems Science
5 | author: Aidan Heerdegen
6 | Licensed under the Apache License, Version 2.0 (the "License");
7 | you may not use this file except in compliance with the License.
8 | You may obtain a copy of the License at
9 | http://www.apache.org/licenses/LICENSE-2.0
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 |
17 | from __future__ import print_function
18 |
19 | import pytest
20 | import sys, os, time, glob
21 | import shutil
22 | import pdb # Add pdb.set_trace() to set breakpoints
23 | import xarray as xr
24 | import numpy as np
25 | import cftime
26 | from datetime import datetime, timedelta
27 |
28 | from cosima_cookbook.date_utils import (
29 | rebase_times,
30 | rebase_dataset,
31 | rebase_variable,
32 | rebase_shift_attr,
33 | format_datetime,
34 | parse_datetime,
35 | )
36 |
37 | from xarray.testing import assert_equal
38 |
39 | verbose = True
40 |
41 | times = []
42 |
43 |
44 | def setup_module(module):
45 | if verbose:
46 | print("setup_module module:%s" % module.__name__)
47 | if verbose:
48 | print("Python version: {}".format(sys.version))
49 | # Put any setup code in here, like making temporary files
50 | # Make 5 years of a noleap calendar on the first of each month
51 | global times
52 | for y in range(1, 6):
53 | for m in range(1, 13):
54 | times.append(
55 | np.round(
56 | cftime.date2num(
57 | cftime.datetime(y, m, 1),
58 | units="days since 01-01-01",
59 | calendar="noleap",
60 | ),
61 | 8,
62 | )
63 | )
64 | times = np.array(times)
65 |
66 |
67 | def teardown_module(module):
68 | if verbose:
69 | print("teardown_module module:%s" % module.__name__)
70 | # Put any taerdown code in here, like deleting temporary files
71 |
72 |
73 | def test_format_parse_datetime():
74 | dates = [
75 | cftime.num2date(t, units="days since 01-01-01", calendar="noleap")
76 | for t in times
77 | ]
78 | assert format_datetime(dates[0]) == "0001-01-01 00:00:00"
79 | assert format_datetime(dates[-1]) == "0005-12-01 00:00:00"
80 |
81 | for d in dates:
82 | assert parse_datetime(format_datetime(d), "noleap") == d
83 |
84 | dates = [
85 | cftime.num2date(t, units="days since 01-01-01", calendar="proleptic_gregorian")
86 | for t in times
87 | ]
88 | assert format_datetime(dates[0]) == "0001-01-01 00:00:00"
89 | assert format_datetime(dates[-1]) == "0005-11-30 00:00:00"
90 |
91 | for d in dates:
92 | assert parse_datetime(format_datetime(d), "proleptic_gregorian") == d
93 |
94 |
95 | def test_rebase_times():
96 | # Should be a 10 year offset between original times and rebased times
97 | assert not np.any(
98 | (times + 365 * 10)
99 | - rebase_times(
100 | times, "days since 1980-01-01", "noleap", "days since 1970-01-01"
101 | )
102 | )
103 |
104 | # Should be a -10 year offset between original times and rebased times
105 | assert not np.any(
106 | (times - 365 * 10)
107 | - rebase_times(
108 | times, "days since 1980-01-01", "noleap", "days since 1990-01-01"
109 | )
110 | )
111 |
112 |
113 | def test_rebase_variable():
114 | timesvar = xr.DataArray(
115 | times, attrs={"units": "days since 1980-01-01", "calendar": "noleap"}
116 | )
117 |
118 | print("att:", timesvar.attrs)
119 |
120 | # Test we can rebase with and without explicitly setting a calendar
121 | timesvar_rebased = rebase_variable(timesvar, target_units="days since 1970-01-01")
122 | assert timesvar_rebased.equals(
123 | rebase_variable(timesvar, "noleap", target_units="days since 1970-01-01")
124 | )
125 |
126 | assert not timesvar.equals(timesvar_rebased)
127 |
128 | # Should be a 10 year offset between original times and rebased times
129 | assert not np.any((times + 365 * 10) - timesvar_rebased.values)
130 | # assert(not np.any((times + 365*10) - rebase_variable(timesvar, 'noleap', target_units='days since 1970-01-01').values))
131 |
132 | with pytest.raises(ValueError):
133 | timesvar_rebased = rebase_variable(
134 | timesvar, "noleap", target_units="days since 1990-01-01"
135 | )
136 |
137 | # Rebase with an offset otherwise would have negative dates
138 | timesvar_rebased = rebase_variable(
139 | timesvar, "noleap", target_units="days since 1990-01-01", offset=365 * 10
140 | )
141 |
142 | # Values should be the same
143 | assert not np.any(times - timesvar_rebased.values)
144 |
145 | # But the rebase_shift_attr should be set to 10 years
146 | assert timesvar_rebased.attrs[rebase_shift_attr] == 365 * 10
147 |
148 | # Check we get back timesvar if rebased again with no arguments (rebases to previous
149 | # units and applies offset if required in this instance)
150 | assert timesvar.equals(rebase_variable(timesvar_rebased))
151 |
152 |
153 | def test_matching_time_units():
154 | testfile = "test/data/ocean_sealevel.nc"
155 |
156 | ds = xr.open_dataset(testfile, decode_times=False)
157 | target_units = "days since 1800-01-01"
158 |
159 | ds1 = rebase_dataset(ds, target_units)
160 | # s1.to_netcdf('tmp.nc')
161 |
162 | ds2 = rebase_dataset(ds1)
163 | # ds2.to_netcdf('tmp2.nc')
164 |
165 | # Rebasing again without target_units specified should
166 | # un-do previous rebase
167 | assert ds.equals(ds2)
168 |
169 | # An offset is required as the target units are ahead of the data in time
170 | target_units = "days since 2000-01-01"
171 |
172 | # Offset can be automatically generated as difference between target and src units
173 | ds1 = rebase_dataset(ds, target_units, offset="auto")
174 | ds2 = rebase_dataset(ds1)
175 |
176 | assert ds.equals(ds2)
177 |
178 | # Offset can be an integer, but need to know what units are being used, days, hours etc
179 | ds1 = rebase_dataset(ds, target_units, offset=100 * 365)
180 | ds2 = rebase_dataset(ds1)
181 |
182 | assert ds.equals(ds2)
183 |
184 | # Offset can be a datetime.timedelta object, but this would need some knowledge of
185 | # the calendar
186 | ds1 = rebase_dataset(ds, target_units, offset=timedelta(days=100 * 365))
187 | ds2 = rebase_dataset(ds1)
188 |
189 | # A different offset will yield a different dataset, but upon rebasing a second time
190 | # should still be the same as the original regardless of offset.
191 | ds3 = rebase_dataset(ds, target_units, offset=timedelta(days=200 * 365))
192 | ds4 = rebase_dataset(ds3)
193 |
194 | assert ds.equals(ds4)
195 | assert not ds1.equals(ds3)
196 |
197 | # Test graceful recovery if time_bounds missing.
198 | del ds["time_bounds"]
199 | ds3 = rebase_dataset(ds, target_units, offset=timedelta(days=200 * 365))
200 | ds4 = rebase_dataset(ds3)
201 |
202 | assert ds.equals(ds4)
203 | assert not ds1.equals(ds3)
204 |
205 | ds = xr.open_dataset(testfile, decode_times=False)[["sea_level"]]
206 | target_units = "days since 1800-01-01"
207 |
208 | ds1 = rebase_dataset(ds, target_units)
209 |
210 |
211 | def test_chunking():
212 | # An offset is required as the target units are ahead of the data in time
213 | target_units = "days since 2000-01-01"
214 |
215 | testfile = "test/data/ocean_sealevel.nc"
216 |
217 | ds = xr.open_dataset(testfile, decode_times=False, chunks={"time": 10})
218 | target_units = "days since 1800-01-01"
219 |
220 | ds1 = rebase_dataset(ds, target_units)
221 |
--------------------------------------------------------------------------------
/test/test_explore.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from datetime import datetime
4 | import os.path
5 | import shutil
6 |
7 | import xarray as xr
8 | import pandas as pd
9 | from pandas.testing import assert_frame_equal, assert_series_equal
10 |
11 | import cosima_cookbook as cc
12 |
13 | from cosima_cookbook.database import NCExperiment, NCFile
14 |
15 |
16 | def metadata_for_experiment(
17 | path, session, metadata_file=None, name="test", commit=True
18 | ):
19 | """Method to read metadata for an experiment without requiring
20 | the rest of the indexing infrastructure.
21 | """
22 |
23 | expt = NCExperiment(experiment=name, root_dir=path)
24 |
25 | # look for this experiment in the database
26 | q = (
27 | session.query(NCExperiment)
28 | .filter(NCExperiment.experiment == expt.experiment)
29 | .filter(NCExperiment.root_dir == expt.root_dir)
30 | )
31 | r = q.one_or_none()
32 | if r is not None:
33 | expt = r
34 |
35 | cc.database.update_metadata(expt, session, metadata_file)
36 |
37 | if commit:
38 | session.add(expt)
39 | session.commit()
40 | else:
41 | return expt
42 |
43 |
44 | @pytest.fixture(scope="module")
45 | def session(tmp_path_factory):
46 | # index test directory into temp database
47 | d = tmp_path_factory.mktemp("database")
48 | db = d / "test.db"
49 | session = cc.database.create_session(str(db))
50 |
51 | # build index for entire module
52 | cc.database.build_index(
53 | [
54 | "test/data/explore/one",
55 | "test/data/explore/two",
56 | "test/data/explore/duplicate/one",
57 | ],
58 | session,
59 | )
60 |
61 | # force all files to be marked as present, even if they're empty
62 | ncfiles = session.query(cc.database.NCFile).all()
63 | for f in ncfiles:
64 | f.present = True
65 | session.commit()
66 |
67 | return session
68 |
69 |
70 | def test_database_explorer(session):
71 | dbx = cc.explore.DatabaseExplorer(session=session)
72 |
73 | assert dbx.session is session
74 |
75 | # Experiment selector
76 | assert dbx.expt_selector.options == ("one", "two")
77 |
78 | # Keyword filter selector
79 | assert dbx.filter_widget.options == tuple(dbx.keywords)
80 |
81 | in_one = set(cc.querying.get_variables(session, "one").name)
82 | in_two = set(cc.querying.get_variables(session, "two").name)
83 |
84 | # The variable filter box
85 | assert len(dbx.var_filter.selector.variables) == len((in_one | in_two))
86 |
87 | # Turn off filtering so all variables are present in the filter selector
88 | dbx.var_filter.selector._filter_variables(coords=False, restarts=False, model="")
89 |
90 | truth = {
91 | "age_global": "Age (global) (yr)",
92 | "diff_cbt_t": "total vert diff_cbt(temp) (w/o neutral included) (m^2/s)",
93 | "dzt": "t-cell thickness (m)",
94 | "hi_m": "grid cell mean ice thickness (m)",
95 | "neutral": "neutral density (kg/m^3)",
96 | "neutralrho_edges": "neutral density edges (kg/m^3)",
97 | "nv": "vertex number",
98 | "pot_rho_0": "potential density referenced to 0 dbar (kg/m^3)",
99 | "pot_rho_2": "potential density referenced to 2000 dbar (kg/m^3)",
100 | "salt": "Practical Salinity (psu)",
101 | "st_edges_ocean": "tcell zstar depth edges (meters)",
102 | "st_ocean": "tcell zstar depth (meters)",
103 | }
104 |
105 | for var, label in truth.items():
106 | assert dbx.var_filter.selector.selector.options[var] == label
107 |
108 | # Add all variables common to both experiments and ensure after filter
109 | # experiment selector still contains both
110 | for var in in_one & in_two:
111 | dbx.var_filter.selector.selector.label = var
112 | dbx.var_filter._add_var_to_selected(None)
113 |
114 | dbx._filter_experiments(None)
115 | assert dbx.expt_selector.options == ("one", "two")
116 |
117 | dbx.var_filter.delete(in_one & in_two)
118 | assert len(dbx.var_filter.var_filter_selected.options) == 0
119 |
120 | # Now all variables only in experiment two and ensure after filter
121 | # experiment selector only contains two
122 | for var in in_two - in_one:
123 | dbx.var_filter.selector.selector.label = var
124 | dbx.var_filter._add_var_to_selected(None)
125 |
126 | dbx._filter_experiments(None)
127 | assert dbx.expt_selector.options == ("two",)
128 |
129 |
130 | def test_experiment_explorer(session):
131 | ee1 = cc.explore.ExperimentExplorer(session=session)
132 |
133 | # Experiment selector
134 | assert ee1.expt_selector.options == ("one", "two")
135 |
136 | assert len(ee1.var_selector.selector.options) == 24
137 | assert "pot_rho_0" in ee1.var_selector.selector.options
138 | assert "ty_trans_rho" not in ee1.var_selector.selector.options
139 |
140 | # Simulate selecting a different experiment from menu
141 | ee1._load_experiment("two")
142 | assert len(ee1.var_selector.selector.options) == 28
143 | assert "pot_rho_0" in ee1.var_selector.selector.options
144 | assert "ty_trans_rho" in ee1.var_selector.selector.options
145 |
146 | # Check frequency drop down changes when variable selector assigned a value
147 | assert ee1.frequency.options == ()
148 | ee1.var_selector.selector.label = "ty_trans"
149 | ee1.var_selector._set_frequency_selector("ty_trans")
150 | assert ee1.frequency.options == ("1 yearly",)
151 | ee1.var_selector._set_cellmethods_selector("ty_trans", "1 yearly")
152 | assert ee1.cellmethods.options == ("time: mean",)
153 | ee1.var_selector._set_daterange_selector("ty_trans", "1 yearly", "time: mean")
154 | assert ee1.frequency.options == ("1 yearly",)
155 |
156 | # Check frequency drop down changes when variable selector assigned a value
157 | ee1.var_selector.selector.label = "tx_trans"
158 | ee1.var_selector._set_frequency_selector("tx_trans")
159 | assert ee1.frequency.options == (None,)
160 | ee1.var_selector._set_cellmethods_selector("tx_trans", None)
161 | assert ee1.cellmethods.options == ("time: mean",)
162 | ee1.var_selector._set_daterange_selector("tx_trans", None, "time: mean")
163 | print(ee1.daterange)
164 |
165 | ee2 = cc.explore.ExperimentExplorer(session=session)
166 | assert id(ee1.var_selector) != id(ee2.var_selector)
167 |
168 |
169 | def test_get_data(session):
170 | ee = cc.explore.ExperimentExplorer(session=session)
171 |
172 | assert ee.data is None
173 |
174 | ee._load_experiment("one")
175 | ee.var_selector.selector.label = "ty_trans"
176 | ee.var_selector._set_frequency_selector("ty_trans")
177 | ee.var_selector._set_cellmethods_selector("ty_trans", "1 yearly")
178 | ee.var_selector._set_daterange_selector("ty_trans", "1 yearly", "time: mean")
179 | ee._load_data(None)
180 |
181 | assert ee.frequency.options == ("1 yearly",)
182 | assert ee.daterange.options[0][0] == "0166/12/31"
183 | assert ee.daterange.options[1][0] == "0167/12/31"
184 |
185 | assert ee.data is not None
186 | assert ee.data.shape == (2, 1, 1, 1)
187 |
188 |
189 | def test_model_property(session):
190 | # Grab all variables and ensure the SQL classification matches the python version
191 | # May be some holes, as not ensured all cases covered
192 | for expt in cc.querying.get_experiments(session, all=True).experiment:
193 | for index, row in cc.querying.get_variables(
194 | session, experiment=expt, inferred=True
195 | ).iterrows():
196 | ncfile = NCFile(
197 | index_time=datetime.now(),
198 | ncfile=row.ncfile,
199 | present=True,
200 | )
201 | assert ncfile.model == row.model
202 |
--------------------------------------------------------------------------------
/test/test_indexing.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import pytest
4 | import shutil
5 | import time
6 | import xarray as xr
7 | from pathlib import Path
8 | from cosima_cookbook import database
9 | from sqlalchemy import func, inspect
10 |
11 | LOGGER = logging.getLogger(__name__)
12 |
13 |
14 | def rm_tree(pth):
15 | pth = Path(pth)
16 | for child in pth.glob("*"):
17 | if child.is_file():
18 | child.unlink()
19 | else:
20 | rm_tree(child)
21 | pth.rmdir()
22 |
23 |
24 | def assert_dictionaries_same(expected, actual):
25 | for key in expected.keys():
26 | if key not in actual or expected[key] != actual[key]:
27 | return False
28 |
29 | return True
30 |
31 |
32 | @pytest.fixture
33 | def unreadable_dir(tmp_path):
34 | expt_path = tmp_path / "expt_dir"
35 | expt_path.mkdir()
36 | idx_dir = expt_path / "unreadable"
37 | idx_dir.mkdir()
38 | idx_dir.chmod(0o300)
39 |
40 | yield idx_dir
41 |
42 | idx_dir.chmod(0o700)
43 | rm_tree(expt_path)
44 |
45 |
46 | def test_find_files():
47 | files = database.find_files("test/data/indexing/")
48 | assert len(files) == 17
49 |
50 | for f in files:
51 | assert Path(f).suffix == ".nc"
52 |
53 | # No python source files in data subdirectory
54 | assert len(database.find_files("test/data/indexing/", "*.py")) == 0
55 |
56 | # Test works with alternative suffix
57 | files = database.find_files("test/", "*.py")
58 | assert len(files) == 9
59 |
60 | for f in files:
61 | assert Path(f).suffix == ".py"
62 |
63 |
64 | def test_find_experiment(session_db):
65 | session, db = session_db
66 |
67 | directory = Path("test/data/indexing/broken_file")
68 |
69 | assert None == database.find_experiment(session, directory)
70 |
71 | expt = database.NCExperiment(
72 | experiment=str(directory.name), root_dir=str(directory.resolve())
73 | )
74 | session.add(expt)
75 | session.flush()
76 |
77 | assert expt == database.find_experiment(session, directory)
78 |
79 |
80 | def test_index_experiment(session_db):
81 | session, db = session_db
82 |
83 | directory = Path("test/data/indexing/longnames")
84 | expt = database.NCExperiment(
85 | experiment=str(directory.name), root_dir=str(directory.resolve())
86 | )
87 |
88 | files = database.find_files(directory)
89 |
90 | # Index just one file
91 | database.index_experiment(set(list(files)[:1]), session, expt)
92 | session.flush()
93 |
94 | assert expt == database.find_experiment(session, directory)
95 | assert len(database.find_experiment(session, directory).ncfiles) == 1
96 |
97 | # Index the other file
98 | database.index_experiment(set(list(files)[1:]), session, expt)
99 |
100 | assert expt == database.find_experiment(session, directory)
101 | assert len(database.find_experiment(session, directory).ncfiles) == 2
102 |
103 |
104 | def test_unreadable(session_db, unreadable_dir):
105 | session, db = session_db
106 |
107 | with pytest.warns(UserWarning, match="Some files or directories could not be read"):
108 | indexed = database.build_index(str(unreadable_dir), session)
109 |
110 |
111 | def test_broken(session_db):
112 | session, db = session_db
113 | indexed = database.build_index("test/data/indexing/broken_file", session)
114 |
115 | # make sure the database was created
116 | assert db.exists()
117 |
118 | # we indexed a single file
119 | assert indexed == 1
120 |
121 | # query ncfiles table -- should have a single file, marked as empty
122 | q = session.query(database.NCFile)
123 | r = q.all()
124 | assert len(r) == 1
125 | assert not r[0].present
126 |
127 | # query ncvars table -- should be empty
128 | q = session.query(func.count(database.NCVar.id))
129 | assert q.scalar() == 0
130 |
131 |
132 | def test_empty_file(session_db):
133 | session, db = session_db
134 | indexed = database.build_index("test/data/indexing/empty_file", session)
135 |
136 | # as with test_broken, we should have seen a single file,
137 | # but it should be marked as empty
138 | assert db.exists()
139 | assert indexed == 1
140 | q = session.query(database.NCFile)
141 | r = q.all()
142 | assert len(r) == 1
143 | assert not r[0].present
144 |
145 | # but there should be a valid variable
146 | q = session.query(func.count(database.NCVar.id)).filter(
147 | database.NCVar.varname == "ty_trans_rho"
148 | )
149 | assert q.scalar() == 1
150 |
151 |
152 | def test_update_nonew(session_db):
153 | session, db = session_db
154 | database.build_index("test/data/indexing/broken_file", session)
155 | assert db.exists()
156 |
157 | # re-run the index, make sure we don't re-index anything
158 | reindexed = database.build_index(
159 | "test/data/indexing/broken_file", session, prune="flag"
160 | )
161 | assert reindexed == 0
162 |
163 |
164 | def test_reindex_force(session_db):
165 | session, db = session_db
166 | database.build_index("test/data/indexing/broken_file", session)
167 | assert db.exists()
168 |
169 | # re-run the index, make sure re-index
170 | reindexed = database.build_index(
171 | "test/data/indexing/broken_file", session, force=True
172 | )
173 | assert reindexed == 1
174 |
175 |
176 | def test_update_newfile(session_db, tmp_path):
177 | session, db = session_db
178 | shutil.copy(
179 | "test/data/indexing/longnames/output000/test1.nc", str(tmp_path / "test1.nc")
180 | )
181 | database.build_index(str(tmp_path), session)
182 |
183 | # add another file
184 | shutil.copy(
185 | "test/data/indexing/longnames/output000/test2.nc", str(tmp_path / "test2.nc")
186 | )
187 | reindexed = database.build_index(str(tmp_path), session)
188 | assert reindexed == 1
189 |
190 |
191 | def test_updated_file(session_db, tmp_path, caplog):
192 | session, db = session_db
193 |
194 | # Make tmp_path a concrete path otherwise filesystem ops won't work
195 | tmp_path = Path(tmp_path)
196 |
197 | ncfile = "test1.nc"
198 | ncpath = Path("test/data/indexing/longnames/output000/") / ncfile
199 | shutil.copy(str(ncpath), str(tmp_path / ncfile))
200 | indexed = database.build_index(str(tmp_path), session)
201 | assert indexed == 1
202 |
203 | # Should not reindex
204 | reindexed = database.build_index(str(tmp_path), session)
205 | assert reindexed == 0
206 |
207 | # Should reindex as file is updated
208 | time.sleep(1)
209 | (tmp_path / ncfile).touch()
210 | reindexed = database.build_index(str(tmp_path), session)
211 | assert reindexed == 1
212 |
213 | # Should not reindex as flagging as missing will not remove
214 | # file from the database, so will not be reindexed
215 | time.sleep(1)
216 | (tmp_path / ncfile).touch()
217 | with caplog.at_level(logging.WARNING):
218 | reindexed = database.build_index(str(tmp_path), session, prune="flag")
219 | assert reindexed == 0
220 | assert "Set prune to 'delete' to reindex updated files" in caplog.text
221 |
222 |
223 | def test_single_broken(session_db):
224 | session, db = session_db
225 | database.build_index("test/data/indexing/single_broken_file", session)
226 |
227 | # query ncfiles table -- should have two entries
228 | q = session.query(func.count(database.NCFile.id))
229 | assert q.scalar() == 2
230 |
231 | # query ncvars table -- should have a single entry
232 | q = session.query(func.count(database.NCVar.id))
233 | assert q.scalar() == 1
234 |
235 |
236 | def test_longnames(session_db):
237 | session, db = session_db
238 | database.build_index("test/data/indexing/longnames", session)
239 |
240 | # query ncvars table -- should have two entries
241 | q = session.query(func.count(database.NCVar.id))
242 | assert q.scalar() == 2
243 |
244 | # query generic table -- should only be a single variable
245 | q = session.query(database.CFVariable)
246 | r = q.all()
247 | assert len(r) == 1
248 | assert r[0].long_name == "Test Variable"
249 |
250 |
251 | def test_multiple_experiments(session_db):
252 | session, db = session_db
253 | # index multiple experiments, which have duplicate data and therefore push
254 | # against some unique constraints
255 | database.build_index(
256 | [
257 | "test/data/indexing/multiple/experiment_a",
258 | "test/data/indexing/multiple/experiment_b",
259 | ],
260 | session,
261 | )
262 |
263 | q = session.query(database.NCExperiment)
264 | assert q.count() == 2
265 |
266 |
267 | def test_same_expt_name(session_db):
268 | session, db = session_db
269 | # index multiple experiments with different root directories, but the same
270 | # final path component (experiment name)
271 | database.build_index(
272 | [
273 | "test/data/indexing/multiple/experiment_a",
274 | "test/data/indexing/alternate/experiment_a",
275 | ],
276 | session,
277 | )
278 |
279 | # the indexing shouldn't fail, and we should have two distinct experiments
280 | # with the same name
281 |
282 | q = session.query(database.NCExperiment).filter(
283 | database.NCExperiment.experiment == "experiment_a"
284 | )
285 | r = q.all()
286 | assert len(r) == 2
287 | assert r[0].root_dir != r[1].root_dir
288 |
289 |
290 | def test_following_symlinks(session_db):
291 | session, db = session_db
292 |
293 | # Indexing symlinked experiment should fail with default arguments
294 | database.build_index("test/data/indexing/symlinked/experiment_a", session)
295 |
296 | q = session.query(database.NCExperiment)
297 | assert q.count() == 0
298 |
299 | # Now specify to follow symlinks
300 | database.build_index(
301 | "test/data/indexing/symlinked/experiment_a", session, followsymlinks=True
302 | )
303 |
304 | q = session.query(database.NCExperiment)
305 | assert q.count() == 1
306 |
307 |
308 | def test_broken_metadata(session_db):
309 | session, db = session_db
310 | indexed = database.build_index("test/data/indexing/broken_metadata", session)
311 |
312 | assert indexed == 1
313 |
314 |
315 | def test_time_dimension(session_db):
316 | session, db = session_db
317 | database.build_index("test/data/indexing/time", session)
318 |
319 | q = session.query(database.NCFile.time_start, database.NCFile.time_end)
320 | assert q.count() == 5 # should pick up 5 files
321 |
322 | q = q.filter(
323 | (database.NCFile.time_start is None) | (database.NCFile.time_end is None)
324 | )
325 | assert q.count() == 0 # but all of them should have times populated
326 |
327 | # there should be 5 separate time variables
328 | q = session.query(database.CFVariable)
329 | assert q.count() == 5
330 |
331 | # each file should have exactly one time dimension
332 | q = (
333 | session.query(func.count(database.NCFile.ncvars))
334 | .join(database.NCFile.ncvars)
335 | .group_by(database.NCFile.id)
336 | )
337 | for r in q.all():
338 | assert r[0] == 1
339 |
340 |
341 | def test_missing_time_bounds(session_db):
342 | session, db = session_db
343 | database.build_index("test/data/indexing/time_bounds", session)
344 |
345 | # Should have one experiment
346 | q = session.query(database.NCExperiment)
347 | assert q.count() == 1
348 |
349 | # And one correctly indexed (present) file
350 | q = session.query(database.NCFile)
351 | r = q.all()
352 | assert len(r) == 1
353 | assert r[0].present
354 |
355 |
356 | def test_index_attributes(session_db):
357 | session, db = session_db
358 | database.build_index("test/data/querying", session)
359 |
360 | inspector = inspect(session.get_bind())
361 | assert assert_dictionaries_same(
362 | {
363 | "name": "ix_ncattributes_ncvar_id",
364 | "column_names": ["ncvar_id"],
365 | "unique": 0,
366 | },
367 | inspector.get_indexes("ncattributes")[0],
368 | )
369 |
370 | ncfile = "output000/ocean.nc"
371 |
372 | # check that we have the right attributes for a file (just use a subset)
373 | f = session.query(database.NCFile).filter(database.NCFile.ncfile == ncfile).one()
374 |
375 | file_attrs = {
376 | "filename": "ocean.nc",
377 | "title": "MOM5",
378 | "grid_type": "mosaic",
379 | "grid_tile": "1",
380 | }
381 | for attr, attr_val in file_attrs.items():
382 | assert attr in f.attrs and f.attrs[attr] == attr_val
383 |
384 | # and check a particular variable
385 | v = (
386 | session.query(database.NCVar)
387 | .join(database.NCFile)
388 | .filter(database.NCFile.ncfile == ncfile)
389 | .filter(database.NCVar.varname == "temp")
390 | .one()
391 | )
392 | var_attrs = {
393 | "long_name": "Potential temperature",
394 | "cell_methods": "time: mean",
395 | "coordinates": "geolon_t geolat_t",
396 | }
397 | for attr, attr_val in var_attrs.items():
398 | assert attr in v.attrs and v.attrs[attr] == attr_val
399 |
400 |
401 | def test_prune_broken(session_db):
402 | session, db = session_db
403 | database.build_index("test/data/indexing/broken_file", session)
404 |
405 | assert db.exists()
406 |
407 | # check that we have one file
408 | q = session.query(database.NCFile)
409 | r = q.all()
410 | assert len(r) == 1
411 |
412 | # prune experiment
413 | database.prune_experiment("broken_file", session)
414 |
415 | # now the database should be empty
416 | q = session.query(database.NCFile)
417 | r = q.all()
418 | assert len(r) == 0
419 |
420 |
421 | def test_prune_missing_experiment(session_db):
422 | session, db = session_db
423 | database.build_index("test/data/indexing/broken_file", session)
424 |
425 | assert db.exists()
426 |
427 | # check that we have one file
428 | q = session.query(database.NCFile)
429 | r = q.all()
430 | assert len(r) == 1
431 |
432 | # prune experiment
433 | experiment = "incorrect_experiment"
434 | with pytest.raises(RuntimeError, match="No such experiment: ".format(experiment)):
435 | database.prune_experiment(experiment, session)
436 |
437 |
438 | def test_prune_nodelete(session_db, tmp_path):
439 | session, db = session_db
440 | expt_dir = tmp_path / "expt"
441 | expt_dir.mkdir()
442 |
443 | # copy the file to a new experiment directory and index
444 | shutil.copy(
445 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
446 | )
447 | database.build_index(str(expt_dir), session)
448 |
449 | # check that we have a valid file
450 | q = session.query(database.NCFile).filter(database.NCFile.present)
451 | r = q.all()
452 | assert len(r) == 1
453 |
454 | # remove the file and prune
455 | os.remove(expt_dir / "test1.nc")
456 | database.prune_experiment("expt", session, delete=False)
457 |
458 | # now we should still have one file, but now not present
459 | q = session.query(database.NCFile)
460 | r = q.one_or_none()
461 | assert r is not None
462 | assert not r.present
463 |
464 |
465 | def test_prune_delete(session_db, tmp_path):
466 | session, db = session_db
467 | expt_dir = tmp_path / "expt"
468 | expt_dir.mkdir()
469 |
470 | # copy the file to a new experiment directory and index
471 | shutil.copy(
472 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
473 | )
474 | database.build_index(str(expt_dir), session)
475 |
476 | # check that we have a valid file
477 | q = session.query(database.NCFile).filter(database.NCFile.present)
478 | r = q.all()
479 | assert len(r) == 1
480 |
481 | # remove the file and prune
482 | os.remove(expt_dir / "test1.nc")
483 | database.prune_experiment("expt", session)
484 |
485 | # now we should still have no files
486 | q = session.query(database.NCFile)
487 | r = q.one_or_none()
488 | assert r is None
489 |
490 |
491 | def test_index_with_prune_nodelete(session_db, tmp_path):
492 | session, db = session_db
493 | expt_dir = tmp_path / "expt"
494 | expt_dir.mkdir()
495 |
496 | # copy the file to a new experiment directory and index
497 | shutil.copy(
498 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
499 | )
500 | database.build_index(str(expt_dir), session)
501 |
502 | # check that we have a valid file
503 | q = session.query(database.NCFile).filter(database.NCFile.present)
504 | r = q.all()
505 | assert len(r) == 1
506 |
507 | # remove the file and build with pruning
508 | os.remove(expt_dir / "test1.nc")
509 | database.build_index(str(expt_dir), session, prune="flag")
510 |
511 | # now we should still have one file, but now not present
512 | q = session.query(database.NCFile)
513 | r = q.one_or_none()
514 | assert r is not None
515 | assert not r.present
516 |
517 |
518 | def test_index_with_prune_delete(session_db, tmp_path):
519 | session, db = session_db
520 | expt_dir = tmp_path / "expt"
521 | expt_dir.mkdir()
522 |
523 | # copy the file to a new experiment directory and index
524 | shutil.copy(
525 | "test/data/indexing/longnames/output000/test1.nc", str(expt_dir / "test1.nc")
526 | )
527 | database.build_index(str(expt_dir), session)
528 |
529 | # check that we have a valid file
530 | q = session.query(database.NCFile).filter(database.NCFile.present)
531 | r = q.all()
532 | assert len(r) == 1
533 |
534 | # remove the file and build with pruning
535 | os.remove(expt_dir / "test1.nc")
536 | database.build_index(str(expt_dir), session, prune="delete")
537 |
538 | # now we should still have no files
539 | q = session.query(database.NCFile)
540 | r = q.one_or_none()
541 | assert r is None
542 |
--------------------------------------------------------------------------------
/test/test_metadata.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from datetime import datetime
3 |
4 | import pandas as pd
5 | from pandas.testing import assert_frame_equal
6 |
7 | from cosima_cookbook import database, querying
8 |
9 |
10 | def metadata_for_experiment(path, session, name="test", commit=True):
11 | """Method to read metadata for an experiment without requiring
12 | the rest of the indexing infrastructure.
13 | """
14 |
15 | expt = database.NCExperiment(experiment=name, root_dir=path)
16 | database.update_metadata(expt, session)
17 |
18 | if commit:
19 | session.add(expt)
20 | session.commit()
21 | else:
22 | return expt
23 |
24 |
25 | def test_metadata(session_db):
26 | """Test that metadata.yaml is read for an experiment during indexing"""
27 |
28 | session, db = session_db
29 | database.build_index("test/data/indexing/metadata", session)
30 |
31 | # query metadata
32 | q = session.query(
33 | database.NCExperiment.contact,
34 | database.NCExperiment.created,
35 | database.NCExperiment.description,
36 | )
37 | r = q.one()
38 | assert r[0] == "The ACCESS Oracle"
39 | assert r[1] == "2018-01-01"
40 | assert len(r[2]) > 0
41 |
42 |
43 | def test_get_experiments_metadata(session_db):
44 | """Test that get_experiments returns metadata correctly"""
45 |
46 | session, db = session_db
47 | database.build_index("test/data/indexing/metadata", session)
48 |
49 | r = querying.get_experiments(session, contact=True)
50 | df = pd.DataFrame.from_dict(
51 | {"experiment": ["metadata"], "contact": ["The ACCESS Oracle"], "ncfiles": [1]}
52 | )
53 | assert_frame_equal(r, df)
54 |
55 | r = querying.get_experiments(session, email=True)
56 | df = pd.DataFrame.from_dict(
57 | {"experiment": ["metadata"], "email": ["oracle@example.com"], "ncfiles": [1]}
58 | )
59 | assert_frame_equal(r, df)
60 |
61 | r = querying.get_experiments(session, url=True)
62 | df = pd.DataFrame.from_dict(
63 | {
64 | "experiment": ["metadata"],
65 | "url": ["https://github.com/COSIMA/oracle"],
66 | "ncfiles": [1],
67 | }
68 | )
69 | assert_frame_equal(r, df)
70 |
71 | r = querying.get_experiments(session, description=True)
72 | df = pd.DataFrame.from_dict(
73 | {
74 | "experiment": ["metadata"],
75 | "description": [
76 | (
77 | "Attempted spinup, using salt flux fix "
78 | "https://arccss.slack.com/archives/C6PP0GU9Y/p1515460656000124 "
79 | "and https://github.com/mom-ocean/MOM5/pull/208/commits/9f4ee6f8b72b76c96a25bf26f3f6cdf773b424d2 "
80 | "from the start. Used mushy ice from July year 1 onwards to avoid vertical thermo error in cice "
81 | "https://arccss.slack.com/archives/C6PP0GU9Y/p1515842016000079"
82 | )
83 | ],
84 | "ncfiles": [1],
85 | }
86 | )
87 | assert_frame_equal(r, df)
88 |
89 | r = querying.get_experiments(session, notes=True)
90 | df = pd.DataFrame.from_dict(
91 | {
92 | "experiment": ["metadata"],
93 | "notes": [
94 | (
95 | "Stripy salt restoring: "
96 | "https://github.com/OceansAus/access-om2/issues/74 tripole seam bug: "
97 | "https://github.com/OceansAus/access-om2/issues/86 requires dt=300s "
98 | "in May, dt=240s in Aug to maintain CFL in CICE near tripoles (storms "
99 | "in those months in 8485RYF); all other months work with dt=400s"
100 | )
101 | ],
102 | "ncfiles": [1],
103 | }
104 | )
105 | assert_frame_equal(r, df)
106 |
107 | r = querying.get_experiments(session, created=True)
108 | df = pd.DataFrame.from_dict(
109 | {"experiment": ["metadata"], "created": ["2018-01-01"], "ncfiles": [1]}
110 | )
111 | assert_frame_equal(r, df)
112 |
113 | r = querying.get_experiments(session, root_dir=True)
114 | # Won't try and match a path that can change on different platforms
115 | # assert_frame_equal(r, df)
116 | assert r.shape == (1, 3)
117 |
118 | r = querying.get_experiments(session, all=True)
119 | # Won't try and match everything, just check dimensions are correct
120 | assert r.shape == (1, 9)
121 |
122 | # Test turning off returning experiment (bit dumb, but hey ...)
123 | r = querying.get_experiments(session, experiment=False)
124 | df = pd.DataFrame.from_dict({"ncfiles": [1]})
125 | assert_frame_equal(r, df)
126 |
127 |
128 | def test_keywords(session_db):
129 | """Test that keywords are read for an experiment"""
130 |
131 | session, db = session_db
132 | metadata_for_experiment("test/data/metadata/keywords", session)
133 |
134 | q = session.query(database.NCExperiment).filter(
135 | database.NCExperiment.experiment == "test"
136 | )
137 | r = q.one()
138 | assert len(r.keywords) == 3
139 | assert "cosima" in r.keywords
140 | assert "not-a-keyword" not in r.keywords
141 |
142 |
143 | def test_duplicate_keywords_commit(session_db):
144 | """Test that the uniqueness constraint works across experiments.
145 | This simulates separate index calls, where the session is committed in between.
146 | """
147 |
148 | session, db = session_db
149 | metadata_for_experiment("test/data/metadata/keywords", session, name="e1")
150 | metadata_for_experiment("test/data/metadata/keywords2", session, name="e2")
151 |
152 | q = session.query(database.Keyword)
153 | r = q.all()
154 | assert len(r) == 4
155 |
156 |
157 | def test_duplicate_keywords_nocommit(session_db):
158 | """Test that the uniqueness constraint works across experiments.
159 | This simulates multiple experiments being added in a single call.
160 | """
161 |
162 | session, db = session_db
163 | e1 = metadata_for_experiment(
164 | "test/data/metadata/keywords", session, name="e1", commit=False
165 | )
166 | e2 = metadata_for_experiment(
167 | "test/data/metadata/keywords2", session, name="e2", commit=False
168 | )
169 | session.add_all([e1, e2])
170 | session.commit()
171 |
172 | q = session.query(database.Keyword)
173 | r = q.all()
174 | assert len(r) == 4
175 |
176 |
177 | def test_keyword_upcast(session_db):
178 | """Test that a string keyword is added correctly."""
179 |
180 | session, db = session_db
181 | metadata_for_experiment("test/data/metadata/string_keyword", session)
182 |
183 | q = session.query(database.NCExperiment).filter(
184 | database.NCExperiment.experiment == "test"
185 | )
186 | r = q.one()
187 | assert "cosima" in r.keywords
188 | assert "c" not in r.keywords # make sure it wasn't added as a string
189 |
190 |
191 | def test_keyword_case_sensitivity(session_db):
192 | """Test that keywords are treated in a case-insensitive manner,
193 | both for metadata retrieval and querying.
194 | """
195 |
196 | session, db = session_db
197 | metadata_for_experiment("test/data/metadata/keywords", session, name="e1")
198 | metadata_for_experiment("test/data/metadata/upcase", session, name="e2")
199 |
200 | # we should be able to find the keyword in lowercase
201 | q = session.query(database.Keyword).filter(database.Keyword.keyword == "cosima")
202 | k1 = q.one_or_none()
203 | assert k1 is not None
204 |
205 | # and in uppercase
206 | q = session.query(database.Keyword).filter(database.Keyword.keyword == "COSIMA")
207 | k2 = q.one_or_none()
208 | assert k2 is not None
209 |
210 | # but they should resolve to the same keyword
211 | assert k1 is k2
212 |
213 | # finally, the set of keywords should all be lowercase
214 | q = session.query(database.NCExperiment).filter(
215 | database.NCExperiment.experiment == "e2"
216 | )
217 | r = q.one()
218 | for kw in r.keywords:
219 | assert kw == kw.lower()
220 |
221 |
222 | def test_get_keywords(session_db):
223 | """Test retrieval of keywords"""
224 |
225 | session, db = session_db
226 | metadata_for_experiment("test/data/metadata/keywords", session, name="e1")
227 | metadata_for_experiment("test/data/metadata/keywords2", session, name="e2")
228 |
229 | # Grab keywords for individual experiments
230 | r = querying.get_keywords(session, "e1")
231 | assert r == {"access-om2-01", "ryf9091", "cosima"}
232 |
233 | r = querying.get_keywords(session, "e2")
234 | assert r == {"another-keyword", "cosima"}
235 |
236 | # Test retrieving all keywords
237 | r = querying.get_keywords(session)
238 | assert r == {"access-om2-01", "ryf9091", "another-keyword", "cosima"}
239 |
240 |
241 | def test_get_experiments_with_keywords(session_db):
242 | """Test retrieval of experiments with keyword filtering"""
243 | session, db = session_db
244 | database.build_index("test/data/metadata/keywords", session)
245 | database.build_index("test/data/metadata/keywords2", session)
246 |
247 | # Test keyword common to both experiments
248 | r = querying.get_experiments(session, keywords="cosima")
249 | df = pd.DataFrame.from_dict(
250 | {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]}
251 | )
252 | assert_frame_equal(r, df)
253 |
254 | # Test keyword common to both experiments using wildcard
255 | r = querying.get_experiments(session, keywords="cos%")
256 | df = pd.DataFrame.from_dict(
257 | {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]}
258 | )
259 | assert_frame_equal(r, df)
260 |
261 | r = querying.get_experiments(session, keywords="%-%")
262 | df = pd.DataFrame.from_dict(
263 | {"experiment": ["keywords", "keywords2"], "ncfiles": [1, 1]}
264 | )
265 | assert_frame_equal(r, df)
266 |
267 | r = querying.get_experiments(session, keywords="access-om2%")
268 | df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]})
269 | assert_frame_equal(r, df)
270 |
271 | # Test keyword in only one experiment
272 | r = querying.get_experiments(session, keywords="another-keyword")
273 | df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]})
274 | assert_frame_equal(r, df)
275 |
276 | r = querying.get_experiments(session, keywords="ryf9091")
277 | df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]})
278 | assert_frame_equal(r, df)
279 |
280 | # Test passing an array of keywords that match only one experiment
281 | r = querying.get_experiments(session, keywords=["cosima", "another-keyword"])
282 | df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]})
283 | assert_frame_equal(r, df)
284 |
285 | # Test passing an array of keywords that will not match any one experiment
286 | r = querying.get_experiments(session, keywords=["another-keyword", "ryf9091"])
287 | df = pd.DataFrame(columns=["experiment", "ncfiles"])
288 | assert_frame_equal(r, df)
289 |
290 | # Test passing a non-existent keyword along with one present. Should return
291 | # nothing as no experiment contains it
292 | r = querying.get_experiments(session, keywords=["ryf9091", "not-a-keyword"])
293 | df = pd.DataFrame(columns=["experiment", "ncfiles"])
294 | assert_frame_equal(r, df)
295 |
296 | # Test passing only a non-existent keyword
297 | r = querying.get_experiments(session, keywords=["not-a-keyword"])
298 | df = pd.DataFrame(columns=["experiment", "ncfiles"])
299 | assert_frame_equal(r, df)
300 |
301 | # Test passing only a non-existent wildcard keyword
302 | r = querying.get_experiments(session, keywords=["z%"])
303 | df = pd.DataFrame(columns=["experiment", "ncfiles"])
304 | assert_frame_equal(r, df)
305 |
306 |
307 | def test_getvar_with_metadata(session_db):
308 | session, db = session_db
309 | database.build_index("test/data/indexing/metadata", session)
310 |
311 | with querying.getvar("metadata", "test", session, decode_times=False) as v:
312 | assert v.attrs["long_name"] == "Test Variable"
313 | assert v.attrs["contact"] == "The ACCESS Oracle"
314 | assert v.attrs["email"] == "oracle@example.com"
315 | assert v.attrs["created"] == "2018-01-01"
316 | assert "description" in v.attrs
317 |
--------------------------------------------------------------------------------
/test/test_querying.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from datetime import datetime
4 |
5 | import pytest
6 |
7 | import xarray as xr
8 | import pandas as pd
9 | from pandas.testing import assert_frame_equal, assert_series_equal
10 | import numpy as np
11 |
12 | import cosima_cookbook as cc
13 | from cosima_cookbook.querying import QueryWarning
14 | from cosima_cookbook.database import NCFile, CFVariable
15 |
16 |
17 | @pytest.fixture(scope="module")
18 | def session(tmp_path_factory):
19 | # index test directory into temp database
20 | d = tmp_path_factory.mktemp("database")
21 | db = d / "test.db"
22 | session = cc.database.create_session(str(db))
23 |
24 | # build index for entire module
25 | cc.database.build_index(
26 | ["test/data/querying", "test/data/querying_disambiguation"], session
27 | )
28 |
29 | # force all files to be marked as present, even though they're empty
30 | ncfiles = session.query(cc.database.NCFile).all()
31 | for f in ncfiles:
32 | f.present = True
33 | session.commit()
34 |
35 | return session
36 |
37 |
38 | def test_valid_query(session):
39 | with cc.querying.getvar("querying", "temp", session, decode_times=False) as v:
40 | assert isinstance(v, xr.DataArray)
41 | assert len(v.attrs["ncfiles"]) == 1
42 | assert v.attrs["ncfiles"][0].endswith("test/data/querying/output000/ocean.nc")
43 | # Make sure other fields aren't included in attributes
44 | assert "index" not in v.attrs
45 | assert "root_dir" not in v.attrs
46 | # Make sure empty metadata fields haven't been included as attributes
47 | assert "contact" not in v.attrs
48 | assert "notes" not in v.attrs
49 | assert "description" not in v.attrs
50 | assert "email" not in v.attrs
51 |
52 |
53 | def test_invalid_query(session):
54 | with pytest.raises(cc.querying.VariableNotFoundError):
55 | cc.querying.getvar("querying", "notfound", session, decode_times=False)
56 |
57 |
58 | def test_warning_on_ambiguous_attr(session):
59 | with pytest.warns(QueryWarning) as record:
60 | cc.querying._ncfiles_for_variable(
61 | "querying_disambiguation",
62 | "v",
63 | session,
64 | attrs_unique={"cell_methods": "bar"},
65 | )
66 |
67 | assert len(record) == 1
68 | assert (
69 | record[0]
70 | .message.args[0]
71 | .startswith(
72 | "Your query returns variables from files with different cell_methods"
73 | )
74 | )
75 |
76 | with pytest.warns(QueryWarning) as record:
77 | files = cc.querying._ncfiles_for_variable(
78 | "querying_disambiguation",
79 | "u",
80 | session,
81 | attrs_unique={"cell_methods": "time: no_valid"},
82 | )
83 |
84 | assert len(files) == 2
85 | assert len(record) == 1
86 | assert (
87 | record[0]
88 | .message.args[0]
89 | .startswith(
90 | "Your query returns variables from files with different cell_methods"
91 | )
92 | )
93 |
94 | # Raise an exception if QueryWarning set to error
95 | warnings.simplefilter("error", QueryWarning)
96 | with pytest.raises(QueryWarning) as record:
97 | cc.querying._ncfiles_for_variable(
98 | "querying_disambiguation",
99 | "v",
100 | session,
101 | attrs_unique={"cell_methods": "bar"},
102 | )
103 |
104 | with warnings.catch_warnings(record=True) as record:
105 | # Turn off warnings, will run without exception
106 | # and record will be empty
107 | warnings.simplefilter("ignore", QueryWarning)
108 |
109 | cc.querying._ncfiles_for_variable(
110 | "querying_disambiguation",
111 | "v",
112 | session,
113 | attrs_unique={"cell_methods": "bar"},
114 | )
115 |
116 | assert len(record) == 0
117 |
118 |
119 | def test_disambiguation_on_default_attr(session):
120 | files = cc.querying._ncfiles_for_variable(
121 | "querying_disambiguation",
122 | "v",
123 | session,
124 | attrs_unique={"cell_methods": "mean_pow(02)"},
125 | )
126 |
127 | assert len(files) == 1
128 | assert files[0].NCVar.attrs["cell_methods"] == "mean_pow(02)"
129 |
130 | files = cc.querying._ncfiles_for_variable(
131 | "querying_disambiguation",
132 | "v",
133 | session,
134 | attrs_unique={"cell_methods": "time: mean"},
135 | )
136 |
137 | assert len(files) == 1
138 | assert files[0].NCVar.attrs["cell_methods"] == "time: mean"
139 |
140 | # One file has no cell_methods attribute
141 | files = cc.querying._ncfiles_for_variable(
142 | "querying_disambiguation",
143 | "u",
144 | session,
145 | attrs_unique={"cell_methods": "time: mean"},
146 | )
147 |
148 | assert len(files) == 1
149 | assert files[0].NCVar.attrs["cell_methods"] == "time: mean"
150 |
151 | # Add another unique attribute not present (should be ignored)
152 | files = cc.querying._ncfiles_for_variable(
153 | "querying_disambiguation",
154 | "v",
155 | session,
156 | attrs_unique={"cell_methods": "time: mean", "foo": "bar"},
157 | )
158 |
159 | assert len(files) == 1
160 | assert files[0].NCVar.attrs["cell_methods"] == "time: mean"
161 |
162 |
163 | def test_query_times(session):
164 | with cc.querying.getvar("querying", "ty_trans", session) as v:
165 | assert isinstance(v, xr.DataArray)
166 |
167 |
168 | def test_chunk_parsing_chunked(session):
169 | var = (
170 | session.query(cc.database.NCVar)
171 | .filter(cc.database.NCVar.varname == "salt")
172 | .first()
173 | )
174 |
175 | chunk_dict = {
176 | "time": 1,
177 | "st_ocean": 15,
178 | "yt_ocean": 216,
179 | "xt_ocean": 288,
180 | }
181 |
182 | assert cc.querying._parse_chunks(var) == chunk_dict
183 |
184 |
185 | def test_chunk_parsing_contiguous(session):
186 | var = (
187 | session.query(cc.database.NCVar)
188 | .filter(cc.database.NCVar.varname == "potrho")
189 | .first()
190 | )
191 |
192 | assert var.chunking == "contiguous"
193 | assert cc.querying._parse_chunks(var) is None
194 |
195 |
196 | def test_chunk_parsing_unchunked(session):
197 | var = (
198 | session.query(cc.database.NCVar)
199 | .filter(cc.database.NCVar.varname == "hi_m")
200 | .first()
201 | )
202 |
203 | assert var.chunking == "None"
204 | assert cc.querying._parse_chunks(var) is None
205 |
206 |
207 | def test_get_experiments(session):
208 | r = cc.querying.get_experiments(session)
209 |
210 | df = pd.DataFrame.from_dict(
211 | {"experiment": ["querying", "querying_disambiguation"], "ncfiles": [3, 2]}
212 | )
213 | assert_frame_equal(r, df)
214 |
215 | metadata_keys = [
216 | "root_dir",
217 | "contact",
218 | "email",
219 | "created",
220 | "url",
221 | "description",
222 | "notes",
223 | ]
224 |
225 | # Won't try and match everything, there is not much useful metadata, just
226 | # check dimensions are correct. Metadata correctness checked in test_metadata
227 | for k in metadata_keys:
228 | r = cc.querying.get_experiments(session, **{k: True})
229 | assert k == r.columns[1]
230 | assert r.shape == (2, 3)
231 |
232 | # Test all = True to select all available metadata
233 | r = cc.querying.get_experiments(session, all=True)
234 | assert r.shape == (2, 9)
235 |
236 | # Functionally equivalent to above
237 | r = cc.querying.get_experiments(session, **{k: True for k in metadata_keys})
238 | assert r.shape == (2, 9)
239 |
240 | # Functionally equivalent to above
241 | r = cc.querying.get_experiments(
242 | session, experiment=False, exptname="querying", all=True
243 | )
244 | assert r.shape == (1, 8)
245 | assert "experiment" not in r
246 |
247 | # Test for filtering by variables
248 | in_both = {"potrho_edges", "age_global", "tx_trans_rho"}
249 | only_in_querying = {"hi_m", "ty_trans"}
250 |
251 | r = cc.querying.get_experiments(session, variables=in_both)
252 | assert r.shape == (2, 2)
253 |
254 | r = cc.querying.get_experiments(session, variables=(in_both | only_in_querying))
255 | assert r.shape == (1, 2)
256 |
257 | r = cc.querying.get_experiments(
258 | session, variables=(in_both | only_in_querying | {"none"})
259 | )
260 | assert r.shape == (0, 2)
261 |
262 |
263 | def test_get_ncfiles(session):
264 | r = cc.querying.get_ncfiles(session, "querying")
265 |
266 | df = pd.DataFrame.from_dict(
267 | {
268 | "ncfile": [
269 | "output000/hi_m.nc",
270 | "output000/ocean.nc",
271 | "restart000/ty_trans.nc",
272 | ],
273 | "index_time": [
274 | pd.Timestamp("2019-08-09 21:51:12.090930"),
275 | pd.Timestamp("2019-08-09 21:51:12.143794"),
276 | pd.Timestamp("2019-08-09 21:51:12.148942"),
277 | ],
278 | }
279 | )
280 |
281 | # The Timestamps will not be the same so check only that the ncfiles are correct
282 | assert_series_equal(r["ncfile"], df["ncfile"])
283 |
284 |
285 | def test_get_variables(session):
286 | r = cc.querying.get_variables(session, "querying", "1 monthly")
287 |
288 | df = pd.DataFrame.from_dict(
289 | {
290 | "name": ["TLAT", "TLON", "hi_m", "tarea", "time", "time_bounds"],
291 | "long_name": [
292 | "T grid center latitude",
293 | "T grid center longitude",
294 | "grid cell mean ice thickness",
295 | "area of T grid cells",
296 | "model time",
297 | "boundaries for time-averaging interval",
298 | ],
299 | "units": [
300 | "degrees_north",
301 | "degrees_east",
302 | "m",
303 | "m^2",
304 | "days since 1900-01-01 00:00:00",
305 | "days since 1900-01-01 00:00:00",
306 | ],
307 | "frequency": ["1 monthly"] * 6,
308 | "ncfile": ["output000/hi_m.nc"] * 6,
309 | "cell_methods": [None, None, "time: mean", None, None, None],
310 | "# ncfiles": [1] * 6,
311 | "time_start": ["1900-01-01 00:00:00"] * 6,
312 | "time_end": ["1900-02-01 00:00:00"] * 6,
313 | }
314 | )
315 |
316 | assert_frame_equal(r, df)
317 |
318 | r = cc.querying.get_variables(session, "querying", search="temp")
319 |
320 | df = pd.DataFrame.from_dict(
321 | {
322 | "name": ["diff_cbt_t", "temp", "temp_xflux_adv", "temp_yflux_adv"],
323 | "long_name": [
324 | "total vert diff_cbt(temp) (w/o neutral included)",
325 | "Potential temperature",
326 | "cp*rho*dzt*dyt*u*temp",
327 | "cp*rho*dzt*dxt*v*temp",
328 | ],
329 | "units": ["m^2/s", "degrees K", "Watts", "Watts"],
330 | "frequency": [None] * 4,
331 | "ncfile": ["output000/ocean.nc"] * 4,
332 | "cell_methods": ["time: mean"] * 4,
333 | "# ncfiles": [1] * 4,
334 | "time_start": [None] * 4,
335 | "time_end": [None] * 4,
336 | }
337 | )
338 |
339 | assert_frame_equal(r, df)
340 |
341 | r = cc.querying.get_variables(session, search="temp")
342 |
343 | df = pd.DataFrame.from_dict(
344 | {
345 | "name": ["diff_cbt_t", "temp", "temp_xflux_adv", "temp_yflux_adv"],
346 | "long_name": [
347 | "total vert diff_cbt(temp) (w/o neutral included)",
348 | "Potential temperature",
349 | "cp*rho*dzt*dyt*u*temp",
350 | "cp*rho*dzt*dxt*v*temp",
351 | ],
352 | "units": ["m^2/s", "degrees K", "Watts", "Watts"],
353 | }
354 | )
355 |
356 | assert_frame_equal(r, df)
357 |
358 | r = cc.querying.get_variables(session, search=("temp", "velocity"))
359 |
360 | df = pd.DataFrame.from_dict(
361 | {
362 | "name": [
363 | "diff_cbt_t",
364 | "temp",
365 | "temp_xflux_adv",
366 | "temp_yflux_adv",
367 | "u",
368 | "v",
369 | "wt",
370 | ],
371 | "long_name": [
372 | "total vert diff_cbt(temp) (w/o neutral included)",
373 | "Potential temperature",
374 | "cp*rho*dzt*dyt*u*temp",
375 | "cp*rho*dzt*dxt*v*temp",
376 | "i-current",
377 | "j-current",
378 | "dia-surface velocity T-points",
379 | ],
380 | "units": [
381 | "m^2/s",
382 | "degrees K",
383 | "Watts",
384 | "Watts",
385 | "m/sec",
386 | "m/sec",
387 | "m/sec",
388 | ],
389 | }
390 | )
391 |
392 | r = cc.querying.get_variables(session, search=("temp", "velocity"))
393 |
394 | df = pd.DataFrame.from_dict(
395 | {
396 | "name": [
397 | "diff_cbt_t",
398 | "temp",
399 | "temp_xflux_adv",
400 | "temp_yflux_adv",
401 | "u",
402 | "v",
403 | "wt",
404 | ],
405 | "long_name": [
406 | "total vert diff_cbt(temp) (w/o neutral included)",
407 | "Potential temperature",
408 | "cp*rho*dzt*dyt*u*temp",
409 | "cp*rho*dzt*dxt*v*temp",
410 | "i-current",
411 | "j-current",
412 | "dia-surface velocity T-points",
413 | ],
414 | "units": [
415 | "m^2/s",
416 | "degrees K",
417 | "Watts",
418 | "Watts",
419 | "m/sec",
420 | "m/sec",
421 | "m/sec",
422 | ],
423 | "frequency": [None] * 7,
424 | "ncfile": ["output000/ocean.nc"] * 7,
425 | "# ncfiles": [1] * 7,
426 | "time_start": [None] * 7,
427 | "time_end": [None] * 7,
428 | }
429 | )
430 |
431 |
432 | def test_model_property(session):
433 | filename_map = {
434 | "ocean": (
435 | "output/ocean/ice.nc",
436 | "output/ocn/land.nc",
437 | "output/ocean/atmos.nc",
438 | "ocean/ocean_daily.nc",
439 | "output/ocean/ocean_daily.nc.0000",
440 | "ocean/atmos.nc",
441 | ),
442 | "atmosphere": (
443 | "output/atm/fire.nc",
444 | "output/atmos/ice.nc",
445 | "output/atmosphere/ice.nc",
446 | "atmosphere/ice.nc",
447 | "atmos/ice.nc",
448 | ),
449 | "land": (
450 | "output/land/fire.nc",
451 | "output/lnd/ice.nc",
452 | "land/fire.nc",
453 | "lnd/ice.nc",
454 | ),
455 | "ice": (
456 | "output/ice/fire.nc",
457 | "output/ice/in/here/land.nc",
458 | "ice/fire.nc",
459 | "ice/in/here/land.nc",
460 | ),
461 | "none": (
462 | "output/ocean.nc", # only a model if part of path, not filename
463 | "someotherpath/ocean_daily.nc",
464 | "lala/land_daily.nc.0000",
465 | "output/atmosphere_ice.nc",
466 | "output/noice/in/here/land.nc",
467 | ),
468 | }
469 | for model in filename_map:
470 | for fpath in filename_map[model]:
471 | ncfile = NCFile(
472 | index_time=datetime.now(),
473 | ncfile=fpath,
474 | present=True,
475 | )
476 | assert ncfile.model == model
477 |
478 |
479 | def test_is_restart_property(session):
480 | filename_map = {
481 | True: (
482 | "output/restart/ice.nc",
483 | "output/restart000/land.nc",
484 | "restart/land.nc",
485 | ),
486 | False: (
487 | "output/restartice.nc",
488 | "output/lastrestart/land.nc",
489 | ),
490 | }
491 | for isrestart in filename_map:
492 | for fpath in filename_map[isrestart]:
493 | ncfile = NCFile(
494 | index_time=datetime.now(),
495 | ncfile=fpath,
496 | present=True,
497 | )
498 | assert ncfile.is_restart == isrestart
499 |
500 | # Grab all variables and ensure the SQL classification matches the python version
501 | # May be some holes, as not ensured all cases covered
502 | for index, row in cc.querying.get_variables(
503 | session, "querying", inferred=True
504 | ).iterrows():
505 | ncfile = NCFile(
506 | index_time=datetime.now(),
507 | ncfile=row.ncfile,
508 | present=True,
509 | )
510 | assert ncfile.is_restart == row.restart
511 |
512 |
513 | def test_is_coordinate_property(session):
514 | units_map = {
515 | True: (
516 | "degrees_",
517 | "degrees_E",
518 | "degrees_N",
519 | "degrees_east",
520 | "hours since a long time ago",
521 | "radians",
522 | "days",
523 | "days since a while ago",
524 | ),
525 | False: ("degrees K",),
526 | }
527 |
528 | for iscoord in units_map:
529 | for units in units_map[iscoord]:
530 | assert CFVariable(name="bogus", units=units).is_coordinate == iscoord
531 |
532 | # Grab all variables and ensure the SQL classification matches the python version
533 | # May be some holes, as not ensured all cases covered
534 | for index, row in cc.querying.get_variables(session, inferred=True).iterrows():
535 | assert (
536 | CFVariable(name=row["name"], units=row.units).is_coordinate
537 | == row.coordinate
538 | )
539 |
540 |
541 | def test_get_frequencies(session):
542 | r = cc.querying.get_frequencies(session, "querying")
543 |
544 | df = pd.DataFrame.from_dict({"frequency": [None, "1 monthly", "1 yearly"]})
545 |
546 | assert_frame_equal(r, df)
547 |
548 |
549 | def test_disambiguation_by_frequency(session):
550 | with pytest.warns(UserWarning) as record:
551 | assert len(cc.querying._ncfiles_for_variable("querying", "time", session)) == 3
552 |
553 | if len(record) != 1:
554 | raise ValueError("|".join([r.message.args[0] for r in record]))
555 |
556 | assert len(record) == 1
557 | assert (
558 | record[0]
559 | .message.args[0]
560 | .startswith("Your query returns files with differing frequencies:")
561 | )
562 |
563 | assert (
564 | len(
565 | cc.querying._ncfiles_for_variable(
566 | "querying", "time", session, frequency="1 monthly"
567 | )
568 | )
569 | == 1
570 | )
571 | assert (
572 | len(
573 | cc.querying._ncfiles_for_variable(
574 | "querying", "time", session, frequency="1 yearly"
575 | )
576 | )
577 | == 1
578 | )
579 |
580 | # Both of these select a single file and successfully return an xarray object
581 | assert cc.querying.getvar(
582 | "querying", "time", session, frequency="1 monthly"
583 | ).shape == (1,)
584 | assert cc.querying.getvar(
585 | "querying", "time", session, frequency="1 yearly"
586 | ).shape == (2,)
587 |
588 |
589 | def test_time_bounds_on_dataarray(session):
590 | var_salt = cc.querying.getvar(
591 | "querying", "salt", session, decode_times=False, return_dataset=True
592 | )
593 |
594 | # we should have added time_bounds into the DataArray's attributes
595 | assert "time_bounds" in var_salt
596 |
597 | # and time_bounds should itself be a DataArray
598 | assert isinstance(var_salt["time_bounds"], xr.DataArray)
599 |
600 |
601 | def test_query_with_attrs(session):
602 | attrs = {
603 | "long_name": "Practical Salinity",
604 | "units": "psu",
605 | }
606 |
607 | # a valid set of attributes
608 | var_salt = cc.querying.getvar(
609 | "querying", "salt", session, decode_times=False, attrs=attrs
610 | )
611 |
612 | for attr, val in attrs.items():
613 | assert var_salt.attrs[attr] == val
614 |
615 | # make sure that this is actually applied as an additional filter
616 | # by making failing queries
617 | # first: incorrect attribute value
618 | with pytest.raises(cc.querying.VariableNotFoundError):
619 | cc.querying.getvar(
620 | "querying",
621 | "salt",
622 | session,
623 | decode_times=False,
624 | attrs={"units": "degrees K"},
625 | )
626 |
627 | # second: non-present attribute name
628 | with pytest.raises(cc.querying.VariableNotFoundError):
629 | cc.querying.getvar(
630 | "querying", "salt", session, decode_times=False, attrs={"not_found": "psu"}
631 | )
632 |
633 |
634 | def test_query_chunks(session, caplog):
635 | with cc.querying.getvar(
636 | "querying", "ty_trans", session, chunks={"invalid": 99}
637 | ) as v:
638 | assert "chunking along dimensions {'invalid'} is not possible" in caplog.text
639 |
--------------------------------------------------------------------------------
/test/test_sqa14.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from cosima_cookbook.database import *
3 |
4 |
5 | def test_empty_file(session_db):
6 | session, db = session_db
7 |
8 | exp = NCExperiment(experiment="a", root_dir="b")
9 | file = NCFile()
10 |
11 | file.experiment = exp
12 |
13 | session.add(exp)
14 | session.commit()
15 |
16 | assert session.query(NCFile).count() == 1
17 | assert session.query(NCExperiment).count() == 1
18 |
19 |
20 | def test_file_one_var(session_db):
21 | session, db = session_db
22 |
23 | exp = NCExperiment(experiment="a", root_dir="b")
24 | file = NCFile()
25 | cfvar = CFVariable(name="c")
26 | var = NCVar()
27 |
28 | file.experiment = exp
29 | var.ncfile = file
30 | var.variable = cfvar
31 |
32 | session.add(exp)
33 | session.commit()
34 |
35 | assert session.query(NCFile).count() == 1
36 | assert session.query(NCVar).count() == 1
37 |
38 |
39 | def test_file_attr(session_db):
40 | session, db = session_db
41 |
42 | exp = NCExperiment(experiment="a", root_dir="b")
43 | file = NCFile()
44 | cfvar = CFVariable(name="c")
45 | var = NCVar()
46 |
47 | file.experiment = exp
48 | file.attrs["x"] = "y"
49 |
50 | session.add(exp)
51 | session.commit()
52 |
53 | assert session.query(NCFile).count() == 1
54 | assert session.query(NCAttribute).count() == 1
55 | assert session.query(NCAttributeString).count() == 2
56 |
57 | # Add another attribute with duplicate string
58 | file.attrs["z"] = "y"
59 |
60 | session.add(exp)
61 | session.commit()
62 |
63 | assert session.query(NCFile).count() == 1
64 | assert session.query(NCAttribute).count() == 2
65 | assert session.query(NCAttributeString).count() == 3
66 |
67 |
68 | def test_var_attr(session_db):
69 | session, db = session_db
70 |
71 | exp = NCExperiment(experiment="a", root_dir="b")
72 | file = NCFile()
73 | cfvar = CFVariable(name="c")
74 | var = NCVar()
75 |
76 | file.experiment = exp
77 | var.ncfile = file
78 | var.variable = cfvar
79 | var.attrs["x"] = "y"
80 |
81 | session.add(exp)
82 | session.commit()
83 |
84 | assert session.query(NCFile).count() == 1
85 | assert session.query(NCAttribute).count() == 1
86 | assert session.query(NCAttributeString).count() == 2
87 |
88 | # Add another attribute with duplicate string
89 | var.attrs["z"] = "y"
90 |
91 | session.add(exp)
92 | session.commit()
93 |
94 | assert session.query(NCAttribute).count() == 2
95 | assert session.query(NCAttributeString).count() == 3
96 |
97 | # Add an attribute to the file
98 | file.attrs["y"] = "x"
99 |
100 | session.add(exp)
101 | session.commit()
102 |
103 | assert session.query(NCAttribute).count() == 3
104 | assert session.query(NCAttributeString).count() == 3
105 |
106 |
107 | def test_index_file(session_db):
108 | session, db = session_db
109 |
110 | exp = NCExperiment(experiment="a", root_dir="test/data/querying")
111 |
112 | file = index_file("output000/ocean.nc", exp, session)
113 |
114 | session.add(exp)
115 | session.commit()
116 |
117 | assert session.query(NCFile).count() == 1
118 | assert session.query(CFVariable).count() == 38
119 | assert session.query(NCVar).count() == 38
120 | assert session.query(NCAttribute).count() == 243 - 18
121 |
122 | var = session.query(NCVar).filter(NCVar.varname == "temp").one()
123 | assert var.attrs["long_name"] == "Potential temperature"
124 |
125 |
126 | def test_file_delete(session_db):
127 | session, db = session_db
128 |
129 | exp = NCExperiment(experiment="a", root_dir="test/data/querying")
130 |
131 | file = index_file("output000/ocean.nc", exp, session)
132 |
133 | session.add(exp)
134 | session.commit()
135 |
136 | assert session.query(NCFile).count() == 1
137 |
138 | session.delete(file)
139 | session.commit()
140 |
141 | assert session.query(NCExperiment).count() == 1
142 | assert session.query(NCFile).count() == 0
143 | assert session.query(CFVariable).count() == 38 # Not cascaded
144 | assert session.query(NCVar).count() == 0
145 | assert session.query(NCAttribute).count() == 0
146 | assert session.query(NCAttributeString).count() == 114 # Not cascaded
147 |
--------------------------------------------------------------------------------
/test/test_update.py:
--------------------------------------------------------------------------------
1 | import shlex
2 | from cosima_cookbook import database_update
3 |
4 |
5 | def test_database_update(tmp_path):
6 | args = shlex.split(
7 | "-db {db} test/data/update/experiment_a test/data/update/experiment_b".format(
8 | db=tmp_path / "test.db"
9 | )
10 | )
11 |
12 | database_update.main(args)
13 |
--------------------------------------------------------------------------------