├── runtime.txt
├── requirements.txt
├── Procfile
├── notebooks
    ├── utils
    │   ├── cime.py
    │   ├── __init__.py
    │   ├── config.py
    │   ├── utils_units.py
    │   ├── compare_ts_and_hist.py
    │   ├── utils.py
    │   ├── PlotTypeClass.py
    │   ├── Plotting.py
    │   └── CaseClass.py
    ├── run_all.py
    ├── dashboard.yaml
    ├── run_notebooks.sh
    ├── plot_suite_maps_0095_1deg.ipynb
    ├── diag_metadata.yaml
    ├── compare_ts_and_hist_003.ipynb
    ├── plot_suite_maps_0001_003.ipynb
    ├── plot_suite_maps_0001_004.ipynb
    ├── trend_maps.003.ipynb
    ├── trend_maps.004.ipynb
    ├── plot_suite_003.ipynb
    ├── plot_suite_004.ipynb
    ├── compare_ts_and_hist_004.ipynb
    ├── plot_suite_1deg.ipynb
    └── gen_csv.ipynb
├── .github
    └── workflows
    │   ├── verify_pre-commit.yaml
    │   └── ci.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── environments
    └── environment.yaml
├── tests
    ├── test_utils_units.py
    ├── xr_ds_ex.py
    └── test_utils.py
├── data_reshaping
    ├── pop.h_t13.sh
    ├── cice.h_t13.sh
    ├── cice.h1_t13.sh
    ├── pop.h.nyear1_t13.sh
    ├── pop.h.nday1_t13.sh
    └── run_all.py
└── README.md


/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.9.2
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | hvplot
2 | jupyterlab
3 | fsspec
4 | psutil
5 | aiohttp
6 | git+https://github.com/andersy005/panelify.git
7 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: panel serve --address="0.0.0.0" --port=$PORT notebooks/Interactive_Dashboard.ipynb --allow-websocket-origin=hires-cesm-analysis.dokku.projectpythia.org --stats-log-frequency 100000 --mem-log-frequency 100000
2 | 


--------------------------------------------------------------------------------
/notebooks/utils/cime.py:
--------------------------------------------------------------------------------
 1 | """
 2 | methods specific to CIME, but independent of models/components that are run with CIME
 3 | """
 4 | 
 5 | import subprocess
 6 | 
 7 | 
 8 | def cime_xmlquery(caseroot, varname):
 9 |     """run CIME's xmlquery for varname in the directory caseroot, return the value"""
10 |     return subprocess.check_output(
11 |         ["./xmlquery", "--value", varname], cwd=caseroot
12 |     ).decode()
13 | 


--------------------------------------------------------------------------------
/notebooks/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # make methods available for usage externally and in notebooks
 2 | 
 3 | from .CaseClass import CaseClass
 4 | from .Plotting import (
 5 |     compare_fields_at_lat_lon,
 6 |     plot_dict_with_date_keys,
 7 |     summary_plot_global_ts,
 8 |     summary_plot_histogram,
 9 |     summary_plot_maps,
10 |     trend_plot,
11 | )
12 | from .utils import (
13 |     gen_output_roots_from_caseroot,
14 |     get_varnames_from_metadata_list,
15 |     timeseries_and_history_comparison,
16 |     generate_plot_catalog,
17 | )
18 | 


--------------------------------------------------------------------------------
/.github/workflows/verify_pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: Run all pre-commit checks one more time
 2 | 
 3 | on:
 4 |     push:
 5 |         branches: "*"
 6 |     pull_request:
 7 |         branches: master
 8 | 
 9 | jobs:
10 |     pre-commit:
11 |         name: pre-commit
12 |         runs-on: ubuntu-latest
13 | 
14 |         steps:
15 |             - name: checkout
16 |               uses: actions/checkout@v2
17 | 
18 |             - name: set up python
19 |               uses: actions/setup-python@v2
20 |               with:
21 |                   python-version: 3.8
22 | 
23 |             - name: Run pre-commit
24 |               uses: pre-commit/action@v2.0.0
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/.ipynb_checkpoints
 2 | **/__pycache__
 3 | notebooks/logs
 4 | data_reshaping/logs
 5 | 
 6 | # ignore images directory, except for the image catalog
 7 | # complicated, but it works... https://stackoverflow.com/a/16318111
 8 | notebooks/images/*
 9 | !notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.003
10 | !notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.004
11 | !notebooks/images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001
12 | notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.003/*
13 | notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.004/*
14 | notebooks/images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001/*
15 | !notebooks/images/*/png_catalog.csv
16 | 


--------------------------------------------------------------------------------
/notebooks/run_all.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 |     This script is intended for developers to rerun all dask-free notebooks without
 4 |     launching JupyterHub or a jupyter lab session.
 5 |     It relies on the run_notebooks function.
 6 | """
 7 | 
 8 | import os
 9 | 
10 | # For now, plot_suite and trend_maps don't run with nbconvert
11 | # It may be NCAR_jobqueue related...
12 | notebooks = []
13 | notebooks.append("Sanity\ Check.ipynb")
14 | notebooks.append("Pull\ info\ from\ logs.ipynb")
15 | notebooks.append(f"compare_ts_and_hist_*.ipynb")
16 | notebooks.append(f"plot_suite_maps_*.ipynb")
17 | 
18 | cmd = "./run_notebooks.sh " + " ".join(notebooks)
19 | os.system(cmd)
20 | 


--------------------------------------------------------------------------------
/notebooks/utils/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Helper functions to find files in the various directories
 3 | """
 4 | 
 5 | import os
 6 | 
 7 | 
 8 | ################################################################################
 9 | 
10 | 
11 | def add_first_date_and_reformat(date_list):
12 |     new_list = []
13 |     for date in date_list:
14 |         year = int(date[:4])
15 |         month = int(date[4:6])
16 |         day = int(date[6:])
17 |         if len(new_list) == 0:
18 |             if day > 1:
19 |                 first_date = f"{year:04}-{month:02}-{(day-1):02}"
20 |             else:
21 |                 first_date = "first"
22 |             new_list.append(first_date)
23 |         new_list.append(f"{year:04}-{month:02}-{day:02}")
24 |     return new_list
25 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |     -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |         rev: v3.2.0
 4 |         hooks:
 5 |             -   id: trailing-whitespace
 6 |             -   id: end-of-file-fixer
 7 |             -   id: check-yaml
 8 | 
 9 |     -   repo: https://github.com/ambv/black
10 |         rev: 19.10b0 # consistent with conda environment
11 |         hooks:
12 |             -   id: black
13 |                 args: []
14 | 
15 |     -   repo: https://github.com/deathbeds/prenotebook
16 |         rev: f5bdb72a400f1a56fe88109936c83aa12cc349fa
17 |         hooks:
18 |             -   id: prenotebook
19 |                 args:
20 |                     [
21 |                      '--keep-output',
22 |                      '--keep-metadata',
23 |                      '--keep-execution-count',
24 |                      '--keep-empty',
25 |                     ]
26 | 


--------------------------------------------------------------------------------
/notebooks/dashboard.yaml:
--------------------------------------------------------------------------------
 1 | summary_map:
 2 |     keys:
 3 |         - "casename"
 4 |         - "varname"
 5 |         - "date"
 6 |         - "apply_log10"
 7 |         - "sel_dict"
 8 |     column_widget_types:
 9 |         date: "discrete_slider"
10 | 
11 | time_series:
12 |     keys:
13 |         - "casename"
14 |         - "varname"
15 |         - "time_period"
16 |         - "sel_dict"
17 | 
18 | histogram:
19 |     keys:
20 |         - "casename"
21 |         - "varname"
22 |         - "time_period"
23 |         - "apply_log10"
24 |         - "sel_dict"
25 |     column_widget_types:
26 |         time_period: "discrete_slider"
27 | 
28 | trend_hist:
29 |     keys:
30 |         - "casename"
31 |         - "varname"
32 |         - "time_period"
33 |         - "sel_dict"
34 | 
35 | trend_map:
36 |     keys:
37 |         - "casename"
38 |         - "varname"
39 |         - "time_period"
40 |         - "sel_dict"
41 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: Continuous Integration
 2 | on:
 3 |     push:
 4 |         branches:
 5 |             -   "*"
 6 |     pull_request:
 7 |         branches:
 8 |             -   "master"
 9 | 
10 | jobs:
11 |     build:
12 |         name: Test on ubuntu
13 |         runs-on: ubuntu-latest
14 |         strategy:
15 |             fail-fast: false
16 |         steps:
17 |             -   name: checkout
18 |                 uses: actions/checkout@v2
19 | 
20 |             -   name: Install conda
21 |                 uses: conda-incubator/setup-miniconda@v2
22 |                 with:
23 |                     auto-update-conda: true
24 |                     activate-environment: hires-marbl
25 |                     environment-file: environments/environment.yaml
26 |                     auto-activate-base: false
27 | 
28 |             -   name: Show conda environment
29 |                 shell: bash -l {0}
30 |                 run: conda list
31 | 
32 |             -   name: Run Tests
33 |                 shell: bash -l {0}
34 |                 run: pytest -v tests/
35 | 


--------------------------------------------------------------------------------
/environments/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: hires-marbl
 2 | channels:
 3 |     - conda-forge
 4 |     - nodefaults
 5 | dependencies:
 6 |     - aiohttp
 7 |     - bokeh
 8 |     - bottleneck
 9 |     - cartopy
10 |     - cf-units
11 |     - cftime
12 |     - cmocean
13 |     - dask-jobqueue>=0.7.2
14 |     - dask-labextension
15 |     - dask-mpi
16 |     - dask==2021.7.0
17 |     - distributed==2021.7.0
18 |     - eofs
19 |     - esmpy
20 |     - fsspec
21 |     - hvplot
22 |     - intake
23 |     - intake-esm
24 |     - ipykernel
25 |     - ipywidgets
26 |     - jupyter-server-proxy
27 |     - jupyterlab>=3
28 |     - matplotlib==3.4.2
29 |     - metpy
30 |     - nc-time-axis
31 |     - ncar-jobqueue
32 |     - netcdf4
33 |     - nodejs
34 |     - numba
35 |     - numpy
36 |     - pandas
37 |     - pint
38 |     - pip
39 |     - pop-tools
40 |     - pre-commit
41 |     - pytest
42 |     - python=3.9
43 |     - scipy
44 |     - seaborn
45 |     - seawater
46 |     - statsmodels
47 |     - toolz
48 |     - tqdm
49 |     - watermark
50 |     - xarray==0.18.2
51 |     - xesmf
52 |     - xgcm
53 |     - xhistogram
54 |     - xrft
55 |     - zarr
56 |     - pip
57 |     - pip:
58 |         - -r ../requirements.txt
59 | 


--------------------------------------------------------------------------------
/notebooks/utils/utils_units.py:
--------------------------------------------------------------------------------
 1 | """
 2 | utility functions related to units
 3 | """
 4 | 
 5 | import re
 6 | 
 7 | from pint import UnitRegistry
 8 | import xarray as xr
 9 | 
10 | 
11 | def conv_units(da, units_out, units_scalef=None):
12 |     """
13 |     return a copy of da, with units converted to units_out
14 |     """
15 |     # use apply_ufunc to preserve dask-ness of da
16 |     func = lambda values: _conv_units_np(
17 |         values, da.attrs["units"], units_out, units_scalef
18 |     )
19 |     da_out = xr.apply_ufunc(
20 |         func, da, keep_attrs=True, dask="parallelized", output_dtypes=[da.dtype]
21 |     )
22 |     da_out.attrs["units"] = units_out
23 |     da_out.encoding = da.encoding
24 |     return da_out
25 | 
26 | 
27 | def _clean_units(units):
28 |     """replace some troublesome unit terms with acceptable replacements"""
29 |     replacements = {
30 |         "kgC": "kg",
31 |         "gC": "g",
32 |         "gC13": "g",
33 |         "gC14": "g",
34 |         "gN": "g",
35 |         "unitless": "1",
36 |         "years": "common_years",
37 |         "yr": "common_year",
38 |         "meq": "mmol",
39 |         "neq": "nmol",
40 |     }
41 |     units_split = re.split(r"( |\(|\)|\^|\*|/|-[0-9]+|[0-9]+)", units)
42 |     units_split_repl = [
43 |         replacements[token] if token in replacements else token for token in units_split
44 |     ]
45 |     return "".join(units_split_repl)
46 | 
47 | 
48 | def _conv_units_np(values, units_in, units_out, units_scalef=None):
49 |     """
50 |     return a copy of numpy array values, with units converted from units_in to units_out
51 |     """
52 |     ureg = UnitRegistry()
53 |     values_in_pint = ureg.Quantity(values, ureg(_clean_units(units_in)))
54 |     if units_scalef is not None:
55 |         values_in_pint *= ureg(_clean_units(units_scalef))
56 |     values_out_pint = values_in_pint.to(_clean_units(units_out))
57 |     return values_out_pint.magnitude
58 | 


--------------------------------------------------------------------------------
/tests/test_utils_units.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import pytest
 6 | import xarray as xr
 7 | import numpy as np
 8 | 
 9 | sys.path.append(os.path.abspath(os.path.join("notebooks", "utils")))
10 | sys.path.append(os.path.abspath("tests"))
11 | from utils_units import _clean_units, conv_units
12 | from xr_ds_ex import xr_ds_ex
13 | 
14 | nyrs = 3
15 | var_const = False
16 | 
17 | 
18 | @pytest.mark.parametrize(
19 |     "units_in, units_out",
20 |     [
21 |         # basic example, straight from dictionary
22 |         ("years", "common_years"),
23 |         # ensure 'gC' in 'degC' doesn't get converted
24 |         ("degC", "degC"),
25 |         # matches within expressions
26 |         ("gN leaf/m^2", "g leaf/m^2"),  # LNC
27 |         ("gC/m^2/s", "g/m^2/s"),  # AR
28 |         ("meq/m^3", "mmol/m^3"),  # ALK
29 |         (
30 |             "(centimeter^2)(meq/m^3 cm/s)",
31 |             "(centimeter^2)(mmol/m^3 cm/s)",
32 |         ),  # ALK_RIV_FLUX integral
33 |         ("neq/cm3", "nmol/cm3"),  # ABIO_ALK_SURF
34 |         ("degC*cm/s", "degC*cm/s"),  # T_FLUX_EXCH_INTRF
35 |         ("days since 0001-01-01 00:00:00", "days since 0001-01-01 00:00:00"),  # time
36 |         # multiple matches
37 |         ("gC/gN", "g/g"),  # LEAFCN
38 |     ],
39 | )
40 | def test_clean_units(units_in, units_out):
41 |     assert _clean_units(units_in) == units_out
42 | 
43 | 
44 | @pytest.mark.parametrize("apply_chunk", [True, False])
45 | @pytest.mark.parametrize("add_encoding", [True, False])
46 | def test_conv_units(apply_chunk, add_encoding):
47 |     da = xr_ds_ex()["var_ex"]
48 |     da.attrs["units"] = "kg"
49 |     da.attrs["long_name"] = "var_ex"
50 |     if apply_chunk:
51 |         da = da.chunk({"time": 12})
52 |     if add_encoding:
53 |         da.encoding["_FillValue"] = None
54 | 
55 |     da_out = conv_units(da, "g")
56 | 
57 |     assert da_out.attrs["units"] == "g"
58 |     assert da_out.encoding == da.encoding
59 |     assert da_out.chunks == da.chunks
60 |     assert np.all(da_out.values == 1000.0 * da.values)
61 | 


--------------------------------------------------------------------------------
/notebooks/utils/compare_ts_and_hist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | A script to verify that converting from history files to time series worked as expected
 4 | """
 5 | 
 6 | import xarray as xr
 7 | from . import CaseClass
 8 | 
 9 | 
10 | def compare_ts_and_hist(
11 |     casename, output_roots, stream, year, exclude_vars=["time_bound", "time_bounds"],
12 | ):
13 |     """
14 |     Generate a CaseClass object from a given casename. For a given stream
15 |     and year, open the history files from the case. Then loop through the
16 |     variables (excluding time_bound in POP and time_bounds in CICE) and
17 |     verify that those fields are available in time series.
18 |     """
19 |     # Set some defaults to pass to open_mfdataset, then apply kwargs argument
20 |     open_mfdataset_kwargs = dict()
21 |     # data_vars="minimal", to avoid introducing time dimension to time-invariant fields
22 |     open_mfdataset_kwargs["data_vars"] = "minimal"
23 |     # compat="override", to skip var consistency checks (for speed)
24 |     open_mfdataset_kwargs["compat"] = "override"
25 |     # coords="minimal", because coords cannot be default="different" if compat="override"
26 |     open_mfdataset_kwargs["coords"] = "minimal"
27 |     #  parallel=True to open files in parallel
28 |     open_mfdataset_kwargs["parallel"] = True
29 | 
30 |     found_all = True
31 | 
32 |     case = CaseClass.CaseClass(casename, output_roots)
33 |     # Return if no time series is available
34 |     if not case.check_for_year_in_timeseries_files(year, stream):
35 |         return "no time series"
36 | 
37 |     # Return if no history files are available
38 |     history_filenames = case.get_history_files(year, stream)
39 |     if len(history_filenames) == 0:
40 |         return "no history"
41 | 
42 |     # Open history files to build dataset
43 |     ds_hist = xr.open_mfdataset(history_filenames, **open_mfdataset_kwargs)
44 |     vars_to_check = [
45 |         var
46 |         for var in ds_hist.data_vars
47 |         if "time" in ds_hist[var].coords and not var in exclude_vars
48 |     ]
49 | 
50 |     # Look for each variable in time series
51 |     for var in vars_to_check:
52 |         if len(case.get_timeseries_files(year, stream, var)) == 0:
53 |             print(f"No time series files for {var} in year {year:04}")
54 |             found_all = False
55 | 
56 |     # Return "same" if all variables were found, otherwise return "datasets differ"
57 |     if not found_all:
58 |         return "datasets differ"
59 |     return "same"
60 | 
61 | 
62 | ########################
63 | 
64 | if __name__ == "__main__":
65 |     print("Feature not implemented yet")
66 | 


--------------------------------------------------------------------------------
/tests/xr_ds_ex.py:
--------------------------------------------------------------------------------
 1 | """function for example xarray.Dataset object"""
 2 | 
 3 | import cftime
 4 | import numpy as np
 5 | import xarray as xr
 6 | 
 7 | days_1yr = np.array(
 8 |     [31.0, 28.0, 31.0, 30.0, 31.0, 30.0, 31.0, 31.0, 30.0, 31.0, 30.0, 31.0]
 9 | )
10 | 
11 | 
12 | def gen_time_bounds_values(nyrs=3):
13 |     """return numpy array of values of month boundaries"""
14 |     time_edges = np.insert(np.cumsum(np.tile(days_1yr, nyrs)), 0, 0)
15 |     return np.stack((time_edges[:-1], time_edges[1:]), axis=1)
16 | 
17 | 
18 | def xr_ds_ex(decode_times=True, nyrs=3, var_const=True, time_mid=True):
19 |     """return an example xarray.Dataset object, useful for testing functions"""
20 | 
21 |     # set up values for Dataset, nyrs yrs of analytic monthly values
22 |     time_bounds_values = gen_time_bounds_values(nyrs)
23 |     if time_mid:
24 |         time_values = 0.5 * time_bounds_values[:, 0] + 0.5 * time_bounds_values[:, 1]
25 |     else:
26 |         time_values = 0.25 * time_bounds_values[:, 0] + 0.75 * time_bounds_values[:, 1]
27 |     time_values_yr = time_values / 365.0
28 |     if var_const:
29 |         var_values = np.ones_like(time_values_yr)
30 |     else:
31 |         var_values = np.sin(np.pi * time_values_yr) * np.exp(-0.1 * time_values_yr)
32 | 
33 |     time_units = "days since 0001-01-01"
34 |     calendar = "noleap"
35 | 
36 |     if decode_times:
37 |         time_values = cftime.num2date(time_values, time_units, calendar)
38 |         time_bounds_values = cftime.num2date(time_bounds_values, time_units, calendar)
39 | 
40 |     # create Dataset, including time_bounds
41 |     time_var = xr.DataArray(
42 |         time_values,
43 |         name="time",
44 |         dims="time",
45 |         coords={"time": time_values},
46 |         attrs={"bounds": "time_bounds"},
47 |     )
48 |     if not decode_times:
49 |         time_var.attrs["units"] = time_units
50 |         time_var.attrs["calendar"] = calendar
51 |     time_bounds = xr.DataArray(
52 |         time_bounds_values,
53 |         name="time_bounds",
54 |         dims=("time", "d2"),
55 |         coords={"time": time_var},
56 |     )
57 |     var = xr.DataArray(
58 |         var_values, name="var_ex", dims="time", coords={"time": time_var}
59 |     )
60 |     ds = var.to_dataset()
61 |     days_in_month = xr.DataArray(
62 |         np.tile(days_1yr, nyrs).squeeze(),
63 |         name="days_in_month",
64 |         dims="time",
65 |         coords={"time": time_var},
66 |     )
67 |     ds = xr.merge([ds, time_bounds, days_in_month])
68 | 
69 |     if decode_times:
70 |         ds.time.encoding["units"] = time_units
71 |         ds.time.encoding["calendar"] = calendar
72 | 
73 |     return ds
74 | 


--------------------------------------------------------------------------------
/notebooks/run_notebooks.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | usage () {
  4 |   echo "$0 NOTEBOOK [NOTEBOOK2 ... NOTEBOOKN]"
  5 |   echo "Submit job(s) to run all notebooks on casper node via jupyter nbconvert"
  6 |   echo ""
  7 |   echo "For each specified file, the full call is:"
  8 |   echo "jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python \\
  9 |                   --ExecutePreprocessor.timeout=3600 --execute NOTEBOOK"
 10 |   echo ""
 11 |   echo "Output from the pbs job is written in the logs/ directory,"
 12 |   echo "which will be created if it does not exist."
 13 | }
 14 | 
 15 | #########################
 16 | 
 17 | # Function that creates a temporary script
 18 | # that is submitted via qsub
 19 | submit_pbs_script () {
 20 | 
 21 |   nbname=`echo ${notebook} | sed -e "s/ /_/g"`
 22 | 
 23 |   echo "running ${notebook}.ipynb..."
 24 |   cat > ${nbname}.sub << EOF
 25 | #!/bin/bash
 26 | #
 27 | #PBS -N ${nbname}
 28 | #PBS -A P93300606
 29 | #PBS -l select=1:ncpus=1:mem=100G
 30 | #PBS -l walltime=6:00:00
 31 | #PBS -q casper
 32 | #PBS -j oe
 33 | #PBS -m ea
 34 | 
 35 | ${set_env}
 36 | jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python \\
 37 |                   --ExecutePreprocessor.timeout=3600 --execute "${notebook}.ipynb"
 38 | EOF
 39 | 
 40 |   qsub ${nbname}.sub
 41 |   rm -f ${nbname}.sub
 42 | }
 43 | 
 44 | ########################
 45 | 
 46 | # Function that creates a temporary script
 47 | # that is submitted via sbatch
 48 | submit_slurm_script () {
 49 | 
 50 |   nbname=`echo ${notebook} | sed -e "s/ /_/g"`
 51 | 
 52 |   echo "running ${notebook}.ipynb..."
 53 |   cat > ${nbname}.sub << EOF
 54 | #!/bin/bash
 55 | #
 56 | #SBATCH -n 16
 57 | #SBATCH -N 1
 58 | #SBATCH --ntasks-per-node=16
 59 | #SBATCH -t 6:00:00
 60 | #SBATCH -p dav
 61 | #SBATCH -J ${nbname}
 62 | #SBATCH --account=P93300606
 63 | #SBATCH --mem 100G
 64 | #SBATCH -e logs/${nbname}.err.%J
 65 | #SBATCH -o logs/${nbname}.out.%J
 66 | #SBATCH --mail-type=ALL
 67 | #SBATCH --mail-user=${USER}@ucar.edu
 68 | #SBATCH -m block
 69 | 
 70 | ${set_env}
 71 | jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python \\
 72 |                   --ExecutePreprocessor.timeout=3600 --execute "${notebook}.ipynb"
 73 | EOF
 74 | 
 75 |   sbatch ${nbname}.sub
 76 |   rm -f ${nbname}.sub
 77 | }
 78 | 
 79 | #########################
 80 | 
 81 | if [ $# == 0 ]; then
 82 |   usage
 83 |   exit 1
 84 | fi
 85 | 
 86 | for args in "$@"
 87 | do
 88 |   if [ "$args" == "-h" ] || [ "$args" == "--help" ]; then
 89 |     usage
 90 |     exit 0
 91 |   fi
 92 | done
 93 | 
 94 | # not sure why conda activate doesn't work but source activate does...
 95 | set_env="export PATH=/glade/work/${USER}/miniconda3/bin/:$PATH ; source activate hires-marbl || exit -1"
 96 | 
 97 | # make sure log directory exists
 98 | mkdir -p logs
 99 | 
100 | for notebook_full in "$@"
101 | do
102 |   if [ ! -f "${notebook_full}" ]; then
103 |     echo "WARNING: can not find ${notebook_full}"
104 |     continue
105 |   fi
106 |   notebook=`echo ${notebook_full} | cut -d '.' -f 1`
107 |   submit_pbs_script $notebook
108 | done
109 | 


--------------------------------------------------------------------------------
/data_reshaping/pop.h_t13.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | #
  3 | #SBATCH -n 64
  4 | #SBATCH -N 4
  5 | #SBATCH --ntasks-per-node=16
  6 | #SBATCH -t 24:00:00
  7 | #SBATCH -p dav
  8 | #SBATCH -J Pop.h_t13
  9 | #SBATCH --account=P93300606
 10 | #SBATCH --mem 100G
 11 | #SBATCH -e logs/Pop.h_t13.err.%J
 12 | #SBATCH -o logs/Pop.h_t13.out.%J
 13 | #SBATCH -m block
 14 | #
 15 | module purge
 16 | conda deactivate || echo "conda not loaded"
 17 | #
 18 | # PARSE COMMAND LINE ARGUMENTS
 19 | CASE=${1} ; export CASE
 20 | ARCHIVE_ROOT=${2}
 21 | YEAR=${3}
 22 | echo "Reshaping year ${YEAR} for ${CASE}..."
 23 | #
 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin
 25 | . activate
 26 | #
 27 | module load intel/17.0.1
 28 | module load ncarenv
 29 | module load ncarcompilers
 30 | module load impi
 31 | module load netcdf/4.6.1
 32 | module load nco/4.7.4
 33 | module load ncl/6.4.0
 34 | #
 35 | HIST=pop.h ; export HIST
 36 | #
 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH
 38 | #
 39 | NCKS=`which ncks`  ; export NCKS
 40 | PROCHOST=`hostname`;export PROCHOST
 41 | #
 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite
 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK
 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE}     ; export PROCBASE
 45 | #
 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP
 47 | case "$HTYP" in
 48 |   cam2 | cam )
 49 |     COMP_NAME=atm ;;
 50 |   cism )
 51 |     COMP_NAME=glc ;;
 52 |   clm2 )
 53 |      COMP_NAME=lnd ;;
 54 |   pop  )
 55 |     COMP_NAME=ocn ;;
 56 |   rtm | mosart )
 57 |     COMP_NAME=rof ;;
 58 |   cice | csim )
 59 |     COMP_NAME=ice ;;
 60 |   * )
 61 |     echo "Unable to continue because "$HIST" not known."
 62 |     exit 1 ;;
 63 | esac
 64 | #
 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST
 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc      ; export LOCAL_PROC
 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED         ; export CACHEDIR
 68 | #
 69 | VERBOSITY=0 ; export VERBOSITY
 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX
 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT
 72 | #
 73 | if [ ! -d $LOCAL_PROC ] ; then
 74 |  mkdir -p $LOCAL_PROC
 75 | fi
 76 | if [ ! -d $CACHEDIR ] ; then
 77 |  mkdir -p $CACHEDIR
 78 | fi
 79 | #
 80 | cd $LOCAL_PROC
 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data
 82 | #
 83 | rm -f ${CASE}.${HIST}.*nc
 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then
 85 |   ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc .
 86 |   NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l`
 87 |   if [ $NHISTF -eq 12 ] ; then
 88 |     OUTTIME="${YEAR}01-${YEAR}12"
 89 |     SUFFIX=".${OUTTIME}.nc" ; export SUFFIX
 90 |     echo -n "TS transpose_data start: " ; date
 91 |     ./Transpose_Data
 92 |     if [ $? -ne 0 ] ; then
 93 |       echo "Transpose_Data failed"
 94 |       exit 1
 95 |     fi
 96 |     echo -n "TS transpose_data end  : " ; date
 97 |     touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR}
 98 |   else
 99 |     echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 12"
100 |   fi
101 | fi
102 | #
103 | echo -n "TS COMPLETE: " ; date
104 | #
105 | exit
106 | 


--------------------------------------------------------------------------------
/data_reshaping/cice.h_t13.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | #
  3 | #SBATCH -n 64
  4 | #SBATCH -N 4
  5 | #SBATCH --ntasks-per-node=16
  6 | #SBATCH -t 24:00:00
  7 | #SBATCH -p dav
  8 | #SBATCH -J Cice.h_t13
  9 | #SBATCH --account=P93300606
 10 | #SBATCH --mem 100G
 11 | #SBATCH -e logs/Cice.h_t13.err.%J
 12 | #SBATCH -o logs/Cice.h_t13.out.%J
 13 | #SBATCH -m block
 14 | #
 15 | module purge
 16 | conda deactivate || echo "conda not loaded"
 17 | #
 18 | # PARSE COMMAND LINE ARGUMENTS
 19 | CASE=${1} ; export CASE
 20 | ARCHIVE_ROOT=${2}
 21 | YEAR=${3}
 22 | echo "Reshaping year ${YEAR} for ${CASE}..."
 23 | #
 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin
 25 | . activate
 26 | #
 27 | module load intel/17.0.1
 28 | module load ncarenv
 29 | module load ncarcompilers
 30 | module load impi
 31 | module load netcdf/4.6.1
 32 | module load nco/4.7.4
 33 | module load ncl/6.4.0
 34 | #
 35 | HIST=cice.h ; export HIST
 36 | #
 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH
 38 | #
 39 | NCKS=`which ncks`  ; export NCKS
 40 | PROCHOST=`hostname`;export PROCHOST
 41 | #
 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite
 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK
 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE}     ; export PROCBASE
 45 | #
 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP
 47 | case "$HTYP" in
 48 |   cam2 | cam )
 49 |     COMP_NAME=atm ;;
 50 |   cism )
 51 |     COMP_NAME=glc ;;
 52 |   clm2 )
 53 |      COMP_NAME=lnd ;;
 54 |   pop  )
 55 |     COMP_NAME=ocn ;;
 56 |   rtm | mosart )
 57 |     COMP_NAME=rof ;;
 58 |   cice | csim )
 59 |     COMP_NAME=ice ;;
 60 |   * )
 61 |     echo "Unable to continue because "$HIST" not known."
 62 |     exit 1 ;;
 63 | esac
 64 | #
 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST
 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc      ; export LOCAL_PROC
 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED         ; export CACHEDIR
 68 | #
 69 | VERBOSITY=0 ; export VERBOSITY
 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX
 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT
 72 | #
 73 | if [ ! -d $LOCAL_PROC ] ; then
 74 |  mkdir -p $LOCAL_PROC
 75 | fi
 76 | if [ ! -d $CACHEDIR ] ; then
 77 |  mkdir -p $CACHEDIR
 78 | fi
 79 | #
 80 | cd $LOCAL_PROC
 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data
 82 | #
 83 | rm -f ${CASE}.${HIST}.*nc
 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then
 85 |   ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc .
 86 |   NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l`
 87 |   if [ $NHISTF -eq 12 ] ; then
 88 |     OUTTIME="${YEAR}01-${YEAR}12"
 89 |     SUFFIX=".${OUTTIME}.nc" ; export SUFFIX
 90 |     echo -n "TS transpose_data start: " ; date
 91 |     ./Transpose_Data
 92 |     if [ $? -ne 0 ] ; then
 93 |       echo "Transpose_Data failed"
 94 |       exit 1
 95 |     fi
 96 |     echo -n "TS transpose_data end  : " ; date
 97 |     touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR}
 98 |   else
 99 |     echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 12"
100 |   fi
101 | fi
102 | #
103 | echo -n "TS COMPLETE: " ; date
104 | #
105 | exit
106 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | [![GitHub Workflow Status](https://img.shields.io/github/workflow/status/marbl-ecosys/HiRes-CESM-analysis/Continuous%20Integration?label=CI&logo=github&style=for-the-badge)](https://github.com/marbl-ecosys/HiRes-CESM-analysis/actions?query=workflow%3A%22Continuous+Integration%22)
 3 | [![GitHub Workflow Status](https://img.shields.io/github/workflow/status/marbl-ecosys/HiRes-CESM-analysis/Run%20all%20pre-commit%20checks%20one%20more%20time?label=code-style&style=for-the-badge)](https://github.com/marbl-ecosys/HiRes-CESM-analysis/actions?query=workflow%3A%22Run+all+pre-commit+checks+one+more+time%22)
 4 | 
 5 | # HiRes-CESM Analysis
 6 | 
 7 | - [HiRes-CESM Analysis](#hires-cesm-analysis)
 8 |   - [For Developers](#for-developers)
 9 |     - [Keep your conda environment up to date](#keep-your-conda-environment-up-to-date)
10 |     - [Use `pre-commit` to test code before commiting](#use-pre-commit-to-test-code-before-commiting)
11 |     - [Run `pytest` after modifying python in `utils/`](#run-pytest-after-modifying-python-in-utils)
12 | 
13 | This repository is building a set of tools for analyzing BGC output in a high-resolution POP run.
14 | 
15 | ## For Developers
16 | 
17 | A few recommended practices to incorporate in your development sandbox:
18 | 
19 | ### Keep your conda environment up to date
20 | 
21 | The first time you check out this repository, run
22 | 
23 | ```
24 | $ conda env install -f environments/environment.yaml
25 | ```
26 | 
27 | If you notice the YAML file has changed after you fetch changes from github,
28 | update the environment with
29 | 
30 | ```
31 | $ conda env update -f environments/environment.yaml
32 | ```
33 | 
34 | If the `env update` command fails, you can remove the environment and re-create it:
35 | 
36 | ```
37 | $ conda env remove --name hires-marbl
38 | $ conda env create -f environments/environment.yaml
39 | ```
40 | 
41 | ### Use `pre-commit` to test code before commiting
42 | 
43 | Please take advantage of the pre-commit package to ensure that `black` is run before commiting:
44 | 
45 | ```
46 | $ pre-commit install --install-hooks # set up pre-commit
47 | $ pre-commit run -a                  # check all the files currently in the repo
48 | ```
49 | 
50 | The pre-commit package is already installed via the `hires-marbl` conda environment.
51 | There is a github action to run these checks on all pull requests,
52 | but running them locally via-pre-commit will reduce the number of failed actions.
53 | NOTE: for some reason, to properly install `pre-commit` on the CISL systems,
54 | the above command must be run from `casper` rather than `cheyenne`.
55 | 
56 | Note that pre-commit creates a virtual environment using specific tags of each package.
57 | As newer versions of `black` become available on `conda-forge`, we will update the pre-commit environment.
58 | 
59 | ### Run `pytest` after modifying python in `utils/`
60 | 
61 | To test some of the python code in `notebooks/utils/`, run `pytest`.
62 | These tests can be run from the top level of this repository by running
63 | 
64 | ```
65 | $ pytest tests/
66 | ```
67 | 
68 | If you add new code to this directory,
69 | consider writing small tests to ensure it is running as expected.
70 | 


--------------------------------------------------------------------------------
/data_reshaping/cice.h1_t13.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | #
  3 | #SBATCH -n 64
  4 | #SBATCH -N 4
  5 | #SBATCH --ntasks-per-node=16
  6 | #SBATCH -t 24:00:00
  7 | #SBATCH -p dav
  8 | #SBATCH -J Cice.h1_t13
  9 | #SBATCH --account=P93300606
 10 | #SBATCH --mem 100G
 11 | #SBATCH -e logs/Cice.h1_t13.err.%J
 12 | #SBATCH -o logs/Cice.h1_t13.out.%J
 13 | #SBATCH -m block
 14 | #
 15 | module purge
 16 | conda deactivate || echo "conda not loaded"
 17 | #
 18 | # PARSE COMMAND LINE ARGUMENTS
 19 | CASE=${1} ; export CASE
 20 | ARCHIVE_ROOT=${2}
 21 | YEAR=${3}
 22 | echo "Reshaping year ${YEAR} for ${CASE}..."
 23 | #
 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin
 25 | . activate
 26 | #
 27 | module load intel/17.0.1
 28 | module load ncarenv
 29 | module load ncarcompilers
 30 | module load impi
 31 | module load netcdf/4.6.1
 32 | module load nco/4.7.4
 33 | module load ncl/6.4.0
 34 | #
 35 | HIST=cice.h1 ; export HIST
 36 | #
 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH
 38 | #
 39 | NCKS=`which ncks`  ; export NCKS
 40 | PROCHOST=`hostname`;export PROCHOST
 41 | #
 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite
 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK
 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE}     ; export PROCBASE
 45 | #
 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP
 47 | case "$HTYP" in
 48 |   cam2 | cam )
 49 |     COMP_NAME=atm ;;
 50 |   cism )
 51 |     COMP_NAME=glc ;;
 52 |   clm2 )
 53 |      COMP_NAME=lnd ;;
 54 |   pop  )
 55 |     COMP_NAME=ocn ;;
 56 |   rtm | mosart )
 57 |     COMP_NAME=rof ;;
 58 |   cice | csim )
 59 |     COMP_NAME=ice ;;
 60 |   * )
 61 |     echo "Unable to continue because "$HIST" not known."
 62 |     exit 1 ;;
 63 | esac
 64 | #
 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST
 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc      ; export LOCAL_PROC
 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED         ; export CACHEDIR
 68 | #
 69 | VERBOSITY=0 ; export VERBOSITY
 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX
 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT
 72 | #
 73 | if [ ! -d $LOCAL_PROC ] ; then
 74 |  mkdir -p $LOCAL_PROC
 75 | fi
 76 | if [ ! -d $CACHEDIR ] ; then
 77 |  mkdir -p $CACHEDIR
 78 | fi
 79 | #
 80 | cd $LOCAL_PROC
 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data
 82 | #
 83 | rm -f ${CASE}.${HIST}.*nc
 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then
 85 |   ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc .
 86 |   NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l`
 87 |   if [ $NHISTF -eq 365 ] ; then
 88 |     OUTTIME="${YEAR}0101-${YEAR}1231"
 89 |     SUFFIX=".${OUTTIME}.nc" ; export SUFFIX
 90 |     echo -n "TS transpose_data start: " ; date
 91 |     ./Transpose_Data
 92 |     if [ $? -ne 0 ] ; then
 93 |       echo "Transpose_Data failed"
 94 |       exit 1
 95 |     fi
 96 |     echo -n "TS transpose_data end  : " ; date
 97 |     touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR}
 98 |   else
 99 |     echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 365"
100 |   fi
101 | fi
102 | #
103 | echo -n "TS COMPLETE: " ; date
104 | #
105 | exit
106 | 


--------------------------------------------------------------------------------
/data_reshaping/pop.h.nyear1_t13.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | #
  3 | #SBATCH -n 64
  4 | #SBATCH -N 4
  5 | #SBATCH --ntasks-per-node=16
  6 | #SBATCH -t 24:00:00
  7 | #SBATCH -p dav
  8 | #SBATCH -J Pop.h.nyear1_t13
  9 | #SBATCH --account=P93300606
 10 | #SBATCH --mem 100G
 11 | #SBATCH -e logs/Pop.h.nyear1_t13.err.%J
 12 | #SBATCH -o logs/Pop.h.nyear1_t13.out.%J
 13 | #SBATCH -m block
 14 | #
 15 | module purge
 16 | conda deactivate || echo "conda not loaded"
 17 | #
 18 | # PARSE COMMAND LINE ARGUMENTS
 19 | CASE=${1} ; export CASE
 20 | ARCHIVE_ROOT=${2}
 21 | YEAR=${3}
 22 | echo "Reshaping year ${YEAR} for ${CASE}..."
 23 | #
 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin
 25 | . activate
 26 | #
 27 | module load intel/17.0.1
 28 | module load ncarenv
 29 | module load ncarcompilers
 30 | module load impi
 31 | module load netcdf/4.6.1
 32 | module load nco/4.7.4
 33 | module load ncl/6.4.0
 34 | #
 35 | HIST=pop.h.nyear1 ; export HIST
 36 | #
 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH
 38 | #
 39 | NCKS=`which ncks`  ; export NCKS
 40 | PROCHOST=`hostname`;export PROCHOST
 41 | #
 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite
 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK
 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE}     ; export PROCBASE
 45 | #
 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP
 47 | case "$HTYP" in
 48 |   cam2 | cam )
 49 |     COMP_NAME=atm ;;
 50 |   cism )
 51 |     COMP_NAME=glc ;;
 52 |   clm2 )
 53 |      COMP_NAME=lnd ;;
 54 |   pop  )
 55 |     COMP_NAME=ocn ;;
 56 |   rtm | mosart )
 57 |     COMP_NAME=rof ;;
 58 |   cice | csim )
 59 |     COMP_NAME=ice ;;
 60 |   * )
 61 |     echo "Unable to continue because "$HIST" not known."
 62 |     exit 1 ;;
 63 | esac
 64 | #
 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST
 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc      ; export LOCAL_PROC
 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED         ; export CACHEDIR
 68 | #
 69 | VERBOSITY=0 ; export VERBOSITY
 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX
 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT
 72 | #
 73 | if [ ! -d $LOCAL_PROC ] ; then
 74 |  mkdir -p $LOCAL_PROC
 75 | fi
 76 | if [ ! -d $CACHEDIR ] ; then
 77 |  mkdir -p $CACHEDIR
 78 | fi
 79 | #
 80 | cd $LOCAL_PROC
 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data
 82 | #
 83 | rm -f ${CASE}.${HIST}.*nc
 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then
 85 |   ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc .
 86 |   NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l`
 87 |   if [ $NHISTF -eq 1 ] ; then
 88 |     OUTTIME="${YEAR}-${YEAR}"
 89 |     SUFFIX=".${OUTTIME}.nc" ; export SUFFIX
 90 |     echo -n "TS transpose_data start: " ; date
 91 |     ./Transpose_Data
 92 |     if [ $? -ne 0 ] ; then
 93 |       echo "Transpose_Data failed"
 94 |       exit 1
 95 |     fi
 96 |     echo -n "TS transpose_data end  : " ; date
 97 |     touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR}
 98 |   else
 99 |     echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 1"
100 |   fi
101 | fi
102 | #
103 | echo -n "TS COMPLETE: " ; date
104 | #
105 | exit
106 | 


--------------------------------------------------------------------------------
/data_reshaping/pop.h.nday1_t13.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -l
  2 | #
  3 | #SBATCH -n 64
  4 | #SBATCH -N 4
  5 | #SBATCH --ntasks-per-node=16
  6 | #SBATCH -t 24:00:00
  7 | #SBATCH -p dav
  8 | #SBATCH -J Pop.h.nday1_t13
  9 | #SBATCH --account=P93300606
 10 | #SBATCH --mem 100G
 11 | #SBATCH -e logs/Pop.h.nday1_t13.err.%J
 12 | #SBATCH -o logs/Pop.h.nday1_t13.out.%J
 13 | #SBATCH -m block
 14 | #
 15 | module purge
 16 | conda deactivate || echo "conda not loaded"
 17 | #
 18 | # PARSE COMMAND LINE ARGUMENTS
 19 | CASE=${1} ; export CASE
 20 | ARCHIVE_ROOT=${2}
 21 | YEAR=${3}
 22 | echo "Reshaping year ${YEAR} for ${CASE}..."
 23 | #
 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin
 25 | . activate
 26 | #
 27 | module load intel/17.0.1
 28 | module load ncarenv
 29 | module load ncarcompilers
 30 | module load impi
 31 | module load netcdf/4.6.1
 32 | module load nco/4.7.4
 33 | module load ncl/6.4.0
 34 | #
 35 | HIST=pop.h.nday1 ; export HIST
 36 | #
 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH
 38 | #
 39 | NCKS=`which ncks`  ; export NCKS
 40 | PROCHOST=`hostname`;export PROCHOST
 41 | #
 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite
 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK
 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE}     ; export PROCBASE
 45 | #
 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP
 47 | case "$HTYP" in
 48 |   cam2 | cam )
 49 |     COMP_NAME=atm ;;
 50 |   cism )
 51 |     COMP_NAME=glc ;;
 52 |   clm2 )
 53 |      COMP_NAME=lnd ;;
 54 |   pop  )
 55 |     COMP_NAME=ocn ;;
 56 |   rtm | mosart )
 57 |     COMP_NAME=rof ;;
 58 |   cice | csim )
 59 |     COMP_NAME=ice ;;
 60 |   * )
 61 |     echo "Unable to continue because "$HIST" not known."
 62 |     exit 1 ;;
 63 | esac
 64 | #
 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST
 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc      ; export LOCAL_PROC
 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED         ; export CACHEDIR
 68 | #
 69 | VERBOSITY=0 ; export VERBOSITY
 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX
 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT
 72 | #
 73 | if [ ! -d $LOCAL_PROC ] ; then
 74 |  mkdir -p $LOCAL_PROC
 75 | fi
 76 | if [ ! -d $CACHEDIR ] ; then
 77 |  mkdir -p $CACHEDIR
 78 | fi
 79 | #
 80 | cd $LOCAL_PROC
 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data
 82 | #
 83 | rm -f ${CASE}.${HIST}.*nc
 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then
 85 |   ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc .
 86 |   NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l`
 87 |   if [ $NHISTF -eq 12 ] ; then
 88 |     OUTTIME="${YEAR}0101-${YEAR}1231"
 89 |     SUFFIX=".${OUTTIME}.nc" ; export SUFFIX
 90 |     echo -n "TS transpose_data start: " ; date
 91 |     ./Transpose_Data
 92 |     if [ $? -ne 0 ] ; then
 93 |       echo "Transpose_Data failed"
 94 |       exit 1
 95 |     fi
 96 |     echo -n "TS transpose_data end  : " ; date
 97 |     touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR}
 98 |   else
 99 |     echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 12"
100 |   fi
101 | fi
102 | #
103 | echo -n "TS COMPLETE: " ; date
104 | #
105 | exit
106 | 


--------------------------------------------------------------------------------
/data_reshaping/run_all.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | 
  4 | 
  5 | def _parse_args():
  6 |     """ Parse command line arguments """
  7 | 
  8 |     import argparse
  9 | 
 10 |     parser = argparse.ArgumentParser(
 11 |         description="Submit scripts to reshape highres BGC output",
 12 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 13 |     )
 14 | 
 15 |     # Required: specify year
 16 |     parser.add_argument(
 17 |         "-y",
 18 |         "--years",
 19 |         action="store",
 20 |         dest="years",
 21 |         type=int,
 22 |         required=True,
 23 |         nargs="+",
 24 |         help="Year of run to convert to time series",
 25 |     )
 26 | 
 27 |     # Optional: which case to convert
 28 |     #           if this tool is made public, drop the default and require case as well
 29 |     parser.add_argument(
 30 |         "-c",
 31 |         "--case",
 32 |         action="store",
 33 |         dest="case",
 34 |         type=str,
 35 |         default="g.e22.G1850ECO_JRA_HR.TL319_t13.004",
 36 |         help="Suffix of case to convert to time series",
 37 |     )
 38 | 
 39 |     # Optional: location of DOUT_S_ROOT
 40 |     archive_default = os.path.join(
 41 |         os.sep, "glade", "scratch", os.environ["USER"], "archive"
 42 |     )
 43 |     parser.add_argument(
 44 |         "-a",
 45 |         "--archive-root",
 46 |         action="store",
 47 |         dest="archive_root",
 48 |         type=str,
 49 |         default=archive_default,
 50 |         help="base of DOUT_S_ROOT",
 51 |     )
 52 | 
 53 |     # Optional: specify which scripts to run
 54 |     parser.add_argument(
 55 |         "-s",
 56 |         "--scripts",
 57 |         action="store",
 58 |         dest="scripts",
 59 |         type=str,
 60 |         nargs="+",
 61 |         default=[
 62 |             "pop.h_t13.sh",
 63 |             "pop.h.nday1_t13.sh",
 64 |             "cice.h_t13.sh",
 65 |             "pop.h.nyear1_t13.sh",
 66 |             "cice.h1_t13.sh",
 67 |         ],
 68 |         help="Scripts to submit to slurm",
 69 |     )
 70 | 
 71 |     # Optional: is this a dry-run? If so, don't submit anything
 72 |     parser.add_argument(
 73 |         "-d",
 74 |         "--dry-run",
 75 |         action="store_true",
 76 |         dest="dryrun",
 77 |         help="If true, do not actually submit job",
 78 |     )
 79 | 
 80 |     # Optional: By default, slurm will email users when jobs start and finish
 81 |     parser.add_argument(
 82 |         "--no-mail",
 83 |         action="store_false",
 84 |         dest="send_mail",
 85 |         help="If true, send SLURM emails to {user}@ucar.edu",
 86 |     )
 87 | 
 88 |     return parser.parse_args()
 89 | 
 90 | 
 91 | ###################
 92 | 
 93 | if __name__ == "__main__":
 94 |     args = _parse_args()
 95 |     case = args.case
 96 |     archive_root = args.archive_root
 97 |     mail_opt = (
 98 |         f"--mail-type=ALL --mail-user={os.environ['USER']}@ucar.edu"
 99 |         if args.send_mail
100 |         else "--mail-type=NONE"
101 |     )
102 | 
103 |     for yr in args.years:
104 |         year = f"{yr:04}"
105 |         for script in args.scripts:
106 |             print(f"Submitting {script} for year {year} of {case}...")
107 |             cmd = f"sbatch {mail_opt} --dependency=singleton {script} {case} {archive_root} {year}"
108 |             if not args.dryrun:
109 |                 # note: the --dependency=singleton option means only one job per job name
110 |                 #       Some jobs had been crashing, and I think it was due to temporary
111 |                 #       files clobbering each other? But only having one pop.h_t13.sh job
112 |                 #       at a time seems to have prevented these issues.
113 |                 os.system(cmd)
114 |             else:
115 |                 print(f"Command to run: {cmd}")
116 | 


--------------------------------------------------------------------------------
/notebooks/plot_suite_maps_0095_1deg.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import yaml\n",
 11 |     "\n",
 12 |     "import utils\n",
 13 |     "\n",
 14 |     "%matplotlib inline"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "casename = \"g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\"\n",
 24 |     "year = 95\n",
 25 |     "campaign_root = os.path.join(\n",
 26 |     "    os.path.sep,\n",
 27 |     "    \"glade\",\n",
 28 |     "    \"campaign\",\n",
 29 |     "    \"cesm\",\n",
 30 |     "    \"development\",\n",
 31 |     "    \"bgcwg\",\n",
 32 |     "    \"projects\",\n",
 33 |     "    \"1deg_cocco_JRA\",\n",
 34 |     "    \"cases\",\n",
 35 |     ")\n",
 36 |     "\n",
 37 |     "# Set up CaseClass object\n",
 38 |     "case = utils.CaseClass(casename, os.path.join(campaign_root, casename))"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n",
 48 |     "    diag_metadata_list = yaml.safe_load(fptr)\n",
 49 |     "\n",
 50 |     "varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 4,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "def summary_plots(ds, diag_metadata, save_pngs=False):\n",
 60 |     "    varname = diag_metadata[\"varname\"]\n",
 61 |     "    print(varname)\n",
 62 |     "    da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n",
 63 |     "\n",
 64 |     "    utils.summary_plot_maps(\n",
 65 |     "        ds,\n",
 66 |     "        da,\n",
 67 |     "        diag_metadata,\n",
 68 |     "        save_pngs=save_pngs,\n",
 69 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 70 |     "    )"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 5,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "Datasets contain a total of 12 time samples\n",
 83 |       "Last average written at 0096-01-01 00:00:00\n",
 84 |       "POC_FLUX_100m\n",
 85 |       "CaCO3_FLUX_100m\n",
 86 |       "FG_CO2\n",
 87 |       "DpCO2\n",
 88 |       "PH\n",
 89 |       "spChl\n",
 90 |       "photoC_sp_zint\n",
 91 |       "coccoChl\n",
 92 |       "photoC_cocco_zint\n",
 93 |       "diatChl\n",
 94 |       "photoC_diat_zint\n",
 95 |       "diazChl\n",
 96 |       "photoC_diaz_zint\n",
 97 |       "NHx_SURFACE_EMIS\n",
 98 |       "NH4\n",
 99 |       "O2_ZMIN\n",
100 |       "O2_ZMIN_DEPTH\n",
101 |       "O2\n",
102 |       "PO4\n",
103 |       "PO4\n",
104 |       "NO3\n",
105 |       "NO3\n",
106 |       "SiO3\n",
107 |       "SiO3\n"
108 |      ]
109 |     }
110 |    ],
111 |    "source": [
112 |     "stream = \"pop.h\"\n",
113 |     "ds = case.gen_dataset(varnames, stream, start_year=year, end_year=year)\n",
114 |     "for diag_metadata in diag_metadata_list:\n",
115 |     "    #     ds = case.gen_dataset(\n",
116 |     "    #         diag_metadata[\"varname\"], stream, start_year=year, end_year=year\n",
117 |     "    #     )\n",
118 |     "    summary_plots(ds, diag_metadata, save_pngs=True)"
119 |    ]
120 |   }
121 |  ],
122 |  "metadata": {
123 |   "kernelspec": {
124 |    "display_name": "Python [conda env:hires-marbl]",
125 |    "language": "python",
126 |    "name": "conda-env-hires-marbl-py"
127 |   },
128 |   "language_info": {
129 |    "codemirror_mode": {
130 |     "name": "ipython",
131 |     "version": 3
132 |    },
133 |    "file_extension": ".py",
134 |    "mimetype": "text/x-python",
135 |    "name": "python",
136 |    "nbconvert_exporter": "python",
137 |    "pygments_lexer": "ipython3",
138 |    "version": "3.7.8"
139 |   }
140 |  },
141 |  "nbformat": 4,
142 |  "nbformat_minor": 4
143 | }
144 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | import os
  4 | import sys
  5 | import pytest
  6 | import cftime
  7 | import numpy as np
  8 | import xarray as xr
  9 | 
 10 | sys.path.append(os.path.abspath(os.path.join("notebooks")))
 11 | sys.path.append(os.path.abspath("tests"))
 12 | from utils.utils import time_year_plus_frac, time_set_mid, repl_coord, round_sig
 13 | from xr_ds_ex import gen_time_bounds_values, xr_ds_ex
 14 | 
 15 | nyrs = 300
 16 | var_const = False
 17 | 
 18 | 
 19 | @pytest.mark.parametrize("decode_times1", [True, False])
 20 | @pytest.mark.parametrize("decode_times2", [True, False])
 21 | @pytest.mark.parametrize("apply_chunk1", [True, False])
 22 | def test_repl_coord(decode_times1, decode_times2, apply_chunk1):
 23 |     ds1 = time_set_mid(xr_ds_ex(decode_times1, nyrs=nyrs, var_const=var_const), "time")
 24 |     if apply_chunk1:
 25 |         ds1 = ds1.chunk({"time": 12})
 26 | 
 27 |     # change time:bounds attribute variable rename corresponding variable
 28 |     tb_name_old = ds1["time"].attrs["bounds"]
 29 |     tb_name_new = tb_name_old + "_new"
 30 |     ds1["time"].attrs["bounds"] = tb_name_new
 31 |     ds1 = ds1.rename({tb_name_old: tb_name_new})
 32 | 
 33 |     # verify that repl_coord on xr_ds_ex gives same results as
 34 |     # 1) executing time_set_mid
 35 |     # 2) manually changing bounds
 36 |     ds2 = repl_coord(
 37 |         "time", ds1, xr_ds_ex(decode_times2, nyrs=nyrs, var_const=var_const)
 38 |     )
 39 |     assert ds2.identical(ds1)
 40 | 
 41 |     assert ds2["time"].encoding == ds1["time"].encoding
 42 |     assert ds2["time"].chunks == ds1["time"].chunks
 43 | 
 44 | 
 45 | @pytest.mark.parametrize("decode_times", [True, False])
 46 | @pytest.mark.parametrize("deep", [True, False])
 47 | @pytest.mark.parametrize("apply_chunk", [True, False])
 48 | def test_time_set_mid(decode_times, deep, apply_chunk):
 49 |     ds = xr_ds_ex(decode_times, nyrs=nyrs, var_const=var_const, time_mid=False)
 50 |     if apply_chunk:
 51 |         ds = ds.chunk({"time": 12})
 52 | 
 53 |     mid_month_values = gen_time_bounds_values(nyrs).mean(axis=1)
 54 |     if decode_times:
 55 |         time_encoding = ds["time"].encoding
 56 |         expected_values = cftime.num2date(
 57 |             mid_month_values, time_encoding["units"], time_encoding["calendar"]
 58 |         )
 59 |     else:
 60 |         expected_values = mid_month_values
 61 | 
 62 |     ds_out = time_set_mid(ds, "time", deep)
 63 | 
 64 |     assert ds_out.attrs == ds.attrs
 65 |     assert ds_out.encoding == ds.encoding
 66 |     assert ds_out.chunks == ds.chunks
 67 | 
 68 |     for varname in ds.variables:
 69 |         assert ds_out[varname].attrs == ds[varname].attrs
 70 |         assert ds_out[varname].encoding == ds[varname].encoding
 71 |         assert ds_out[varname].chunks == ds[varname].chunks
 72 |         if varname == "time":
 73 |             assert np.all(ds_out[varname].values == expected_values)
 74 |         else:
 75 |             assert np.all(ds_out[varname].values == ds[varname].values)
 76 |             assert (ds_out[varname].data is ds[varname].data) == (not deep)
 77 | 
 78 |     # verify that values are independent of ds being chunked in time
 79 |     ds_chunk = xr_ds_ex(
 80 |         decode_times, nyrs=nyrs, var_const=var_const, time_mid=False
 81 |     ).chunk({"time": 6})
 82 |     ds_chunk_out = time_set_mid(ds_chunk, "time")
 83 |     assert ds_chunk_out.identical(ds_out)
 84 | 
 85 | 
 86 | @pytest.mark.parametrize("decode_times", [True, False])
 87 | def test_time_year_plus_frac(decode_times):
 88 |     ds = xr_ds_ex(decode_times, nyrs=nyrs, var_const=var_const)
 89 | 
 90 |     # call time_year_plus_frac to ensure that it doesn't raise an exception
 91 |     ty = time_year_plus_frac(ds, "time")
 92 | 
 93 | 
 94 | @pytest.mark.parametrize(
 95 |     "x, ndigits, expected",
 96 |     [
 97 |         (0.0, 1, 0.0),
 98 |         (0.0, 2, 0.0),
 99 |         (1.25, 1, 1.0),
100 |         (1.25, 3, 1.25),
101 |         (12.5, 1, 10.0),
102 |         (12.5, 2, 12.0),  # round to even
103 |         (12.5, 3, 12.5),
104 |         (12.5, 4, 12.5),
105 |         (13.5, 1, 10.0),
106 |         (13.5, 2, 14.0),  # round to even
107 |         (13.5, 3, 13.5),
108 |         (13.52, 3, 13.5),
109 |         (13.48, 3, 13.5),
110 |         (13.5, 4, 13.5),
111 |     ],
112 | )
113 | def test_round_sig(x, ndigits, expected):
114 |     assert round_sig(x, ndigits) == expected
115 | 


--------------------------------------------------------------------------------
/notebooks/diag_metadata.yaml:
--------------------------------------------------------------------------------
  1 | -   varname: POC_FLUX_100m
  2 |     spatial_op: integrate
  3 |     integral_unit_conv: (12 g)/(mol) # convert from mol to g C
  4 |     integral_display_units: Pg / yr
  5 |     display_units: mol / m^2 / yr
  6 |     map_vmin: 1.0e-2
  7 |     map_vmax: 10.0
  8 |     apply_log10: True
  9 | 
 10 | -   varname: CaCO3_FLUX_100m
 11 |     spatial_op: integrate
 12 |     integral_unit_conv: (12 g)/(mol) # convert from mol to g C
 13 |     integral_display_units: Pg / yr
 14 |     display_units: mol / m^2 / yr
 15 |     map_vmin: 1.0e-3
 16 |     map_vmax: 3.0
 17 |     apply_log10: True
 18 | 
 19 | -   varname: FG_CO2
 20 |     spatial_op: integrate
 21 |     integral_unit_conv: (12 g)/(mol) # convert from mol to g C
 22 |     integral_display_units: Pg / yr
 23 |     display_units: mol / m^2 / yr
 24 |     map_vmin: -10.0
 25 |     map_vmax: 10.0
 26 | 
 27 | -   varname: DpCO2
 28 |     spatial_op: average
 29 |     map_vmin: -75.0
 30 |     map_vmax: 75.0
 31 | 
 32 | -   varname: PH
 33 |     spatial_op: average
 34 |     map_vmin: 8.0
 35 |     map_vmax: 8.4
 36 | 
 37 | -   varname: spChl
 38 |     isel_dict:
 39 |         z_t_150m: 0
 40 |     spatial_op: average
 41 |     map_vmin: 1.0e-3
 42 |     map_vmax: 1.0e-1
 43 |     apply_log10: True
 44 | 
 45 | -   varname: photoC_sp_zint
 46 |     spatial_op: integrate
 47 |     integral_unit_conv: (12 g)/(mol) # convert from mol to g C
 48 |     integral_display_units: Pg / yr
 49 |     display_units: mol / m^2 / yr
 50 |     map_vmin: 1.0e-2
 51 |     map_vmax: 15.0
 52 |     apply_log10: True
 53 | 
 54 | -   varname: coccoChl
 55 |     isel_dict:
 56 |         z_t_150m: 0
 57 |     spatial_op: average
 58 |     map_vmin: 1.0e-3
 59 |     map_vmax: 1.0
 60 |     apply_log10: True
 61 | 
 62 | -   varname: photoC_cocco_zint
 63 |     spatial_op: integrate
 64 |     integral_unit_conv: (12 g)/(mol) # convert from mol to g C
 65 |     integral_display_units: Pg / yr
 66 |     display_units: mol / m^2 / yr
 67 |     map_vmin: 1.0e-2
 68 |     map_vmax: 15.0
 69 |     apply_log10: True
 70 | 
 71 | -   varname: diatChl
 72 |     isel_dict:
 73 |         z_t_150m: 0
 74 |     spatial_op: average
 75 |     map_vmin: 1.0e-3
 76 |     map_vmax: 10.0
 77 |     apply_log10: True
 78 | 
 79 | -   varname: photoC_diat_zint
 80 |     spatial_op: integrate
 81 |     integral_unit_conv: (12 g)/(mol) # convert from mol to g C
 82 |     integral_display_units: Pg / yr
 83 |     display_units: mol / m^2 / yr
 84 |     map_vmin: 1.0e-2
 85 |     map_vmax: 15.0
 86 |     apply_log10: True
 87 | 
 88 | -   varname: diazChl
 89 |     isel_dict:
 90 |         z_t_150m: 0
 91 |     spatial_op: average
 92 |     map_vmin: 1.0e-4
 93 |     map_vmax: 1.0e-2
 94 |     apply_log10: True
 95 | 
 96 | -   varname: photoC_diaz_zint
 97 |     spatial_op: integrate
 98 |     integral_unit_conv: (12 g)/(mol) # convert from mol to g C
 99 |     integral_display_units: Pg / yr
100 |     display_units: mol / m^2 / yr
101 |     map_vmin: 1.0e-2
102 |     map_vmax: 1.0
103 |     apply_log10: True
104 | 
105 | -   varname: NHx_SURFACE_EMIS
106 |     spatial_op: integrate
107 |     integral_unit_conv: (14 g)/(mol) # convert from mol to g N
108 |     integral_display_units: Tg yr^-1
109 |     display_units: mol / m^2 / yr
110 |     map_vmin: 1.0e-5
111 |     map_vmax: 0.01
112 |     apply_log10: True
113 | 
114 | -   varname: NH4
115 |     isel_dict:
116 |         z_t: 0
117 |     spatial_op: average
118 |     map_vmin: 1.0e-3
119 |     map_vmax: 3.0
120 |     apply_log10: True
121 | 
122 | -   varname: O2_ZMIN
123 |     spatial_op: average
124 |     map_vmin: -5.0
125 |     map_vmax: 50.0
126 | 
127 | -   varname: O2_ZMIN_DEPTH
128 |     spatial_op: average
129 |     display_units: m
130 |     map_vmin: 0.0
131 |     map_vmax: 1000.0
132 | 
133 | -   varname: O2
134 |     isel_dict:
135 |         z_t: 28
136 |     spatial_op: average
137 |     map_vmin: 1.0
138 |     map_vmax: 300.0
139 |     apply_log10: True
140 | 
141 | -   varname: PO4
142 |     isel_dict:
143 |         z_t: 0
144 |     spatial_op: average
145 |     map_vmin: 1.0e-2
146 |     map_vmax: 2.2
147 |     apply_log10: True
148 | 
149 | -   varname: PO4
150 |     isel_dict:
151 |         z_t: 28
152 |     spatial_op: average
153 |     map_vmin: 0.5
154 |     map_vmax: 3.5
155 | 
156 | -   varname: NO3
157 |     isel_dict:
158 |         z_t: 0
159 |     spatial_op: average
160 |     map_vmin: 1.0e-2
161 |     map_vmax: 35.0
162 |     apply_log10: True
163 | 
164 | -   varname: NO3
165 |     isel_dict:
166 |         z_t: 28
167 |     spatial_op: average
168 |     map_vmin: 10.0
169 |     map_vmax: 35.0
170 | 
171 | -   varname: SiO3
172 |     isel_dict:
173 |         z_t: 0
174 |     spatial_op: average
175 |     map_vmin: 1.0
176 |     map_vmax: 75.0
177 |     apply_log10: True
178 | 
179 | -   varname: SiO3
180 |     isel_dict:
181 |         z_t: 28
182 |     spatial_op: average
183 |     map_vmin: 0.0
184 |     map_vmax: 100.0
185 | 


--------------------------------------------------------------------------------
/notebooks/compare_ts_and_hist_003.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "\n",
 11 |     "import utils"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "Checking year 0001...\n",
 24 |       "... checking stream pop.h.nyear1 ...\n",
 25 |       "Skipping stream pop.h.nyear1 for year 0001 because there are no history files\n",
 26 |       "... checking stream pop.h.nday1 ...\n",
 27 |       "Skipping stream pop.h.nday1 for year 0001 because there are no history files\n",
 28 |       "... checking stream pop.h ...\n",
 29 |       "Skipping stream pop.h for year 0001 because there are no history files\n",
 30 |       "... checking stream cice.h ...\n",
 31 |       "Skipping stream cice.h for year 0001 because there are no history files\n",
 32 |       "Could not find time series for all variables in year 0001\n",
 33 |       "----\n",
 34 |       "Checking year 0002...\n",
 35 |       "... checking stream pop.h.nyear1 ...\n",
 36 |       "Skipping stream pop.h.nyear1 for year 0002 because there are no history files\n",
 37 |       "... checking stream pop.h.nday1 ...\n",
 38 |       "Skipping stream pop.h.nday1 for year 0002 because there are no history files\n",
 39 |       "... checking stream pop.h ...\n",
 40 |       "Skipping stream pop.h for year 0002 because there are no history files\n",
 41 |       "... checking stream cice.h1 ...\n",
 42 |       "Skipping stream cice.h1 for year 0002 because there are no history files\n",
 43 |       "... checking stream cice.h ...\n",
 44 |       "Skipping stream cice.h for year 0002 because there are no history files\n",
 45 |       "Could not find time series for all variables in year 0002\n",
 46 |       "----\n",
 47 |       "Checking year 0003...\n",
 48 |       "... checking stream pop.h.nyear1 ...\n",
 49 |       "Skipping stream pop.h.nyear1 for year 0003 because there are no history files\n",
 50 |       "... checking stream pop.h.nday1 ...\n",
 51 |       "Skipping stream pop.h.nday1 for year 0003 because there are no history files\n",
 52 |       "... checking stream pop.h ...\n",
 53 |       "Skipping stream pop.h for year 0003 because there are no history files\n",
 54 |       "... checking stream cice.h1 ...\n",
 55 |       "Skipping stream cice.h1 for year 0003 because there are no history files\n",
 56 |       "... checking stream cice.h ...\n",
 57 |       "Skipping stream cice.h for year 0003 because there are no history files\n",
 58 |       "Could not find time series for all variables in year 0003\n",
 59 |       "----\n",
 60 |       "Checking year 0004...\n",
 61 |       "... checking stream pop.h.nyear1 ...\n",
 62 |       "Skipping stream pop.h.nyear1 for year 0004 because there are no history files\n",
 63 |       "... checking stream pop.h.nday1 ...\n",
 64 |       "Skipping stream pop.h.nday1 for year 0004 because there are no history files\n",
 65 |       "... checking stream pop.h ...\n",
 66 |       "Skipping stream pop.h for year 0004 because there are no history files\n",
 67 |       "... checking stream cice.h1 ...\n",
 68 |       "Skipping stream cice.h1 for year 0004 because there are no history files\n",
 69 |       "... checking stream cice.h ...\n",
 70 |       "Skipping stream cice.h for year 0004 because there are no history files\n",
 71 |       "Could not find time series for all variables in year 0004\n",
 72 |       "----\n",
 73 |       "Checking year 0005...\n",
 74 |       "... checking stream pop.h.nyear1 ...\n",
 75 |       "Could not find time series for year 0005\n",
 76 |       "CPU times: user 207 ms, sys: 86.5 ms, total: 294 ms\n",
 77 |       "Wall time: 1.39 s\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "%%time\n",
 83 |     "\n",
 84 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n",
 85 |     "\n",
 86 |     "# Directories to search for netCDF files\n",
 87 |     "caseroot = os.path.join(os.sep, \"glade\", \"work\", \"mlevy\", \"hi-res_BGC_JRA\", \"cases\")\n",
 88 |     "campaign_root = os.path.join(os.sep, \"glade\", \"campaign\", \"cesm\", \"development\", \"bgcwg\", \"projects\", \"hi-res_JRA\", \"cases\")\n",
 89 |     "output_roots = [os.path.join(campaign_root, casename, \"output\")]\n",
 90 |     "output_roots += utils.gen_output_roots_from_caseroot(os.path.join(caseroot, casename))\n",
 91 |     "\n",
 92 |     "utils.timeseries_and_history_comparison(casename, output_roots)"
 93 |    ]
 94 |   }
 95 |  ],
 96 |  "metadata": {
 97 |   "kernelspec": {
 98 |    "display_name": "Python [conda env:hires-marbl]",
 99 |    "language": "python",
100 |    "name": "conda-env-hires-marbl-py"
101 |   },
102 |   "language_info": {
103 |    "codemirror_mode": {
104 |     "name": "ipython",
105 |     "version": 3
106 |    },
107 |    "file_extension": ".py",
108 |    "mimetype": "text/x-python",
109 |    "name": "python",
110 |    "nbconvert_exporter": "python",
111 |    "pygments_lexer": "ipython3",
112 |    "version": "3.7.8"
113 |   }
114 |  },
115 |  "nbformat": 4,
116 |  "nbformat_minor": 4
117 | }
118 | 


--------------------------------------------------------------------------------
/notebooks/plot_suite_maps_0001_003.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "execution": {}
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import os\n",
 12 |     "import yaml\n",
 13 |     "\n",
 14 |     "import utils\n",
 15 |     "\n",
 16 |     "%matplotlib inline"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n",
 26 |     "year = 1\n",
 27 |     "campaign_root = os.path.join(\n",
 28 |     "    os.sep,\n",
 29 |     "    \"glade\",\n",
 30 |     "    \"campaign\",\n",
 31 |     "    \"cesm\",\n",
 32 |     "    \"development\",\n",
 33 |     "    \"bgcwg\",\n",
 34 |     "    \"projects\",\n",
 35 |     "    \"hi-res_JRA\",\n",
 36 |     "    \"cases\",\n",
 37 |     ")\n",
 38 |     "\n",
 39 |     "# Set up CaseClass object\n",
 40 |     "case = utils.CaseClass(\n",
 41 |     "    casename, os.path.join(campaign_root, casename, \"output\")\n",
 42 |     ")"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "execution": {}
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n",
 54 |     "    diag_metadata_list = yaml.safe_load(fptr)\n",
 55 |     "\n",
 56 |     "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 4,
 62 |    "metadata": {
 63 |     "execution": {}
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def summary_plots(ds, diag_metadata, save_pngs=False):\n",
 68 |     "    varname = diag_metadata[\"varname\"]\n",
 69 |     "    print(varname)\n",
 70 |     "    da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n",
 71 |     "\n",
 72 |     "    utils.summary_plot_maps(\n",
 73 |     "        ds,\n",
 74 |     "        da,\n",
 75 |     "        diag_metadata,\n",
 76 |     "        save_pngs=save_pngs,\n",
 77 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 78 |     "    )"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 5,
 84 |    "metadata": {
 85 |     "execution": {}
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "Datasets contain a total of 12 time samples\n",
 93 |       "Last average written at 0002-01-01 00:00:00\n",
 94 |       "POC_FLUX_100m\n",
 95 |       "Datasets contain a total of 12 time samples\n",
 96 |       "Last average written at 0002-01-01 00:00:00\n",
 97 |       "CaCO3_FLUX_100m\n",
 98 |       "Datasets contain a total of 12 time samples\n",
 99 |       "Last average written at 0002-01-01 00:00:00\n",
100 |       "FG_CO2\n",
101 |       "Datasets contain a total of 12 time samples\n",
102 |       "Last average written at 0002-01-01 00:00:00\n",
103 |       "DpCO2\n",
104 |       "Datasets contain a total of 12 time samples\n",
105 |       "Last average written at 0002-01-01 00:00:00\n",
106 |       "PH\n",
107 |       "Datasets contain a total of 12 time samples\n",
108 |       "Last average written at 0002-01-01 00:00:00\n",
109 |       "spChl\n",
110 |       "Datasets contain a total of 12 time samples\n",
111 |       "Last average written at 0002-01-01 00:00:00\n",
112 |       "photoC_sp_zint\n",
113 |       "Datasets contain a total of 12 time samples\n",
114 |       "Last average written at 0002-01-01 00:00:00\n",
115 |       "coccoChl\n",
116 |       "Datasets contain a total of 12 time samples\n",
117 |       "Last average written at 0002-01-01 00:00:00\n",
118 |       "photoC_cocco_zint\n",
119 |       "Datasets contain a total of 12 time samples\n",
120 |       "Last average written at 0002-01-01 00:00:00\n",
121 |       "diatChl\n",
122 |       "Datasets contain a total of 12 time samples\n",
123 |       "Last average written at 0002-01-01 00:00:00\n",
124 |       "photoC_diat_zint\n",
125 |       "Datasets contain a total of 12 time samples\n",
126 |       "Last average written at 0002-01-01 00:00:00\n",
127 |       "diazChl\n",
128 |       "Datasets contain a total of 12 time samples\n",
129 |       "Last average written at 0002-01-01 00:00:00\n",
130 |       "photoC_diaz_zint\n",
131 |       "Datasets contain a total of 12 time samples\n",
132 |       "Last average written at 0002-01-01 00:00:00\n",
133 |       "NHx_SURFACE_EMIS\n",
134 |       "Datasets contain a total of 12 time samples\n",
135 |       "Last average written at 0002-01-01 00:00:00\n",
136 |       "NH4\n",
137 |       "Datasets contain a total of 12 time samples\n",
138 |       "Last average written at 0002-01-01 00:00:00\n",
139 |       "O2_ZMIN\n",
140 |       "Datasets contain a total of 12 time samples\n",
141 |       "Last average written at 0002-01-01 00:00:00\n",
142 |       "O2_ZMIN_DEPTH\n",
143 |       "Datasets contain a total of 12 time samples\n",
144 |       "Last average written at 0002-01-01 00:00:00\n",
145 |       "O2\n",
146 |       "Datasets contain a total of 12 time samples\n",
147 |       "Last average written at 0002-01-01 00:00:00\n",
148 |       "PO4\n",
149 |       "Datasets contain a total of 12 time samples\n",
150 |       "Last average written at 0002-01-01 00:00:00\n",
151 |       "PO4\n",
152 |       "Datasets contain a total of 12 time samples\n",
153 |       "Last average written at 0002-01-01 00:00:00\n",
154 |       "NO3\n",
155 |       "Datasets contain a total of 12 time samples\n",
156 |       "Last average written at 0002-01-01 00:00:00\n",
157 |       "NO3\n",
158 |       "Datasets contain a total of 12 time samples\n",
159 |       "Last average written at 0002-01-01 00:00:00\n",
160 |       "SiO3\n",
161 |       "Datasets contain a total of 12 time samples\n",
162 |       "Last average written at 0002-01-01 00:00:00\n",
163 |       "SiO3\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "stream = \"pop.h\"\n",
169 |     "# ds = case.gen_dataset(varnames, stream, start_year=year, end_year=year)\n",
170 |     "for diag_metadata in diag_metadata_list:\n",
171 |     "    ds = case.gen_dataset(\n",
172 |     "        diag_metadata[\"varname\"], stream, start_year=year, end_year=year\n",
173 |     "    )\n",
174 |     "    summary_plots(ds, diag_metadata, save_pngs=True)"
175 |    ]
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python [conda env:hires-marbl]",
181 |    "language": "python",
182 |    "name": "conda-env-hires-marbl-py"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.7.8"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 4
199 | }
200 | 


--------------------------------------------------------------------------------
/notebooks/plot_suite_maps_0001_004.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "execution": {}
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import os\n",
 12 |     "import yaml\n",
 13 |     "\n",
 14 |     "import utils\n",
 15 |     "\n",
 16 |     "%matplotlib inline"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n",
 26 |     "year = 1\n",
 27 |     "campaign_root = os.path.join(\n",
 28 |     "    os.sep,\n",
 29 |     "    \"glade\",\n",
 30 |     "    \"campaign\",\n",
 31 |     "    \"cesm\",\n",
 32 |     "    \"development\",\n",
 33 |     "    \"bgcwg\",\n",
 34 |     "    \"projects\",\n",
 35 |     "    \"hi-res_JRA\",\n",
 36 |     "    \"cases\",\n",
 37 |     ")\n",
 38 |     "\n",
 39 |     "# Set up CaseClass object\n",
 40 |     "case = utils.CaseClass(\n",
 41 |     "    casename, os.path.join(campaign_root, casename, \"output\")\n",
 42 |     ")"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "execution": {}
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n",
 54 |     "    diag_metadata_list = yaml.safe_load(fptr)\n",
 55 |     "\n",
 56 |     "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 4,
 62 |    "metadata": {
 63 |     "execution": {}
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def summary_plots(ds, diag_metadata, save_pngs=False):\n",
 68 |     "    varname = diag_metadata[\"varname\"]\n",
 69 |     "    print(varname)\n",
 70 |     "    da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n",
 71 |     "\n",
 72 |     "    utils.summary_plot_maps(\n",
 73 |     "        ds,\n",
 74 |     "        da,\n",
 75 |     "        diag_metadata,\n",
 76 |     "        save_pngs=save_pngs,\n",
 77 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 78 |     "    )"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 5,
 84 |    "metadata": {
 85 |     "execution": {}
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "Datasets contain a total of 12 time samples\n",
 93 |       "Last average written at 0002-01-01 00:00:00\n",
 94 |       "POC_FLUX_100m\n",
 95 |       "Datasets contain a total of 12 time samples\n",
 96 |       "Last average written at 0002-01-01 00:00:00\n",
 97 |       "CaCO3_FLUX_100m\n",
 98 |       "Datasets contain a total of 12 time samples\n",
 99 |       "Last average written at 0002-01-01 00:00:00\n",
100 |       "FG_CO2\n",
101 |       "Datasets contain a total of 12 time samples\n",
102 |       "Last average written at 0002-01-01 00:00:00\n",
103 |       "DpCO2\n",
104 |       "Datasets contain a total of 12 time samples\n",
105 |       "Last average written at 0002-01-01 00:00:00\n",
106 |       "PH\n",
107 |       "Datasets contain a total of 12 time samples\n",
108 |       "Last average written at 0002-01-01 00:00:00\n",
109 |       "spChl\n",
110 |       "Datasets contain a total of 12 time samples\n",
111 |       "Last average written at 0002-01-01 00:00:00\n",
112 |       "photoC_sp_zint\n",
113 |       "Datasets contain a total of 12 time samples\n",
114 |       "Last average written at 0002-01-01 00:00:00\n",
115 |       "coccoChl\n",
116 |       "Datasets contain a total of 12 time samples\n",
117 |       "Last average written at 0002-01-01 00:00:00\n",
118 |       "photoC_cocco_zint\n",
119 |       "Datasets contain a total of 12 time samples\n",
120 |       "Last average written at 0002-01-01 00:00:00\n",
121 |       "diatChl\n",
122 |       "Datasets contain a total of 12 time samples\n",
123 |       "Last average written at 0002-01-01 00:00:00\n",
124 |       "photoC_diat_zint\n",
125 |       "Datasets contain a total of 12 time samples\n",
126 |       "Last average written at 0002-01-01 00:00:00\n",
127 |       "diazChl\n",
128 |       "Datasets contain a total of 12 time samples\n",
129 |       "Last average written at 0002-01-01 00:00:00\n",
130 |       "photoC_diaz_zint\n",
131 |       "Datasets contain a total of 12 time samples\n",
132 |       "Last average written at 0002-01-01 00:00:00\n",
133 |       "NHx_SURFACE_EMIS\n",
134 |       "Datasets contain a total of 12 time samples\n",
135 |       "Last average written at 0002-01-01 00:00:00\n",
136 |       "NH4\n",
137 |       "Datasets contain a total of 12 time samples\n",
138 |       "Last average written at 0002-01-01 00:00:00\n",
139 |       "O2_ZMIN\n",
140 |       "Datasets contain a total of 12 time samples\n",
141 |       "Last average written at 0002-01-01 00:00:00\n",
142 |       "O2_ZMIN_DEPTH\n",
143 |       "Datasets contain a total of 12 time samples\n",
144 |       "Last average written at 0002-01-01 00:00:00\n",
145 |       "O2\n",
146 |       "Datasets contain a total of 12 time samples\n",
147 |       "Last average written at 0002-01-01 00:00:00\n",
148 |       "PO4\n",
149 |       "Datasets contain a total of 12 time samples\n",
150 |       "Last average written at 0002-01-01 00:00:00\n",
151 |       "PO4\n",
152 |       "Datasets contain a total of 12 time samples\n",
153 |       "Last average written at 0002-01-01 00:00:00\n",
154 |       "NO3\n",
155 |       "Datasets contain a total of 12 time samples\n",
156 |       "Last average written at 0002-01-01 00:00:00\n",
157 |       "NO3\n",
158 |       "Datasets contain a total of 12 time samples\n",
159 |       "Last average written at 0002-01-01 00:00:00\n",
160 |       "SiO3\n",
161 |       "Datasets contain a total of 12 time samples\n",
162 |       "Last average written at 0002-01-01 00:00:00\n",
163 |       "SiO3\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "stream = \"pop.h\"\n",
169 |     "# ds = case.gen_dataset(varnames, stream, start_year=year, end_year=year)\n",
170 |     "for diag_metadata in diag_metadata_list:\n",
171 |     "    ds = case.gen_dataset(\n",
172 |     "        diag_metadata[\"varname\"], stream, start_year=year, end_year=year\n",
173 |     "    )\n",
174 |     "    summary_plots(ds, diag_metadata, save_pngs=True)"
175 |    ]
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python [conda env:hires-marbl]",
181 |    "language": "python",
182 |    "name": "conda-env-hires-marbl-py"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.7.8"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 4
199 | }
200 | 


--------------------------------------------------------------------------------
/notebooks/trend_maps.003.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import glob\n",
 10 |     "import os\n",
 11 |     "\n",
 12 |     "import dask.distributed\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "import ncar_jobqueue\n",
 15 |     "import xarray as xr\n",
 16 |     "\n",
 17 |     "import utils\n",
 18 |     "from utils.utils import time_set_mid\n",
 19 |     "\n",
 20 |     "%matplotlib inline\n",
 21 |     "%load_ext autoreload\n",
 22 |     "%autoreload 2\n"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n",
 32 |     "campaign_root = os.path.join(\n",
 33 |     "    os.sep,\n",
 34 |     "    \"glade\",\n",
 35 |     "    \"campaign\",\n",
 36 |     "    \"cesm\",\n",
 37 |     "    \"development\",\n",
 38 |     "    \"bgcwg\",\n",
 39 |     "    \"projects\",\n",
 40 |     "    \"hi-res_JRA\",\n",
 41 |     "    \"cases\",\n",
 42 |     ")\n",
 43 |     "\n",
 44 |     "# Set up CaseClass object\n",
 45 |     "case = utils.CaseClass(\n",
 46 |     "    casename, os.path.join(campaign_root, casename, \"output\")\n",
 47 |     ")"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n"
 60 |      ]
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "cluster = ncar_jobqueue.NCARCluster(\n",
 65 |     "    cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n",
 66 |     ")\n",
 67 |     "cluster.scale(n=8)  # n = number of workers\n",
 68 |     "print(f\"dashboard_link={cluster.dashboard_link}\")\n",
 69 |     "client = dask.distributed.Client(cluster)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "Datasets contain a total of 36 time samples\n",
 82 |       "Last average written at 0005-01-01 00:00:00\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "stream = \"pop.h\"\n",
 88 |     "varnames = [\"PO4\", \"NO3\", \"SiO3\", \"O2\", \"DIC\", \"ALK\"]\n",
 89 |     "ds_4d = case.gen_dataset(\n",
 90 |     "    varnames,\n",
 91 |     "    stream,\n",
 92 |     "    start_year=2,\n",
 93 |     "    end_year=4,\n",
 94 |     ")\n",
 95 |     "\n",
 96 |     "ds_3d = ds_4d.isel(z_t=28).chunk({\"time\": 36, \"nlat\": 300, \"nlon\": 900})"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "za_dir = f\"/glade/campaign/cesm/development/bgcwg/projects/hi-res_JRA/cases/{casename}/output/ocn/proc/za\"\n",
106 |     "ds_list = []\n",
107 |     "for var in varnames:\n",
108 |     "    filename_pattern = f\"{za_dir}/za_{casename}.pop.h.{var}.??????-??????.nc\"\n",
109 |     "    za_filenames = glob.glob(filename_pattern)\n",
110 |     "    za_filenames.sort()\n",
111 |     "    ds_tmp = xr.open_mfdataset(\n",
112 |     "        za_filenames,\n",
113 |     "        data_vars=\"minimal\",\n",
114 |     "        compat=\"override\",\n",
115 |     "        coords=\"minimal\",\n",
116 |     "    ).chunk({\"basins\": 1, \"time\": 36, \"z_t\": 62, \"lat_t\": 2400})\n",
117 |     "    ds_list.append(\n",
118 |     "        time_set_mid(ds_tmp, \"time\").assign_coords(\n",
119 |     "            {\"basins\": [\"Global\", \"Pacific\", \"Indian\", \"Atlantic\"]}\n",
120 |     "        )\n",
121 |     "    )\n",
122 |     "ds_za = xr.merge(ds_list, compat=\"override\", join=\"left\")\n",
123 |     "ds_za.attrs = ds_3d.attrs"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 6,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "def trend_plots(varname, map_vminmax=None, za_vminmax=None, save_pngs=False):\n",
133 |     "    da = ds_3d[varname]\n",
134 |     "    utils.trend_plot(\n",
135 |     "        ds_3d,\n",
136 |     "        da,\n",
137 |     "        vmin=-map_vminmax,\n",
138 |     "        vmax=map_vminmax,\n",
139 |     "        save_pngs=save_pngs,\n",
140 |     "        isel_dict={\"z_t\": 0},\n",
141 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
142 |     "    )\n",
143 |     "\n",
144 |     "    for basin_ind in range(ds_za.dims[\"basins\"]):\n",
145 |     "        da = ds_za[varname].isel(basins=basin_ind)\n",
146 |     "        utils.trend_plot(\n",
147 |     "            ds_za,\n",
148 |     "            da,\n",
149 |     "            vmin=-za_vminmax,\n",
150 |     "            vmax=za_vminmax,\n",
151 |     "            invert_yaxis=True,\n",
152 |     "            save_pngs=save_pngs,\n",
153 |     "            isel_dict={\"basins\": 0},\n",
154 |     "            savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
155 |     "        )"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 7,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "trend_plots(\"PO4\", map_vminmax=0.1, za_vminmax=0.05, save_pngs=True)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 8,
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": [
173 |     "trend_plots(\"NO3\", map_vminmax=1.0, za_vminmax=0.5, save_pngs=True)"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 9,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "trend_plots(\"SiO3\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 10,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "trend_plots(\"O2\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 11,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "trend_plots(\"DIC\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 12,
206 |    "metadata": {},
207 |    "outputs": [],
208 |    "source": [
209 |     "trend_plots(\"ALK\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 13,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "client.close()\n",
219 |     "cluster.close()"
220 |    ]
221 |   }
222 |  ],
223 |  "metadata": {
224 |   "kernelspec": {
225 |    "display_name": "Python [conda env:miniconda3-hires-marbl]",
226 |    "language": "python",
227 |    "name": "conda-env-miniconda3-hires-marbl-py"
228 |   },
229 |   "language_info": {
230 |    "codemirror_mode": {
231 |     "name": "ipython",
232 |     "version": 3
233 |    },
234 |    "file_extension": ".py",
235 |    "mimetype": "text/x-python",
236 |    "name": "python",
237 |    "nbconvert_exporter": "python",
238 |    "pygments_lexer": "ipython3",
239 |    "version": "3.7.8"
240 |   }
241 |  },
242 |  "nbformat": 4,
243 |  "nbformat_minor": 4
244 | }
245 | 


--------------------------------------------------------------------------------
/notebooks/trend_maps.004.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import glob\n",
 11 |     "\n",
 12 |     "import dask.distributed\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "import ncar_jobqueue\n",
 15 |     "import xarray as xr\n",
 16 |     "\n",
 17 |     "import utils\n",
 18 |     "from utils.utils import time_set_mid\n",
 19 |     "\n",
 20 |     "%matplotlib inline\n",
 21 |     "%load_ext autoreload\n",
 22 |     "%autoreload 2\n"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n",
 32 |     "campaign_root = os.path.join(\n",
 33 |     "    os.sep,\n",
 34 |     "    \"glade\",\n",
 35 |     "    \"campaign\",\n",
 36 |     "    \"cesm\",\n",
 37 |     "    \"development\",\n",
 38 |     "    \"bgcwg\",\n",
 39 |     "    \"projects\",\n",
 40 |     "    \"hi-res_JRA\",\n",
 41 |     "    \"cases\",\n",
 42 |     ")\n",
 43 |     "\n",
 44 |     "# Set up CaseClass object\n",
 45 |     "case = utils.CaseClass(\n",
 46 |     "    casename, os.path.join(campaign_root, casename, \"output\")\n",
 47 |     ")"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n"
 60 |      ]
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "cluster = ncar_jobqueue.NCARCluster(\n",
 65 |     "    cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n",
 66 |     ")\n",
 67 |     "cluster.scale(n=8)  # n = number of workers\n",
 68 |     "print(f\"dashboard_link={cluster.dashboard_link}\")\n",
 69 |     "client = dask.distributed.Client(cluster)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "Datasets contain a total of 36 time samples\n",
 82 |       "Last average written at 0005-01-01 00:00:00\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "stream = \"pop.h\"\n",
 88 |     "varnames = [\"PO4\", \"NO3\", \"SiO3\", \"O2\", \"DIC\", \"ALK\"]\n",
 89 |     "ds_4d = case.gen_dataset(\n",
 90 |     "    varnames,\n",
 91 |     "    stream,\n",
 92 |     "    start_year=2,\n",
 93 |     "    end_year=4,\n",
 94 |     ")\n",
 95 |     "\n",
 96 |     "ds_3d = ds_4d.isel(z_t=28).chunk({\"time\": 36, \"nlat\": 300, \"nlon\": 900})"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "za_dir = f\"/glade/campaign/cesm/development/bgcwg/projects/hi-res_JRA/cases/{casename}/output/ocn/proc/za\"\n",
106 |     "ds_list = []\n",
107 |     "for var in varnames:\n",
108 |     "    filename_pattern = f\"{za_dir}/za_{casename}.pop.h.{var}.??????-??????.nc\"\n",
109 |     "    za_filenames = glob.glob(filename_pattern)\n",
110 |     "    za_filenames.sort()\n",
111 |     "    ds_tmp = xr.open_mfdataset(\n",
112 |     "        za_filenames,\n",
113 |     "        data_vars=\"minimal\",\n",
114 |     "        compat=\"override\",\n",
115 |     "        coords=\"minimal\",\n",
116 |     "    ).chunk({\"basins\": 1, \"time\": 36, \"z_t\": 62, \"lat_t\": 2400})\n",
117 |     "    ds_list.append(\n",
118 |     "        time_set_mid(ds_tmp, \"time\").assign_coords(\n",
119 |     "            {\"basins\": [\"Global\", \"Pacific\", \"Indian\", \"Atlantic\"]}\n",
120 |     "        )\n",
121 |     "    )\n",
122 |     "ds_za = xr.merge(ds_list, compat=\"override\", join=\"left\")\n",
123 |     "ds_za.attrs = ds_3d.attrs"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 6,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "def trend_plots(varname, map_vminmax=None, za_vminmax=None, save_pngs=False):\n",
133 |     "    #     da = ds_4d[varname][:, 28, :, :].chunk({\"time\": 36, \"nlat\": 300, \"nlon\": 900})\n",
134 |     "    da = ds_3d[varname]\n",
135 |     "    utils.trend_plot(\n",
136 |     "        ds_3d,\n",
137 |     "        da,\n",
138 |     "        vmin=-map_vminmax,\n",
139 |     "        vmax=map_vminmax,\n",
140 |     "        save_pngs=save_pngs,\n",
141 |     "        isel_dict={\"z_t\": 0},\n",
142 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
143 |     "    )\n",
144 |     "\n",
145 |     "    for basin_ind in range(ds_za.dims[\"basins\"]):\n",
146 |     "        da = ds_za[varname].isel(basins=basin_ind)\n",
147 |     "        da = da\n",
148 |     "        utils.trend_plot(\n",
149 |     "            ds_za,\n",
150 |     "            da,\n",
151 |     "            vmin=-za_vminmax,\n",
152 |     "            vmax=za_vminmax,\n",
153 |     "            invert_yaxis=True,\n",
154 |     "            save_pngs=save_pngs,\n",
155 |     "            isel_dict={\"basins\": 0},\n",
156 |     "            savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
157 |     "        )"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 7,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "trend_plots(\"PO4\", map_vminmax=0.1, za_vminmax=0.05, save_pngs=True)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 8,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "trend_plots(\"NO3\", map_vminmax=1.0, za_vminmax=0.5, save_pngs=True)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 9,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "trend_plots(\"SiO3\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 10,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "trend_plots(\"O2\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 11,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "trend_plots(\"DIC\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 12,
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "trend_plots(\"ALK\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 13,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "client.close()\n",
221 |     "cluster.close()"
222 |    ]
223 |   }
224 |  ],
225 |  "metadata": {
226 |   "kernelspec": {
227 |    "display_name": "Python [conda env:miniconda3-hires-marbl]",
228 |    "language": "python",
229 |    "name": "conda-env-miniconda3-hires-marbl-py"
230 |   },
231 |   "language_info": {
232 |    "codemirror_mode": {
233 |     "name": "ipython",
234 |     "version": 3
235 |    },
236 |    "file_extension": ".py",
237 |    "mimetype": "text/x-python",
238 |    "name": "python",
239 |    "nbconvert_exporter": "python",
240 |    "pygments_lexer": "ipython3",
241 |    "version": "3.7.8"
242 |   }
243 |  },
244 |  "nbformat": 4,
245 |  "nbformat_minor": 4
246 | }
247 | 


--------------------------------------------------------------------------------
/notebooks/plot_suite_003.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import warnings\n",
 11 |     "\n",
 12 |     "import dask\n",
 13 |     "import ncar_jobqueue\n",
 14 |     "import yaml\n",
 15 |     "\n",
 16 |     "import utils\n",
 17 |     "\n",
 18 |     "%matplotlib inline"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n",
 28 |     "    diag_metadata_list = yaml.safe_load(fptr)\n",
 29 |     "\n",
 30 |     "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "def summary_plots(ds, diag_metadata, save_pngs=False):\n",
 40 |     "    varname = diag_metadata[\"varname\"]\n",
 41 |     "    print(varname)\n",
 42 |     "    da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n",
 43 |     "\n",
 44 |     "    utils.summary_plot_global_ts(\n",
 45 |     "        ds,\n",
 46 |     "        da,\n",
 47 |     "        diag_metadata,\n",
 48 |     "        time_coarsen_len=12,\n",
 49 |     "        save_pngs=save_pngs,\n",
 50 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 51 |     "    )\n",
 52 |     "\n",
 53 |     "    utils.summary_plot_histogram(\n",
 54 |     "        ds,\n",
 55 |     "        da,\n",
 56 |     "        diag_metadata,\n",
 57 |     "        save_pngs=save_pngs,\n",
 58 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 59 |     "    )"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n"
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "cluster = ncar_jobqueue.NCARCluster(\n",
 77 |     "    cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n",
 78 |     ")\n",
 79 |     "cluster.scale(n=8)  # n = number of workers\n",
 80 |     "print(f\"dashboard_link={cluster.dashboard_link}\")"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n",
 90 |     "campaign_root = os.path.join(\n",
 91 |     "    os.sep,\n",
 92 |     "    \"glade\",\n",
 93 |     "    \"campaign\",\n",
 94 |     "    \"cesm\",\n",
 95 |     "    \"development\",\n",
 96 |     "    \"bgcwg\",\n",
 97 |     "    \"projects\",\n",
 98 |     "    \"hi-res_JRA\",\n",
 99 |     "    \"cases\",\n",
100 |     ")\n",
101 |     "\n",
102 |     "# Set up CaseClass object\n",
103 |     "case = utils.CaseClass(\n",
104 |     "    casename, os.path.join(campaign_root, casename, \"output\")\n",
105 |     ")"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 6,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "Datasets contain a total of 48 time samples\n",
118 |       "Last average written at 0005-01-01 00:00:00\n",
119 |       "POC_FLUX_100m\n",
120 |       "Datasets contain a total of 48 time samples\n",
121 |       "Last average written at 0005-01-01 00:00:00\n",
122 |       "CaCO3_FLUX_100m\n",
123 |       "Datasets contain a total of 48 time samples\n",
124 |       "Last average written at 0005-01-01 00:00:00\n",
125 |       "FG_CO2\n",
126 |       "Datasets contain a total of 48 time samples\n",
127 |       "Last average written at 0005-01-01 00:00:00\n",
128 |       "DpCO2\n",
129 |       "Datasets contain a total of 48 time samples\n",
130 |       "Last average written at 0005-01-01 00:00:00\n",
131 |       "PH\n",
132 |       "Datasets contain a total of 48 time samples\n",
133 |       "Last average written at 0005-01-01 00:00:00\n",
134 |       "spChl\n",
135 |       "Datasets contain a total of 48 time samples\n",
136 |       "Last average written at 0005-01-01 00:00:00\n",
137 |       "photoC_sp_zint\n",
138 |       "Datasets contain a total of 48 time samples\n",
139 |       "Last average written at 0005-01-01 00:00:00\n",
140 |       "coccoChl\n",
141 |       "Datasets contain a total of 48 time samples\n",
142 |       "Last average written at 0005-01-01 00:00:00\n",
143 |       "photoC_cocco_zint\n",
144 |       "Datasets contain a total of 48 time samples\n",
145 |       "Last average written at 0005-01-01 00:00:00\n",
146 |       "diatChl\n",
147 |       "Datasets contain a total of 48 time samples\n",
148 |       "Last average written at 0005-01-01 00:00:00\n",
149 |       "photoC_diat_zint\n",
150 |       "Datasets contain a total of 48 time samples\n",
151 |       "Last average written at 0005-01-01 00:00:00\n",
152 |       "diazChl\n",
153 |       "Datasets contain a total of 48 time samples\n",
154 |       "Last average written at 0005-01-01 00:00:00\n",
155 |       "photoC_diaz_zint\n",
156 |       "Datasets contain a total of 48 time samples\n",
157 |       "Last average written at 0005-01-01 00:00:00\n",
158 |       "NHx_SURFACE_EMIS\n",
159 |       "Datasets contain a total of 48 time samples\n",
160 |       "Last average written at 0005-01-01 00:00:00\n",
161 |       "NH4\n",
162 |       "Datasets contain a total of 48 time samples\n",
163 |       "Last average written at 0005-01-01 00:00:00\n",
164 |       "O2_ZMIN\n",
165 |       "Datasets contain a total of 48 time samples\n",
166 |       "Last average written at 0005-01-01 00:00:00\n",
167 |       "O2_ZMIN_DEPTH\n",
168 |       "Datasets contain a total of 48 time samples\n",
169 |       "Last average written at 0005-01-01 00:00:00\n",
170 |       "O2\n",
171 |       "Datasets contain a total of 48 time samples\n",
172 |       "Last average written at 0005-01-01 00:00:00\n",
173 |       "PO4\n",
174 |       "Datasets contain a total of 48 time samples\n",
175 |       "Last average written at 0005-01-01 00:00:00\n",
176 |       "PO4\n",
177 |       "Datasets contain a total of 48 time samples\n",
178 |       "Last average written at 0005-01-01 00:00:00\n",
179 |       "NO3\n",
180 |       "Datasets contain a total of 48 time samples\n",
181 |       "Last average written at 0005-01-01 00:00:00\n",
182 |       "NO3\n",
183 |       "Datasets contain a total of 48 time samples\n",
184 |       "Last average written at 0005-01-01 00:00:00\n",
185 |       "SiO3\n",
186 |       "Datasets contain a total of 48 time samples\n",
187 |       "Last average written at 0005-01-01 00:00:00\n",
188 |       "SiO3\n"
189 |      ]
190 |     }
191 |    ],
192 |    "source": [
193 |     "with dask.distributed.Client(cluster) as client:\n",
194 |     "    stream = \"pop.h\"\n",
195 |     "    for diag_metadata in diag_metadata_list:\n",
196 |     "        ds = case.gen_dataset(diag_metadata[\"varname\"], stream)\n",
197 |     "        summary_plots(ds, diag_metadata, save_pngs=True)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 7,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "cluster.close()"
207 |    ]
208 |   }
209 |  ],
210 |  "metadata": {
211 |   "kernelspec": {
212 |    "display_name": "Python [conda env:miniconda3-hires-marbl]",
213 |    "language": "python",
214 |    "name": "conda-env-miniconda3-hires-marbl-py"
215 |   },
216 |   "language_info": {
217 |    "codemirror_mode": {
218 |     "name": "ipython",
219 |     "version": 3
220 |    },
221 |    "file_extension": ".py",
222 |    "mimetype": "text/x-python",
223 |    "name": "python",
224 |    "nbconvert_exporter": "python",
225 |    "pygments_lexer": "ipython3",
226 |    "version": "3.7.8"
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 4
231 | }
232 | 


--------------------------------------------------------------------------------
/notebooks/plot_suite_004.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import warnings\n",
 11 |     "\n",
 12 |     "import dask\n",
 13 |     "import ncar_jobqueue\n",
 14 |     "import yaml\n",
 15 |     "\n",
 16 |     "import utils\n",
 17 |     "\n",
 18 |     "%matplotlib inline"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n",
 28 |     "    diag_metadata_list = yaml.safe_load(fptr)\n",
 29 |     "\n",
 30 |     "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "def summary_plots(ds, diag_metadata, save_pngs=False):\n",
 40 |     "    varname = diag_metadata[\"varname\"]\n",
 41 |     "    print(varname)\n",
 42 |     "    da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n",
 43 |     "\n",
 44 |     "    utils.summary_plot_global_ts(\n",
 45 |     "        ds,\n",
 46 |     "        da,\n",
 47 |     "        diag_metadata,\n",
 48 |     "        time_coarsen_len=12,\n",
 49 |     "        save_pngs=save_pngs,\n",
 50 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 51 |     "    )\n",
 52 |     "\n",
 53 |     "    utils.summary_plot_histogram(\n",
 54 |     "        ds,\n",
 55 |     "        da,\n",
 56 |     "        diag_metadata,\n",
 57 |     "        save_pngs=save_pngs,\n",
 58 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 59 |     "    )"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n"
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "cluster = ncar_jobqueue.NCARCluster(\n",
 77 |     "    cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n",
 78 |     ")\n",
 79 |     "cluster.scale(n=8)  # n = number of workers\n",
 80 |     "print(f\"dashboard_link={cluster.dashboard_link}\")"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n",
 90 |     "campaign_root = os.path.join(\n",
 91 |     "    os.sep,\n",
 92 |     "    \"glade\",\n",
 93 |     "    \"campaign\",\n",
 94 |     "    \"cesm\",\n",
 95 |     "    \"development\",\n",
 96 |     "    \"bgcwg\",\n",
 97 |     "    \"projects\",\n",
 98 |     "    \"hi-res_JRA\",\n",
 99 |     "    \"cases\",\n",
100 |     ")\n",
101 |     "\n",
102 |     "# Set up CaseClass object\n",
103 |     "case = utils.CaseClass(\n",
104 |     "    casename, os.path.join(campaign_root, casename, \"output\")\n",
105 |     ")"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 6,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "Datasets contain a total of 204 time samples\n",
118 |       "Last average written at 0018-01-01 00:00:00\n",
119 |       "POC_FLUX_100m\n",
120 |       "Datasets contain a total of 204 time samples\n",
121 |       "Last average written at 0018-01-01 00:00:00\n",
122 |       "CaCO3_FLUX_100m\n",
123 |       "Datasets contain a total of 204 time samples\n",
124 |       "Last average written at 0018-01-01 00:00:00\n",
125 |       "FG_CO2\n",
126 |       "Datasets contain a total of 204 time samples\n",
127 |       "Last average written at 0018-01-01 00:00:00\n",
128 |       "DpCO2\n",
129 |       "Datasets contain a total of 204 time samples\n",
130 |       "Last average written at 0018-01-01 00:00:00\n",
131 |       "PH\n",
132 |       "Datasets contain a total of 204 time samples\n",
133 |       "Last average written at 0018-01-01 00:00:00\n",
134 |       "spChl\n",
135 |       "Datasets contain a total of 204 time samples\n",
136 |       "Last average written at 0018-01-01 00:00:00\n",
137 |       "photoC_sp_zint\n",
138 |       "Datasets contain a total of 204 time samples\n",
139 |       "Last average written at 0018-01-01 00:00:00\n",
140 |       "coccoChl\n",
141 |       "Datasets contain a total of 204 time samples\n",
142 |       "Last average written at 0018-01-01 00:00:00\n",
143 |       "photoC_cocco_zint\n",
144 |       "Datasets contain a total of 204 time samples\n",
145 |       "Last average written at 0018-01-01 00:00:00\n",
146 |       "diatChl\n",
147 |       "Datasets contain a total of 204 time samples\n",
148 |       "Last average written at 0018-01-01 00:00:00\n",
149 |       "photoC_diat_zint\n",
150 |       "Datasets contain a total of 204 time samples\n",
151 |       "Last average written at 0018-01-01 00:00:00\n",
152 |       "diazChl\n",
153 |       "Datasets contain a total of 204 time samples\n",
154 |       "Last average written at 0018-01-01 00:00:00\n",
155 |       "photoC_diaz_zint\n",
156 |       "Datasets contain a total of 204 time samples\n",
157 |       "Last average written at 0018-01-01 00:00:00\n",
158 |       "NHx_SURFACE_EMIS\n",
159 |       "Datasets contain a total of 204 time samples\n",
160 |       "Last average written at 0018-01-01 00:00:00\n",
161 |       "NH4\n",
162 |       "Datasets contain a total of 204 time samples\n",
163 |       "Last average written at 0018-01-01 00:00:00\n",
164 |       "O2_ZMIN\n",
165 |       "Datasets contain a total of 204 time samples\n",
166 |       "Last average written at 0018-01-01 00:00:00\n",
167 |       "O2_ZMIN_DEPTH\n",
168 |       "Datasets contain a total of 204 time samples\n",
169 |       "Last average written at 0018-01-01 00:00:00\n",
170 |       "O2\n",
171 |       "Datasets contain a total of 204 time samples\n",
172 |       "Last average written at 0018-01-01 00:00:00\n",
173 |       "PO4\n",
174 |       "Datasets contain a total of 204 time samples\n",
175 |       "Last average written at 0018-01-01 00:00:00\n",
176 |       "PO4\n",
177 |       "Datasets contain a total of 204 time samples\n",
178 |       "Last average written at 0018-01-01 00:00:00\n",
179 |       "NO3\n",
180 |       "Datasets contain a total of 204 time samples\n",
181 |       "Last average written at 0018-01-01 00:00:00\n",
182 |       "NO3\n",
183 |       "Datasets contain a total of 204 time samples\n",
184 |       "Last average written at 0018-01-01 00:00:00\n",
185 |       "SiO3\n",
186 |       "Datasets contain a total of 204 time samples\n",
187 |       "Last average written at 0018-01-01 00:00:00\n",
188 |       "SiO3\n"
189 |      ]
190 |     }
191 |    ],
192 |    "source": [
193 |     "with dask.distributed.Client(cluster) as client:\n",
194 |     "    stream = \"pop.h\"\n",
195 |     "    #     ds = case.gen_dataset(varnames, stream)\n",
196 |     "    for diag_metadata in diag_metadata_list:\n",
197 |     "        ds = case.gen_dataset(diag_metadata[\"varname\"], stream, end_year=17)\n",
198 |     "        summary_plots(ds, diag_metadata, save_pngs=True)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 7,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "cluster.close()"
208 |    ]
209 |   }
210 |  ],
211 |  "metadata": {
212 |   "kernelspec": {
213 |    "display_name": "Python [conda env:miniconda3-hires-marbl]",
214 |    "language": "python",
215 |    "name": "conda-env-miniconda3-hires-marbl-py"
216 |   },
217 |   "language_info": {
218 |    "codemirror_mode": {
219 |     "name": "ipython",
220 |     "version": 3
221 |    },
222 |    "file_extension": ".py",
223 |    "mimetype": "text/x-python",
224 |    "name": "python",
225 |    "nbconvert_exporter": "python",
226 |    "pygments_lexer": "ipython3",
227 |    "version": "3.7.8"
228 |   }
229 |  },
230 |  "nbformat": 4,
231 |  "nbformat_minor": 4
232 | }
233 | 


--------------------------------------------------------------------------------
/notebooks/compare_ts_and_hist_004.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "\n",
 11 |     "import utils"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "Checking year 0001...\n",
 24 |       "... checking stream pop.h.nyear1 ...\n",
 25 |       "... checking stream pop.h.nday1 ...\n",
 26 |       "... checking stream pop.h ...\n",
 27 |       "... checking stream cice.h ...\n",
 28 |       "All variables available in time series for year 0001\n",
 29 |       "----\n",
 30 |       "Checking year 0002...\n",
 31 |       "... checking stream pop.h.nyear1 ...\n",
 32 |       "... checking stream pop.h.nday1 ...\n",
 33 |       "... checking stream pop.h ...\n",
 34 |       "... checking stream cice.h1 ...\n",
 35 |       "... checking stream cice.h ...\n",
 36 |       "All variables available in time series for year 0002\n",
 37 |       "----\n",
 38 |       "Checking year 0003...\n",
 39 |       "... checking stream pop.h.nyear1 ...\n",
 40 |       "... checking stream pop.h.nday1 ...\n",
 41 |       "... checking stream pop.h ...\n",
 42 |       "... checking stream cice.h1 ...\n",
 43 |       "... checking stream cice.h ...\n",
 44 |       "All variables available in time series for year 0003\n",
 45 |       "----\n",
 46 |       "Checking year 0004...\n",
 47 |       "... checking stream pop.h.nyear1 ...\n",
 48 |       "... checking stream pop.h.nday1 ...\n",
 49 |       "... checking stream pop.h ...\n",
 50 |       "... checking stream cice.h1 ...\n",
 51 |       "... checking stream cice.h ...\n",
 52 |       "All variables available in time series for year 0004\n",
 53 |       "----\n",
 54 |       "Checking year 0005...\n",
 55 |       "... checking stream pop.h.nyear1 ...\n",
 56 |       "... checking stream pop.h.nday1 ...\n",
 57 |       "... checking stream pop.h ...\n",
 58 |       "... checking stream cice.h1 ...\n",
 59 |       "... checking stream cice.h ...\n",
 60 |       "All variables available in time series for year 0005\n",
 61 |       "----\n",
 62 |       "Checking year 0006...\n",
 63 |       "... checking stream pop.h.nyear1 ...\n",
 64 |       "... checking stream pop.h.nday1 ...\n",
 65 |       "... checking stream pop.h ...\n",
 66 |       "... checking stream cice.h1 ...\n",
 67 |       "... checking stream cice.h ...\n",
 68 |       "All variables available in time series for year 0006\n",
 69 |       "----\n",
 70 |       "Checking year 0007...\n",
 71 |       "... checking stream pop.h.nyear1 ...\n",
 72 |       "... checking stream pop.h.nday1 ...\n",
 73 |       "... checking stream pop.h ...\n",
 74 |       "... checking stream cice.h1 ...\n",
 75 |       "... checking stream cice.h ...\n",
 76 |       "All variables available in time series for year 0007\n",
 77 |       "----\n",
 78 |       "Checking year 0008...\n",
 79 |       "... checking stream pop.h.nyear1 ...\n",
 80 |       "... checking stream pop.h.nday1 ...\n",
 81 |       "... checking stream pop.h ...\n",
 82 |       "... checking stream cice.h1 ...\n",
 83 |       "... checking stream cice.h ...\n",
 84 |       "All variables available in time series for year 0008\n",
 85 |       "----\n",
 86 |       "Checking year 0009...\n",
 87 |       "... checking stream pop.h.nyear1 ...\n",
 88 |       "... checking stream pop.h.nday1 ...\n",
 89 |       "... checking stream pop.h ...\n",
 90 |       "... checking stream cice.h1 ...\n",
 91 |       "... checking stream cice.h ...\n",
 92 |       "All variables available in time series for year 0009\n",
 93 |       "----\n",
 94 |       "Checking year 0010...\n",
 95 |       "... checking stream pop.h.nyear1 ...\n",
 96 |       "... checking stream pop.h.nday1 ...\n",
 97 |       "... checking stream pop.h ...\n",
 98 |       "... checking stream cice.h1 ...\n",
 99 |       "... checking stream cice.h ...\n",
100 |       "All variables available in time series for year 0010\n",
101 |       "----\n",
102 |       "Checking year 0011...\n",
103 |       "... checking stream pop.h.nyear1 ...\n",
104 |       "... checking stream pop.h.nday1 ...\n",
105 |       "... checking stream pop.h ...\n",
106 |       "... checking stream cice.h1 ...\n",
107 |       "... checking stream cice.h ...\n",
108 |       "All variables available in time series for year 0011\n",
109 |       "----\n",
110 |       "Checking year 0012...\n",
111 |       "... checking stream pop.h.nyear1 ...\n",
112 |       "... checking stream pop.h.nday1 ...\n",
113 |       "... checking stream pop.h ...\n",
114 |       "... checking stream cice.h1 ...\n",
115 |       "... checking stream cice.h ...\n",
116 |       "All variables available in time series for year 0012\n",
117 |       "----\n",
118 |       "Checking year 0013...\n",
119 |       "... checking stream pop.h.nyear1 ...\n",
120 |       "... checking stream pop.h.nday1 ...\n",
121 |       "... checking stream pop.h ...\n",
122 |       "... checking stream cice.h1 ...\n",
123 |       "... checking stream cice.h ...\n",
124 |       "All variables available in time series for year 0013\n",
125 |       "----\n",
126 |       "Checking year 0014...\n",
127 |       "... checking stream pop.h.nyear1 ...\n",
128 |       "... checking stream pop.h.nday1 ...\n",
129 |       "... checking stream pop.h ...\n",
130 |       "... checking stream cice.h1 ...\n",
131 |       "... checking stream cice.h ...\n",
132 |       "All variables available in time series for year 0014\n",
133 |       "----\n",
134 |       "Checking year 0015...\n",
135 |       "... checking stream pop.h.nyear1 ...\n",
136 |       "... checking stream pop.h.nday1 ...\n",
137 |       "... checking stream pop.h ...\n",
138 |       "... checking stream cice.h1 ...\n",
139 |       "... checking stream cice.h ...\n",
140 |       "All variables available in time series for year 0015\n",
141 |       "----\n",
142 |       "Checking year 0016...\n",
143 |       "... checking stream pop.h.nyear1 ...\n",
144 |       "... checking stream pop.h.nday1 ...\n",
145 |       "... checking stream pop.h ...\n",
146 |       "... checking stream cice.h1 ...\n",
147 |       "... checking stream cice.h ...\n",
148 |       "All variables available in time series for year 0016\n",
149 |       "----\n",
150 |       "Checking year 0017...\n",
151 |       "... checking stream pop.h.nyear1 ...\n",
152 |       "... checking stream pop.h.nday1 ...\n",
153 |       "... checking stream pop.h ...\n",
154 |       "... checking stream cice.h1 ...\n",
155 |       "... checking stream cice.h ...\n",
156 |       "All variables available in time series for year 0017\n",
157 |       "----\n",
158 |       "Checking year 0018...\n",
159 |       "... checking stream pop.h.nyear1 ...\n",
160 |       "Could not find time series for year 0018\n",
161 |       "CPU times: user 4min 46s, sys: 3min 16s, total: 8min 3s\n",
162 |       "Wall time: 11min 3s\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "%%time\n",
168 |     "\n",
169 |     "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n",
170 |     "\n",
171 |     "# Directories to search for netCDF files\n",
172 |     "caseroot = os.path.join(os.sep, \"glade\", \"work\", \"mlevy\", \"hi-res_BGC_JRA\", \"cases\")\n",
173 |     "campaign_root = os.path.join(os.sep, \"glade\", \"campaign\", \"cesm\", \"development\", \"bgcwg\", \"projects\", \"hi-res_JRA\", \"cases\")\n",
174 |     "output_roots = [os.path.join(campaign_root, casename, \"output\")]\n",
175 |     "output_roots += utils.gen_output_roots_from_caseroot(os.path.join(caseroot, casename))\n",
176 |     "\n",
177 |     "utils.timeseries_and_history_comparison(casename, output_roots)"
178 |    ]
179 |   }
180 |  ],
181 |  "metadata": {
182 |   "kernelspec": {
183 |    "display_name": "Python [conda env:hires-marbl]",
184 |    "language": "python",
185 |    "name": "conda-env-hires-marbl-py"
186 |   },
187 |   "language_info": {
188 |    "codemirror_mode": {
189 |     "name": "ipython",
190 |     "version": 3
191 |    },
192 |    "file_extension": ".py",
193 |    "mimetype": "text/x-python",
194 |    "name": "python",
195 |    "nbconvert_exporter": "python",
196 |    "pygments_lexer": "ipython3",
197 |    "version": "3.7.8"
198 |   }
199 |  },
200 |  "nbformat": 4,
201 |  "nbformat_minor": 4
202 | }
203 | 


--------------------------------------------------------------------------------
/notebooks/plot_suite_1deg.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import warnings\n",
 11 |     "\n",
 12 |     "import dask\n",
 13 |     "import ncar_jobqueue\n",
 14 |     "import yaml\n",
 15 |     "\n",
 16 |     "import utils\n",
 17 |     "\n",
 18 |     "%matplotlib inline"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n",
 28 |     "    diag_metadata_list = yaml.safe_load(fptr)\n",
 29 |     "\n",
 30 |     "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "def summary_plots(ds, diag_metadata, save_pngs=False):\n",
 40 |     "    varname = diag_metadata[\"varname\"]\n",
 41 |     "    print(varname)\n",
 42 |     "    da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n",
 43 |     "\n",
 44 |     "    utils.summary_plot_global_ts(\n",
 45 |     "        ds,\n",
 46 |     "        da,\n",
 47 |     "        diag_metadata,\n",
 48 |     "        time_coarsen_len=12,\n",
 49 |     "        save_pngs=save_pngs,\n",
 50 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 51 |     "    )\n",
 52 |     "\n",
 53 |     "    utils.summary_plot_histogram(\n",
 54 |     "        ds,\n",
 55 |     "        da,\n",
 56 |     "        diag_metadata,\n",
 57 |     "        save_pngs=save_pngs,\n",
 58 |     "        savefig_kwargs={\"dpi\": 72},  # match default behavior of savefig\n",
 59 |     "    )"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/36124/status\n"
 72 |      ]
 73 |     },
 74 |     {
 75 |      "name": "stderr",
 76 |      "output_type": "stream",
 77 |      "text": [
 78 |       "/glade/work/mlevy/miniconda3/envs/hires-marbl/lib/python3.7/site-packages/distributed/node.py:155: UserWarning: Port 8787 is already in use.\n",
 79 |       "Perhaps you already have a cluster running?\n",
 80 |       "Hosting the HTTP server on port 36124 instead\n",
 81 |       "  http_address[\"port\"], self.http_server.port\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "cluster = ncar_jobqueue.NCARCluster(\n",
 87 |     "    cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n",
 88 |     ")\n",
 89 |     "cluster.scale(n=8)  # n = number of workers\n",
 90 |     "print(f\"dashboard_link={cluster.dashboard_link}\")"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "casename = \"g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\"\n",
100 |     "year = 95\n",
101 |     "campaign_root = os.path.join(\n",
102 |     "    os.path.sep,\n",
103 |     "    \"glade\",\n",
104 |     "    \"campaign\",\n",
105 |     "    \"cesm\",\n",
106 |     "    \"development\",\n",
107 |     "    \"bgcwg\",\n",
108 |     "    \"projects\",\n",
109 |     "    \"1deg_cocco_JRA\",\n",
110 |     "    \"cases\",\n",
111 |     ")\n",
112 |     "\n",
113 |     "# Set up CaseClass object\n",
114 |     "case = utils.CaseClass(casename, os.path.join(campaign_root, casename))"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "Datasets contain a total of 120 time samples\n",
127 |       "Last average written at 0105-01-01 00:00:00\n",
128 |       "POC_FLUX_100m\n",
129 |       "Datasets contain a total of 120 time samples\n",
130 |       "Last average written at 0105-01-01 00:00:00\n",
131 |       "CaCO3_FLUX_100m\n",
132 |       "Datasets contain a total of 120 time samples\n",
133 |       "Last average written at 0105-01-01 00:00:00\n",
134 |       "FG_CO2\n",
135 |       "Datasets contain a total of 120 time samples\n",
136 |       "Last average written at 0105-01-01 00:00:00\n",
137 |       "DpCO2\n",
138 |       "Datasets contain a total of 120 time samples\n",
139 |       "Last average written at 0105-01-01 00:00:00\n",
140 |       "PH\n",
141 |       "Datasets contain a total of 120 time samples\n",
142 |       "Last average written at 0105-01-01 00:00:00\n",
143 |       "spChl\n",
144 |       "Datasets contain a total of 120 time samples\n",
145 |       "Last average written at 0105-01-01 00:00:00\n",
146 |       "photoC_sp_zint\n",
147 |       "Datasets contain a total of 120 time samples\n",
148 |       "Last average written at 0105-01-01 00:00:00\n",
149 |       "coccoChl\n",
150 |       "Datasets contain a total of 120 time samples\n",
151 |       "Last average written at 0105-01-01 00:00:00\n",
152 |       "photoC_cocco_zint\n",
153 |       "Datasets contain a total of 120 time samples\n",
154 |       "Last average written at 0105-01-01 00:00:00\n",
155 |       "diatChl\n",
156 |       "Datasets contain a total of 120 time samples\n",
157 |       "Last average written at 0105-01-01 00:00:00\n",
158 |       "photoC_diat_zint\n",
159 |       "Datasets contain a total of 120 time samples\n",
160 |       "Last average written at 0105-01-01 00:00:00\n",
161 |       "diazChl\n",
162 |       "Datasets contain a total of 120 time samples\n",
163 |       "Last average written at 0105-01-01 00:00:00\n",
164 |       "photoC_diaz_zint\n",
165 |       "Datasets contain a total of 120 time samples\n",
166 |       "Last average written at 0105-01-01 00:00:00\n",
167 |       "NHx_SURFACE_EMIS\n",
168 |       "Datasets contain a total of 120 time samples\n",
169 |       "Last average written at 0105-01-01 00:00:00\n",
170 |       "NH4\n",
171 |       "Datasets contain a total of 120 time samples\n",
172 |       "Last average written at 0105-01-01 00:00:00\n",
173 |       "O2_ZMIN\n",
174 |       "Datasets contain a total of 120 time samples\n",
175 |       "Last average written at 0105-01-01 00:00:00\n",
176 |       "O2_ZMIN_DEPTH\n",
177 |       "Datasets contain a total of 120 time samples\n",
178 |       "Last average written at 0105-01-01 00:00:00\n",
179 |       "O2\n",
180 |       "Datasets contain a total of 120 time samples\n",
181 |       "Last average written at 0105-01-01 00:00:00\n",
182 |       "PO4\n",
183 |       "Datasets contain a total of 120 time samples\n",
184 |       "Last average written at 0105-01-01 00:00:00\n",
185 |       "PO4\n",
186 |       "Datasets contain a total of 120 time samples\n",
187 |       "Last average written at 0105-01-01 00:00:00\n",
188 |       "NO3\n",
189 |       "Datasets contain a total of 120 time samples\n",
190 |       "Last average written at 0105-01-01 00:00:00\n",
191 |       "NO3\n",
192 |       "Datasets contain a total of 120 time samples\n",
193 |       "Last average written at 0105-01-01 00:00:00\n",
194 |       "SiO3\n",
195 |       "Datasets contain a total of 120 time samples\n",
196 |       "Last average written at 0105-01-01 00:00:00\n",
197 |       "SiO3\n"
198 |      ]
199 |     }
200 |    ],
201 |    "source": [
202 |     "with dask.distributed.Client(cluster) as client:\n",
203 |     "    stream = \"pop.h\"\n",
204 |     "    #     ds = case.gen_dataset(varnames, stream)\n",
205 |     "    for diag_metadata in diag_metadata_list:\n",
206 |     "        ds = case.gen_dataset(\n",
207 |     "            diag_metadata[\"varname\"], stream, start_year=95, end_year=104\n",
208 |     "        )\n",
209 |     "        summary_plots(ds, diag_metadata, save_pngs=True)"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 7,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "cluster.close()"
219 |    ]
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python [conda env:miniconda3-hires-marbl]",
225 |    "language": "python",
226 |    "name": "conda-env-miniconda3-hires-marbl-py"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 3
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython3",
238 |    "version": "3.7.8"
239 |   }
240 |  },
241 |  "nbformat": 4,
242 |  "nbformat_minor": 4
243 | }
244 | 


--------------------------------------------------------------------------------
/notebooks/utils/utils.py:
--------------------------------------------------------------------------------
  1 | """utility functions"""
  2 | 
  3 | import math
  4 | 
  5 | import cftime
  6 | import numpy as np
  7 | import xarray as xr
  8 | import pathlib
  9 | import pandas as pd
 10 | import json
 11 | 
 12 | from .compare_ts_and_hist import compare_ts_and_hist
 13 | from .cime import cime_xmlquery
 14 | 
 15 | ################################################################################
 16 | 
 17 | 
 18 | def repl_coord(coordname, ds1, ds2):
 19 |     """
 20 |     Return copy of d2 with coordinate coordname replaced, using coordname from ds1.
 21 |     Drop ds2.coordname.attrs['bounds'] in result, if ds2.coordname has bounds attribute.
 22 |     Add ds1.coordname.attrs['bounds'] to result, if ds1.coordname has bounds attribute.
 23 |     Except for coordname, the returned Dataset is a non-deep copy of ds2.
 24 |     """
 25 |     if "bounds" in ds2[coordname].attrs:
 26 |         tb_name = ds2[coordname].attrs["bounds"]
 27 |         ds_out = ds2.drop(tb_name).assign_coords({coordname: ds1[coordname]})
 28 |     else:
 29 |         ds_out = ds2.assign_coords({coordname: ds1[coordname]})
 30 |     if "bounds" in ds1[coordname].attrs:
 31 |         tb_name = ds1[coordname].attrs["bounds"]
 32 |         ds_out = xr.merge([ds_out, ds1[tb_name]])
 33 |     return ds_out
 34 | 
 35 | 
 36 | ################################################################################
 37 | 
 38 | 
 39 | def time_set_mid(ds, time_name, deep=False):
 40 |     """
 41 |     Return copy of ds with values of ds[time_name] replaced with midpoints of
 42 |     ds[time_name].attrs['bounds'], if bounds attribute exists.
 43 |     Except for time_name, the returned Dataset is a copy of ds2.
 44 |     The copy is deep or not depending on the argument deep.
 45 |     """
 46 | 
 47 |     ds_out = ds.copy(deep=deep)
 48 | 
 49 |     if "bounds" not in ds[time_name].attrs:
 50 |         return ds_out
 51 | 
 52 |     tb_name = ds[time_name].attrs["bounds"]
 53 |     tb = ds[tb_name]
 54 |     bounds_dim = next(dim for dim in tb.dims if dim != time_name)
 55 | 
 56 |     # Use da = da.copy(data=...), in order to preserve attributes and encoding.
 57 | 
 58 |     # If tb is an array of datetime objects then encode time before averaging.
 59 |     # Do this because computing the mean on datetime objects with xarray fails
 60 |     # if the time span is 293 or more years.
 61 |     #     https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/issues/7
 62 |     if tb.dtype == np.dtype("O"):
 63 |         units = "days since 0001-01-01"
 64 |         calendar = "noleap"
 65 |         tb_vals = cftime.date2num(ds[tb_name].values, units=units, calendar=calendar)
 66 |         tb_mid_decode = cftime.num2date(
 67 |             tb_vals.mean(axis=1), units=units, calendar=calendar
 68 |         )
 69 |         ds_out[time_name] = ds[time_name].copy(data=tb_mid_decode)
 70 |     else:
 71 |         ds_out[time_name] = ds[time_name].copy(data=tb.mean(bounds_dim))
 72 | 
 73 |     return ds_out
 74 | 
 75 | 
 76 | ################################################################################
 77 | 
 78 | 
 79 | def time_year_plus_frac(ds, time_name):
 80 |     """return time variable, as numpy array of year plus fraction of year values"""
 81 | 
 82 |     # this is straightforward if time has units='days since 0000-01-01' and calendar='noleap'
 83 |     # so convert specification of time to that representation
 84 | 
 85 |     # get time values as an np.ndarray of cftime objects
 86 |     if np.dtype(ds[time_name]) == np.dtype("O"):
 87 |         tvals_cftime = ds[time_name].values
 88 |     else:
 89 |         tvals_cftime = cftime.num2date(
 90 |             ds[time_name].values,
 91 |             ds[time_name].attrs["units"],
 92 |             ds[time_name].attrs["calendar"],
 93 |         )
 94 | 
 95 |     # convert cftime objects to representation mentioned above
 96 |     tvals_days = cftime.date2num(
 97 |         tvals_cftime, "days since 0000-01-01", calendar="noleap"
 98 |     )
 99 | 
100 |     return tvals_days / 365.0
101 | 
102 | 
103 | ################################################################################
104 | 
105 | 
106 | def round_sig(x, ndigits):
107 |     """round x to ndigits precision"""
108 |     if x == 0:
109 |         return x
110 |     ndigits_offset = math.floor(math.log10(abs(x)))
111 |     return round(x, ndigits - 1 - ndigits_offset)
112 | 
113 | 
114 | ################################################################################
115 | 
116 | 
117 | def get_varnames_from_metadata_list(diag_metadata_list):
118 |     varnames = []
119 |     for diag_metadata in diag_metadata_list:
120 |         if diag_metadata["varname"] not in varnames:
121 |             varnames.append(diag_metadata["varname"])
122 |     return varnames
123 | 
124 | 
125 | ################################################################################
126 | 
127 | 
128 | def gen_output_roots_from_caseroot(caseroot):
129 |     if type(caseroot) == str:
130 |         caseroot = [caseroot]
131 |     if type(caseroot) != list:
132 |         raise TypeError("caseroot must be a str or list, {caseroot} is not acceptable")
133 | 
134 |     output_roots = []
135 |     for single_root in caseroot:
136 |         vars_to_check = ["RUNDIR"]
137 |         if cime_xmlquery(single_root, "DOUT_S") == "TRUE":
138 |             vars_to_check.append("DOUT_S_ROOT")
139 |         for xml_var_to_query in vars_to_check:
140 |             output_roots.append(cime_xmlquery(single_root, xml_var_to_query))
141 | 
142 |     return output_roots
143 | 
144 | 
145 | ################################################################################
146 | 
147 | 
148 | def timeseries_and_history_comparison(casename, output_roots):
149 |     for year in range(1, 62):
150 |         has_ts = True
151 |         found_all = True
152 |         print(f"Checking year {year:04}...")
153 |         for stream in ["pop.h.nyear1", "pop.h.nday1", "pop.h", "cice.h1", "cice.h"]:
154 |             has_hist = True
155 |             # There is no cice.h1 time series for 0001 so skip check
156 |             if stream == "cice.h1" and year == 1:
157 |                 continue
158 |             # Run test
159 |             print(f"... checking stream {stream} ...")
160 |             comp_test = compare_ts_and_hist(casename, output_roots, stream, year)
161 |             # Check ends when there are no history files for comparison
162 |             if comp_test == "no time series":
163 |                 has_ts = False
164 |                 break
165 | 
166 |             # Skip years when there are no history files
167 |             # (Assume those years were already checked prior to deleting history files)
168 |             if comp_test == "no history":
169 |                 print(
170 |                     f"Skipping stream {stream} for year {year:04} because there are no history files"
171 |                 )
172 |                 has_hist = False
173 |                 continue
174 | 
175 |             found_all = found_all and (comp_test == "same")
176 | 
177 |         if not has_ts:
178 |             print(f"Could not find time series for year {year:04}")
179 |             break
180 |         if has_hist and found_all:
181 |             print(f"All variables available in time series for year {year:04}")
182 |         else:
183 |             print(f"Could not find time series for all variables in year {year:04}")
184 |         print("----")
185 | 
186 | 
187 | ################################################################################
188 | 
189 | 
190 | def dict_copy_vals(src, dst, keys, abort_on_mismatch=True):
191 |     for key in keys if type(keys) == list else [keys]:
192 |         if key in src:
193 |             if key in dst and abort_on_mismatch:
194 |                 if dst[key] != src[key]:
195 |                     raise ValueError(
196 |                         f"{key} exists in dst and src and dst values mismatch"
197 |                     )
198 |             else:
199 |                 dst[key] = src[key]
200 | 
201 | 
202 | ################################################################################
203 | 
204 | 
205 | def print_key_metadata(ds, msg=None):
206 |     print(64 * "*")
207 |     if msg is not None:
208 |         print(msg)
209 |         print(64 * "*")
210 |     for attr_name in ["chunks", "attrs", "encoding"]:
211 |         print("ds." + attr_name)
212 |         print(getattr(ds, attr_name))
213 |         print(32 * "*")
214 |     for attr_name in ["chunks", "attrs", "encoding"]:
215 |         print("ds['time']." + attr_name)
216 |         print(getattr(ds["time"], attr_name))
217 |         print(32 * "*")
218 | 
219 | 
220 | ################################################################################
221 | 
222 | 
223 | def generate_plot_catalog(
224 |     root_dir, image_dir_name="images", extension=".json", use_full_path=True
225 | ):
226 |     """
227 |     Generate a single dataframe from plot attributes saved in json files.
228 |     Parameters
229 |     ----------
230 |     root_dir : str, pathlib.Path
231 |           The root directory
232 |     extension : str, default `.json.`
233 |           file extension to look for.
234 | 
235 |     Returns
236 |     -------
237 |     df : pd.DataFrame
238 |     """
239 |     root_dir = pathlib.Path(root_dir)
240 |     image_dir = root_dir / image_dir_name
241 |     image_dir.exists()
242 |     files = sorted(image_dir.rglob(f"**/*{extension}"))
243 |     data = []
244 |     if files:
245 |         for file in files:
246 |             metadata = json.load(file.open())
247 |             if use_full_path:
248 |                 metadata["filepath"] = (
249 |                     (root_dir / metadata["filepath"]).absolute().as_posix()
250 |                 )
251 |             data.append(metadata)
252 |         return pd.DataFrame(data)
253 |     else:
254 |         print(f"Found 0 files with extension={extension} in {image_dir}.")
255 |         return pd.DataFrame()
256 | 


--------------------------------------------------------------------------------
/notebooks/utils/PlotTypeClass.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import pathlib
  4 | 
  5 | 
  6 | class _PlotTypeBaseClass(object):
  7 |     def __init__(self, *args, **kwargs):
  8 |         raise NotImplementedError("This must be implemented in child class")
  9 | 
 10 |     def get_filepaths(self, *args, **kwargs):
 11 |         raise NotImplementedError("This must be implemented in child class")
 12 | 
 13 |     def get_isel_str(self, da, isel_dict):
 14 |         """
 15 |             If diag metadata passes isel_dict option, we need that reflected in file name.
 16 | 
 17 |             This subroutine produces an additional string of the form var1_val1.var2_val2...,
 18 |             where isel_dict is equivalent to da.sel(var1=val1,var2=val2). Note the switch
 19 |             from .isel to .sel -- val1 should be da.var1[isel_dict[var1]] (the true value,
 20 |             rather than the index)
 21 |         """
 22 |         # Return empty string if isel_dict is empty dictionary
 23 |         isel_list = []
 24 |         for varname in isel_dict:
 25 |             value = da[varname].data
 26 |             try:
 27 |                 # Use two digits after decimal for floats
 28 |                 isel_list.append(f"{varname}--{value:.2f}")
 29 |             except:
 30 |                 # Otherwise just include the variable value (e.g. strings)
 31 |                 isel_list.append(f"{varname}--{value}")
 32 |         isel_str = "__".join(isel_list)
 33 |         if len(isel_str) > 0:
 34 |             isel_str = "." + isel_str
 35 |         return isel_str
 36 | 
 37 |     def savefig(self, fig, root_dir="images", **kwargs):
 38 |         """
 39 |             Saves fig as a PNG, with the file name determined by the other parameters.
 40 | 
 41 |             Also writes metadata about image file to a JSON file
 42 |         """
 43 | 
 44 |         # Always use tight_layout
 45 |         fig.tight_layout()
 46 | 
 47 |         # Remove trailing slash from root_dir
 48 |         if root_dir[-1] == "/":
 49 |             root_dir = root_dir[:-1]
 50 | 
 51 |         # Set up dictionary for metadata
 52 |         metadata = self.metadata
 53 |         filepath, jsonpath = self.get_filepaths()
 54 |         metadata["filepath"] = os.path.join(
 55 |             self.metadata["plot_type"], f"{filepath}.png"
 56 |         )
 57 |         filepath = os.path.join(
 58 |             root_dir, self.metadata["casename"], metadata["filepath"]
 59 |         )
 60 |         jsonpath = os.path.join(
 61 |             root_dir,
 62 |             self.metadata["casename"],
 63 |             self.metadata["plot_type"],
 64 |             f"{jsonpath}.json",
 65 |         )
 66 | 
 67 |         for path in [filepath, jsonpath]:
 68 |             parent_dir = pathlib.Path(path).parent
 69 |             parent_dir.mkdir(parents=True, exist_ok=True)
 70 | 
 71 |         fig.savefig(filepath, **kwargs)
 72 |         with open(jsonpath, "w") as fp:
 73 |             json.dump(metadata, fp)
 74 | 
 75 | 
 76 | ################################################################################
 77 | 
 78 | 
 79 | class SummaryMapClass(_PlotTypeBaseClass):
 80 |     def __init__(self, da, casename, datestamp, apply_log10, isel_dict):
 81 |         self.metadata = dict()
 82 |         self.metadata["plot_type"] = "summary_map"
 83 |         self.metadata["varname"] = da.name
 84 |         self.metadata["casename"] = casename
 85 |         self.metadata["date"] = datestamp
 86 |         self.metadata["apply_log10"] = apply_log10
 87 |         self.metadata["sel_dict"] = dict()
 88 |         for varname in isel_dict:
 89 |             value = da[varname].data
 90 |             try:
 91 |                 # Use two digits after decimal for floats
 92 |                 str_val = f"{value:.2f}"
 93 |             except:
 94 |                 # Otherwise just include the variable value (e.g. strings)
 95 |                 str_val = f"{value}"
 96 |             self.metadata["sel_dict"][varname] = str_val
 97 |         self.isel_str = self.get_isel_str(da, isel_dict)
 98 | 
 99 |     def get_filepaths(self):
100 |         log_str = "" if not self.metadata["apply_log10"] else ".log10"
101 |         file_prefix = f"{self.metadata['varname']}.{self.metadata['date']}{self.isel_str}{log_str}"
102 |         filepath = file_prefix
103 |         jsonpath = os.path.join("metadata", file_prefix)
104 | 
105 |         return filepath, jsonpath
106 | 
107 | 
108 | ################################################################################
109 | 
110 | 
111 | class SummaryTSClass(_PlotTypeBaseClass):
112 |     def __init__(self, da, casename, start_date, end_date, isel_dict):
113 |         self.metadata = dict()
114 |         self.metadata["plot_type"] = "time_series"
115 |         self.metadata["varname"] = da.name
116 |         self.metadata["casename"] = casename
117 |         self.metadata["time_period"] = f"{start_date}_{end_date}"
118 |         self.metadata["sel_dict"] = dict()
119 |         for varname in isel_dict:
120 |             value = da[varname].data
121 |             try:
122 |                 # Use two digits after decimal for floats
123 |                 str_val = f"{value:.2f}"
124 |             except:
125 |                 # Otherwise just include the variable value (e.g. strings)
126 |                 str_val = f"{value}"
127 |             self.metadata["sel_dict"][varname] = str_val
128 |         self.isel_str = self.get_isel_str(da, isel_dict)
129 | 
130 |     def get_filepaths(self):
131 |         file_prefix = (
132 |             f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}"
133 |         )
134 |         filepath = file_prefix
135 |         jsonpath = os.path.join("metadata", file_prefix)
136 | 
137 |         return filepath, jsonpath
138 | 
139 | 
140 | ################################################################################
141 | 
142 | 
143 | class SummaryHistClass(_PlotTypeBaseClass):
144 |     def __init__(self, da, casename, apply_log10, start_date, end_date, isel_dict):
145 |         self.metadata = dict()
146 |         self.metadata["plot_type"] = "histogram"
147 |         self.metadata["varname"] = da.name
148 |         self.metadata["casename"] = casename
149 |         self.metadata["apply_log10"] = apply_log10
150 |         self.metadata["time_period"] = f"{start_date}_{end_date}"
151 |         self.metadata["sel_dict"] = dict()
152 |         for varname in isel_dict:
153 |             value = da[varname].data
154 |             try:
155 |                 # Use two digits after decimal for floats
156 |                 str_val = f"{value:.2f}"
157 |             except:
158 |                 # Otherwise just include the variable value (e.g. strings)
159 |                 str_val = f"{value}"
160 |             self.metadata["sel_dict"][varname] = str_val
161 |         self.isel_str = self.get_isel_str(da, isel_dict)
162 | 
163 |     def get_filepaths(self):
164 |         log_str = "" if not self.metadata["apply_log10"] else ".log10"
165 |         file_prefix = f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}{log_str}"
166 |         filepath = file_prefix
167 |         jsonpath = os.path.join("metadata", file_prefix)
168 | 
169 |         return filepath, jsonpath
170 | 
171 | 
172 | ################################################################################
173 | 
174 | 
175 | class TrendMapClass(_PlotTypeBaseClass):
176 |     def __init__(self, da, casename, start_date, end_date, isel_dict):
177 |         self.metadata = dict()
178 |         self.metadata["plot_type"] = "trend_map"
179 |         self.metadata["varname"] = da.name
180 |         self.metadata["casename"] = casename
181 |         self.metadata["time_period"] = f"{start_date}_{end_date}"
182 |         self.metadata["sel_dict"] = dict()
183 |         for varname in isel_dict:
184 |             value = da[varname].data
185 |             try:
186 |                 # Use two digits after decimal for floats
187 |                 str_val = f"{value:.2f}"
188 |             except:
189 |                 # Otherwise just include the variable value (e.g. strings)
190 |                 str_val = f"{value}"
191 |             self.metadata["sel_dict"][varname] = str_val
192 |         self.isel_str = self.get_isel_str(da, isel_dict)
193 | 
194 |     def get_filepaths(self):
195 |         file_prefix = (
196 |             f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}"
197 |         )
198 |         filepath = os.path.join(file_prefix)
199 |         jsonpath = os.path.join("metadata", file_prefix)
200 | 
201 |         return filepath, jsonpath
202 | 
203 | 
204 | ################################################################################
205 | 
206 | 
207 | class TrendHistClass(_PlotTypeBaseClass):
208 |     def __init__(self, da, casename, start_date, end_date, isel_dict):
209 |         self.metadata = dict()
210 |         self.metadata["plot_type"] = "trend_hist"
211 |         self.metadata["varname"] = da.name
212 |         self.metadata["casename"] = casename
213 |         self.metadata["time_period"] = f"{start_date}_{end_date}"
214 |         self.metadata["sel_dict"] = dict()
215 |         for varname in isel_dict:
216 |             value = da[varname].data
217 |             try:
218 |                 # Use two digits after decimal for floats
219 |                 str_val = f"{value:.2f}"
220 |             except:
221 |                 # Otherwise just include the variable value (e.g. strings)
222 |                 str_val = f"{value}"
223 |             self.metadata["sel_dict"][varname] = str_val
224 |         self.isel_str = self.get_isel_str(da, isel_dict)
225 | 
226 |     def get_filepaths(self):
227 |         file_prefix = (
228 |             f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}"
229 |         )
230 |         filepath = os.path.join(file_prefix)
231 |         jsonpath = os.path.join("metadata", file_prefix)
232 | 
233 |         return filepath, jsonpath
234 | 


--------------------------------------------------------------------------------
/notebooks/gen_csv.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from utils import generate_plot_catalog\n",
 10 |     "import pandas as pd"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "Generating csv for histogram plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n",
 23 |       "Generating csv for summary_map plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n",
 24 |       "Generating csv for time_series plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n",
 25 |       "Generating csv for trend_hist plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n",
 26 |       "Found 0 files with extension=.json in images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001/trend_hist.\n",
 27 |       "Generating csv for trend_map plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n",
 28 |       "Found 0 files with extension=.json in images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001/trend_map.\n",
 29 |       "Generating csv for histogram plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n",
 30 |       "Generating csv for summary_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n",
 31 |       "Generating csv for time_series plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n",
 32 |       "Generating csv for trend_hist plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n",
 33 |       "Generating csv for trend_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n",
 34 |       "Generating csv for histogram plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n",
 35 |       "Generating csv for summary_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n",
 36 |       "Generating csv for time_series plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n",
 37 |       "Generating csv for trend_hist plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n",
 38 |       "Generating csv for trend_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n"
 39 |      ]
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "df = dict()\n",
 44 |     "for casename in [\n",
 45 |     "    \"g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\",\n",
 46 |     "    \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\",\n",
 47 |     "    \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\",\n",
 48 |     "]:\n",
 49 |     "    df[casename] = pd.DataFrame({})\n",
 50 |     "    for plottype in [\n",
 51 |     "        \"histogram\",\n",
 52 |     "        \"summary_map\",\n",
 53 |     "        \"time_series\",\n",
 54 |     "        \"trend_hist\",\n",
 55 |     "        \"trend_map\",\n",
 56 |     "    ]:\n",
 57 |     "        print(f\"Generating csv for {plottype} plots in {casename}\")\n",
 58 |     "        df[casename] = pd.concat(\n",
 59 |     "            [\n",
 60 |     "                df[casename],\n",
 61 |     "                generate_plot_catalog(\n",
 62 |     "                    \"./\",\n",
 63 |     "                    image_dir_name=f\"images/{casename}/{plottype}\",\n",
 64 |     "                    use_full_path=False,\n",
 65 |     "                ),\n",
 66 |     "            ]\n",
 67 |     "        )\n",
 68 |     "    df[casename].to_csv(\n",
 69 |     "        f\"images/{casename}/png_catalog.csv\",\n",
 70 |     "        compression=None,\n",
 71 |     "        index=False,\n",
 72 |     "    )"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 3,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "data": {
 82 |       "text/html": [
 83 |        "<div>\n",
 84 |        "<style scoped>\n",
 85 |        "    .dataframe tbody tr th:only-of-type {\n",
 86 |        "        vertical-align: middle;\n",
 87 |        "    }\n",
 88 |        "\n",
 89 |        "    .dataframe tbody tr th {\n",
 90 |        "        vertical-align: top;\n",
 91 |        "    }\n",
 92 |        "\n",
 93 |        "    .dataframe thead th {\n",
 94 |        "        text-align: right;\n",
 95 |        "    }\n",
 96 |        "</style>\n",
 97 |        "<table border=\"1\" class=\"dataframe\">\n",
 98 |        "  <thead>\n",
 99 |        "    <tr style=\"text-align: right;\">\n",
100 |        "      <th></th>\n",
101 |        "      <th>plot_type</th>\n",
102 |        "      <th>varname</th>\n",
103 |        "      <th>casename</th>\n",
104 |        "      <th>apply_log10</th>\n",
105 |        "      <th>time_period</th>\n",
106 |        "      <th>sel_dict</th>\n",
107 |        "      <th>filepath</th>\n",
108 |        "      <th>date</th>\n",
109 |        "    </tr>\n",
110 |        "  </thead>\n",
111 |        "  <tbody>\n",
112 |        "    <tr>\n",
113 |        "      <th>0</th>\n",
114 |        "      <td>histogram</td>\n",
115 |        "      <td>CaCO3_FLUX_100m</td>\n",
116 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
117 |        "      <td>False</td>\n",
118 |        "      <td>0001-01-01_0001-12-31</td>\n",
119 |        "      <td>{}</td>\n",
120 |        "      <td>histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3...</td>\n",
121 |        "      <td>NaN</td>\n",
122 |        "    </tr>\n",
123 |        "    <tr>\n",
124 |        "      <th>1</th>\n",
125 |        "      <td>histogram</td>\n",
126 |        "      <td>CaCO3_FLUX_100m</td>\n",
127 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
128 |        "      <td>True</td>\n",
129 |        "      <td>0001-01-01_0001-12-31</td>\n",
130 |        "      <td>{}</td>\n",
131 |        "      <td>histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3...</td>\n",
132 |        "      <td>NaN</td>\n",
133 |        "    </tr>\n",
134 |        "    <tr>\n",
135 |        "      <th>2</th>\n",
136 |        "      <td>histogram</td>\n",
137 |        "      <td>CaCO3_FLUX_100m</td>\n",
138 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
139 |        "      <td>False</td>\n",
140 |        "      <td>0002-01-01_0002-12-31</td>\n",
141 |        "      <td>{}</td>\n",
142 |        "      <td>histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3...</td>\n",
143 |        "      <td>NaN</td>\n",
144 |        "    </tr>\n",
145 |        "    <tr>\n",
146 |        "      <th>3</th>\n",
147 |        "      <td>histogram</td>\n",
148 |        "      <td>CaCO3_FLUX_100m</td>\n",
149 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
150 |        "      <td>True</td>\n",
151 |        "      <td>0002-01-01_0002-12-31</td>\n",
152 |        "      <td>{}</td>\n",
153 |        "      <td>histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3...</td>\n",
154 |        "      <td>NaN</td>\n",
155 |        "    </tr>\n",
156 |        "    <tr>\n",
157 |        "      <th>4</th>\n",
158 |        "      <td>histogram</td>\n",
159 |        "      <td>CaCO3_FLUX_100m</td>\n",
160 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
161 |        "      <td>False</td>\n",
162 |        "      <td>0003-01-01_0003-12-31</td>\n",
163 |        "      <td>{}</td>\n",
164 |        "      <td>histogram/CaCO3_FLUX_100m.0003-01-01_0003-12-3...</td>\n",
165 |        "      <td>NaN</td>\n",
166 |        "    </tr>\n",
167 |        "    <tr>\n",
168 |        "      <th>...</th>\n",
169 |        "      <td>...</td>\n",
170 |        "      <td>...</td>\n",
171 |        "      <td>...</td>\n",
172 |        "      <td>...</td>\n",
173 |        "      <td>...</td>\n",
174 |        "      <td>...</td>\n",
175 |        "      <td>...</td>\n",
176 |        "      <td>...</td>\n",
177 |        "    </tr>\n",
178 |        "    <tr>\n",
179 |        "      <th>25</th>\n",
180 |        "      <td>trend_map</td>\n",
181 |        "      <td>SiO3</td>\n",
182 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
183 |        "      <td>NaN</td>\n",
184 |        "      <td>0002-01-01_0004-12-31</td>\n",
185 |        "      <td>{'basins': 'Atlantic'}</td>\n",
186 |        "      <td>trend_map/SiO3.0002-01-01_0004-12-31.basins--A...</td>\n",
187 |        "      <td>NaN</td>\n",
188 |        "    </tr>\n",
189 |        "    <tr>\n",
190 |        "      <th>26</th>\n",
191 |        "      <td>trend_map</td>\n",
192 |        "      <td>SiO3</td>\n",
193 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
194 |        "      <td>NaN</td>\n",
195 |        "      <td>0002-01-01_0004-12-31</td>\n",
196 |        "      <td>{'basins': 'Global'}</td>\n",
197 |        "      <td>trend_map/SiO3.0002-01-01_0004-12-31.basins--G...</td>\n",
198 |        "      <td>NaN</td>\n",
199 |        "    </tr>\n",
200 |        "    <tr>\n",
201 |        "      <th>27</th>\n",
202 |        "      <td>trend_map</td>\n",
203 |        "      <td>SiO3</td>\n",
204 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
205 |        "      <td>NaN</td>\n",
206 |        "      <td>0002-01-01_0004-12-31</td>\n",
207 |        "      <td>{'basins': 'Indian'}</td>\n",
208 |        "      <td>trend_map/SiO3.0002-01-01_0004-12-31.basins--I...</td>\n",
209 |        "      <td>NaN</td>\n",
210 |        "    </tr>\n",
211 |        "    <tr>\n",
212 |        "      <th>28</th>\n",
213 |        "      <td>trend_map</td>\n",
214 |        "      <td>SiO3</td>\n",
215 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
216 |        "      <td>NaN</td>\n",
217 |        "      <td>0002-01-01_0004-12-31</td>\n",
218 |        "      <td>{'basins': 'Pacific'}</td>\n",
219 |        "      <td>trend_map/SiO3.0002-01-01_0004-12-31.basins--P...</td>\n",
220 |        "      <td>NaN</td>\n",
221 |        "    </tr>\n",
222 |        "    <tr>\n",
223 |        "      <th>29</th>\n",
224 |        "      <td>trend_map</td>\n",
225 |        "      <td>SiO3</td>\n",
226 |        "      <td>g.e22.G1850ECO_JRA_HR.TL319_t13.004</td>\n",
227 |        "      <td>NaN</td>\n",
228 |        "      <td>0002-01-01_0004-12-31</td>\n",
229 |        "      <td>{'z_t': '35109.35'}</td>\n",
230 |        "      <td>trend_map/SiO3.0002-01-01_0004-12-31.z_t--3510...</td>\n",
231 |        "      <td>NaN</td>\n",
232 |        "    </tr>\n",
233 |        "  </tbody>\n",
234 |        "</table>\n",
235 |        "<p>1244 rows × 8 columns</p>\n",
236 |        "</div>"
237 |       ],
238 |       "text/plain": [
239 |        "    plot_type          varname                             casename  \\\n",
240 |        "0   histogram  CaCO3_FLUX_100m  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
241 |        "1   histogram  CaCO3_FLUX_100m  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
242 |        "2   histogram  CaCO3_FLUX_100m  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
243 |        "3   histogram  CaCO3_FLUX_100m  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
244 |        "4   histogram  CaCO3_FLUX_100m  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
245 |        "..        ...              ...                                  ...   \n",
246 |        "25  trend_map             SiO3  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
247 |        "26  trend_map             SiO3  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
248 |        "27  trend_map             SiO3  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
249 |        "28  trend_map             SiO3  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
250 |        "29  trend_map             SiO3  g.e22.G1850ECO_JRA_HR.TL319_t13.004   \n",
251 |        "\n",
252 |        "   apply_log10            time_period                sel_dict  \\\n",
253 |        "0        False  0001-01-01_0001-12-31                      {}   \n",
254 |        "1         True  0001-01-01_0001-12-31                      {}   \n",
255 |        "2        False  0002-01-01_0002-12-31                      {}   \n",
256 |        "3         True  0002-01-01_0002-12-31                      {}   \n",
257 |        "4        False  0003-01-01_0003-12-31                      {}   \n",
258 |        "..         ...                    ...                     ...   \n",
259 |        "25         NaN  0002-01-01_0004-12-31  {'basins': 'Atlantic'}   \n",
260 |        "26         NaN  0002-01-01_0004-12-31    {'basins': 'Global'}   \n",
261 |        "27         NaN  0002-01-01_0004-12-31    {'basins': 'Indian'}   \n",
262 |        "28         NaN  0002-01-01_0004-12-31   {'basins': 'Pacific'}   \n",
263 |        "29         NaN  0002-01-01_0004-12-31     {'z_t': '35109.35'}   \n",
264 |        "\n",
265 |        "                                             filepath date  \n",
266 |        "0   histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3...  NaN  \n",
267 |        "1   histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3...  NaN  \n",
268 |        "2   histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3...  NaN  \n",
269 |        "3   histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3...  NaN  \n",
270 |        "4   histogram/CaCO3_FLUX_100m.0003-01-01_0003-12-3...  NaN  \n",
271 |        "..                                                ...  ...  \n",
272 |        "25  trend_map/SiO3.0002-01-01_0004-12-31.basins--A...  NaN  \n",
273 |        "26  trend_map/SiO3.0002-01-01_0004-12-31.basins--G...  NaN  \n",
274 |        "27  trend_map/SiO3.0002-01-01_0004-12-31.basins--I...  NaN  \n",
275 |        "28  trend_map/SiO3.0002-01-01_0004-12-31.basins--P...  NaN  \n",
276 |        "29  trend_map/SiO3.0002-01-01_0004-12-31.z_t--3510...  NaN  \n",
277 |        "\n",
278 |        "[1244 rows x 8 columns]"
279 |       ]
280 |      },
281 |      "execution_count": 3,
282 |      "metadata": {},
283 |      "output_type": "execute_result"
284 |     }
285 |    ],
286 |    "source": [
287 |     "df[\"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"]"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 4,
293 |    "metadata": {},
294 |    "outputs": [
295 |     {
296 |      "data": {
297 |       "text/plain": [
298 |        "'histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-31.png'"
299 |       ]
300 |      },
301 |      "execution_count": 4,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "df[\"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"][\"filepath\"].to_list()[0]"
308 |    ]
309 |   }
310 |  ],
311 |  "metadata": {
312 |   "kernelspec": {
313 |    "display_name": "Python [conda env:miniconda3-hires-marbl]",
314 |    "language": "python",
315 |    "name": "conda-env-miniconda3-hires-marbl-py"
316 |   },
317 |   "language_info": {
318 |    "codemirror_mode": {
319 |     "name": "ipython",
320 |     "version": 3
321 |    },
322 |    "file_extension": ".py",
323 |    "mimetype": "text/x-python",
324 |    "name": "python",
325 |    "nbconvert_exporter": "python",
326 |    "pygments_lexer": "ipython3",
327 |    "version": "3.7.8"
328 |   }
329 |  },
330 |  "nbformat": 4,
331 |  "nbformat_minor": 4
332 | }
333 | 


--------------------------------------------------------------------------------
/notebooks/utils/Plotting.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     Tools to find and open files associated with the runs
  3 | """
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import xarray as xr
  8 | import cftime
  9 | import datetime
 10 | 
 11 | # local modules, not available through __init__
 12 | from .utils import time_year_plus_frac, round_sig
 13 | from .utils_units import conv_units
 14 | from .PlotTypeClass import (
 15 |     SummaryMapClass,
 16 |     SummaryTSClass,
 17 |     SummaryHistClass,
 18 |     TrendMapClass,
 19 |     TrendHistClass,
 20 | )
 21 | 
 22 | ################################################################################
 23 | 
 24 | 
 25 | def compare_fields_at_lat_lon(
 26 |     list_of_das_in, nlat, nlon, individual_plots=False, filename=None
 27 | ):
 28 | 
 29 |     # This shouldn't be hard-coded... but how else to get?
 30 |     xticks = 365 + np.array([0, 31, 59, 90, 120, 151])
 31 |     xlabels = ["Jan 1", "Feb 1", "Mar 1", "Apr 1", "May 1", "June 1"]
 32 |     yticks = np.linspace(0, 17e4, 18)
 33 | 
 34 |     list_of_das = []
 35 |     for da in list_of_das_in:
 36 |         list_of_das.append(da.isel(nlat=nlat, nlon=nlon).compute())
 37 | 
 38 |     # Get longitude and latitude (hard-coded to assume we want W and S)
 39 |     long_west = 360 - list_of_das[0]["TLONG"].data
 40 |     lat_south = -list_of_das[0]["TLAT"].data
 41 | 
 42 |     if individual_plots:
 43 |         nrows = int(np.ceil(len(list_of_das) / 2))
 44 |         fig, axes = plt.subplots(
 45 |             nrows=nrows, ncols=2, figsize=(9 * nrows, 10.5), sharex=True
 46 |         )
 47 | 
 48 |         # Hard-coded title is also a bad idea
 49 |         fig.suptitle(f"Mix Layer Depth at ({long_west:.2f} W, {lat_south:.2f} S)")
 50 | 
 51 |         for n, da in enumerate(list_of_das):
 52 |             plt.subplot(nrows, 2, n + 1)
 53 |             da.plot()
 54 |             plt.title(f"Run {(n+1):03}")
 55 |             #             plt.xlim((np.min(xticks), np.max(xticks)))
 56 |             #             plt.xticks(xticks, xlabels)
 57 | 
 58 |             # Only label yticks on left-most column
 59 |             if n % 2 == 0:
 60 |                 plt.yticks(yticks)
 61 |             else:
 62 |                 plt.yticks(yticks, "")
 63 |                 plt.ylabel("")
 64 | 
 65 |             # Only add xlabel on last row
 66 |             if (n + 1) / 2 == nrows:
 67 |                 plt.xlabel("Date (year 0001)")
 68 |             else:
 69 |                 plt.xlabel("")
 70 |     else:
 71 |         fig = plt.figure(figsize=(9.0, 5.25), clear=True)
 72 |         fig.suptitle(f"Mix Layer Depth at ({long_west:.2f} W, {lat_south:.2f} S)")
 73 | 
 74 |         for da in list_of_das:
 75 |             da.plot()
 76 |         plt.title("All 4 runs overlay")
 77 |         #         plt.xlim((np.min(xticks), np.max(xticks)))
 78 |         #         plt.xticks(xticks, xlabels)
 79 |         plt.yticks(yticks)
 80 |         plt.xlabel("Date (year 0001)")
 81 | 
 82 |     if filename:
 83 |         fig.savefig(filename)
 84 | 
 85 |     return fig
 86 | 
 87 | 
 88 | ################################################################################
 89 | 
 90 | 
 91 | def plot_dict_with_date_keys(dict_in, title, legend=None):
 92 |     """
 93 |     Assume that keys of dict_in are 'YYYYMMDD' and values are numeric
 94 |     """
 95 |     time_units = "days since 0001-01-01 0:00:00"
 96 |     time = []
 97 |     array_val = []
 98 |     for date in dict_in.keys():
 99 |         if "log" not in date:
100 |             (year, month, day) = date.split("-")
101 |             time.append(cftime.DatetimeNoLeap(int(year), int(month), int(day)))
102 |             array_val.append(dict_in[date])
103 | 
104 |     if type(array_val[0]) == list:
105 |         dim2 = len(array_val[0])
106 |         da = xr.DataArray(array_val, dims=["time", "dim2"])
107 |     else:
108 |         dim2 = None
109 |         da = xr.DataArray(array_val, dims="time")
110 |     da["time"] = time
111 | 
112 |     fig = plt.figure(figsize=(9.0, 5.25), clear=True)
113 |     if dim2:
114 |         for dim2ind in range(dim2):
115 |             da.isel(dim2=dim2ind).plot()
116 |     else:
117 |         da.plot()
118 |     if legend:
119 |         plt.legend(legend)
120 |     plt.title(title)
121 |     plt.show()
122 | 
123 | 
124 | #     return fig
125 | 
126 | ################################################################################
127 | 
128 | 
129 | def _extract_field_from_file(ds, varname, nlat, nlon):
130 |     return ds[varname].isel(nlat=nlat, nlon=nlon).compute()
131 | 
132 | 
133 | ################################################################################
134 | 
135 | 
136 | def summary_plot_global_ts(
137 |     ds, da, diag_metadata, time_coarsen_len=None, **plot_options
138 | ):
139 |     casename = ds.attrs["title"]
140 |     save_pngs = plot_options.get("save_pngs", False)
141 |     if save_pngs:
142 |         root_dir = plot_options.get("root_dir", "images")
143 |         kwargs = plot_options.get("savefig_kwargs", {})
144 |         isel_dict = diag_metadata.get("isel_dict", {})
145 | 
146 |     reduce_dims = da.dims[-2:]
147 |     weights = ds["TAREA"].fillna(0)
148 |     da_weighted = da.weighted(weights)
149 |     spatial_op = diag_metadata.get("spatial_op", "average")
150 |     if spatial_op == "average":
151 |         to_plot = da_weighted.mean(dim=reduce_dims)
152 |         to_plot.attrs = da.attrs
153 |         if "display_units" in diag_metadata:
154 |             to_plot = conv_units(to_plot, diag_metadata["display_units"])
155 |     if spatial_op == "integrate":
156 |         to_plot = da_weighted.sum(dim=reduce_dims)
157 |         to_plot.attrs = da.attrs
158 |         to_plot.attrs["units"] += f" {weights.attrs['units']}"
159 |         if "integral_display_units" in diag_metadata:
160 |             to_plot = conv_units(
161 |                 to_plot,
162 |                 diag_metadata["integral_display_units"],
163 |                 units_scalef=diag_metadata.get("integral_unit_conv"),
164 |             )
165 |     # do not use to_plot.plot.line("-o") because of incorrect time axis values
166 |     # https://github.com/pydata/xarray/issues/4401
167 |     fig, ax = plt.subplots()
168 |     ax.plot(time_year_plus_frac(to_plot, "time"), to_plot.values, "-o")
169 |     ax.set_xlabel(xr.plot.utils.label_from_attrs(to_plot["time"]))
170 |     ax.set_ylabel(xr.plot.utils.label_from_attrs(to_plot))
171 |     ax.set_title(to_plot._title_for_slice())
172 |     if time_coarsen_len is not None:
173 |         tlen = len(to_plot.time)
174 |         tlen_trunc = (tlen // time_coarsen_len) * time_coarsen_len
175 |         to_plot_trunc = to_plot.isel(time=slice(0, tlen_trunc))
176 |         to_plot_coarse = to_plot_trunc.coarsen({"time": time_coarsen_len}).mean()
177 |         ax.plot(
178 |             time_year_plus_frac(to_plot_coarse, "time"), to_plot_coarse.values, "-o"
179 |         )
180 |         title = ax.get_title()
181 |         if title != "":
182 |             title += ", "
183 |         title += f"last mean value={round_sig(to_plot_coarse.values[-1],4)}"
184 |         ax.set_title(title)
185 |     if save_pngs:
186 |         str_datestamp = f'{ds[ds["time"].attrs["bounds"]].load().data[0,0]}'
187 |         first_datestamp = str_datestamp.split(" ")[0]
188 |         str_datestamp = (
189 |             f'{ds[ds["time"].attrs["bounds"]].data[-1,-1]-datetime.timedelta(days=1)}'
190 |         )
191 |         last_datestamp = str_datestamp.split(" ")[0]
192 |         summary_ts = SummaryTSClass(
193 |             da, casename, first_datestamp, last_datestamp, isel_dict
194 |         )
195 |         summary_ts.savefig(fig, root_dir=root_dir, **kwargs)
196 |     else:
197 |         plt.show()
198 |     plt.close(fig)
199 | 
200 | 
201 | ################################################################################
202 | 
203 | 
204 | def summary_plot_histogram(ds, da, diag_metadata, lines_per_plot=12, **plot_options):
205 |     save_pngs = plot_options.get("save_pngs", False)
206 |     casename = ds.attrs["title"]
207 |     if save_pngs:
208 |         root_dir = plot_options.get("root_dir", "images")
209 |         kwargs = plot_options.get("savefig_kwargs", {})
210 |         isel_dict = diag_metadata.get("isel_dict", {})
211 | 
212 |     # histogram, all time levels in one plot
213 |     hist_bins = 20
214 |     hist_log = True
215 | 
216 |     # Loop length
217 |     t_cnt = len(da["time"])
218 |     for apply_log10 in _apply_log10_vals(diag_metadata):
219 |         t_ind_beg = 0
220 |         fig, ax = plt.subplots()
221 |         # fig.tight_layout()
222 |         for t_ind in range(t_cnt):
223 |             to_plot = da.isel(time=t_ind)
224 |             if "display_units" in diag_metadata:
225 |                 to_plot = conv_units(to_plot, diag_metadata["display_units"])
226 |             if apply_log10:
227 |                 to_plot = np.log10(xr.where(to_plot > 0, to_plot, np.nan))
228 |                 to_plot.name = f"log10({to_plot.name})"
229 |             # to_plot.plot.hist(bins=hist_bins, log=hist_log, histtype="step")
230 |             to_plot.plot.hist(ax=ax, bins=hist_bins, log=hist_log, histtype="step")
231 |             if t_ind % lines_per_plot == lines_per_plot - 1:
232 |                 t_beg = ds[ds["time"].attrs["bounds"]].values[t_ind_beg, 0]
233 |                 t_str_beg = f"{t_beg.year:04}-{t_beg.month:02}-{t_beg.day:02}"
234 |                 t_ind_end = t_ind
235 |                 t_end = ds[ds["time"].attrs["bounds"]].values[
236 |                     t_ind_end, -1
237 |                 ] - datetime.timedelta(days=1)
238 |                 t_str_end = f"{t_end.year:04}-{t_end.month:02}-{t_end.day:02}"
239 |                 plt.title(f"Histogram: {t_str_beg} : {t_str_end}")
240 |                 t_ind_beg = t_ind_end + 1
241 |                 if save_pngs:
242 |                     summary_hist = SummaryHistClass(
243 |                         da, casename, apply_log10, t_str_beg, t_str_end, isel_dict
244 |                     )
245 |                     summary_hist.savefig(fig, root_dir=root_dir, **kwargs)
246 |                 else:
247 |                     plt.show()
248 |                 plt.close(fig)
249 |                 if t_ind != t_cnt - 1:
250 |                     fig, ax = plt.subplots()
251 | 
252 |         if t_ind % lines_per_plot != lines_per_plot - 1:
253 |             t_beg = ds[ds["time"].attrs["bounds"]].values[t_ind_beg, 0]
254 |             t_str_beg = f"{t_beg.year:04}-{t_beg.month:02}-{t_beg.day:02}"
255 |             t_ind_end = t_ind
256 |             t_end = ds[ds["time"].attrs["bounds"]].values[
257 |                 t_ind_end, -1
258 |             ] - datetime.timedelta(days=1)
259 |             t_str_end = f"{t_end.year:04}-{t_end.month:02}-{t_end.day:02}"
260 |             plt.title(f"Histogram: {t_str_beg} : {t_str_end}")
261 |             if save_pngs:
262 |                 summary_hist = SummaryHistClass(
263 |                     da, casename, t_str_beg, t_str_end, isel_dict
264 |                 )
265 |                 summary_hist.savefig(fig, root_dir=root_dir, **kwargs)
266 |             else:
267 |                 plt.show()
268 |             plt.close(fig)
269 | 
270 | 
271 | ################################################################################
272 | 
273 | 
274 | def summary_plot_maps(ds, da, diag_metadata, **plot_options):
275 | 
276 |     save_pngs = plot_options.get("save_pngs", False)
277 |     casename = ds.attrs["title"]
278 |     if save_pngs:
279 |         root_dir = plot_options.get("root_dir", "images")
280 |         kwargs = plot_options.get("savefig_kwargs", {})
281 |         isel_dict = diag_metadata.get("isel_dict", {})
282 | 
283 |     # maps, 1 plots for time level
284 |     cmap = "plasma"
285 | 
286 |     for apply_log10 in _apply_log10_vals(diag_metadata):
287 |         vmin = diag_metadata.get("map_vmin")
288 |         vmax = diag_metadata.get("map_vmax")
289 |         if apply_log10:
290 |             if vmin is not None:
291 |                 vmin = np.log10(vmin) if vmin > 0.0 else None
292 |             if vmax is not None:
293 |                 vmax = np.log10(vmax) if vmax > 0.0 else None
294 |         for t_ind in range(len(da["time"])):
295 |             to_plot = da.isel(time=t_ind)
296 |             if "display_units" in diag_metadata:
297 |                 to_plot = conv_units(to_plot, diag_metadata["display_units"])
298 |             if apply_log10:
299 |                 to_plot = np.log10(xr.where(to_plot > 0.0, to_plot, np.nan))
300 |                 to_plot.name = f"log10({to_plot.name})"
301 | 
302 |             ax = to_plot.plot(cmap=cmap, vmin=vmin, vmax=vmax)
303 |             fig = ax.get_figure()
304 |             if save_pngs:
305 |                 datestamp = f"{da.time[t_ind].data.item()}".split(" ")[0]
306 |                 summary_map = SummaryMapClass(
307 |                     da, casename, datestamp, apply_log10, isel_dict
308 |                 )
309 |                 summary_map.savefig(fig, root_dir=root_dir, **kwargs)
310 |             else:
311 |                 plt.show()
312 |             plt.close(fig)
313 | 
314 | 
315 | ################################################################################
316 | 
317 | 
318 | def trend_plot(ds, da, vmin=None, vmax=None, invert_yaxis=False, **plot_options):
319 | 
320 |     save_pngs = plot_options.get("save_pngs", False)
321 |     casename = ds.attrs["title"]
322 |     if save_pngs:
323 |         root_dir = plot_options.get("root_dir", "images")
324 |         kwargs = plot_options.get("savefig_kwargs", {})
325 |         isel_dict = plot_options.get("isel_dict", {})
326 |         t_beg = ds[ds["time"].attrs["bounds"]].values[0, 0]
327 |         t_str_beg = f"{t_beg.year:04}-{t_beg.month:02}-{t_beg.day:02}"
328 |         t_end = ds[ds["time"].attrs["bounds"]].values[-1, -1] - datetime.timedelta(
329 |             days=1
330 |         )
331 |         t_str_end = f"{t_end.year:04}-{t_end.month:02}-{t_end.day:02}"
332 | 
333 |     trend = da.polyfit("time", 1).polyfit_coefficients.sel(degree=1)
334 |     trend.name = da.name + " Trend"
335 |     trend.attrs["long_name"] = da.attrs["long_name"] + " Trend"
336 |     nsec_per_yr = 1.0e9 * 86400 * 365
337 |     trend = nsec_per_yr * trend
338 |     trend.attrs["units"] = da.attrs["units"] + "/yr"
339 |     trend.load()
340 | 
341 |     fig, ax = plt.subplots()
342 |     trend.plot.hist(bins=20, log=True, ax=ax)
343 |     plt.title(da._title_for_slice())
344 |     if save_pngs:
345 |         trend_hist = TrendHistClass(da, casename, t_str_beg, t_str_end, isel_dict)
346 |         trend_hist.savefig(fig, root_dir=root_dir, **kwargs)
347 |     else:
348 |         plt.show()
349 |     plt.close(fig)
350 | 
351 |     fig, ax = plt.subplots()
352 |     trend.plot.pcolormesh(cmap="plasma", vmin=vmin, vmax=vmax, ax=ax)
353 |     plt.title(da._title_for_slice())
354 |     if invert_yaxis:
355 |         ax.invert_yaxis()
356 |     if save_pngs:
357 |         trend_map = TrendMapClass(da, casename, t_str_beg, t_str_end, isel_dict)
358 |         trend_map.savefig(fig, root_dir=root_dir, **kwargs)
359 |     else:
360 |         plt.show()
361 |     plt.close(fig)
362 | 
363 | 
364 | ################################################################################
365 | 
366 | 
367 | def _apply_log10_vals(diag_metadata):
368 |     if diag_metadata.get("apply_log10", False):
369 |         return [False, True]
370 |     else:
371 |         return [False]
372 | 
373 | 
374 | ################################################################################
375 | 


--------------------------------------------------------------------------------
/notebooks/utils/CaseClass.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     Class to use to access output (log and netCDF) from CESM runs
  3 | """
  4 | 
  5 | import glob
  6 | import os
  7 | import gzip as gz
  8 | import cftime
  9 | import numpy as np
 10 | import xarray as xr
 11 | 
 12 | # local modules, not available through __init__
 13 | from .config import add_first_date_and_reformat
 14 | 
 15 | from .utils import time_set_mid, dict_copy_vals, print_key_metadata
 16 | 
 17 | ################################################################################
 18 | 
 19 | 
 20 | class CaseClass(object):
 21 | 
 22 |     # Constructor [goal: get an intake-esm catalog into memory; read from disk or generate it]
 23 |     def __init__(
 24 |         self, casenames, output_roots, verbose=False,
 25 |     ):
 26 |         """
 27 |         casenames: a string or list containing the name(s) of the case(s) to include in the object
 28 |         output_roots: a string or list containing the name(s) of the directories to search for log / netCDF files
 29 |                       * netCDF files may be in one of three locations:
 30 |                         1. history files may be in {output_root} itself
 31 |                            [e.g. output_root = RUNDIR]
 32 |                         2. history files may be in {output_root}/{component}/hist
 33 |                            [e.g. output_root = DOUT_S]
 34 |                         3. time series files may be in {output_root}/{component}/proc/tseries/{freq}
 35 |                            [e.g. output_root = root of pyReshaper output]
 36 |                       * log files may be in one of two locations
 37 |                         1. {output_root} itself [e.g. output_root = RUNDIR]
 38 |                         2. {output_root}/logs [e.g. output_root = DOUT_S]
 39 |         """
 40 |         if type(casenames) == str:
 41 |             casenames = [casenames]
 42 |         if type(casenames) != list:
 43 |             raise ValueError(f"{casenames} is not a string or list")
 44 | 
 45 |         if type(output_roots) == str:
 46 |             output_roots = [output_roots]
 47 |         if type(output_roots) != list:
 48 |             raise ValueError(f"{output_roots} is not a string or list")
 49 | 
 50 |         self._casenames = casenames
 51 |         self._output_roots = []
 52 |         for output_dir in output_roots:
 53 |             if os.path.isdir(output_dir):
 54 |                 self._output_roots.append(output_dir)
 55 |         self._verbose = verbose
 56 |         # TODO: figure out how to let this configuration be user-specified (maybe YAML?)
 57 |         self._stream_metadata = dict()
 58 |         self._stream_metadata["pop.h"] = {"comp": "ocn", "freq": "month_1"}
 59 |         self._stream_metadata["pop.h.nday1"] = {"comp": "ocn", "freq": "day_1"}
 60 |         self._stream_metadata["pop.h.nyear1"] = {"comp": "ocn", "freq": "year_1"}
 61 |         self._stream_metadata["cice.h"] = {"comp": "ice", "freq": "month_1"}
 62 |         self._stream_metadata["cice.h1"] = {"comp": "ice", "freq": "day_1"}
 63 |         self._log_filenames = self._find_log_files()
 64 |         self._history_filenames, self._timeseries_filenames = self._find_nc_files()
 65 |         self._dataset_files = dict()
 66 |         self._dataset_src = dict()
 67 | 
 68 |         self.log_contents = dict()
 69 | 
 70 |     ############################################################################
 71 | 
 72 |     def get_co2calc_warning_cnt(self, max_it=4):
 73 |         self._read_log("cesm")
 74 | 
 75 |         warning_count = dict()
 76 |         # For each date, pull value from most recent log file
 77 |         for date in self.log_contents["cesm"]:
 78 |             logs = list(self.log_contents["cesm"][date].keys())
 79 |             logs.sort()
 80 |             warning_count[date] = []
 81 |             for it in range(1, max_it + 1):
 82 |                 warning_count[date].append(
 83 |                     sum(
 84 |                         [
 85 |                             f"MARBL WARNING (marbl_co2calc_mod:drtsafe): (marbl_co2calc_mod:drtsafe) it = {it}"
 86 |                             in entry
 87 |                             for entry in self.log_contents["cesm"][date][logs[-1]]
 88 |                         ]
 89 |                     )
 90 |                 )
 91 | 
 92 |         return warning_count
 93 | 
 94 |     ############################################################################
 95 | 
 96 |     def _get_single_year_timeseries_files(self, year, stream, varname):
 97 |         timeseries_filenames = [
 98 |             filename
 99 |             for filename in self._timeseries_filenames[stream]
100 |             if (f".{varname}." in filename and f".{year:04}" in filename)
101 |         ]
102 |         return timeseries_filenames
103 | 
104 |     ############################################################################
105 | 
106 |     def get_timeseries_files(self, year, stream, varnames=None):
107 |         if type(varnames) == str:
108 |             varnames = [varnames]
109 |         if not (type(varnames) == list or varnames is None):
110 |             raise ValueError(
111 |                 f"varnames = {varnames} which is not None, a string, or a list"
112 |             )
113 | 
114 |         timeseries_filenames = []
115 |         if varnames:
116 |             for varname in varnames:
117 |                 timeseries_filenames.extend(
118 |                     self._get_single_year_timeseries_files(year, stream, varname)
119 |                 )
120 |         else:
121 |             timeseries_filenames = self._get_single_year_timeseries_files(year, stream)
122 | 
123 |         return timeseries_filenames
124 | 
125 |     ############################################################################
126 | 
127 |     def check_for_year_in_timeseries_files(self, year, stream):
128 |         """
129 |         Return True if {stream} has any timeseries files from {year}
130 |         """
131 |         return any(
132 |             [
133 |                 f".{year:04}" in filename
134 |                 for filename in self._timeseries_filenames[stream]
135 |             ]
136 |         )
137 | 
138 |     ############################################################################
139 | 
140 |     def get_history_files(self, year, stream):
141 |         return [
142 |             filename
143 |             for filename in self._history_filenames[stream]
144 |             if f"{stream}.{year:04}" in filename
145 |         ]
146 | 
147 |     ############################################################################
148 | 
149 |     def _find_log_files(self):
150 |         """
151 |         Look in each _output_roots dir (and /logs) for cesm.log, ocn.log, and cpl.log files
152 |         """
153 |         files = dict()
154 |         for component in ["cesm", "ocn", "cpl"]:
155 |             files[component] = []
156 |             for output_dir in self._output_roots:
157 |                 files[component].extend(
158 |                     glob.glob(os.path.join(output_dir, f"{component}.log.*"))
159 |                 )
160 |                 if os.path.isdir(os.path.join(output_dir, "logs")):
161 |                     files[component].extend(
162 |                         glob.glob(
163 |                             os.path.join(output_dir, "logs", f"{component}.log.*")
164 |                         )
165 |                     )
166 |         return files
167 | 
168 |     ############################################################################
169 | 
170 |     def _find_nc_files(self):
171 |         """
172 |         Look for netcdf files in each output_root directory, as well as
173 |         {component}/hist and {component}/proc/tseries/{freq} subdirectories
174 |         """
175 |         hist_files = dict()
176 |         ts_files = dict()
177 |         for stream in self._stream_metadata:
178 |             hist_files[stream] = []
179 |             ts_files[stream] = []
180 |             comp = self._stream_metadata[stream]["comp"]
181 |             freq = self._stream_metadata[stream]["freq"]
182 |             for casename in self._casenames:
183 |                 for output_dir in self._output_roots:
184 |                     if self._verbose:
185 |                         print(f"Checking {output_dir} for {stream} files...")
186 |                     # (1) Look for history files in output_dir
187 |                     #     TODO: need better way to avoid wrong stream than .0*
188 |                     #           (do not want to glob *.pop.h.nday1.* when looking for pop.h files)
189 |                     pattern = f"{casename}.{stream}.0*.nc"
190 |                     files_found = glob.glob(os.path.join(output_dir, pattern))
191 |                     files_found.sort()
192 |                     hist_files[stream].extend(files_found)
193 | 
194 |                     # (2) look for history files that might be in {output_dir}/{comp}/hist
195 |                     #     TODO: need better way to avoid wrong stream than .0*
196 |                     #           (do not want to glob *.pop.h.nday1.* when looking for pop.h files)
197 |                     hist_dir = os.path.join(output_dir, comp, "hist")
198 |                     if os.path.isdir(hist_dir):
199 |                         pattern = f"{casename}.{stream}.0*.nc"
200 |                         files_found = glob.glob(os.path.join(hist_dir, pattern))
201 |                         files_found.sort()
202 |                         hist_files[stream].extend(files_found)
203 | 
204 |                     # (3) look for time series files that might be in {output_dir}/{comp}/proc/time_series/{freq}
205 |                     tseries_dir = os.path.join(
206 |                         output_dir, comp, "proc", "tseries", freq
207 |                     )
208 |                     if os.path.isdir(tseries_dir):
209 |                         pattern = f"{casename}.{stream}.*.nc"
210 |                         files_found = glob.glob(os.path.join(tseries_dir, pattern))
211 |                         files_found.sort()
212 |                         ts_files[stream].extend(files_found)
213 | 
214 |         return hist_files, ts_files
215 | 
216 |     ############################################################################
217 | 
218 |     def _read_log(self, component):
219 |         """
220 |         Read all log files from specified component. Returns a dict where keys
221 |         are dates and values are contents of log from that date; if multiple
222 |         logs contain the same date, uses the most recent.
223 |         """
224 |         if component in self.log_contents:
225 |             return
226 |         if component not in self._log_filenames:
227 |             raise ValueError(f"No known {component}.log files")
228 | 
229 |         datestamps = {"cesm": "model date =", "cpl": "tStamp"}
230 |         try:
231 |             datestamp = datestamps[component]
232 |         except:
233 |             raise ValueError(f"Do not know how to find dates in {component}.log")
234 | 
235 |         all_dates = []
236 |         contents = dict()
237 |         for log in self._log_filenames[component]:
238 |             # Open file
239 |             is_gz = log.endswith("gz")
240 |             if is_gz:
241 |                 local_open = gz.open
242 |                 mode = "rt"
243 |             else:
244 |                 local_open = open
245 |                 mode = "r"
246 |             with local_open(log, mode) as f:
247 |                 single_log_contents = f.readlines()
248 | 
249 |             # Look for datestamps in log; if none found, save contents as 'date_unknown'
250 |             date_inds = np.where([datestamp in entry for entry in single_log_contents])[
251 |                 0
252 |             ]
253 |             if len(date_inds) == 0:
254 |                 date = log.split("/")[-1]
255 |                 if date not in contents:
256 |                     contents[date] = dict()
257 |                 contents[date][log] = single_log_contents
258 |                 continue
259 | 
260 |             # Set up list of dates and np array of indices
261 |             dates_in_log = [
262 |                 entry.split(datestamp)[1].strip()[:8]
263 |                 for entry in np.array(single_log_contents)[date_inds].tolist()
264 |             ]
265 |             # add first day of run to dates_in_log, and prepend 0 to date_inds
266 |             date_inds = np.insert(date_inds, 0, 0)
267 |             dates_in_log = add_first_date_and_reformat(dates_in_log)
268 | 
269 |             # for each date, add contents to dictionary
270 |             for n, date in enumerate(dates_in_log[:-1]):
271 |                 if date not in contents:
272 |                     contents[date] = dict()
273 |                 contents[date][log] = single_log_contents[
274 |                     date_inds[n] : date_inds[n + 1]
275 |                 ]
276 | 
277 |             #  Need to account for partial days from runs that die
278 |             # e.g. model crashes midway through 00010104 => need an 00010105 stamp (since we're using datestamp from end of the day, e.g. midnight the next day)
279 |             if not is_gz:
280 |                 date = dates_in_log[-1]
281 |                 if date not in contents:
282 |                     contents[date] = dict()
283 |                 contents[date][log] = single_log_contents[date_inds[-1] :]
284 | 
285 |         self.log_contents[component] = dict()
286 |         for key in sorted(contents):
287 |             self.log_contents[component][key] = contents[key]
288 | 
289 |     ############################################################################
290 | 
291 |     def get_catalog(self):
292 |         """
293 |         Return intake esm catalog that was created / read in constructor
294 |         """
295 |         return self.catalog
296 | 
297 |     ############################################################################
298 | 
299 |     def get_dataset_source(self, stream, year, varname):
300 | 
301 |         # Does _dataset_src[stream] exist?
302 |         if stream not in self._dataset_src:
303 |             print(f"No datasets have been returned from {stream}")
304 |             return None
305 | 
306 |         # Does _dataset_src[stream][year] exist?
307 |         if year not in self._dataset_src[stream]:
308 |             print(
309 |                 f"No datasets covering year {year:04} have been returned from {stream}"
310 |             )
311 |             return None
312 | 
313 |         # Does _dataset_src[stream][year][varname] exist?
314 |         if varname not in self._dataset_src[stream][year]:
315 |             print(
316 |                 f"No dataset containing {varname} from year {year:04} have been returned from {stream}"
317 |             )
318 |             return None
319 | 
320 |         return self._dataset_src[stream][year][varname]
321 | 
322 |     ############################################################################
323 | 
324 |     def gen_dataset(
325 |         self,
326 |         varnames,
327 |         stream,
328 |         vars_to_keep=None,
329 |         start_year=1,
330 |         end_year=61,
331 |         quiet=False,
332 |         debug=False,
333 |         **kwargs,
334 |     ):
335 |         """
336 |         Open all history files from a specified stream. Returns a dict where keys
337 |         are stream names and values are xarray Datasets
338 | 
339 |         Pared-down API for working with intake-esm catalog.
340 |         Users familiar with intake-esm may prefer self.get_catalog() and then querying directly.
341 |         """
342 |         if type(varnames) == str:
343 |             varnames = [varnames]
344 |         if type(varnames) != list:
345 |             raise ValueError(f"{varnames} is not a string or list")
346 | 
347 |         if stream not in self._dataset_files:
348 |             self._dataset_files[stream] = dict()
349 |             self._dataset_src[stream] = dict()
350 | 
351 |         # Set some defaults to pass to open_mfdataset, then apply kwargs argument
352 |         open_mfdataset_kwargs = dict()
353 |         # data_vars="minimal", to avoid introducing time dimension to time-invariant fields
354 |         open_mfdataset_kwargs["data_vars"] = "minimal"
355 |         # compat="override", to skip var consistency checks (for speed)
356 |         open_mfdataset_kwargs["compat"] = "override"
357 |         # coords="minimal", because coords cannot be default="different" if compat="override"
358 |         open_mfdataset_kwargs["coords"] = "minimal"
359 |         #  parallel=True to open files in parallel
360 |         open_mfdataset_kwargs["parallel"] = True
361 |         open_mfdataset_kwargs.update(kwargs)
362 | 
363 |         # Pull specific keys from open_mfdataset_kwargs to pass to xr.concat
364 |         concat_keys = ["data_vars", "compat", "coords"]
365 |         concat_kwargs = {
366 |             key: value
367 |             for key, value in open_mfdataset_kwargs.items()
368 |             if key in concat_keys
369 |         }
370 | 
371 |         # Make sure these variables are kept in all datasets
372 |         _vars_to_keep = ["time_bound", "TAREA"]
373 |         if vars_to_keep is not None:
374 |             if type(vars_to_keep) == str:
375 |                 vars_to_keep = [vars_to_keep]
376 |             if type(vars_to_keep) != list:
377 |                 raise ValueError(f"{vars_to_keep} is not a string or list")
378 |             _vars_to_keep.extend(vars_to_keep)
379 | 
380 |         # Pare down time series file list (only contains years and variables we are interested in)
381 |         ds_timeseries_per_var = []
382 |         for varname in varnames:
383 |             timeseries_filenames = []
384 |             for year in range(start_year, end_year + 1):
385 |                 if year not in self._dataset_files[stream]:
386 |                     self._dataset_files[stream][year] = dict()
387 |                     self._dataset_src[stream][year] = dict()
388 |                 self._dataset_files[stream][year][varname] = self.get_timeseries_files(
389 |                     year, stream, varname
390 |                 )
391 |                 if self._dataset_files[stream][year][varname]:
392 |                     self._dataset_src[stream][year][varname] = "time series"
393 |                     timeseries_filenames.extend(
394 |                         self._dataset_files[stream][year][varname]
395 |                     )
396 |             if timeseries_filenames:
397 |                 dsmf = xr.open_mfdataset(timeseries_filenames, **open_mfdataset_kwargs)[
398 |                     [varname] + _vars_to_keep
399 |                 ]
400 |                 with xr.open_dataset(timeseries_filenames[0])[
401 |                     [varname] + _vars_to_keep
402 |                 ] as ds0:
403 |                     if debug:
404 |                         print(open_mfdataset_kwargs)
405 |                         print_key_metadata(
406 |                             dsmf, "timeseries_filenames open_mfdataset dsmf"
407 |                         )
408 |                         print_key_metadata(
409 |                             ds0, "timeseries_filenames open_mfdataset ds0"
410 |                         )
411 |                     dict_copy_vals(ds0.encoding, dsmf.encoding, "unlimited_dims")
412 |                     dict_copy_vals(
413 |                         ds0["time"].encoding,
414 |                         dsmf["time"].encoding,
415 |                         ["dtype", "_FillValue", "units", "calendar"],
416 |                     )
417 |                 ds_timeseries_per_var.append(dsmf)
418 | 
419 |         if ds_timeseries_per_var:
420 |             ds_timeseries = xr.merge(ds_timeseries_per_var, combine_attrs="override")
421 |             ds0 = ds_timeseries_per_var[0]
422 |             if debug:
423 |                 print_key_metadata(
424 |                     ds_timeseries, "ds_timeseries_per_var merge ds_timeseries"
425 |                 )
426 |                 print_key_metadata(ds0, "ds_timeseries_per_var merge ds0")
427 |             dict_copy_vals(ds0.encoding, ds_timeseries.encoding, "unlimited_dims")
428 |             tb_name_ts = ds_timeseries["time"].attrs["bounds"]
429 |             tb = ds_timeseries[tb_name_ts]
430 |             if tb.dtype == np.dtype("O"):
431 |                 start_year = int(tb.values[-1, 1].strftime("%Y"))
432 |             else:
433 |                 # NOTE: this block will be used if decode_times=False in open_mfdataset()
434 |                 #       If decode_times=False because cftime can not decode the time dimension,
435 |                 #       then this will likely fail and we'll need a better way to determine
436 |                 #       the last year read from time series. Maybe pull from filenames?
437 |                 decoded_tb = cftime.num2date(
438 |                     tb.values[-1, 1],
439 |                     tb.attrs["units"],
440 |                     calendar=ds_timeseries["time"].attrs["calendar"],
441 |                 )
442 |                 start_year = int(decoded_tb.strftime("%Y"))
443 | 
444 |         # Pare down history file list
445 |         history_filenames = []
446 |         for year in range(start_year, end_year + 1):
447 |             if year not in self._dataset_files[stream]:
448 |                 self._dataset_files[stream][year] = dict()
449 |                 self._dataset_src[stream][year] = dict()
450 |             self._dataset_files[stream][year][varname] = self.get_history_files(
451 |                 year, stream
452 |             )
453 |             if self._dataset_files[stream][year][varname]:
454 |                 self._dataset_src[stream][year][varname] = "hist"
455 |                 history_filenames.extend(self._dataset_files[stream][year][varname])
456 | 
457 |         if history_filenames:
458 |             ds_history = xr.open_mfdataset(history_filenames, **open_mfdataset_kwargs)[
459 |                 varnames + _vars_to_keep
460 |             ]
461 |             with xr.open_dataset(history_filenames[0])[varnames + _vars_to_keep] as ds0:
462 |                 if debug:
463 |                     print_key_metadata(
464 |                         ds_history, "history_filenames open_mfdataset ds_history"
465 |                     )
466 |                     print_key_metadata(ds0, "history_filenames open_mfdataset ds0")
467 |                 dict_copy_vals(ds0.encoding, ds_history.encoding, "unlimited_dims")
468 |                 dict_copy_vals(
469 |                     ds0["time"].encoding,
470 |                     ds_history["time"].encoding,
471 |                     ["dtype", "_FillValue", "units", "calendar"],
472 |                 )
473 | 
474 |         # Concatenate discovered datasets
475 |         if ds_timeseries_per_var:
476 |             if history_filenames:
477 |                 print(
478 |                     f'Time series ends at {ds_timeseries["time_bound"].values[-1,1]}, history files begin at {ds_history["time_bound"].values[0,0]}'
479 |                 )
480 |                 ds = xr.concat([ds_timeseries, ds_history], dim="time", **concat_kwargs)
481 |                 if debug:
482 |                     print_key_metadata(ds, "xr.concat ds")
483 |                     print_key_metadata(ds_timeseries, "xr.concat ds_timeseries")
484 |                     print_key_metadata(ds_history, "xr.concat ds_history")
485 |                 for ds_src in [ds_timeseries, ds_history]:
486 |                     dict_copy_vals(
487 |                         ds_src["time"].encoding,
488 |                         ds["time"].encoding,
489 |                         ["dtype", "_FillValue", "units", "calendar"],
490 |                     )
491 |             else:
492 |                 ds = ds_timeseries
493 |         else:
494 |             if history_filenames:
495 |                 ds = ds_history
496 |             else:
497 |                 raise ValueError(
498 |                     f"Can not find requested variables between {start_year:04} and {end_year:04}"
499 |                 )
500 | 
501 |         ds = time_set_mid(ds, "time")
502 | 
503 |         if not quiet:
504 |             print(f'Datasets contain a total of {ds.sizes["time"]} time samples')
505 |         tb_name = ds["time"].attrs["bounds"]
506 |         if not quiet:
507 |             print(f"Last average written at {ds[tb_name].values[-1, 1]}")
508 |         return ds
509 | 


--------------------------------------------------------------------------------