├── runtime.txt ├── requirements.txt ├── Procfile ├── notebooks ├── utils │ ├── cime.py │ ├── __init__.py │ ├── config.py │ ├── utils_units.py │ ├── compare_ts_and_hist.py │ ├── utils.py │ ├── PlotTypeClass.py │ ├── Plotting.py │ └── CaseClass.py ├── run_all.py ├── dashboard.yaml ├── run_notebooks.sh ├── plot_suite_maps_0095_1deg.ipynb ├── diag_metadata.yaml ├── compare_ts_and_hist_003.ipynb ├── plot_suite_maps_0001_003.ipynb ├── plot_suite_maps_0001_004.ipynb ├── trend_maps.003.ipynb ├── trend_maps.004.ipynb ├── plot_suite_003.ipynb ├── plot_suite_004.ipynb ├── compare_ts_and_hist_004.ipynb ├── plot_suite_1deg.ipynb └── gen_csv.ipynb ├── .github └── workflows │ ├── verify_pre-commit.yaml │ └── ci.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── environments └── environment.yaml ├── tests ├── test_utils_units.py ├── xr_ds_ex.py └── test_utils.py ├── data_reshaping ├── pop.h_t13.sh ├── cice.h_t13.sh ├── cice.h1_t13.sh ├── pop.h.nyear1_t13.sh ├── pop.h.nday1_t13.sh └── run_all.py └── README.md /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.9.2 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | hvplot 2 | jupyterlab 3 | fsspec 4 | psutil 5 | aiohttp 6 | git+https://github.com/andersy005/panelify.git 7 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: panel serve --address="0.0.0.0" --port=$PORT notebooks/Interactive_Dashboard.ipynb --allow-websocket-origin=hires-cesm-analysis.dokku.projectpythia.org --stats-log-frequency 100000 --mem-log-frequency 100000 2 | -------------------------------------------------------------------------------- /notebooks/utils/cime.py: -------------------------------------------------------------------------------- 1 | """ 2 | methods specific to CIME, but independent of models/components that are run with CIME 3 | """ 4 | 5 | import subprocess 6 | 7 | 8 | def cime_xmlquery(caseroot, varname): 9 | """run CIME's xmlquery for varname in the directory caseroot, return the value""" 10 | return subprocess.check_output( 11 | ["./xmlquery", "--value", varname], cwd=caseroot 12 | ).decode() 13 | -------------------------------------------------------------------------------- /notebooks/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # make methods available for usage externally and in notebooks 2 | 3 | from .CaseClass import CaseClass 4 | from .Plotting import ( 5 | compare_fields_at_lat_lon, 6 | plot_dict_with_date_keys, 7 | summary_plot_global_ts, 8 | summary_plot_histogram, 9 | summary_plot_maps, 10 | trend_plot, 11 | ) 12 | from .utils import ( 13 | gen_output_roots_from_caseroot, 14 | get_varnames_from_metadata_list, 15 | timeseries_and_history_comparison, 16 | generate_plot_catalog, 17 | ) 18 | -------------------------------------------------------------------------------- /.github/workflows/verify_pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: Run all pre-commit checks one more time 2 | 3 | on: 4 | push: 5 | branches: "*" 6 | pull_request: 7 | branches: master 8 | 9 | jobs: 10 | pre-commit: 11 | name: pre-commit 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: checkout 16 | uses: actions/checkout@v2 17 | 18 | - name: set up python 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: 3.8 22 | 23 | - name: Run pre-commit 24 | uses: pre-commit/action@v2.0.0 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.ipynb_checkpoints 2 | **/__pycache__ 3 | notebooks/logs 4 | data_reshaping/logs 5 | 6 | # ignore images directory, except for the image catalog 7 | # complicated, but it works... https://stackoverflow.com/a/16318111 8 | notebooks/images/* 9 | !notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.003 10 | !notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.004 11 | !notebooks/images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001 12 | notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.003/* 13 | notebooks/images/g.e22.G1850ECO_JRA_HR.TL319_t13.004/* 14 | notebooks/images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001/* 15 | !notebooks/images/*/png_catalog.csv 16 | -------------------------------------------------------------------------------- /notebooks/run_all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This script is intended for developers to rerun all dask-free notebooks without 4 | launching JupyterHub or a jupyter lab session. 5 | It relies on the run_notebooks function. 6 | """ 7 | 8 | import os 9 | 10 | # For now, plot_suite and trend_maps don't run with nbconvert 11 | # It may be NCAR_jobqueue related... 12 | notebooks = [] 13 | notebooks.append("Sanity\ Check.ipynb") 14 | notebooks.append("Pull\ info\ from\ logs.ipynb") 15 | notebooks.append(f"compare_ts_and_hist_*.ipynb") 16 | notebooks.append(f"plot_suite_maps_*.ipynb") 17 | 18 | cmd = "./run_notebooks.sh " + " ".join(notebooks) 19 | os.system(cmd) 20 | -------------------------------------------------------------------------------- /notebooks/utils/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions to find files in the various directories 3 | """ 4 | 5 | import os 6 | 7 | 8 | ################################################################################ 9 | 10 | 11 | def add_first_date_and_reformat(date_list): 12 | new_list = [] 13 | for date in date_list: 14 | year = int(date[:4]) 15 | month = int(date[4:6]) 16 | day = int(date[6:]) 17 | if len(new_list) == 0: 18 | if day > 1: 19 | first_date = f"{year:04}-{month:02}-{(day-1):02}" 20 | else: 21 | first_date = "first" 22 | new_list.append(first_date) 23 | new_list.append(f"{year:04}-{month:02}-{day:02}") 24 | return new_list 25 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v3.2.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | 9 | - repo: https://github.com/ambv/black 10 | rev: 19.10b0 # consistent with conda environment 11 | hooks: 12 | - id: black 13 | args: [] 14 | 15 | - repo: https://github.com/deathbeds/prenotebook 16 | rev: f5bdb72a400f1a56fe88109936c83aa12cc349fa 17 | hooks: 18 | - id: prenotebook 19 | args: 20 | [ 21 | '--keep-output', 22 | '--keep-metadata', 23 | '--keep-execution-count', 24 | '--keep-empty', 25 | ] 26 | -------------------------------------------------------------------------------- /notebooks/dashboard.yaml: -------------------------------------------------------------------------------- 1 | summary_map: 2 | keys: 3 | - "casename" 4 | - "varname" 5 | - "date" 6 | - "apply_log10" 7 | - "sel_dict" 8 | column_widget_types: 9 | date: "discrete_slider" 10 | 11 | time_series: 12 | keys: 13 | - "casename" 14 | - "varname" 15 | - "time_period" 16 | - "sel_dict" 17 | 18 | histogram: 19 | keys: 20 | - "casename" 21 | - "varname" 22 | - "time_period" 23 | - "apply_log10" 24 | - "sel_dict" 25 | column_widget_types: 26 | time_period: "discrete_slider" 27 | 28 | trend_hist: 29 | keys: 30 | - "casename" 31 | - "varname" 32 | - "time_period" 33 | - "sel_dict" 34 | 35 | trend_map: 36 | keys: 37 | - "casename" 38 | - "varname" 39 | - "time_period" 40 | - "sel_dict" 41 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | on: 3 | push: 4 | branches: 5 | - "*" 6 | pull_request: 7 | branches: 8 | - "master" 9 | 10 | jobs: 11 | build: 12 | name: Test on ubuntu 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | steps: 17 | - name: checkout 18 | uses: actions/checkout@v2 19 | 20 | - name: Install conda 21 | uses: conda-incubator/setup-miniconda@v2 22 | with: 23 | auto-update-conda: true 24 | activate-environment: hires-marbl 25 | environment-file: environments/environment.yaml 26 | auto-activate-base: false 27 | 28 | - name: Show conda environment 29 | shell: bash -l {0} 30 | run: conda list 31 | 32 | - name: Run Tests 33 | shell: bash -l {0} 34 | run: pytest -v tests/ 35 | -------------------------------------------------------------------------------- /environments/environment.yaml: -------------------------------------------------------------------------------- 1 | name: hires-marbl 2 | channels: 3 | - conda-forge 4 | - nodefaults 5 | dependencies: 6 | - aiohttp 7 | - bokeh 8 | - bottleneck 9 | - cartopy 10 | - cf-units 11 | - cftime 12 | - cmocean 13 | - dask-jobqueue>=0.7.2 14 | - dask-labextension 15 | - dask-mpi 16 | - dask==2021.7.0 17 | - distributed==2021.7.0 18 | - eofs 19 | - esmpy 20 | - fsspec 21 | - hvplot 22 | - intake 23 | - intake-esm 24 | - ipykernel 25 | - ipywidgets 26 | - jupyter-server-proxy 27 | - jupyterlab>=3 28 | - matplotlib==3.4.2 29 | - metpy 30 | - nc-time-axis 31 | - ncar-jobqueue 32 | - netcdf4 33 | - nodejs 34 | - numba 35 | - numpy 36 | - pandas 37 | - pint 38 | - pip 39 | - pop-tools 40 | - pre-commit 41 | - pytest 42 | - python=3.9 43 | - scipy 44 | - seaborn 45 | - seawater 46 | - statsmodels 47 | - toolz 48 | - tqdm 49 | - watermark 50 | - xarray==0.18.2 51 | - xesmf 52 | - xgcm 53 | - xhistogram 54 | - xrft 55 | - zarr 56 | - pip 57 | - pip: 58 | - -r ../requirements.txt 59 | -------------------------------------------------------------------------------- /notebooks/utils/utils_units.py: -------------------------------------------------------------------------------- 1 | """ 2 | utility functions related to units 3 | """ 4 | 5 | import re 6 | 7 | from pint import UnitRegistry 8 | import xarray as xr 9 | 10 | 11 | def conv_units(da, units_out, units_scalef=None): 12 | """ 13 | return a copy of da, with units converted to units_out 14 | """ 15 | # use apply_ufunc to preserve dask-ness of da 16 | func = lambda values: _conv_units_np( 17 | values, da.attrs["units"], units_out, units_scalef 18 | ) 19 | da_out = xr.apply_ufunc( 20 | func, da, keep_attrs=True, dask="parallelized", output_dtypes=[da.dtype] 21 | ) 22 | da_out.attrs["units"] = units_out 23 | da_out.encoding = da.encoding 24 | return da_out 25 | 26 | 27 | def _clean_units(units): 28 | """replace some troublesome unit terms with acceptable replacements""" 29 | replacements = { 30 | "kgC": "kg", 31 | "gC": "g", 32 | "gC13": "g", 33 | "gC14": "g", 34 | "gN": "g", 35 | "unitless": "1", 36 | "years": "common_years", 37 | "yr": "common_year", 38 | "meq": "mmol", 39 | "neq": "nmol", 40 | } 41 | units_split = re.split(r"( |\(|\)|\^|\*|/|-[0-9]+|[0-9]+)", units) 42 | units_split_repl = [ 43 | replacements[token] if token in replacements else token for token in units_split 44 | ] 45 | return "".join(units_split_repl) 46 | 47 | 48 | def _conv_units_np(values, units_in, units_out, units_scalef=None): 49 | """ 50 | return a copy of numpy array values, with units converted from units_in to units_out 51 | """ 52 | ureg = UnitRegistry() 53 | values_in_pint = ureg.Quantity(values, ureg(_clean_units(units_in))) 54 | if units_scalef is not None: 55 | values_in_pint *= ureg(_clean_units(units_scalef)) 56 | values_out_pint = values_in_pint.to(_clean_units(units_out)) 57 | return values_out_pint.magnitude 58 | -------------------------------------------------------------------------------- /tests/test_utils_units.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import pytest 6 | import xarray as xr 7 | import numpy as np 8 | 9 | sys.path.append(os.path.abspath(os.path.join("notebooks", "utils"))) 10 | sys.path.append(os.path.abspath("tests")) 11 | from utils_units import _clean_units, conv_units 12 | from xr_ds_ex import xr_ds_ex 13 | 14 | nyrs = 3 15 | var_const = False 16 | 17 | 18 | @pytest.mark.parametrize( 19 | "units_in, units_out", 20 | [ 21 | # basic example, straight from dictionary 22 | ("years", "common_years"), 23 | # ensure 'gC' in 'degC' doesn't get converted 24 | ("degC", "degC"), 25 | # matches within expressions 26 | ("gN leaf/m^2", "g leaf/m^2"), # LNC 27 | ("gC/m^2/s", "g/m^2/s"), # AR 28 | ("meq/m^3", "mmol/m^3"), # ALK 29 | ( 30 | "(centimeter^2)(meq/m^3 cm/s)", 31 | "(centimeter^2)(mmol/m^3 cm/s)", 32 | ), # ALK_RIV_FLUX integral 33 | ("neq/cm3", "nmol/cm3"), # ABIO_ALK_SURF 34 | ("degC*cm/s", "degC*cm/s"), # T_FLUX_EXCH_INTRF 35 | ("days since 0001-01-01 00:00:00", "days since 0001-01-01 00:00:00"), # time 36 | # multiple matches 37 | ("gC/gN", "g/g"), # LEAFCN 38 | ], 39 | ) 40 | def test_clean_units(units_in, units_out): 41 | assert _clean_units(units_in) == units_out 42 | 43 | 44 | @pytest.mark.parametrize("apply_chunk", [True, False]) 45 | @pytest.mark.parametrize("add_encoding", [True, False]) 46 | def test_conv_units(apply_chunk, add_encoding): 47 | da = xr_ds_ex()["var_ex"] 48 | da.attrs["units"] = "kg" 49 | da.attrs["long_name"] = "var_ex" 50 | if apply_chunk: 51 | da = da.chunk({"time": 12}) 52 | if add_encoding: 53 | da.encoding["_FillValue"] = None 54 | 55 | da_out = conv_units(da, "g") 56 | 57 | assert da_out.attrs["units"] == "g" 58 | assert da_out.encoding == da.encoding 59 | assert da_out.chunks == da.chunks 60 | assert np.all(da_out.values == 1000.0 * da.values) 61 | -------------------------------------------------------------------------------- /notebooks/utils/compare_ts_and_hist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | A script to verify that converting from history files to time series worked as expected 4 | """ 5 | 6 | import xarray as xr 7 | from . import CaseClass 8 | 9 | 10 | def compare_ts_and_hist( 11 | casename, output_roots, stream, year, exclude_vars=["time_bound", "time_bounds"], 12 | ): 13 | """ 14 | Generate a CaseClass object from a given casename. For a given stream 15 | and year, open the history files from the case. Then loop through the 16 | variables (excluding time_bound in POP and time_bounds in CICE) and 17 | verify that those fields are available in time series. 18 | """ 19 | # Set some defaults to pass to open_mfdataset, then apply kwargs argument 20 | open_mfdataset_kwargs = dict() 21 | # data_vars="minimal", to avoid introducing time dimension to time-invariant fields 22 | open_mfdataset_kwargs["data_vars"] = "minimal" 23 | # compat="override", to skip var consistency checks (for speed) 24 | open_mfdataset_kwargs["compat"] = "override" 25 | # coords="minimal", because coords cannot be default="different" if compat="override" 26 | open_mfdataset_kwargs["coords"] = "minimal" 27 | # parallel=True to open files in parallel 28 | open_mfdataset_kwargs["parallel"] = True 29 | 30 | found_all = True 31 | 32 | case = CaseClass.CaseClass(casename, output_roots) 33 | # Return if no time series is available 34 | if not case.check_for_year_in_timeseries_files(year, stream): 35 | return "no time series" 36 | 37 | # Return if no history files are available 38 | history_filenames = case.get_history_files(year, stream) 39 | if len(history_filenames) == 0: 40 | return "no history" 41 | 42 | # Open history files to build dataset 43 | ds_hist = xr.open_mfdataset(history_filenames, **open_mfdataset_kwargs) 44 | vars_to_check = [ 45 | var 46 | for var in ds_hist.data_vars 47 | if "time" in ds_hist[var].coords and not var in exclude_vars 48 | ] 49 | 50 | # Look for each variable in time series 51 | for var in vars_to_check: 52 | if len(case.get_timeseries_files(year, stream, var)) == 0: 53 | print(f"No time series files for {var} in year {year:04}") 54 | found_all = False 55 | 56 | # Return "same" if all variables were found, otherwise return "datasets differ" 57 | if not found_all: 58 | return "datasets differ" 59 | return "same" 60 | 61 | 62 | ######################## 63 | 64 | if __name__ == "__main__": 65 | print("Feature not implemented yet") 66 | -------------------------------------------------------------------------------- /tests/xr_ds_ex.py: -------------------------------------------------------------------------------- 1 | """function for example xarray.Dataset object""" 2 | 3 | import cftime 4 | import numpy as np 5 | import xarray as xr 6 | 7 | days_1yr = np.array( 8 | [31.0, 28.0, 31.0, 30.0, 31.0, 30.0, 31.0, 31.0, 30.0, 31.0, 30.0, 31.0] 9 | ) 10 | 11 | 12 | def gen_time_bounds_values(nyrs=3): 13 | """return numpy array of values of month boundaries""" 14 | time_edges = np.insert(np.cumsum(np.tile(days_1yr, nyrs)), 0, 0) 15 | return np.stack((time_edges[:-1], time_edges[1:]), axis=1) 16 | 17 | 18 | def xr_ds_ex(decode_times=True, nyrs=3, var_const=True, time_mid=True): 19 | """return an example xarray.Dataset object, useful for testing functions""" 20 | 21 | # set up values for Dataset, nyrs yrs of analytic monthly values 22 | time_bounds_values = gen_time_bounds_values(nyrs) 23 | if time_mid: 24 | time_values = 0.5 * time_bounds_values[:, 0] + 0.5 * time_bounds_values[:, 1] 25 | else: 26 | time_values = 0.25 * time_bounds_values[:, 0] + 0.75 * time_bounds_values[:, 1] 27 | time_values_yr = time_values / 365.0 28 | if var_const: 29 | var_values = np.ones_like(time_values_yr) 30 | else: 31 | var_values = np.sin(np.pi * time_values_yr) * np.exp(-0.1 * time_values_yr) 32 | 33 | time_units = "days since 0001-01-01" 34 | calendar = "noleap" 35 | 36 | if decode_times: 37 | time_values = cftime.num2date(time_values, time_units, calendar) 38 | time_bounds_values = cftime.num2date(time_bounds_values, time_units, calendar) 39 | 40 | # create Dataset, including time_bounds 41 | time_var = xr.DataArray( 42 | time_values, 43 | name="time", 44 | dims="time", 45 | coords={"time": time_values}, 46 | attrs={"bounds": "time_bounds"}, 47 | ) 48 | if not decode_times: 49 | time_var.attrs["units"] = time_units 50 | time_var.attrs["calendar"] = calendar 51 | time_bounds = xr.DataArray( 52 | time_bounds_values, 53 | name="time_bounds", 54 | dims=("time", "d2"), 55 | coords={"time": time_var}, 56 | ) 57 | var = xr.DataArray( 58 | var_values, name="var_ex", dims="time", coords={"time": time_var} 59 | ) 60 | ds = var.to_dataset() 61 | days_in_month = xr.DataArray( 62 | np.tile(days_1yr, nyrs).squeeze(), 63 | name="days_in_month", 64 | dims="time", 65 | coords={"time": time_var}, 66 | ) 67 | ds = xr.merge([ds, time_bounds, days_in_month]) 68 | 69 | if decode_times: 70 | ds.time.encoding["units"] = time_units 71 | ds.time.encoding["calendar"] = calendar 72 | 73 | return ds 74 | -------------------------------------------------------------------------------- /notebooks/run_notebooks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage () { 4 | echo "$0 NOTEBOOK [NOTEBOOK2 ... NOTEBOOKN]" 5 | echo "Submit job(s) to run all notebooks on casper node via jupyter nbconvert" 6 | echo "" 7 | echo "For each specified file, the full call is:" 8 | echo "jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python \\ 9 | --ExecutePreprocessor.timeout=3600 --execute NOTEBOOK" 10 | echo "" 11 | echo "Output from the pbs job is written in the logs/ directory," 12 | echo "which will be created if it does not exist." 13 | } 14 | 15 | ######################### 16 | 17 | # Function that creates a temporary script 18 | # that is submitted via qsub 19 | submit_pbs_script () { 20 | 21 | nbname=`echo ${notebook} | sed -e "s/ /_/g"` 22 | 23 | echo "running ${notebook}.ipynb..." 24 | cat > ${nbname}.sub << EOF 25 | #!/bin/bash 26 | # 27 | #PBS -N ${nbname} 28 | #PBS -A P93300606 29 | #PBS -l select=1:ncpus=1:mem=100G 30 | #PBS -l walltime=6:00:00 31 | #PBS -q casper 32 | #PBS -j oe 33 | #PBS -m ea 34 | 35 | ${set_env} 36 | jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python \\ 37 | --ExecutePreprocessor.timeout=3600 --execute "${notebook}.ipynb" 38 | EOF 39 | 40 | qsub ${nbname}.sub 41 | rm -f ${nbname}.sub 42 | } 43 | 44 | ######################## 45 | 46 | # Function that creates a temporary script 47 | # that is submitted via sbatch 48 | submit_slurm_script () { 49 | 50 | nbname=`echo ${notebook} | sed -e "s/ /_/g"` 51 | 52 | echo "running ${notebook}.ipynb..." 53 | cat > ${nbname}.sub << EOF 54 | #!/bin/bash 55 | # 56 | #SBATCH -n 16 57 | #SBATCH -N 1 58 | #SBATCH --ntasks-per-node=16 59 | #SBATCH -t 6:00:00 60 | #SBATCH -p dav 61 | #SBATCH -J ${nbname} 62 | #SBATCH --account=P93300606 63 | #SBATCH --mem 100G 64 | #SBATCH -e logs/${nbname}.err.%J 65 | #SBATCH -o logs/${nbname}.out.%J 66 | #SBATCH --mail-type=ALL 67 | #SBATCH --mail-user=${USER}@ucar.edu 68 | #SBATCH -m block 69 | 70 | ${set_env} 71 | jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python \\ 72 | --ExecutePreprocessor.timeout=3600 --execute "${notebook}.ipynb" 73 | EOF 74 | 75 | sbatch ${nbname}.sub 76 | rm -f ${nbname}.sub 77 | } 78 | 79 | ######################### 80 | 81 | if [ $# == 0 ]; then 82 | usage 83 | exit 1 84 | fi 85 | 86 | for args in "$@" 87 | do 88 | if [ "$args" == "-h" ] || [ "$args" == "--help" ]; then 89 | usage 90 | exit 0 91 | fi 92 | done 93 | 94 | # not sure why conda activate doesn't work but source activate does... 95 | set_env="export PATH=/glade/work/${USER}/miniconda3/bin/:$PATH ; source activate hires-marbl || exit -1" 96 | 97 | # make sure log directory exists 98 | mkdir -p logs 99 | 100 | for notebook_full in "$@" 101 | do 102 | if [ ! -f "${notebook_full}" ]; then 103 | echo "WARNING: can not find ${notebook_full}" 104 | continue 105 | fi 106 | notebook=`echo ${notebook_full} | cut -d '.' -f 1` 107 | submit_pbs_script $notebook 108 | done 109 | -------------------------------------------------------------------------------- /data_reshaping/pop.h_t13.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | # 3 | #SBATCH -n 64 4 | #SBATCH -N 4 5 | #SBATCH --ntasks-per-node=16 6 | #SBATCH -t 24:00:00 7 | #SBATCH -p dav 8 | #SBATCH -J Pop.h_t13 9 | #SBATCH --account=P93300606 10 | #SBATCH --mem 100G 11 | #SBATCH -e logs/Pop.h_t13.err.%J 12 | #SBATCH -o logs/Pop.h_t13.out.%J 13 | #SBATCH -m block 14 | # 15 | module purge 16 | conda deactivate || echo "conda not loaded" 17 | # 18 | # PARSE COMMAND LINE ARGUMENTS 19 | CASE=${1} ; export CASE 20 | ARCHIVE_ROOT=${2} 21 | YEAR=${3} 22 | echo "Reshaping year ${YEAR} for ${CASE}..." 23 | # 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin 25 | . activate 26 | # 27 | module load intel/17.0.1 28 | module load ncarenv 29 | module load ncarcompilers 30 | module load impi 31 | module load netcdf/4.6.1 32 | module load nco/4.7.4 33 | module load ncl/6.4.0 34 | # 35 | HIST=pop.h ; export HIST 36 | # 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH 38 | # 39 | NCKS=`which ncks` ; export NCKS 40 | PROCHOST=`hostname`;export PROCHOST 41 | # 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE} ; export PROCBASE 45 | # 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP 47 | case "$HTYP" in 48 | cam2 | cam ) 49 | COMP_NAME=atm ;; 50 | cism ) 51 | COMP_NAME=glc ;; 52 | clm2 ) 53 | COMP_NAME=lnd ;; 54 | pop ) 55 | COMP_NAME=ocn ;; 56 | rtm | mosart ) 57 | COMP_NAME=rof ;; 58 | cice | csim ) 59 | COMP_NAME=ice ;; 60 | * ) 61 | echo "Unable to continue because "$HIST" not known." 62 | exit 1 ;; 63 | esac 64 | # 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc ; export LOCAL_PROC 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED ; export CACHEDIR 68 | # 69 | VERBOSITY=0 ; export VERBOSITY 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT 72 | # 73 | if [ ! -d $LOCAL_PROC ] ; then 74 | mkdir -p $LOCAL_PROC 75 | fi 76 | if [ ! -d $CACHEDIR ] ; then 77 | mkdir -p $CACHEDIR 78 | fi 79 | # 80 | cd $LOCAL_PROC 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data 82 | # 83 | rm -f ${CASE}.${HIST}.*nc 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then 85 | ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc . 86 | NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l` 87 | if [ $NHISTF -eq 12 ] ; then 88 | OUTTIME="${YEAR}01-${YEAR}12" 89 | SUFFIX=".${OUTTIME}.nc" ; export SUFFIX 90 | echo -n "TS transpose_data start: " ; date 91 | ./Transpose_Data 92 | if [ $? -ne 0 ] ; then 93 | echo "Transpose_Data failed" 94 | exit 1 95 | fi 96 | echo -n "TS transpose_data end : " ; date 97 | touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} 98 | else 99 | echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 12" 100 | fi 101 | fi 102 | # 103 | echo -n "TS COMPLETE: " ; date 104 | # 105 | exit 106 | -------------------------------------------------------------------------------- /data_reshaping/cice.h_t13.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | # 3 | #SBATCH -n 64 4 | #SBATCH -N 4 5 | #SBATCH --ntasks-per-node=16 6 | #SBATCH -t 24:00:00 7 | #SBATCH -p dav 8 | #SBATCH -J Cice.h_t13 9 | #SBATCH --account=P93300606 10 | #SBATCH --mem 100G 11 | #SBATCH -e logs/Cice.h_t13.err.%J 12 | #SBATCH -o logs/Cice.h_t13.out.%J 13 | #SBATCH -m block 14 | # 15 | module purge 16 | conda deactivate || echo "conda not loaded" 17 | # 18 | # PARSE COMMAND LINE ARGUMENTS 19 | CASE=${1} ; export CASE 20 | ARCHIVE_ROOT=${2} 21 | YEAR=${3} 22 | echo "Reshaping year ${YEAR} for ${CASE}..." 23 | # 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin 25 | . activate 26 | # 27 | module load intel/17.0.1 28 | module load ncarenv 29 | module load ncarcompilers 30 | module load impi 31 | module load netcdf/4.6.1 32 | module load nco/4.7.4 33 | module load ncl/6.4.0 34 | # 35 | HIST=cice.h ; export HIST 36 | # 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH 38 | # 39 | NCKS=`which ncks` ; export NCKS 40 | PROCHOST=`hostname`;export PROCHOST 41 | # 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE} ; export PROCBASE 45 | # 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP 47 | case "$HTYP" in 48 | cam2 | cam ) 49 | COMP_NAME=atm ;; 50 | cism ) 51 | COMP_NAME=glc ;; 52 | clm2 ) 53 | COMP_NAME=lnd ;; 54 | pop ) 55 | COMP_NAME=ocn ;; 56 | rtm | mosart ) 57 | COMP_NAME=rof ;; 58 | cice | csim ) 59 | COMP_NAME=ice ;; 60 | * ) 61 | echo "Unable to continue because "$HIST" not known." 62 | exit 1 ;; 63 | esac 64 | # 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc ; export LOCAL_PROC 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED ; export CACHEDIR 68 | # 69 | VERBOSITY=0 ; export VERBOSITY 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT 72 | # 73 | if [ ! -d $LOCAL_PROC ] ; then 74 | mkdir -p $LOCAL_PROC 75 | fi 76 | if [ ! -d $CACHEDIR ] ; then 77 | mkdir -p $CACHEDIR 78 | fi 79 | # 80 | cd $LOCAL_PROC 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data 82 | # 83 | rm -f ${CASE}.${HIST}.*nc 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then 85 | ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc . 86 | NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l` 87 | if [ $NHISTF -eq 12 ] ; then 88 | OUTTIME="${YEAR}01-${YEAR}12" 89 | SUFFIX=".${OUTTIME}.nc" ; export SUFFIX 90 | echo -n "TS transpose_data start: " ; date 91 | ./Transpose_Data 92 | if [ $? -ne 0 ] ; then 93 | echo "Transpose_Data failed" 94 | exit 1 95 | fi 96 | echo -n "TS transpose_data end : " ; date 97 | touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} 98 | else 99 | echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 12" 100 | fi 101 | fi 102 | # 103 | echo -n "TS COMPLETE: " ; date 104 | # 105 | exit 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![GitHub Workflow Status](https://img.shields.io/github/workflow/status/marbl-ecosys/HiRes-CESM-analysis/Continuous%20Integration?label=CI&logo=github&style=for-the-badge)](https://github.com/marbl-ecosys/HiRes-CESM-analysis/actions?query=workflow%3A%22Continuous+Integration%22) 3 | [![GitHub Workflow Status](https://img.shields.io/github/workflow/status/marbl-ecosys/HiRes-CESM-analysis/Run%20all%20pre-commit%20checks%20one%20more%20time?label=code-style&style=for-the-badge)](https://github.com/marbl-ecosys/HiRes-CESM-analysis/actions?query=workflow%3A%22Run+all+pre-commit+checks+one+more+time%22) 4 | 5 | # HiRes-CESM Analysis 6 | 7 | - [HiRes-CESM Analysis](#hires-cesm-analysis) 8 | - [For Developers](#for-developers) 9 | - [Keep your conda environment up to date](#keep-your-conda-environment-up-to-date) 10 | - [Use `pre-commit` to test code before commiting](#use-pre-commit-to-test-code-before-commiting) 11 | - [Run `pytest` after modifying python in `utils/`](#run-pytest-after-modifying-python-in-utils) 12 | 13 | This repository is building a set of tools for analyzing BGC output in a high-resolution POP run. 14 | 15 | ## For Developers 16 | 17 | A few recommended practices to incorporate in your development sandbox: 18 | 19 | ### Keep your conda environment up to date 20 | 21 | The first time you check out this repository, run 22 | 23 | ``` 24 | $ conda env install -f environments/environment.yaml 25 | ``` 26 | 27 | If you notice the YAML file has changed after you fetch changes from github, 28 | update the environment with 29 | 30 | ``` 31 | $ conda env update -f environments/environment.yaml 32 | ``` 33 | 34 | If the `env update` command fails, you can remove the environment and re-create it: 35 | 36 | ``` 37 | $ conda env remove --name hires-marbl 38 | $ conda env create -f environments/environment.yaml 39 | ``` 40 | 41 | ### Use `pre-commit` to test code before commiting 42 | 43 | Please take advantage of the pre-commit package to ensure that `black` is run before commiting: 44 | 45 | ``` 46 | $ pre-commit install --install-hooks # set up pre-commit 47 | $ pre-commit run -a # check all the files currently in the repo 48 | ``` 49 | 50 | The pre-commit package is already installed via the `hires-marbl` conda environment. 51 | There is a github action to run these checks on all pull requests, 52 | but running them locally via-pre-commit will reduce the number of failed actions. 53 | NOTE: for some reason, to properly install `pre-commit` on the CISL systems, 54 | the above command must be run from `casper` rather than `cheyenne`. 55 | 56 | Note that pre-commit creates a virtual environment using specific tags of each package. 57 | As newer versions of `black` become available on `conda-forge`, we will update the pre-commit environment. 58 | 59 | ### Run `pytest` after modifying python in `utils/` 60 | 61 | To test some of the python code in `notebooks/utils/`, run `pytest`. 62 | These tests can be run from the top level of this repository by running 63 | 64 | ``` 65 | $ pytest tests/ 66 | ``` 67 | 68 | If you add new code to this directory, 69 | consider writing small tests to ensure it is running as expected. 70 | -------------------------------------------------------------------------------- /data_reshaping/cice.h1_t13.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | # 3 | #SBATCH -n 64 4 | #SBATCH -N 4 5 | #SBATCH --ntasks-per-node=16 6 | #SBATCH -t 24:00:00 7 | #SBATCH -p dav 8 | #SBATCH -J Cice.h1_t13 9 | #SBATCH --account=P93300606 10 | #SBATCH --mem 100G 11 | #SBATCH -e logs/Cice.h1_t13.err.%J 12 | #SBATCH -o logs/Cice.h1_t13.out.%J 13 | #SBATCH -m block 14 | # 15 | module purge 16 | conda deactivate || echo "conda not loaded" 17 | # 18 | # PARSE COMMAND LINE ARGUMENTS 19 | CASE=${1} ; export CASE 20 | ARCHIVE_ROOT=${2} 21 | YEAR=${3} 22 | echo "Reshaping year ${YEAR} for ${CASE}..." 23 | # 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin 25 | . activate 26 | # 27 | module load intel/17.0.1 28 | module load ncarenv 29 | module load ncarcompilers 30 | module load impi 31 | module load netcdf/4.6.1 32 | module load nco/4.7.4 33 | module load ncl/6.4.0 34 | # 35 | HIST=cice.h1 ; export HIST 36 | # 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH 38 | # 39 | NCKS=`which ncks` ; export NCKS 40 | PROCHOST=`hostname`;export PROCHOST 41 | # 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE} ; export PROCBASE 45 | # 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP 47 | case "$HTYP" in 48 | cam2 | cam ) 49 | COMP_NAME=atm ;; 50 | cism ) 51 | COMP_NAME=glc ;; 52 | clm2 ) 53 | COMP_NAME=lnd ;; 54 | pop ) 55 | COMP_NAME=ocn ;; 56 | rtm | mosart ) 57 | COMP_NAME=rof ;; 58 | cice | csim ) 59 | COMP_NAME=ice ;; 60 | * ) 61 | echo "Unable to continue because "$HIST" not known." 62 | exit 1 ;; 63 | esac 64 | # 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc ; export LOCAL_PROC 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED ; export CACHEDIR 68 | # 69 | VERBOSITY=0 ; export VERBOSITY 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT 72 | # 73 | if [ ! -d $LOCAL_PROC ] ; then 74 | mkdir -p $LOCAL_PROC 75 | fi 76 | if [ ! -d $CACHEDIR ] ; then 77 | mkdir -p $CACHEDIR 78 | fi 79 | # 80 | cd $LOCAL_PROC 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data 82 | # 83 | rm -f ${CASE}.${HIST}.*nc 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then 85 | ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc . 86 | NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l` 87 | if [ $NHISTF -eq 365 ] ; then 88 | OUTTIME="${YEAR}0101-${YEAR}1231" 89 | SUFFIX=".${OUTTIME}.nc" ; export SUFFIX 90 | echo -n "TS transpose_data start: " ; date 91 | ./Transpose_Data 92 | if [ $? -ne 0 ] ; then 93 | echo "Transpose_Data failed" 94 | exit 1 95 | fi 96 | echo -n "TS transpose_data end : " ; date 97 | touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} 98 | else 99 | echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 365" 100 | fi 101 | fi 102 | # 103 | echo -n "TS COMPLETE: " ; date 104 | # 105 | exit 106 | -------------------------------------------------------------------------------- /data_reshaping/pop.h.nyear1_t13.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | # 3 | #SBATCH -n 64 4 | #SBATCH -N 4 5 | #SBATCH --ntasks-per-node=16 6 | #SBATCH -t 24:00:00 7 | #SBATCH -p dav 8 | #SBATCH -J Pop.h.nyear1_t13 9 | #SBATCH --account=P93300606 10 | #SBATCH --mem 100G 11 | #SBATCH -e logs/Pop.h.nyear1_t13.err.%J 12 | #SBATCH -o logs/Pop.h.nyear1_t13.out.%J 13 | #SBATCH -m block 14 | # 15 | module purge 16 | conda deactivate || echo "conda not loaded" 17 | # 18 | # PARSE COMMAND LINE ARGUMENTS 19 | CASE=${1} ; export CASE 20 | ARCHIVE_ROOT=${2} 21 | YEAR=${3} 22 | echo "Reshaping year ${YEAR} for ${CASE}..." 23 | # 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin 25 | . activate 26 | # 27 | module load intel/17.0.1 28 | module load ncarenv 29 | module load ncarcompilers 30 | module load impi 31 | module load netcdf/4.6.1 32 | module load nco/4.7.4 33 | module load ncl/6.4.0 34 | # 35 | HIST=pop.h.nyear1 ; export HIST 36 | # 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH 38 | # 39 | NCKS=`which ncks` ; export NCKS 40 | PROCHOST=`hostname`;export PROCHOST 41 | # 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE} ; export PROCBASE 45 | # 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP 47 | case "$HTYP" in 48 | cam2 | cam ) 49 | COMP_NAME=atm ;; 50 | cism ) 51 | COMP_NAME=glc ;; 52 | clm2 ) 53 | COMP_NAME=lnd ;; 54 | pop ) 55 | COMP_NAME=ocn ;; 56 | rtm | mosart ) 57 | COMP_NAME=rof ;; 58 | cice | csim ) 59 | COMP_NAME=ice ;; 60 | * ) 61 | echo "Unable to continue because "$HIST" not known." 62 | exit 1 ;; 63 | esac 64 | # 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc ; export LOCAL_PROC 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED ; export CACHEDIR 68 | # 69 | VERBOSITY=0 ; export VERBOSITY 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT 72 | # 73 | if [ ! -d $LOCAL_PROC ] ; then 74 | mkdir -p $LOCAL_PROC 75 | fi 76 | if [ ! -d $CACHEDIR ] ; then 77 | mkdir -p $CACHEDIR 78 | fi 79 | # 80 | cd $LOCAL_PROC 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data 82 | # 83 | rm -f ${CASE}.${HIST}.*nc 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then 85 | ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc . 86 | NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l` 87 | if [ $NHISTF -eq 1 ] ; then 88 | OUTTIME="${YEAR}-${YEAR}" 89 | SUFFIX=".${OUTTIME}.nc" ; export SUFFIX 90 | echo -n "TS transpose_data start: " ; date 91 | ./Transpose_Data 92 | if [ $? -ne 0 ] ; then 93 | echo "Transpose_Data failed" 94 | exit 1 95 | fi 96 | echo -n "TS transpose_data end : " ; date 97 | touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} 98 | else 99 | echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 1" 100 | fi 101 | fi 102 | # 103 | echo -n "TS COMPLETE: " ; date 104 | # 105 | exit 106 | -------------------------------------------------------------------------------- /data_reshaping/pop.h.nday1_t13.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | # 3 | #SBATCH -n 64 4 | #SBATCH -N 4 5 | #SBATCH --ntasks-per-node=16 6 | #SBATCH -t 24:00:00 7 | #SBATCH -p dav 8 | #SBATCH -J Pop.h.nday1_t13 9 | #SBATCH --account=P93300606 10 | #SBATCH --mem 100G 11 | #SBATCH -e logs/Pop.h.nday1_t13.err.%J 12 | #SBATCH -o logs/Pop.h.nday1_t13.out.%J 13 | #SBATCH -m block 14 | # 15 | module purge 16 | conda deactivate || echo "conda not loaded" 17 | # 18 | # PARSE COMMAND LINE ARGUMENTS 19 | CASE=${1} ; export CASE 20 | ARCHIVE_ROOT=${2} 21 | YEAR=${3} 22 | echo "Reshaping year ${YEAR} for ${CASE}..." 23 | # 24 | cd /glade/p/cesm/postprocessing_dav/cesm-env2/bin 25 | . activate 26 | # 27 | module load intel/17.0.1 28 | module load ncarenv 29 | module load ncarcompilers 30 | module load impi 31 | module load netcdf/4.6.1 32 | module load nco/4.7.4 33 | module load ncl/6.4.0 34 | # 35 | HIST=pop.h.nday1 ; export HIST 36 | # 37 | PATH=/glade/p/cesm/postprocessing_dav/cesm-env2/bin:/usr/local/bin:${PATH} ; export PATH 38 | # 39 | NCKS=`which ncks` ; export NCKS 40 | PROCHOST=`hostname`;export PROCHOST 41 | # 42 | BASEDIR=/glade/u/home/strandwg/CCP_Processing_Suite 43 | LOCALDSK=${ARCHIVE_ROOT}/${CASE} ; export LOCALDSK 44 | PROCBASE=/glade/scratch/$USER/T13/${CASE} ; export PROCBASE 45 | # 46 | HTYP=`echo $HIST | cut -d'.' -f1` ; export HTYP 47 | case "$HTYP" in 48 | cam2 | cam ) 49 | COMP_NAME=atm ;; 50 | cism ) 51 | COMP_NAME=glc ;; 52 | clm2 ) 53 | COMP_NAME=lnd ;; 54 | pop ) 55 | COMP_NAME=ocn ;; 56 | rtm | mosart ) 57 | COMP_NAME=rof ;; 58 | cice | csim ) 59 | COMP_NAME=ice ;; 60 | * ) 61 | echo "Unable to continue because "$HIST" not known." 62 | exit 1 ;; 63 | esac 64 | # 65 | LOCAL_HIST=${LOCALDSK}/${COMP_NAME}/hist ; export LOCAL_HIST 66 | LOCAL_PROC=${PROCBASE}/${HIST}/proc ; export LOCAL_PROC 67 | CACHEDIR=${LOCAL_PROC}/COMPLETED ; export CACHEDIR 68 | # 69 | VERBOSITY=0 ; export VERBOSITY 70 | PREFIX="${CACHEDIR}/${CASE}.${HIST}." ; export PREFIX 71 | NCFORMAT=netcdf4c ; export NCFORMAT ; export NCFORMAT 72 | # 73 | if [ ! -d $LOCAL_PROC ] ; then 74 | mkdir -p $LOCAL_PROC 75 | fi 76 | if [ ! -d $CACHEDIR ] ; then 77 | mkdir -p $CACHEDIR 78 | fi 79 | # 80 | cd $LOCAL_PROC 81 | ln -s -f $BASEDIR/run_slice2series_dav Transpose_Data 82 | # 83 | rm -f ${CASE}.${HIST}.*nc 84 | if [ ! -f ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} ] ; then 85 | ln -s -f ${LOCAL_HIST}/${CASE}.${HIST}.${YEAR}*nc . 86 | NHISTF=`/bin/ls ${CASE}.${HIST}.${YEAR}*nc | wc -l` 87 | if [ $NHISTF -eq 12 ] ; then 88 | OUTTIME="${YEAR}0101-${YEAR}1231" 89 | SUFFIX=".${OUTTIME}.nc" ; export SUFFIX 90 | echo -n "TS transpose_data start: " ; date 91 | ./Transpose_Data 92 | if [ $? -ne 0 ] ; then 93 | echo "Transpose_Data failed" 94 | exit 1 95 | fi 96 | echo -n "TS transpose_data end : " ; date 97 | touch ${LOCAL_PROC}/.DONE.${CASE}.${HIST}.${YEAR} 98 | else 99 | echo "File count mismatch on "${CASE}"."${HIST}"."${YEAR}": "${NHISTF}" instead of 12" 100 | fi 101 | fi 102 | # 103 | echo -n "TS COMPLETE: " ; date 104 | # 105 | exit 106 | -------------------------------------------------------------------------------- /data_reshaping/run_all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | 4 | 5 | def _parse_args(): 6 | """ Parse command line arguments """ 7 | 8 | import argparse 9 | 10 | parser = argparse.ArgumentParser( 11 | description="Submit scripts to reshape highres BGC output", 12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 13 | ) 14 | 15 | # Required: specify year 16 | parser.add_argument( 17 | "-y", 18 | "--years", 19 | action="store", 20 | dest="years", 21 | type=int, 22 | required=True, 23 | nargs="+", 24 | help="Year of run to convert to time series", 25 | ) 26 | 27 | # Optional: which case to convert 28 | # if this tool is made public, drop the default and require case as well 29 | parser.add_argument( 30 | "-c", 31 | "--case", 32 | action="store", 33 | dest="case", 34 | type=str, 35 | default="g.e22.G1850ECO_JRA_HR.TL319_t13.004", 36 | help="Suffix of case to convert to time series", 37 | ) 38 | 39 | # Optional: location of DOUT_S_ROOT 40 | archive_default = os.path.join( 41 | os.sep, "glade", "scratch", os.environ["USER"], "archive" 42 | ) 43 | parser.add_argument( 44 | "-a", 45 | "--archive-root", 46 | action="store", 47 | dest="archive_root", 48 | type=str, 49 | default=archive_default, 50 | help="base of DOUT_S_ROOT", 51 | ) 52 | 53 | # Optional: specify which scripts to run 54 | parser.add_argument( 55 | "-s", 56 | "--scripts", 57 | action="store", 58 | dest="scripts", 59 | type=str, 60 | nargs="+", 61 | default=[ 62 | "pop.h_t13.sh", 63 | "pop.h.nday1_t13.sh", 64 | "cice.h_t13.sh", 65 | "pop.h.nyear1_t13.sh", 66 | "cice.h1_t13.sh", 67 | ], 68 | help="Scripts to submit to slurm", 69 | ) 70 | 71 | # Optional: is this a dry-run? If so, don't submit anything 72 | parser.add_argument( 73 | "-d", 74 | "--dry-run", 75 | action="store_true", 76 | dest="dryrun", 77 | help="If true, do not actually submit job", 78 | ) 79 | 80 | # Optional: By default, slurm will email users when jobs start and finish 81 | parser.add_argument( 82 | "--no-mail", 83 | action="store_false", 84 | dest="send_mail", 85 | help="If true, send SLURM emails to {user}@ucar.edu", 86 | ) 87 | 88 | return parser.parse_args() 89 | 90 | 91 | ################### 92 | 93 | if __name__ == "__main__": 94 | args = _parse_args() 95 | case = args.case 96 | archive_root = args.archive_root 97 | mail_opt = ( 98 | f"--mail-type=ALL --mail-user={os.environ['USER']}@ucar.edu" 99 | if args.send_mail 100 | else "--mail-type=NONE" 101 | ) 102 | 103 | for yr in args.years: 104 | year = f"{yr:04}" 105 | for script in args.scripts: 106 | print(f"Submitting {script} for year {year} of {case}...") 107 | cmd = f"sbatch {mail_opt} --dependency=singleton {script} {case} {archive_root} {year}" 108 | if not args.dryrun: 109 | # note: the --dependency=singleton option means only one job per job name 110 | # Some jobs had been crashing, and I think it was due to temporary 111 | # files clobbering each other? But only having one pop.h_t13.sh job 112 | # at a time seems to have prevented these issues. 113 | os.system(cmd) 114 | else: 115 | print(f"Command to run: {cmd}") 116 | -------------------------------------------------------------------------------- /notebooks/plot_suite_maps_0095_1deg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import yaml\n", 11 | "\n", 12 | "import utils\n", 13 | "\n", 14 | "%matplotlib inline" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "casename = \"g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\"\n", 24 | "year = 95\n", 25 | "campaign_root = os.path.join(\n", 26 | " os.path.sep,\n", 27 | " \"glade\",\n", 28 | " \"campaign\",\n", 29 | " \"cesm\",\n", 30 | " \"development\",\n", 31 | " \"bgcwg\",\n", 32 | " \"projects\",\n", 33 | " \"1deg_cocco_JRA\",\n", 34 | " \"cases\",\n", 35 | ")\n", 36 | "\n", 37 | "# Set up CaseClass object\n", 38 | "case = utils.CaseClass(casename, os.path.join(campaign_root, casename))" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n", 48 | " diag_metadata_list = yaml.safe_load(fptr)\n", 49 | "\n", 50 | "varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "def summary_plots(ds, diag_metadata, save_pngs=False):\n", 60 | " varname = diag_metadata[\"varname\"]\n", 61 | " print(varname)\n", 62 | " da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n", 63 | "\n", 64 | " utils.summary_plot_maps(\n", 65 | " ds,\n", 66 | " da,\n", 67 | " diag_metadata,\n", 68 | " save_pngs=save_pngs,\n", 69 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 70 | " )" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "Datasets contain a total of 12 time samples\n", 83 | "Last average written at 0096-01-01 00:00:00\n", 84 | "POC_FLUX_100m\n", 85 | "CaCO3_FLUX_100m\n", 86 | "FG_CO2\n", 87 | "DpCO2\n", 88 | "PH\n", 89 | "spChl\n", 90 | "photoC_sp_zint\n", 91 | "coccoChl\n", 92 | "photoC_cocco_zint\n", 93 | "diatChl\n", 94 | "photoC_diat_zint\n", 95 | "diazChl\n", 96 | "photoC_diaz_zint\n", 97 | "NHx_SURFACE_EMIS\n", 98 | "NH4\n", 99 | "O2_ZMIN\n", 100 | "O2_ZMIN_DEPTH\n", 101 | "O2\n", 102 | "PO4\n", 103 | "PO4\n", 104 | "NO3\n", 105 | "NO3\n", 106 | "SiO3\n", 107 | "SiO3\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "stream = \"pop.h\"\n", 113 | "ds = case.gen_dataset(varnames, stream, start_year=year, end_year=year)\n", 114 | "for diag_metadata in diag_metadata_list:\n", 115 | " # ds = case.gen_dataset(\n", 116 | " # diag_metadata[\"varname\"], stream, start_year=year, end_year=year\n", 117 | " # )\n", 118 | " summary_plots(ds, diag_metadata, save_pngs=True)" 119 | ] 120 | } 121 | ], 122 | "metadata": { 123 | "kernelspec": { 124 | "display_name": "Python [conda env:hires-marbl]", 125 | "language": "python", 126 | "name": "conda-env-hires-marbl-py" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.7.8" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 4 143 | } 144 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import pytest 6 | import cftime 7 | import numpy as np 8 | import xarray as xr 9 | 10 | sys.path.append(os.path.abspath(os.path.join("notebooks"))) 11 | sys.path.append(os.path.abspath("tests")) 12 | from utils.utils import time_year_plus_frac, time_set_mid, repl_coord, round_sig 13 | from xr_ds_ex import gen_time_bounds_values, xr_ds_ex 14 | 15 | nyrs = 300 16 | var_const = False 17 | 18 | 19 | @pytest.mark.parametrize("decode_times1", [True, False]) 20 | @pytest.mark.parametrize("decode_times2", [True, False]) 21 | @pytest.mark.parametrize("apply_chunk1", [True, False]) 22 | def test_repl_coord(decode_times1, decode_times2, apply_chunk1): 23 | ds1 = time_set_mid(xr_ds_ex(decode_times1, nyrs=nyrs, var_const=var_const), "time") 24 | if apply_chunk1: 25 | ds1 = ds1.chunk({"time": 12}) 26 | 27 | # change time:bounds attribute variable rename corresponding variable 28 | tb_name_old = ds1["time"].attrs["bounds"] 29 | tb_name_new = tb_name_old + "_new" 30 | ds1["time"].attrs["bounds"] = tb_name_new 31 | ds1 = ds1.rename({tb_name_old: tb_name_new}) 32 | 33 | # verify that repl_coord on xr_ds_ex gives same results as 34 | # 1) executing time_set_mid 35 | # 2) manually changing bounds 36 | ds2 = repl_coord( 37 | "time", ds1, xr_ds_ex(decode_times2, nyrs=nyrs, var_const=var_const) 38 | ) 39 | assert ds2.identical(ds1) 40 | 41 | assert ds2["time"].encoding == ds1["time"].encoding 42 | assert ds2["time"].chunks == ds1["time"].chunks 43 | 44 | 45 | @pytest.mark.parametrize("decode_times", [True, False]) 46 | @pytest.mark.parametrize("deep", [True, False]) 47 | @pytest.mark.parametrize("apply_chunk", [True, False]) 48 | def test_time_set_mid(decode_times, deep, apply_chunk): 49 | ds = xr_ds_ex(decode_times, nyrs=nyrs, var_const=var_const, time_mid=False) 50 | if apply_chunk: 51 | ds = ds.chunk({"time": 12}) 52 | 53 | mid_month_values = gen_time_bounds_values(nyrs).mean(axis=1) 54 | if decode_times: 55 | time_encoding = ds["time"].encoding 56 | expected_values = cftime.num2date( 57 | mid_month_values, time_encoding["units"], time_encoding["calendar"] 58 | ) 59 | else: 60 | expected_values = mid_month_values 61 | 62 | ds_out = time_set_mid(ds, "time", deep) 63 | 64 | assert ds_out.attrs == ds.attrs 65 | assert ds_out.encoding == ds.encoding 66 | assert ds_out.chunks == ds.chunks 67 | 68 | for varname in ds.variables: 69 | assert ds_out[varname].attrs == ds[varname].attrs 70 | assert ds_out[varname].encoding == ds[varname].encoding 71 | assert ds_out[varname].chunks == ds[varname].chunks 72 | if varname == "time": 73 | assert np.all(ds_out[varname].values == expected_values) 74 | else: 75 | assert np.all(ds_out[varname].values == ds[varname].values) 76 | assert (ds_out[varname].data is ds[varname].data) == (not deep) 77 | 78 | # verify that values are independent of ds being chunked in time 79 | ds_chunk = xr_ds_ex( 80 | decode_times, nyrs=nyrs, var_const=var_const, time_mid=False 81 | ).chunk({"time": 6}) 82 | ds_chunk_out = time_set_mid(ds_chunk, "time") 83 | assert ds_chunk_out.identical(ds_out) 84 | 85 | 86 | @pytest.mark.parametrize("decode_times", [True, False]) 87 | def test_time_year_plus_frac(decode_times): 88 | ds = xr_ds_ex(decode_times, nyrs=nyrs, var_const=var_const) 89 | 90 | # call time_year_plus_frac to ensure that it doesn't raise an exception 91 | ty = time_year_plus_frac(ds, "time") 92 | 93 | 94 | @pytest.mark.parametrize( 95 | "x, ndigits, expected", 96 | [ 97 | (0.0, 1, 0.0), 98 | (0.0, 2, 0.0), 99 | (1.25, 1, 1.0), 100 | (1.25, 3, 1.25), 101 | (12.5, 1, 10.0), 102 | (12.5, 2, 12.0), # round to even 103 | (12.5, 3, 12.5), 104 | (12.5, 4, 12.5), 105 | (13.5, 1, 10.0), 106 | (13.5, 2, 14.0), # round to even 107 | (13.5, 3, 13.5), 108 | (13.52, 3, 13.5), 109 | (13.48, 3, 13.5), 110 | (13.5, 4, 13.5), 111 | ], 112 | ) 113 | def test_round_sig(x, ndigits, expected): 114 | assert round_sig(x, ndigits) == expected 115 | -------------------------------------------------------------------------------- /notebooks/diag_metadata.yaml: -------------------------------------------------------------------------------- 1 | - varname: POC_FLUX_100m 2 | spatial_op: integrate 3 | integral_unit_conv: (12 g)/(mol) # convert from mol to g C 4 | integral_display_units: Pg / yr 5 | display_units: mol / m^2 / yr 6 | map_vmin: 1.0e-2 7 | map_vmax: 10.0 8 | apply_log10: True 9 | 10 | - varname: CaCO3_FLUX_100m 11 | spatial_op: integrate 12 | integral_unit_conv: (12 g)/(mol) # convert from mol to g C 13 | integral_display_units: Pg / yr 14 | display_units: mol / m^2 / yr 15 | map_vmin: 1.0e-3 16 | map_vmax: 3.0 17 | apply_log10: True 18 | 19 | - varname: FG_CO2 20 | spatial_op: integrate 21 | integral_unit_conv: (12 g)/(mol) # convert from mol to g C 22 | integral_display_units: Pg / yr 23 | display_units: mol / m^2 / yr 24 | map_vmin: -10.0 25 | map_vmax: 10.0 26 | 27 | - varname: DpCO2 28 | spatial_op: average 29 | map_vmin: -75.0 30 | map_vmax: 75.0 31 | 32 | - varname: PH 33 | spatial_op: average 34 | map_vmin: 8.0 35 | map_vmax: 8.4 36 | 37 | - varname: spChl 38 | isel_dict: 39 | z_t_150m: 0 40 | spatial_op: average 41 | map_vmin: 1.0e-3 42 | map_vmax: 1.0e-1 43 | apply_log10: True 44 | 45 | - varname: photoC_sp_zint 46 | spatial_op: integrate 47 | integral_unit_conv: (12 g)/(mol) # convert from mol to g C 48 | integral_display_units: Pg / yr 49 | display_units: mol / m^2 / yr 50 | map_vmin: 1.0e-2 51 | map_vmax: 15.0 52 | apply_log10: True 53 | 54 | - varname: coccoChl 55 | isel_dict: 56 | z_t_150m: 0 57 | spatial_op: average 58 | map_vmin: 1.0e-3 59 | map_vmax: 1.0 60 | apply_log10: True 61 | 62 | - varname: photoC_cocco_zint 63 | spatial_op: integrate 64 | integral_unit_conv: (12 g)/(mol) # convert from mol to g C 65 | integral_display_units: Pg / yr 66 | display_units: mol / m^2 / yr 67 | map_vmin: 1.0e-2 68 | map_vmax: 15.0 69 | apply_log10: True 70 | 71 | - varname: diatChl 72 | isel_dict: 73 | z_t_150m: 0 74 | spatial_op: average 75 | map_vmin: 1.0e-3 76 | map_vmax: 10.0 77 | apply_log10: True 78 | 79 | - varname: photoC_diat_zint 80 | spatial_op: integrate 81 | integral_unit_conv: (12 g)/(mol) # convert from mol to g C 82 | integral_display_units: Pg / yr 83 | display_units: mol / m^2 / yr 84 | map_vmin: 1.0e-2 85 | map_vmax: 15.0 86 | apply_log10: True 87 | 88 | - varname: diazChl 89 | isel_dict: 90 | z_t_150m: 0 91 | spatial_op: average 92 | map_vmin: 1.0e-4 93 | map_vmax: 1.0e-2 94 | apply_log10: True 95 | 96 | - varname: photoC_diaz_zint 97 | spatial_op: integrate 98 | integral_unit_conv: (12 g)/(mol) # convert from mol to g C 99 | integral_display_units: Pg / yr 100 | display_units: mol / m^2 / yr 101 | map_vmin: 1.0e-2 102 | map_vmax: 1.0 103 | apply_log10: True 104 | 105 | - varname: NHx_SURFACE_EMIS 106 | spatial_op: integrate 107 | integral_unit_conv: (14 g)/(mol) # convert from mol to g N 108 | integral_display_units: Tg yr^-1 109 | display_units: mol / m^2 / yr 110 | map_vmin: 1.0e-5 111 | map_vmax: 0.01 112 | apply_log10: True 113 | 114 | - varname: NH4 115 | isel_dict: 116 | z_t: 0 117 | spatial_op: average 118 | map_vmin: 1.0e-3 119 | map_vmax: 3.0 120 | apply_log10: True 121 | 122 | - varname: O2_ZMIN 123 | spatial_op: average 124 | map_vmin: -5.0 125 | map_vmax: 50.0 126 | 127 | - varname: O2_ZMIN_DEPTH 128 | spatial_op: average 129 | display_units: m 130 | map_vmin: 0.0 131 | map_vmax: 1000.0 132 | 133 | - varname: O2 134 | isel_dict: 135 | z_t: 28 136 | spatial_op: average 137 | map_vmin: 1.0 138 | map_vmax: 300.0 139 | apply_log10: True 140 | 141 | - varname: PO4 142 | isel_dict: 143 | z_t: 0 144 | spatial_op: average 145 | map_vmin: 1.0e-2 146 | map_vmax: 2.2 147 | apply_log10: True 148 | 149 | - varname: PO4 150 | isel_dict: 151 | z_t: 28 152 | spatial_op: average 153 | map_vmin: 0.5 154 | map_vmax: 3.5 155 | 156 | - varname: NO3 157 | isel_dict: 158 | z_t: 0 159 | spatial_op: average 160 | map_vmin: 1.0e-2 161 | map_vmax: 35.0 162 | apply_log10: True 163 | 164 | - varname: NO3 165 | isel_dict: 166 | z_t: 28 167 | spatial_op: average 168 | map_vmin: 10.0 169 | map_vmax: 35.0 170 | 171 | - varname: SiO3 172 | isel_dict: 173 | z_t: 0 174 | spatial_op: average 175 | map_vmin: 1.0 176 | map_vmax: 75.0 177 | apply_log10: True 178 | 179 | - varname: SiO3 180 | isel_dict: 181 | z_t: 28 182 | spatial_op: average 183 | map_vmin: 0.0 184 | map_vmax: 100.0 185 | -------------------------------------------------------------------------------- /notebooks/compare_ts_and_hist_003.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "import utils" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Checking year 0001...\n", 24 | "... checking stream pop.h.nyear1 ...\n", 25 | "Skipping stream pop.h.nyear1 for year 0001 because there are no history files\n", 26 | "... checking stream pop.h.nday1 ...\n", 27 | "Skipping stream pop.h.nday1 for year 0001 because there are no history files\n", 28 | "... checking stream pop.h ...\n", 29 | "Skipping stream pop.h for year 0001 because there are no history files\n", 30 | "... checking stream cice.h ...\n", 31 | "Skipping stream cice.h for year 0001 because there are no history files\n", 32 | "Could not find time series for all variables in year 0001\n", 33 | "----\n", 34 | "Checking year 0002...\n", 35 | "... checking stream pop.h.nyear1 ...\n", 36 | "Skipping stream pop.h.nyear1 for year 0002 because there are no history files\n", 37 | "... checking stream pop.h.nday1 ...\n", 38 | "Skipping stream pop.h.nday1 for year 0002 because there are no history files\n", 39 | "... checking stream pop.h ...\n", 40 | "Skipping stream pop.h for year 0002 because there are no history files\n", 41 | "... checking stream cice.h1 ...\n", 42 | "Skipping stream cice.h1 for year 0002 because there are no history files\n", 43 | "... checking stream cice.h ...\n", 44 | "Skipping stream cice.h for year 0002 because there are no history files\n", 45 | "Could not find time series for all variables in year 0002\n", 46 | "----\n", 47 | "Checking year 0003...\n", 48 | "... checking stream pop.h.nyear1 ...\n", 49 | "Skipping stream pop.h.nyear1 for year 0003 because there are no history files\n", 50 | "... checking stream pop.h.nday1 ...\n", 51 | "Skipping stream pop.h.nday1 for year 0003 because there are no history files\n", 52 | "... checking stream pop.h ...\n", 53 | "Skipping stream pop.h for year 0003 because there are no history files\n", 54 | "... checking stream cice.h1 ...\n", 55 | "Skipping stream cice.h1 for year 0003 because there are no history files\n", 56 | "... checking stream cice.h ...\n", 57 | "Skipping stream cice.h for year 0003 because there are no history files\n", 58 | "Could not find time series for all variables in year 0003\n", 59 | "----\n", 60 | "Checking year 0004...\n", 61 | "... checking stream pop.h.nyear1 ...\n", 62 | "Skipping stream pop.h.nyear1 for year 0004 because there are no history files\n", 63 | "... checking stream pop.h.nday1 ...\n", 64 | "Skipping stream pop.h.nday1 for year 0004 because there are no history files\n", 65 | "... checking stream pop.h ...\n", 66 | "Skipping stream pop.h for year 0004 because there are no history files\n", 67 | "... checking stream cice.h1 ...\n", 68 | "Skipping stream cice.h1 for year 0004 because there are no history files\n", 69 | "... checking stream cice.h ...\n", 70 | "Skipping stream cice.h for year 0004 because there are no history files\n", 71 | "Could not find time series for all variables in year 0004\n", 72 | "----\n", 73 | "Checking year 0005...\n", 74 | "... checking stream pop.h.nyear1 ...\n", 75 | "Could not find time series for year 0005\n", 76 | "CPU times: user 207 ms, sys: 86.5 ms, total: 294 ms\n", 77 | "Wall time: 1.39 s\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "%%time\n", 83 | "\n", 84 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n", 85 | "\n", 86 | "# Directories to search for netCDF files\n", 87 | "caseroot = os.path.join(os.sep, \"glade\", \"work\", \"mlevy\", \"hi-res_BGC_JRA\", \"cases\")\n", 88 | "campaign_root = os.path.join(os.sep, \"glade\", \"campaign\", \"cesm\", \"development\", \"bgcwg\", \"projects\", \"hi-res_JRA\", \"cases\")\n", 89 | "output_roots = [os.path.join(campaign_root, casename, \"output\")]\n", 90 | "output_roots += utils.gen_output_roots_from_caseroot(os.path.join(caseroot, casename))\n", 91 | "\n", 92 | "utils.timeseries_and_history_comparison(casename, output_roots)" 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "Python [conda env:hires-marbl]", 99 | "language": "python", 100 | "name": "conda-env-hires-marbl-py" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 3 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython3", 112 | "version": "3.7.8" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 4 117 | } 118 | -------------------------------------------------------------------------------- /notebooks/plot_suite_maps_0001_003.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "execution": {} 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "import yaml\n", 13 | "\n", 14 | "import utils\n", 15 | "\n", 16 | "%matplotlib inline" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n", 26 | "year = 1\n", 27 | "campaign_root = os.path.join(\n", 28 | " os.sep,\n", 29 | " \"glade\",\n", 30 | " \"campaign\",\n", 31 | " \"cesm\",\n", 32 | " \"development\",\n", 33 | " \"bgcwg\",\n", 34 | " \"projects\",\n", 35 | " \"hi-res_JRA\",\n", 36 | " \"cases\",\n", 37 | ")\n", 38 | "\n", 39 | "# Set up CaseClass object\n", 40 | "case = utils.CaseClass(\n", 41 | " casename, os.path.join(campaign_root, casename, \"output\")\n", 42 | ")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "execution": {} 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n", 54 | " diag_metadata_list = yaml.safe_load(fptr)\n", 55 | "\n", 56 | "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": { 63 | "execution": {} 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "def summary_plots(ds, diag_metadata, save_pngs=False):\n", 68 | " varname = diag_metadata[\"varname\"]\n", 69 | " print(varname)\n", 70 | " da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n", 71 | "\n", 72 | " utils.summary_plot_maps(\n", 73 | " ds,\n", 74 | " da,\n", 75 | " diag_metadata,\n", 76 | " save_pngs=save_pngs,\n", 77 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 78 | " )" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": { 85 | "execution": {} 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Datasets contain a total of 12 time samples\n", 93 | "Last average written at 0002-01-01 00:00:00\n", 94 | "POC_FLUX_100m\n", 95 | "Datasets contain a total of 12 time samples\n", 96 | "Last average written at 0002-01-01 00:00:00\n", 97 | "CaCO3_FLUX_100m\n", 98 | "Datasets contain a total of 12 time samples\n", 99 | "Last average written at 0002-01-01 00:00:00\n", 100 | "FG_CO2\n", 101 | "Datasets contain a total of 12 time samples\n", 102 | "Last average written at 0002-01-01 00:00:00\n", 103 | "DpCO2\n", 104 | "Datasets contain a total of 12 time samples\n", 105 | "Last average written at 0002-01-01 00:00:00\n", 106 | "PH\n", 107 | "Datasets contain a total of 12 time samples\n", 108 | "Last average written at 0002-01-01 00:00:00\n", 109 | "spChl\n", 110 | "Datasets contain a total of 12 time samples\n", 111 | "Last average written at 0002-01-01 00:00:00\n", 112 | "photoC_sp_zint\n", 113 | "Datasets contain a total of 12 time samples\n", 114 | "Last average written at 0002-01-01 00:00:00\n", 115 | "coccoChl\n", 116 | "Datasets contain a total of 12 time samples\n", 117 | "Last average written at 0002-01-01 00:00:00\n", 118 | "photoC_cocco_zint\n", 119 | "Datasets contain a total of 12 time samples\n", 120 | "Last average written at 0002-01-01 00:00:00\n", 121 | "diatChl\n", 122 | "Datasets contain a total of 12 time samples\n", 123 | "Last average written at 0002-01-01 00:00:00\n", 124 | "photoC_diat_zint\n", 125 | "Datasets contain a total of 12 time samples\n", 126 | "Last average written at 0002-01-01 00:00:00\n", 127 | "diazChl\n", 128 | "Datasets contain a total of 12 time samples\n", 129 | "Last average written at 0002-01-01 00:00:00\n", 130 | "photoC_diaz_zint\n", 131 | "Datasets contain a total of 12 time samples\n", 132 | "Last average written at 0002-01-01 00:00:00\n", 133 | "NHx_SURFACE_EMIS\n", 134 | "Datasets contain a total of 12 time samples\n", 135 | "Last average written at 0002-01-01 00:00:00\n", 136 | "NH4\n", 137 | "Datasets contain a total of 12 time samples\n", 138 | "Last average written at 0002-01-01 00:00:00\n", 139 | "O2_ZMIN\n", 140 | "Datasets contain a total of 12 time samples\n", 141 | "Last average written at 0002-01-01 00:00:00\n", 142 | "O2_ZMIN_DEPTH\n", 143 | "Datasets contain a total of 12 time samples\n", 144 | "Last average written at 0002-01-01 00:00:00\n", 145 | "O2\n", 146 | "Datasets contain a total of 12 time samples\n", 147 | "Last average written at 0002-01-01 00:00:00\n", 148 | "PO4\n", 149 | "Datasets contain a total of 12 time samples\n", 150 | "Last average written at 0002-01-01 00:00:00\n", 151 | "PO4\n", 152 | "Datasets contain a total of 12 time samples\n", 153 | "Last average written at 0002-01-01 00:00:00\n", 154 | "NO3\n", 155 | "Datasets contain a total of 12 time samples\n", 156 | "Last average written at 0002-01-01 00:00:00\n", 157 | "NO3\n", 158 | "Datasets contain a total of 12 time samples\n", 159 | "Last average written at 0002-01-01 00:00:00\n", 160 | "SiO3\n", 161 | "Datasets contain a total of 12 time samples\n", 162 | "Last average written at 0002-01-01 00:00:00\n", 163 | "SiO3\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "stream = \"pop.h\"\n", 169 | "# ds = case.gen_dataset(varnames, stream, start_year=year, end_year=year)\n", 170 | "for diag_metadata in diag_metadata_list:\n", 171 | " ds = case.gen_dataset(\n", 172 | " diag_metadata[\"varname\"], stream, start_year=year, end_year=year\n", 173 | " )\n", 174 | " summary_plots(ds, diag_metadata, save_pngs=True)" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python [conda env:hires-marbl]", 181 | "language": "python", 182 | "name": "conda-env-hires-marbl-py" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.7.8" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 4 199 | } 200 | -------------------------------------------------------------------------------- /notebooks/plot_suite_maps_0001_004.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "execution": {} 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "import yaml\n", 13 | "\n", 14 | "import utils\n", 15 | "\n", 16 | "%matplotlib inline" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n", 26 | "year = 1\n", 27 | "campaign_root = os.path.join(\n", 28 | " os.sep,\n", 29 | " \"glade\",\n", 30 | " \"campaign\",\n", 31 | " \"cesm\",\n", 32 | " \"development\",\n", 33 | " \"bgcwg\",\n", 34 | " \"projects\",\n", 35 | " \"hi-res_JRA\",\n", 36 | " \"cases\",\n", 37 | ")\n", 38 | "\n", 39 | "# Set up CaseClass object\n", 40 | "case = utils.CaseClass(\n", 41 | " casename, os.path.join(campaign_root, casename, \"output\")\n", 42 | ")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "execution": {} 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n", 54 | " diag_metadata_list = yaml.safe_load(fptr)\n", 55 | "\n", 56 | "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": { 63 | "execution": {} 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "def summary_plots(ds, diag_metadata, save_pngs=False):\n", 68 | " varname = diag_metadata[\"varname\"]\n", 69 | " print(varname)\n", 70 | " da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n", 71 | "\n", 72 | " utils.summary_plot_maps(\n", 73 | " ds,\n", 74 | " da,\n", 75 | " diag_metadata,\n", 76 | " save_pngs=save_pngs,\n", 77 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 78 | " )" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": { 85 | "execution": {} 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Datasets contain a total of 12 time samples\n", 93 | "Last average written at 0002-01-01 00:00:00\n", 94 | "POC_FLUX_100m\n", 95 | "Datasets contain a total of 12 time samples\n", 96 | "Last average written at 0002-01-01 00:00:00\n", 97 | "CaCO3_FLUX_100m\n", 98 | "Datasets contain a total of 12 time samples\n", 99 | "Last average written at 0002-01-01 00:00:00\n", 100 | "FG_CO2\n", 101 | "Datasets contain a total of 12 time samples\n", 102 | "Last average written at 0002-01-01 00:00:00\n", 103 | "DpCO2\n", 104 | "Datasets contain a total of 12 time samples\n", 105 | "Last average written at 0002-01-01 00:00:00\n", 106 | "PH\n", 107 | "Datasets contain a total of 12 time samples\n", 108 | "Last average written at 0002-01-01 00:00:00\n", 109 | "spChl\n", 110 | "Datasets contain a total of 12 time samples\n", 111 | "Last average written at 0002-01-01 00:00:00\n", 112 | "photoC_sp_zint\n", 113 | "Datasets contain a total of 12 time samples\n", 114 | "Last average written at 0002-01-01 00:00:00\n", 115 | "coccoChl\n", 116 | "Datasets contain a total of 12 time samples\n", 117 | "Last average written at 0002-01-01 00:00:00\n", 118 | "photoC_cocco_zint\n", 119 | "Datasets contain a total of 12 time samples\n", 120 | "Last average written at 0002-01-01 00:00:00\n", 121 | "diatChl\n", 122 | "Datasets contain a total of 12 time samples\n", 123 | "Last average written at 0002-01-01 00:00:00\n", 124 | "photoC_diat_zint\n", 125 | "Datasets contain a total of 12 time samples\n", 126 | "Last average written at 0002-01-01 00:00:00\n", 127 | "diazChl\n", 128 | "Datasets contain a total of 12 time samples\n", 129 | "Last average written at 0002-01-01 00:00:00\n", 130 | "photoC_diaz_zint\n", 131 | "Datasets contain a total of 12 time samples\n", 132 | "Last average written at 0002-01-01 00:00:00\n", 133 | "NHx_SURFACE_EMIS\n", 134 | "Datasets contain a total of 12 time samples\n", 135 | "Last average written at 0002-01-01 00:00:00\n", 136 | "NH4\n", 137 | "Datasets contain a total of 12 time samples\n", 138 | "Last average written at 0002-01-01 00:00:00\n", 139 | "O2_ZMIN\n", 140 | "Datasets contain a total of 12 time samples\n", 141 | "Last average written at 0002-01-01 00:00:00\n", 142 | "O2_ZMIN_DEPTH\n", 143 | "Datasets contain a total of 12 time samples\n", 144 | "Last average written at 0002-01-01 00:00:00\n", 145 | "O2\n", 146 | "Datasets contain a total of 12 time samples\n", 147 | "Last average written at 0002-01-01 00:00:00\n", 148 | "PO4\n", 149 | "Datasets contain a total of 12 time samples\n", 150 | "Last average written at 0002-01-01 00:00:00\n", 151 | "PO4\n", 152 | "Datasets contain a total of 12 time samples\n", 153 | "Last average written at 0002-01-01 00:00:00\n", 154 | "NO3\n", 155 | "Datasets contain a total of 12 time samples\n", 156 | "Last average written at 0002-01-01 00:00:00\n", 157 | "NO3\n", 158 | "Datasets contain a total of 12 time samples\n", 159 | "Last average written at 0002-01-01 00:00:00\n", 160 | "SiO3\n", 161 | "Datasets contain a total of 12 time samples\n", 162 | "Last average written at 0002-01-01 00:00:00\n", 163 | "SiO3\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "stream = \"pop.h\"\n", 169 | "# ds = case.gen_dataset(varnames, stream, start_year=year, end_year=year)\n", 170 | "for diag_metadata in diag_metadata_list:\n", 171 | " ds = case.gen_dataset(\n", 172 | " diag_metadata[\"varname\"], stream, start_year=year, end_year=year\n", 173 | " )\n", 174 | " summary_plots(ds, diag_metadata, save_pngs=True)" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python [conda env:hires-marbl]", 181 | "language": "python", 182 | "name": "conda-env-hires-marbl-py" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.7.8" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 4 199 | } 200 | -------------------------------------------------------------------------------- /notebooks/trend_maps.003.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import glob\n", 10 | "import os\n", 11 | "\n", 12 | "import dask.distributed\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import ncar_jobqueue\n", 15 | "import xarray as xr\n", 16 | "\n", 17 | "import utils\n", 18 | "from utils.utils import time_set_mid\n", 19 | "\n", 20 | "%matplotlib inline\n", 21 | "%load_ext autoreload\n", 22 | "%autoreload 2\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n", 32 | "campaign_root = os.path.join(\n", 33 | " os.sep,\n", 34 | " \"glade\",\n", 35 | " \"campaign\",\n", 36 | " \"cesm\",\n", 37 | " \"development\",\n", 38 | " \"bgcwg\",\n", 39 | " \"projects\",\n", 40 | " \"hi-res_JRA\",\n", 41 | " \"cases\",\n", 42 | ")\n", 43 | "\n", 44 | "# Set up CaseClass object\n", 45 | "case = utils.CaseClass(\n", 46 | " casename, os.path.join(campaign_root, casename, \"output\")\n", 47 | ")" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "cluster = ncar_jobqueue.NCARCluster(\n", 65 | " cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n", 66 | ")\n", 67 | "cluster.scale(n=8) # n = number of workers\n", 68 | "print(f\"dashboard_link={cluster.dashboard_link}\")\n", 69 | "client = dask.distributed.Client(cluster)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Datasets contain a total of 36 time samples\n", 82 | "Last average written at 0005-01-01 00:00:00\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "stream = \"pop.h\"\n", 88 | "varnames = [\"PO4\", \"NO3\", \"SiO3\", \"O2\", \"DIC\", \"ALK\"]\n", 89 | "ds_4d = case.gen_dataset(\n", 90 | " varnames,\n", 91 | " stream,\n", 92 | " start_year=2,\n", 93 | " end_year=4,\n", 94 | ")\n", 95 | "\n", 96 | "ds_3d = ds_4d.isel(z_t=28).chunk({\"time\": 36, \"nlat\": 300, \"nlon\": 900})" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "za_dir = f\"/glade/campaign/cesm/development/bgcwg/projects/hi-res_JRA/cases/{casename}/output/ocn/proc/za\"\n", 106 | "ds_list = []\n", 107 | "for var in varnames:\n", 108 | " filename_pattern = f\"{za_dir}/za_{casename}.pop.h.{var}.??????-??????.nc\"\n", 109 | " za_filenames = glob.glob(filename_pattern)\n", 110 | " za_filenames.sort()\n", 111 | " ds_tmp = xr.open_mfdataset(\n", 112 | " za_filenames,\n", 113 | " data_vars=\"minimal\",\n", 114 | " compat=\"override\",\n", 115 | " coords=\"minimal\",\n", 116 | " ).chunk({\"basins\": 1, \"time\": 36, \"z_t\": 62, \"lat_t\": 2400})\n", 117 | " ds_list.append(\n", 118 | " time_set_mid(ds_tmp, \"time\").assign_coords(\n", 119 | " {\"basins\": [\"Global\", \"Pacific\", \"Indian\", \"Atlantic\"]}\n", 120 | " )\n", 121 | " )\n", 122 | "ds_za = xr.merge(ds_list, compat=\"override\", join=\"left\")\n", 123 | "ds_za.attrs = ds_3d.attrs" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 6, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "def trend_plots(varname, map_vminmax=None, za_vminmax=None, save_pngs=False):\n", 133 | " da = ds_3d[varname]\n", 134 | " utils.trend_plot(\n", 135 | " ds_3d,\n", 136 | " da,\n", 137 | " vmin=-map_vminmax,\n", 138 | " vmax=map_vminmax,\n", 139 | " save_pngs=save_pngs,\n", 140 | " isel_dict={\"z_t\": 0},\n", 141 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 142 | " )\n", 143 | "\n", 144 | " for basin_ind in range(ds_za.dims[\"basins\"]):\n", 145 | " da = ds_za[varname].isel(basins=basin_ind)\n", 146 | " utils.trend_plot(\n", 147 | " ds_za,\n", 148 | " da,\n", 149 | " vmin=-za_vminmax,\n", 150 | " vmax=za_vminmax,\n", 151 | " invert_yaxis=True,\n", 152 | " save_pngs=save_pngs,\n", 153 | " isel_dict={\"basins\": 0},\n", 154 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 155 | " )" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 7, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "trend_plots(\"PO4\", map_vminmax=0.1, za_vminmax=0.05, save_pngs=True)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 8, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "trend_plots(\"NO3\", map_vminmax=1.0, za_vminmax=0.5, save_pngs=True)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "trend_plots(\"SiO3\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 10, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "trend_plots(\"O2\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 11, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "trend_plots(\"DIC\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 12, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "trend_plots(\"ALK\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 13, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "client.close()\n", 219 | "cluster.close()" 220 | ] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": "Python [conda env:miniconda3-hires-marbl]", 226 | "language": "python", 227 | "name": "conda-env-miniconda3-hires-marbl-py" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.7.8" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 4 244 | } 245 | -------------------------------------------------------------------------------- /notebooks/trend_maps.004.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import glob\n", 11 | "\n", 12 | "import dask.distributed\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import ncar_jobqueue\n", 15 | "import xarray as xr\n", 16 | "\n", 17 | "import utils\n", 18 | "from utils.utils import time_set_mid\n", 19 | "\n", 20 | "%matplotlib inline\n", 21 | "%load_ext autoreload\n", 22 | "%autoreload 2\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n", 32 | "campaign_root = os.path.join(\n", 33 | " os.sep,\n", 34 | " \"glade\",\n", 35 | " \"campaign\",\n", 36 | " \"cesm\",\n", 37 | " \"development\",\n", 38 | " \"bgcwg\",\n", 39 | " \"projects\",\n", 40 | " \"hi-res_JRA\",\n", 41 | " \"cases\",\n", 42 | ")\n", 43 | "\n", 44 | "# Set up CaseClass object\n", 45 | "case = utils.CaseClass(\n", 46 | " casename, os.path.join(campaign_root, casename, \"output\")\n", 47 | ")" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "cluster = ncar_jobqueue.NCARCluster(\n", 65 | " cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n", 66 | ")\n", 67 | "cluster.scale(n=8) # n = number of workers\n", 68 | "print(f\"dashboard_link={cluster.dashboard_link}\")\n", 69 | "client = dask.distributed.Client(cluster)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Datasets contain a total of 36 time samples\n", 82 | "Last average written at 0005-01-01 00:00:00\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "stream = \"pop.h\"\n", 88 | "varnames = [\"PO4\", \"NO3\", \"SiO3\", \"O2\", \"DIC\", \"ALK\"]\n", 89 | "ds_4d = case.gen_dataset(\n", 90 | " varnames,\n", 91 | " stream,\n", 92 | " start_year=2,\n", 93 | " end_year=4,\n", 94 | ")\n", 95 | "\n", 96 | "ds_3d = ds_4d.isel(z_t=28).chunk({\"time\": 36, \"nlat\": 300, \"nlon\": 900})" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "za_dir = f\"/glade/campaign/cesm/development/bgcwg/projects/hi-res_JRA/cases/{casename}/output/ocn/proc/za\"\n", 106 | "ds_list = []\n", 107 | "for var in varnames:\n", 108 | " filename_pattern = f\"{za_dir}/za_{casename}.pop.h.{var}.??????-??????.nc\"\n", 109 | " za_filenames = glob.glob(filename_pattern)\n", 110 | " za_filenames.sort()\n", 111 | " ds_tmp = xr.open_mfdataset(\n", 112 | " za_filenames,\n", 113 | " data_vars=\"minimal\",\n", 114 | " compat=\"override\",\n", 115 | " coords=\"minimal\",\n", 116 | " ).chunk({\"basins\": 1, \"time\": 36, \"z_t\": 62, \"lat_t\": 2400})\n", 117 | " ds_list.append(\n", 118 | " time_set_mid(ds_tmp, \"time\").assign_coords(\n", 119 | " {\"basins\": [\"Global\", \"Pacific\", \"Indian\", \"Atlantic\"]}\n", 120 | " )\n", 121 | " )\n", 122 | "ds_za = xr.merge(ds_list, compat=\"override\", join=\"left\")\n", 123 | "ds_za.attrs = ds_3d.attrs" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 6, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "def trend_plots(varname, map_vminmax=None, za_vminmax=None, save_pngs=False):\n", 133 | " # da = ds_4d[varname][:, 28, :, :].chunk({\"time\": 36, \"nlat\": 300, \"nlon\": 900})\n", 134 | " da = ds_3d[varname]\n", 135 | " utils.trend_plot(\n", 136 | " ds_3d,\n", 137 | " da,\n", 138 | " vmin=-map_vminmax,\n", 139 | " vmax=map_vminmax,\n", 140 | " save_pngs=save_pngs,\n", 141 | " isel_dict={\"z_t\": 0},\n", 142 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 143 | " )\n", 144 | "\n", 145 | " for basin_ind in range(ds_za.dims[\"basins\"]):\n", 146 | " da = ds_za[varname].isel(basins=basin_ind)\n", 147 | " da = da\n", 148 | " utils.trend_plot(\n", 149 | " ds_za,\n", 150 | " da,\n", 151 | " vmin=-za_vminmax,\n", 152 | " vmax=za_vminmax,\n", 153 | " invert_yaxis=True,\n", 154 | " save_pngs=save_pngs,\n", 155 | " isel_dict={\"basins\": 0},\n", 156 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 157 | " )" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 7, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "trend_plots(\"PO4\", map_vminmax=0.1, za_vminmax=0.05, save_pngs=True)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 8, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "trend_plots(\"NO3\", map_vminmax=1.0, za_vminmax=0.5, save_pngs=True)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "trend_plots(\"SiO3\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 10, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "trend_plots(\"O2\", map_vminmax=5.0, za_vminmax=2.0, save_pngs=True)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 11, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "trend_plots(\"DIC\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 12, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "trend_plots(\"ALK\", map_vminmax=10.0, za_vminmax=5.0, save_pngs=True)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 13, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "client.close()\n", 221 | "cluster.close()" 222 | ] 223 | } 224 | ], 225 | "metadata": { 226 | "kernelspec": { 227 | "display_name": "Python [conda env:miniconda3-hires-marbl]", 228 | "language": "python", 229 | "name": "conda-env-miniconda3-hires-marbl-py" 230 | }, 231 | "language_info": { 232 | "codemirror_mode": { 233 | "name": "ipython", 234 | "version": 3 235 | }, 236 | "file_extension": ".py", 237 | "mimetype": "text/x-python", 238 | "name": "python", 239 | "nbconvert_exporter": "python", 240 | "pygments_lexer": "ipython3", 241 | "version": "3.7.8" 242 | } 243 | }, 244 | "nbformat": 4, 245 | "nbformat_minor": 4 246 | } 247 | -------------------------------------------------------------------------------- /notebooks/plot_suite_003.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import warnings\n", 11 | "\n", 12 | "import dask\n", 13 | "import ncar_jobqueue\n", 14 | "import yaml\n", 15 | "\n", 16 | "import utils\n", 17 | "\n", 18 | "%matplotlib inline" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n", 28 | " diag_metadata_list = yaml.safe_load(fptr)\n", 29 | "\n", 30 | "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "def summary_plots(ds, diag_metadata, save_pngs=False):\n", 40 | " varname = diag_metadata[\"varname\"]\n", 41 | " print(varname)\n", 42 | " da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n", 43 | "\n", 44 | " utils.summary_plot_global_ts(\n", 45 | " ds,\n", 46 | " da,\n", 47 | " diag_metadata,\n", 48 | " time_coarsen_len=12,\n", 49 | " save_pngs=save_pngs,\n", 50 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 51 | " )\n", 52 | "\n", 53 | " utils.summary_plot_histogram(\n", 54 | " ds,\n", 55 | " da,\n", 56 | " diag_metadata,\n", 57 | " save_pngs=save_pngs,\n", 58 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 59 | " )" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "cluster = ncar_jobqueue.NCARCluster(\n", 77 | " cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n", 78 | ")\n", 79 | "cluster.scale(n=8) # n = number of workers\n", 80 | "print(f\"dashboard_link={cluster.dashboard_link}\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\"\n", 90 | "campaign_root = os.path.join(\n", 91 | " os.sep,\n", 92 | " \"glade\",\n", 93 | " \"campaign\",\n", 94 | " \"cesm\",\n", 95 | " \"development\",\n", 96 | " \"bgcwg\",\n", 97 | " \"projects\",\n", 98 | " \"hi-res_JRA\",\n", 99 | " \"cases\",\n", 100 | ")\n", 101 | "\n", 102 | "# Set up CaseClass object\n", 103 | "case = utils.CaseClass(\n", 104 | " casename, os.path.join(campaign_root, casename, \"output\")\n", 105 | ")" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Datasets contain a total of 48 time samples\n", 118 | "Last average written at 0005-01-01 00:00:00\n", 119 | "POC_FLUX_100m\n", 120 | "Datasets contain a total of 48 time samples\n", 121 | "Last average written at 0005-01-01 00:00:00\n", 122 | "CaCO3_FLUX_100m\n", 123 | "Datasets contain a total of 48 time samples\n", 124 | "Last average written at 0005-01-01 00:00:00\n", 125 | "FG_CO2\n", 126 | "Datasets contain a total of 48 time samples\n", 127 | "Last average written at 0005-01-01 00:00:00\n", 128 | "DpCO2\n", 129 | "Datasets contain a total of 48 time samples\n", 130 | "Last average written at 0005-01-01 00:00:00\n", 131 | "PH\n", 132 | "Datasets contain a total of 48 time samples\n", 133 | "Last average written at 0005-01-01 00:00:00\n", 134 | "spChl\n", 135 | "Datasets contain a total of 48 time samples\n", 136 | "Last average written at 0005-01-01 00:00:00\n", 137 | "photoC_sp_zint\n", 138 | "Datasets contain a total of 48 time samples\n", 139 | "Last average written at 0005-01-01 00:00:00\n", 140 | "coccoChl\n", 141 | "Datasets contain a total of 48 time samples\n", 142 | "Last average written at 0005-01-01 00:00:00\n", 143 | "photoC_cocco_zint\n", 144 | "Datasets contain a total of 48 time samples\n", 145 | "Last average written at 0005-01-01 00:00:00\n", 146 | "diatChl\n", 147 | "Datasets contain a total of 48 time samples\n", 148 | "Last average written at 0005-01-01 00:00:00\n", 149 | "photoC_diat_zint\n", 150 | "Datasets contain a total of 48 time samples\n", 151 | "Last average written at 0005-01-01 00:00:00\n", 152 | "diazChl\n", 153 | "Datasets contain a total of 48 time samples\n", 154 | "Last average written at 0005-01-01 00:00:00\n", 155 | "photoC_diaz_zint\n", 156 | "Datasets contain a total of 48 time samples\n", 157 | "Last average written at 0005-01-01 00:00:00\n", 158 | "NHx_SURFACE_EMIS\n", 159 | "Datasets contain a total of 48 time samples\n", 160 | "Last average written at 0005-01-01 00:00:00\n", 161 | "NH4\n", 162 | "Datasets contain a total of 48 time samples\n", 163 | "Last average written at 0005-01-01 00:00:00\n", 164 | "O2_ZMIN\n", 165 | "Datasets contain a total of 48 time samples\n", 166 | "Last average written at 0005-01-01 00:00:00\n", 167 | "O2_ZMIN_DEPTH\n", 168 | "Datasets contain a total of 48 time samples\n", 169 | "Last average written at 0005-01-01 00:00:00\n", 170 | "O2\n", 171 | "Datasets contain a total of 48 time samples\n", 172 | "Last average written at 0005-01-01 00:00:00\n", 173 | "PO4\n", 174 | "Datasets contain a total of 48 time samples\n", 175 | "Last average written at 0005-01-01 00:00:00\n", 176 | "PO4\n", 177 | "Datasets contain a total of 48 time samples\n", 178 | "Last average written at 0005-01-01 00:00:00\n", 179 | "NO3\n", 180 | "Datasets contain a total of 48 time samples\n", 181 | "Last average written at 0005-01-01 00:00:00\n", 182 | "NO3\n", 183 | "Datasets contain a total of 48 time samples\n", 184 | "Last average written at 0005-01-01 00:00:00\n", 185 | "SiO3\n", 186 | "Datasets contain a total of 48 time samples\n", 187 | "Last average written at 0005-01-01 00:00:00\n", 188 | "SiO3\n" 189 | ] 190 | } 191 | ], 192 | "source": [ 193 | "with dask.distributed.Client(cluster) as client:\n", 194 | " stream = \"pop.h\"\n", 195 | " for diag_metadata in diag_metadata_list:\n", 196 | " ds = case.gen_dataset(diag_metadata[\"varname\"], stream)\n", 197 | " summary_plots(ds, diag_metadata, save_pngs=True)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 7, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "cluster.close()" 207 | ] 208 | } 209 | ], 210 | "metadata": { 211 | "kernelspec": { 212 | "display_name": "Python [conda env:miniconda3-hires-marbl]", 213 | "language": "python", 214 | "name": "conda-env-miniconda3-hires-marbl-py" 215 | }, 216 | "language_info": { 217 | "codemirror_mode": { 218 | "name": "ipython", 219 | "version": 3 220 | }, 221 | "file_extension": ".py", 222 | "mimetype": "text/x-python", 223 | "name": "python", 224 | "nbconvert_exporter": "python", 225 | "pygments_lexer": "ipython3", 226 | "version": "3.7.8" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 4 231 | } 232 | -------------------------------------------------------------------------------- /notebooks/plot_suite_004.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import warnings\n", 11 | "\n", 12 | "import dask\n", 13 | "import ncar_jobqueue\n", 14 | "import yaml\n", 15 | "\n", 16 | "import utils\n", 17 | "\n", 18 | "%matplotlib inline" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n", 28 | " diag_metadata_list = yaml.safe_load(fptr)\n", 29 | "\n", 30 | "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "def summary_plots(ds, diag_metadata, save_pngs=False):\n", 40 | " varname = diag_metadata[\"varname\"]\n", 41 | " print(varname)\n", 42 | " da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n", 43 | "\n", 44 | " utils.summary_plot_global_ts(\n", 45 | " ds,\n", 46 | " da,\n", 47 | " diag_metadata,\n", 48 | " time_coarsen_len=12,\n", 49 | " save_pngs=save_pngs,\n", 50 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 51 | " )\n", 52 | "\n", 53 | " utils.summary_plot_histogram(\n", 54 | " ds,\n", 55 | " da,\n", 56 | " diag_metadata,\n", 57 | " save_pngs=save_pngs,\n", 58 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 59 | " )" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/8787/status\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "cluster = ncar_jobqueue.NCARCluster(\n", 77 | " cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n", 78 | ")\n", 79 | "cluster.scale(n=8) # n = number of workers\n", 80 | "print(f\"dashboard_link={cluster.dashboard_link}\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n", 90 | "campaign_root = os.path.join(\n", 91 | " os.sep,\n", 92 | " \"glade\",\n", 93 | " \"campaign\",\n", 94 | " \"cesm\",\n", 95 | " \"development\",\n", 96 | " \"bgcwg\",\n", 97 | " \"projects\",\n", 98 | " \"hi-res_JRA\",\n", 99 | " \"cases\",\n", 100 | ")\n", 101 | "\n", 102 | "# Set up CaseClass object\n", 103 | "case = utils.CaseClass(\n", 104 | " casename, os.path.join(campaign_root, casename, \"output\")\n", 105 | ")" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Datasets contain a total of 204 time samples\n", 118 | "Last average written at 0018-01-01 00:00:00\n", 119 | "POC_FLUX_100m\n", 120 | "Datasets contain a total of 204 time samples\n", 121 | "Last average written at 0018-01-01 00:00:00\n", 122 | "CaCO3_FLUX_100m\n", 123 | "Datasets contain a total of 204 time samples\n", 124 | "Last average written at 0018-01-01 00:00:00\n", 125 | "FG_CO2\n", 126 | "Datasets contain a total of 204 time samples\n", 127 | "Last average written at 0018-01-01 00:00:00\n", 128 | "DpCO2\n", 129 | "Datasets contain a total of 204 time samples\n", 130 | "Last average written at 0018-01-01 00:00:00\n", 131 | "PH\n", 132 | "Datasets contain a total of 204 time samples\n", 133 | "Last average written at 0018-01-01 00:00:00\n", 134 | "spChl\n", 135 | "Datasets contain a total of 204 time samples\n", 136 | "Last average written at 0018-01-01 00:00:00\n", 137 | "photoC_sp_zint\n", 138 | "Datasets contain a total of 204 time samples\n", 139 | "Last average written at 0018-01-01 00:00:00\n", 140 | "coccoChl\n", 141 | "Datasets contain a total of 204 time samples\n", 142 | "Last average written at 0018-01-01 00:00:00\n", 143 | "photoC_cocco_zint\n", 144 | "Datasets contain a total of 204 time samples\n", 145 | "Last average written at 0018-01-01 00:00:00\n", 146 | "diatChl\n", 147 | "Datasets contain a total of 204 time samples\n", 148 | "Last average written at 0018-01-01 00:00:00\n", 149 | "photoC_diat_zint\n", 150 | "Datasets contain a total of 204 time samples\n", 151 | "Last average written at 0018-01-01 00:00:00\n", 152 | "diazChl\n", 153 | "Datasets contain a total of 204 time samples\n", 154 | "Last average written at 0018-01-01 00:00:00\n", 155 | "photoC_diaz_zint\n", 156 | "Datasets contain a total of 204 time samples\n", 157 | "Last average written at 0018-01-01 00:00:00\n", 158 | "NHx_SURFACE_EMIS\n", 159 | "Datasets contain a total of 204 time samples\n", 160 | "Last average written at 0018-01-01 00:00:00\n", 161 | "NH4\n", 162 | "Datasets contain a total of 204 time samples\n", 163 | "Last average written at 0018-01-01 00:00:00\n", 164 | "O2_ZMIN\n", 165 | "Datasets contain a total of 204 time samples\n", 166 | "Last average written at 0018-01-01 00:00:00\n", 167 | "O2_ZMIN_DEPTH\n", 168 | "Datasets contain a total of 204 time samples\n", 169 | "Last average written at 0018-01-01 00:00:00\n", 170 | "O2\n", 171 | "Datasets contain a total of 204 time samples\n", 172 | "Last average written at 0018-01-01 00:00:00\n", 173 | "PO4\n", 174 | "Datasets contain a total of 204 time samples\n", 175 | "Last average written at 0018-01-01 00:00:00\n", 176 | "PO4\n", 177 | "Datasets contain a total of 204 time samples\n", 178 | "Last average written at 0018-01-01 00:00:00\n", 179 | "NO3\n", 180 | "Datasets contain a total of 204 time samples\n", 181 | "Last average written at 0018-01-01 00:00:00\n", 182 | "NO3\n", 183 | "Datasets contain a total of 204 time samples\n", 184 | "Last average written at 0018-01-01 00:00:00\n", 185 | "SiO3\n", 186 | "Datasets contain a total of 204 time samples\n", 187 | "Last average written at 0018-01-01 00:00:00\n", 188 | "SiO3\n" 189 | ] 190 | } 191 | ], 192 | "source": [ 193 | "with dask.distributed.Client(cluster) as client:\n", 194 | " stream = \"pop.h\"\n", 195 | " # ds = case.gen_dataset(varnames, stream)\n", 196 | " for diag_metadata in diag_metadata_list:\n", 197 | " ds = case.gen_dataset(diag_metadata[\"varname\"], stream, end_year=17)\n", 198 | " summary_plots(ds, diag_metadata, save_pngs=True)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 7, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "cluster.close()" 208 | ] 209 | } 210 | ], 211 | "metadata": { 212 | "kernelspec": { 213 | "display_name": "Python [conda env:miniconda3-hires-marbl]", 214 | "language": "python", 215 | "name": "conda-env-miniconda3-hires-marbl-py" 216 | }, 217 | "language_info": { 218 | "codemirror_mode": { 219 | "name": "ipython", 220 | "version": 3 221 | }, 222 | "file_extension": ".py", 223 | "mimetype": "text/x-python", 224 | "name": "python", 225 | "nbconvert_exporter": "python", 226 | "pygments_lexer": "ipython3", 227 | "version": "3.7.8" 228 | } 229 | }, 230 | "nbformat": 4, 231 | "nbformat_minor": 4 232 | } 233 | -------------------------------------------------------------------------------- /notebooks/compare_ts_and_hist_004.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "import utils" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Checking year 0001...\n", 24 | "... checking stream pop.h.nyear1 ...\n", 25 | "... checking stream pop.h.nday1 ...\n", 26 | "... checking stream pop.h ...\n", 27 | "... checking stream cice.h ...\n", 28 | "All variables available in time series for year 0001\n", 29 | "----\n", 30 | "Checking year 0002...\n", 31 | "... checking stream pop.h.nyear1 ...\n", 32 | "... checking stream pop.h.nday1 ...\n", 33 | "... checking stream pop.h ...\n", 34 | "... checking stream cice.h1 ...\n", 35 | "... checking stream cice.h ...\n", 36 | "All variables available in time series for year 0002\n", 37 | "----\n", 38 | "Checking year 0003...\n", 39 | "... checking stream pop.h.nyear1 ...\n", 40 | "... checking stream pop.h.nday1 ...\n", 41 | "... checking stream pop.h ...\n", 42 | "... checking stream cice.h1 ...\n", 43 | "... checking stream cice.h ...\n", 44 | "All variables available in time series for year 0003\n", 45 | "----\n", 46 | "Checking year 0004...\n", 47 | "... checking stream pop.h.nyear1 ...\n", 48 | "... checking stream pop.h.nday1 ...\n", 49 | "... checking stream pop.h ...\n", 50 | "... checking stream cice.h1 ...\n", 51 | "... checking stream cice.h ...\n", 52 | "All variables available in time series for year 0004\n", 53 | "----\n", 54 | "Checking year 0005...\n", 55 | "... checking stream pop.h.nyear1 ...\n", 56 | "... checking stream pop.h.nday1 ...\n", 57 | "... checking stream pop.h ...\n", 58 | "... checking stream cice.h1 ...\n", 59 | "... checking stream cice.h ...\n", 60 | "All variables available in time series for year 0005\n", 61 | "----\n", 62 | "Checking year 0006...\n", 63 | "... checking stream pop.h.nyear1 ...\n", 64 | "... checking stream pop.h.nday1 ...\n", 65 | "... checking stream pop.h ...\n", 66 | "... checking stream cice.h1 ...\n", 67 | "... checking stream cice.h ...\n", 68 | "All variables available in time series for year 0006\n", 69 | "----\n", 70 | "Checking year 0007...\n", 71 | "... checking stream pop.h.nyear1 ...\n", 72 | "... checking stream pop.h.nday1 ...\n", 73 | "... checking stream pop.h ...\n", 74 | "... checking stream cice.h1 ...\n", 75 | "... checking stream cice.h ...\n", 76 | "All variables available in time series for year 0007\n", 77 | "----\n", 78 | "Checking year 0008...\n", 79 | "... checking stream pop.h.nyear1 ...\n", 80 | "... checking stream pop.h.nday1 ...\n", 81 | "... checking stream pop.h ...\n", 82 | "... checking stream cice.h1 ...\n", 83 | "... checking stream cice.h ...\n", 84 | "All variables available in time series for year 0008\n", 85 | "----\n", 86 | "Checking year 0009...\n", 87 | "... checking stream pop.h.nyear1 ...\n", 88 | "... checking stream pop.h.nday1 ...\n", 89 | "... checking stream pop.h ...\n", 90 | "... checking stream cice.h1 ...\n", 91 | "... checking stream cice.h ...\n", 92 | "All variables available in time series for year 0009\n", 93 | "----\n", 94 | "Checking year 0010...\n", 95 | "... checking stream pop.h.nyear1 ...\n", 96 | "... checking stream pop.h.nday1 ...\n", 97 | "... checking stream pop.h ...\n", 98 | "... checking stream cice.h1 ...\n", 99 | "... checking stream cice.h ...\n", 100 | "All variables available in time series for year 0010\n", 101 | "----\n", 102 | "Checking year 0011...\n", 103 | "... checking stream pop.h.nyear1 ...\n", 104 | "... checking stream pop.h.nday1 ...\n", 105 | "... checking stream pop.h ...\n", 106 | "... checking stream cice.h1 ...\n", 107 | "... checking stream cice.h ...\n", 108 | "All variables available in time series for year 0011\n", 109 | "----\n", 110 | "Checking year 0012...\n", 111 | "... checking stream pop.h.nyear1 ...\n", 112 | "... checking stream pop.h.nday1 ...\n", 113 | "... checking stream pop.h ...\n", 114 | "... checking stream cice.h1 ...\n", 115 | "... checking stream cice.h ...\n", 116 | "All variables available in time series for year 0012\n", 117 | "----\n", 118 | "Checking year 0013...\n", 119 | "... checking stream pop.h.nyear1 ...\n", 120 | "... checking stream pop.h.nday1 ...\n", 121 | "... checking stream pop.h ...\n", 122 | "... checking stream cice.h1 ...\n", 123 | "... checking stream cice.h ...\n", 124 | "All variables available in time series for year 0013\n", 125 | "----\n", 126 | "Checking year 0014...\n", 127 | "... checking stream pop.h.nyear1 ...\n", 128 | "... checking stream pop.h.nday1 ...\n", 129 | "... checking stream pop.h ...\n", 130 | "... checking stream cice.h1 ...\n", 131 | "... checking stream cice.h ...\n", 132 | "All variables available in time series for year 0014\n", 133 | "----\n", 134 | "Checking year 0015...\n", 135 | "... checking stream pop.h.nyear1 ...\n", 136 | "... checking stream pop.h.nday1 ...\n", 137 | "... checking stream pop.h ...\n", 138 | "... checking stream cice.h1 ...\n", 139 | "... checking stream cice.h ...\n", 140 | "All variables available in time series for year 0015\n", 141 | "----\n", 142 | "Checking year 0016...\n", 143 | "... checking stream pop.h.nyear1 ...\n", 144 | "... checking stream pop.h.nday1 ...\n", 145 | "... checking stream pop.h ...\n", 146 | "... checking stream cice.h1 ...\n", 147 | "... checking stream cice.h ...\n", 148 | "All variables available in time series for year 0016\n", 149 | "----\n", 150 | "Checking year 0017...\n", 151 | "... checking stream pop.h.nyear1 ...\n", 152 | "... checking stream pop.h.nday1 ...\n", 153 | "... checking stream pop.h ...\n", 154 | "... checking stream cice.h1 ...\n", 155 | "... checking stream cice.h ...\n", 156 | "All variables available in time series for year 0017\n", 157 | "----\n", 158 | "Checking year 0018...\n", 159 | "... checking stream pop.h.nyear1 ...\n", 160 | "Could not find time series for year 0018\n", 161 | "CPU times: user 4min 46s, sys: 3min 16s, total: 8min 3s\n", 162 | "Wall time: 11min 3s\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "%%time\n", 168 | "\n", 169 | "casename = \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"\n", 170 | "\n", 171 | "# Directories to search for netCDF files\n", 172 | "caseroot = os.path.join(os.sep, \"glade\", \"work\", \"mlevy\", \"hi-res_BGC_JRA\", \"cases\")\n", 173 | "campaign_root = os.path.join(os.sep, \"glade\", \"campaign\", \"cesm\", \"development\", \"bgcwg\", \"projects\", \"hi-res_JRA\", \"cases\")\n", 174 | "output_roots = [os.path.join(campaign_root, casename, \"output\")]\n", 175 | "output_roots += utils.gen_output_roots_from_caseroot(os.path.join(caseroot, casename))\n", 176 | "\n", 177 | "utils.timeseries_and_history_comparison(casename, output_roots)" 178 | ] 179 | } 180 | ], 181 | "metadata": { 182 | "kernelspec": { 183 | "display_name": "Python [conda env:hires-marbl]", 184 | "language": "python", 185 | "name": "conda-env-hires-marbl-py" 186 | }, 187 | "language_info": { 188 | "codemirror_mode": { 189 | "name": "ipython", 190 | "version": 3 191 | }, 192 | "file_extension": ".py", 193 | "mimetype": "text/x-python", 194 | "name": "python", 195 | "nbconvert_exporter": "python", 196 | "pygments_lexer": "ipython3", 197 | "version": "3.7.8" 198 | } 199 | }, 200 | "nbformat": 4, 201 | "nbformat_minor": 4 202 | } 203 | -------------------------------------------------------------------------------- /notebooks/plot_suite_1deg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import warnings\n", 11 | "\n", 12 | "import dask\n", 13 | "import ncar_jobqueue\n", 14 | "import yaml\n", 15 | "\n", 16 | "import utils\n", 17 | "\n", 18 | "%matplotlib inline" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "with open(\"diag_metadata.yaml\", mode=\"r\") as fptr:\n", 28 | " diag_metadata_list = yaml.safe_load(fptr)\n", 29 | "\n", 30 | "# varnames = utils.get_varnames_from_metadata_list(diag_metadata_list)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "def summary_plots(ds, diag_metadata, save_pngs=False):\n", 40 | " varname = diag_metadata[\"varname\"]\n", 41 | " print(varname)\n", 42 | " da = ds[varname].isel(diag_metadata.get(\"isel_dict\"))\n", 43 | "\n", 44 | " utils.summary_plot_global_ts(\n", 45 | " ds,\n", 46 | " da,\n", 47 | " diag_metadata,\n", 48 | " time_coarsen_len=12,\n", 49 | " save_pngs=save_pngs,\n", 50 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 51 | " )\n", 52 | "\n", 53 | " utils.summary_plot_histogram(\n", 54 | " ds,\n", 55 | " da,\n", 56 | " diag_metadata,\n", 57 | " save_pngs=save_pngs,\n", 58 | " savefig_kwargs={\"dpi\": 72}, # match default behavior of savefig\n", 59 | " )" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "dashboard_link=https://jupyterhub.hpc.ucar.edu/stable/user/mlevy/proxy/36124/status\n" 72 | ] 73 | }, 74 | { 75 | "name": "stderr", 76 | "output_type": "stream", 77 | "text": [ 78 | "/glade/work/mlevy/miniconda3/envs/hires-marbl/lib/python3.7/site-packages/distributed/node.py:155: UserWarning: Port 8787 is already in use.\n", 79 | "Perhaps you already have a cluster running?\n", 80 | "Hosting the HTTP server on port 36124 instead\n", 81 | " http_address[\"port\"], self.http_server.port\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "cluster = ncar_jobqueue.NCARCluster(\n", 87 | " cores=2, memory=\"64 GB\", processes=2, walltime=\"6:00:00\"\n", 88 | ")\n", 89 | "cluster.scale(n=8) # n = number of workers\n", 90 | "print(f\"dashboard_link={cluster.dashboard_link}\")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "casename = \"g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\"\n", 100 | "year = 95\n", 101 | "campaign_root = os.path.join(\n", 102 | " os.path.sep,\n", 103 | " \"glade\",\n", 104 | " \"campaign\",\n", 105 | " \"cesm\",\n", 106 | " \"development\",\n", 107 | " \"bgcwg\",\n", 108 | " \"projects\",\n", 109 | " \"1deg_cocco_JRA\",\n", 110 | " \"cases\",\n", 111 | ")\n", 112 | "\n", 113 | "# Set up CaseClass object\n", 114 | "case = utils.CaseClass(casename, os.path.join(campaign_root, casename))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "Datasets contain a total of 120 time samples\n", 127 | "Last average written at 0105-01-01 00:00:00\n", 128 | "POC_FLUX_100m\n", 129 | "Datasets contain a total of 120 time samples\n", 130 | "Last average written at 0105-01-01 00:00:00\n", 131 | "CaCO3_FLUX_100m\n", 132 | "Datasets contain a total of 120 time samples\n", 133 | "Last average written at 0105-01-01 00:00:00\n", 134 | "FG_CO2\n", 135 | "Datasets contain a total of 120 time samples\n", 136 | "Last average written at 0105-01-01 00:00:00\n", 137 | "DpCO2\n", 138 | "Datasets contain a total of 120 time samples\n", 139 | "Last average written at 0105-01-01 00:00:00\n", 140 | "PH\n", 141 | "Datasets contain a total of 120 time samples\n", 142 | "Last average written at 0105-01-01 00:00:00\n", 143 | "spChl\n", 144 | "Datasets contain a total of 120 time samples\n", 145 | "Last average written at 0105-01-01 00:00:00\n", 146 | "photoC_sp_zint\n", 147 | "Datasets contain a total of 120 time samples\n", 148 | "Last average written at 0105-01-01 00:00:00\n", 149 | "coccoChl\n", 150 | "Datasets contain a total of 120 time samples\n", 151 | "Last average written at 0105-01-01 00:00:00\n", 152 | "photoC_cocco_zint\n", 153 | "Datasets contain a total of 120 time samples\n", 154 | "Last average written at 0105-01-01 00:00:00\n", 155 | "diatChl\n", 156 | "Datasets contain a total of 120 time samples\n", 157 | "Last average written at 0105-01-01 00:00:00\n", 158 | "photoC_diat_zint\n", 159 | "Datasets contain a total of 120 time samples\n", 160 | "Last average written at 0105-01-01 00:00:00\n", 161 | "diazChl\n", 162 | "Datasets contain a total of 120 time samples\n", 163 | "Last average written at 0105-01-01 00:00:00\n", 164 | "photoC_diaz_zint\n", 165 | "Datasets contain a total of 120 time samples\n", 166 | "Last average written at 0105-01-01 00:00:00\n", 167 | "NHx_SURFACE_EMIS\n", 168 | "Datasets contain a total of 120 time samples\n", 169 | "Last average written at 0105-01-01 00:00:00\n", 170 | "NH4\n", 171 | "Datasets contain a total of 120 time samples\n", 172 | "Last average written at 0105-01-01 00:00:00\n", 173 | "O2_ZMIN\n", 174 | "Datasets contain a total of 120 time samples\n", 175 | "Last average written at 0105-01-01 00:00:00\n", 176 | "O2_ZMIN_DEPTH\n", 177 | "Datasets contain a total of 120 time samples\n", 178 | "Last average written at 0105-01-01 00:00:00\n", 179 | "O2\n", 180 | "Datasets contain a total of 120 time samples\n", 181 | "Last average written at 0105-01-01 00:00:00\n", 182 | "PO4\n", 183 | "Datasets contain a total of 120 time samples\n", 184 | "Last average written at 0105-01-01 00:00:00\n", 185 | "PO4\n", 186 | "Datasets contain a total of 120 time samples\n", 187 | "Last average written at 0105-01-01 00:00:00\n", 188 | "NO3\n", 189 | "Datasets contain a total of 120 time samples\n", 190 | "Last average written at 0105-01-01 00:00:00\n", 191 | "NO3\n", 192 | "Datasets contain a total of 120 time samples\n", 193 | "Last average written at 0105-01-01 00:00:00\n", 194 | "SiO3\n", 195 | "Datasets contain a total of 120 time samples\n", 196 | "Last average written at 0105-01-01 00:00:00\n", 197 | "SiO3\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "with dask.distributed.Client(cluster) as client:\n", 203 | " stream = \"pop.h\"\n", 204 | " # ds = case.gen_dataset(varnames, stream)\n", 205 | " for diag_metadata in diag_metadata_list:\n", 206 | " ds = case.gen_dataset(\n", 207 | " diag_metadata[\"varname\"], stream, start_year=95, end_year=104\n", 208 | " )\n", 209 | " summary_plots(ds, diag_metadata, save_pngs=True)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 7, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "cluster.close()" 219 | ] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python [conda env:miniconda3-hires-marbl]", 225 | "language": "python", 226 | "name": "conda-env-miniconda3-hires-marbl-py" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.7.8" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 4 243 | } 244 | -------------------------------------------------------------------------------- /notebooks/utils/utils.py: -------------------------------------------------------------------------------- 1 | """utility functions""" 2 | 3 | import math 4 | 5 | import cftime 6 | import numpy as np 7 | import xarray as xr 8 | import pathlib 9 | import pandas as pd 10 | import json 11 | 12 | from .compare_ts_and_hist import compare_ts_and_hist 13 | from .cime import cime_xmlquery 14 | 15 | ################################################################################ 16 | 17 | 18 | def repl_coord(coordname, ds1, ds2): 19 | """ 20 | Return copy of d2 with coordinate coordname replaced, using coordname from ds1. 21 | Drop ds2.coordname.attrs['bounds'] in result, if ds2.coordname has bounds attribute. 22 | Add ds1.coordname.attrs['bounds'] to result, if ds1.coordname has bounds attribute. 23 | Except for coordname, the returned Dataset is a non-deep copy of ds2. 24 | """ 25 | if "bounds" in ds2[coordname].attrs: 26 | tb_name = ds2[coordname].attrs["bounds"] 27 | ds_out = ds2.drop(tb_name).assign_coords({coordname: ds1[coordname]}) 28 | else: 29 | ds_out = ds2.assign_coords({coordname: ds1[coordname]}) 30 | if "bounds" in ds1[coordname].attrs: 31 | tb_name = ds1[coordname].attrs["bounds"] 32 | ds_out = xr.merge([ds_out, ds1[tb_name]]) 33 | return ds_out 34 | 35 | 36 | ################################################################################ 37 | 38 | 39 | def time_set_mid(ds, time_name, deep=False): 40 | """ 41 | Return copy of ds with values of ds[time_name] replaced with midpoints of 42 | ds[time_name].attrs['bounds'], if bounds attribute exists. 43 | Except for time_name, the returned Dataset is a copy of ds2. 44 | The copy is deep or not depending on the argument deep. 45 | """ 46 | 47 | ds_out = ds.copy(deep=deep) 48 | 49 | if "bounds" not in ds[time_name].attrs: 50 | return ds_out 51 | 52 | tb_name = ds[time_name].attrs["bounds"] 53 | tb = ds[tb_name] 54 | bounds_dim = next(dim for dim in tb.dims if dim != time_name) 55 | 56 | # Use da = da.copy(data=...), in order to preserve attributes and encoding. 57 | 58 | # If tb is an array of datetime objects then encode time before averaging. 59 | # Do this because computing the mean on datetime objects with xarray fails 60 | # if the time span is 293 or more years. 61 | # https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/issues/7 62 | if tb.dtype == np.dtype("O"): 63 | units = "days since 0001-01-01" 64 | calendar = "noleap" 65 | tb_vals = cftime.date2num(ds[tb_name].values, units=units, calendar=calendar) 66 | tb_mid_decode = cftime.num2date( 67 | tb_vals.mean(axis=1), units=units, calendar=calendar 68 | ) 69 | ds_out[time_name] = ds[time_name].copy(data=tb_mid_decode) 70 | else: 71 | ds_out[time_name] = ds[time_name].copy(data=tb.mean(bounds_dim)) 72 | 73 | return ds_out 74 | 75 | 76 | ################################################################################ 77 | 78 | 79 | def time_year_plus_frac(ds, time_name): 80 | """return time variable, as numpy array of year plus fraction of year values""" 81 | 82 | # this is straightforward if time has units='days since 0000-01-01' and calendar='noleap' 83 | # so convert specification of time to that representation 84 | 85 | # get time values as an np.ndarray of cftime objects 86 | if np.dtype(ds[time_name]) == np.dtype("O"): 87 | tvals_cftime = ds[time_name].values 88 | else: 89 | tvals_cftime = cftime.num2date( 90 | ds[time_name].values, 91 | ds[time_name].attrs["units"], 92 | ds[time_name].attrs["calendar"], 93 | ) 94 | 95 | # convert cftime objects to representation mentioned above 96 | tvals_days = cftime.date2num( 97 | tvals_cftime, "days since 0000-01-01", calendar="noleap" 98 | ) 99 | 100 | return tvals_days / 365.0 101 | 102 | 103 | ################################################################################ 104 | 105 | 106 | def round_sig(x, ndigits): 107 | """round x to ndigits precision""" 108 | if x == 0: 109 | return x 110 | ndigits_offset = math.floor(math.log10(abs(x))) 111 | return round(x, ndigits - 1 - ndigits_offset) 112 | 113 | 114 | ################################################################################ 115 | 116 | 117 | def get_varnames_from_metadata_list(diag_metadata_list): 118 | varnames = [] 119 | for diag_metadata in diag_metadata_list: 120 | if diag_metadata["varname"] not in varnames: 121 | varnames.append(diag_metadata["varname"]) 122 | return varnames 123 | 124 | 125 | ################################################################################ 126 | 127 | 128 | def gen_output_roots_from_caseroot(caseroot): 129 | if type(caseroot) == str: 130 | caseroot = [caseroot] 131 | if type(caseroot) != list: 132 | raise TypeError("caseroot must be a str or list, {caseroot} is not acceptable") 133 | 134 | output_roots = [] 135 | for single_root in caseroot: 136 | vars_to_check = ["RUNDIR"] 137 | if cime_xmlquery(single_root, "DOUT_S") == "TRUE": 138 | vars_to_check.append("DOUT_S_ROOT") 139 | for xml_var_to_query in vars_to_check: 140 | output_roots.append(cime_xmlquery(single_root, xml_var_to_query)) 141 | 142 | return output_roots 143 | 144 | 145 | ################################################################################ 146 | 147 | 148 | def timeseries_and_history_comparison(casename, output_roots): 149 | for year in range(1, 62): 150 | has_ts = True 151 | found_all = True 152 | print(f"Checking year {year:04}...") 153 | for stream in ["pop.h.nyear1", "pop.h.nday1", "pop.h", "cice.h1", "cice.h"]: 154 | has_hist = True 155 | # There is no cice.h1 time series for 0001 so skip check 156 | if stream == "cice.h1" and year == 1: 157 | continue 158 | # Run test 159 | print(f"... checking stream {stream} ...") 160 | comp_test = compare_ts_and_hist(casename, output_roots, stream, year) 161 | # Check ends when there are no history files for comparison 162 | if comp_test == "no time series": 163 | has_ts = False 164 | break 165 | 166 | # Skip years when there are no history files 167 | # (Assume those years were already checked prior to deleting history files) 168 | if comp_test == "no history": 169 | print( 170 | f"Skipping stream {stream} for year {year:04} because there are no history files" 171 | ) 172 | has_hist = False 173 | continue 174 | 175 | found_all = found_all and (comp_test == "same") 176 | 177 | if not has_ts: 178 | print(f"Could not find time series for year {year:04}") 179 | break 180 | if has_hist and found_all: 181 | print(f"All variables available in time series for year {year:04}") 182 | else: 183 | print(f"Could not find time series for all variables in year {year:04}") 184 | print("----") 185 | 186 | 187 | ################################################################################ 188 | 189 | 190 | def dict_copy_vals(src, dst, keys, abort_on_mismatch=True): 191 | for key in keys if type(keys) == list else [keys]: 192 | if key in src: 193 | if key in dst and abort_on_mismatch: 194 | if dst[key] != src[key]: 195 | raise ValueError( 196 | f"{key} exists in dst and src and dst values mismatch" 197 | ) 198 | else: 199 | dst[key] = src[key] 200 | 201 | 202 | ################################################################################ 203 | 204 | 205 | def print_key_metadata(ds, msg=None): 206 | print(64 * "*") 207 | if msg is not None: 208 | print(msg) 209 | print(64 * "*") 210 | for attr_name in ["chunks", "attrs", "encoding"]: 211 | print("ds." + attr_name) 212 | print(getattr(ds, attr_name)) 213 | print(32 * "*") 214 | for attr_name in ["chunks", "attrs", "encoding"]: 215 | print("ds['time']." + attr_name) 216 | print(getattr(ds["time"], attr_name)) 217 | print(32 * "*") 218 | 219 | 220 | ################################################################################ 221 | 222 | 223 | def generate_plot_catalog( 224 | root_dir, image_dir_name="images", extension=".json", use_full_path=True 225 | ): 226 | """ 227 | Generate a single dataframe from plot attributes saved in json files. 228 | Parameters 229 | ---------- 230 | root_dir : str, pathlib.Path 231 | The root directory 232 | extension : str, default `.json.` 233 | file extension to look for. 234 | 235 | Returns 236 | ------- 237 | df : pd.DataFrame 238 | """ 239 | root_dir = pathlib.Path(root_dir) 240 | image_dir = root_dir / image_dir_name 241 | image_dir.exists() 242 | files = sorted(image_dir.rglob(f"**/*{extension}")) 243 | data = [] 244 | if files: 245 | for file in files: 246 | metadata = json.load(file.open()) 247 | if use_full_path: 248 | metadata["filepath"] = ( 249 | (root_dir / metadata["filepath"]).absolute().as_posix() 250 | ) 251 | data.append(metadata) 252 | return pd.DataFrame(data) 253 | else: 254 | print(f"Found 0 files with extension={extension} in {image_dir}.") 255 | return pd.DataFrame() 256 | -------------------------------------------------------------------------------- /notebooks/utils/PlotTypeClass.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pathlib 4 | 5 | 6 | class _PlotTypeBaseClass(object): 7 | def __init__(self, *args, **kwargs): 8 | raise NotImplementedError("This must be implemented in child class") 9 | 10 | def get_filepaths(self, *args, **kwargs): 11 | raise NotImplementedError("This must be implemented in child class") 12 | 13 | def get_isel_str(self, da, isel_dict): 14 | """ 15 | If diag metadata passes isel_dict option, we need that reflected in file name. 16 | 17 | This subroutine produces an additional string of the form var1_val1.var2_val2..., 18 | where isel_dict is equivalent to da.sel(var1=val1,var2=val2). Note the switch 19 | from .isel to .sel -- val1 should be da.var1[isel_dict[var1]] (the true value, 20 | rather than the index) 21 | """ 22 | # Return empty string if isel_dict is empty dictionary 23 | isel_list = [] 24 | for varname in isel_dict: 25 | value = da[varname].data 26 | try: 27 | # Use two digits after decimal for floats 28 | isel_list.append(f"{varname}--{value:.2f}") 29 | except: 30 | # Otherwise just include the variable value (e.g. strings) 31 | isel_list.append(f"{varname}--{value}") 32 | isel_str = "__".join(isel_list) 33 | if len(isel_str) > 0: 34 | isel_str = "." + isel_str 35 | return isel_str 36 | 37 | def savefig(self, fig, root_dir="images", **kwargs): 38 | """ 39 | Saves fig as a PNG, with the file name determined by the other parameters. 40 | 41 | Also writes metadata about image file to a JSON file 42 | """ 43 | 44 | # Always use tight_layout 45 | fig.tight_layout() 46 | 47 | # Remove trailing slash from root_dir 48 | if root_dir[-1] == "/": 49 | root_dir = root_dir[:-1] 50 | 51 | # Set up dictionary for metadata 52 | metadata = self.metadata 53 | filepath, jsonpath = self.get_filepaths() 54 | metadata["filepath"] = os.path.join( 55 | self.metadata["plot_type"], f"{filepath}.png" 56 | ) 57 | filepath = os.path.join( 58 | root_dir, self.metadata["casename"], metadata["filepath"] 59 | ) 60 | jsonpath = os.path.join( 61 | root_dir, 62 | self.metadata["casename"], 63 | self.metadata["plot_type"], 64 | f"{jsonpath}.json", 65 | ) 66 | 67 | for path in [filepath, jsonpath]: 68 | parent_dir = pathlib.Path(path).parent 69 | parent_dir.mkdir(parents=True, exist_ok=True) 70 | 71 | fig.savefig(filepath, **kwargs) 72 | with open(jsonpath, "w") as fp: 73 | json.dump(metadata, fp) 74 | 75 | 76 | ################################################################################ 77 | 78 | 79 | class SummaryMapClass(_PlotTypeBaseClass): 80 | def __init__(self, da, casename, datestamp, apply_log10, isel_dict): 81 | self.metadata = dict() 82 | self.metadata["plot_type"] = "summary_map" 83 | self.metadata["varname"] = da.name 84 | self.metadata["casename"] = casename 85 | self.metadata["date"] = datestamp 86 | self.metadata["apply_log10"] = apply_log10 87 | self.metadata["sel_dict"] = dict() 88 | for varname in isel_dict: 89 | value = da[varname].data 90 | try: 91 | # Use two digits after decimal for floats 92 | str_val = f"{value:.2f}" 93 | except: 94 | # Otherwise just include the variable value (e.g. strings) 95 | str_val = f"{value}" 96 | self.metadata["sel_dict"][varname] = str_val 97 | self.isel_str = self.get_isel_str(da, isel_dict) 98 | 99 | def get_filepaths(self): 100 | log_str = "" if not self.metadata["apply_log10"] else ".log10" 101 | file_prefix = f"{self.metadata['varname']}.{self.metadata['date']}{self.isel_str}{log_str}" 102 | filepath = file_prefix 103 | jsonpath = os.path.join("metadata", file_prefix) 104 | 105 | return filepath, jsonpath 106 | 107 | 108 | ################################################################################ 109 | 110 | 111 | class SummaryTSClass(_PlotTypeBaseClass): 112 | def __init__(self, da, casename, start_date, end_date, isel_dict): 113 | self.metadata = dict() 114 | self.metadata["plot_type"] = "time_series" 115 | self.metadata["varname"] = da.name 116 | self.metadata["casename"] = casename 117 | self.metadata["time_period"] = f"{start_date}_{end_date}" 118 | self.metadata["sel_dict"] = dict() 119 | for varname in isel_dict: 120 | value = da[varname].data 121 | try: 122 | # Use two digits after decimal for floats 123 | str_val = f"{value:.2f}" 124 | except: 125 | # Otherwise just include the variable value (e.g. strings) 126 | str_val = f"{value}" 127 | self.metadata["sel_dict"][varname] = str_val 128 | self.isel_str = self.get_isel_str(da, isel_dict) 129 | 130 | def get_filepaths(self): 131 | file_prefix = ( 132 | f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}" 133 | ) 134 | filepath = file_prefix 135 | jsonpath = os.path.join("metadata", file_prefix) 136 | 137 | return filepath, jsonpath 138 | 139 | 140 | ################################################################################ 141 | 142 | 143 | class SummaryHistClass(_PlotTypeBaseClass): 144 | def __init__(self, da, casename, apply_log10, start_date, end_date, isel_dict): 145 | self.metadata = dict() 146 | self.metadata["plot_type"] = "histogram" 147 | self.metadata["varname"] = da.name 148 | self.metadata["casename"] = casename 149 | self.metadata["apply_log10"] = apply_log10 150 | self.metadata["time_period"] = f"{start_date}_{end_date}" 151 | self.metadata["sel_dict"] = dict() 152 | for varname in isel_dict: 153 | value = da[varname].data 154 | try: 155 | # Use two digits after decimal for floats 156 | str_val = f"{value:.2f}" 157 | except: 158 | # Otherwise just include the variable value (e.g. strings) 159 | str_val = f"{value}" 160 | self.metadata["sel_dict"][varname] = str_val 161 | self.isel_str = self.get_isel_str(da, isel_dict) 162 | 163 | def get_filepaths(self): 164 | log_str = "" if not self.metadata["apply_log10"] else ".log10" 165 | file_prefix = f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}{log_str}" 166 | filepath = file_prefix 167 | jsonpath = os.path.join("metadata", file_prefix) 168 | 169 | return filepath, jsonpath 170 | 171 | 172 | ################################################################################ 173 | 174 | 175 | class TrendMapClass(_PlotTypeBaseClass): 176 | def __init__(self, da, casename, start_date, end_date, isel_dict): 177 | self.metadata = dict() 178 | self.metadata["plot_type"] = "trend_map" 179 | self.metadata["varname"] = da.name 180 | self.metadata["casename"] = casename 181 | self.metadata["time_period"] = f"{start_date}_{end_date}" 182 | self.metadata["sel_dict"] = dict() 183 | for varname in isel_dict: 184 | value = da[varname].data 185 | try: 186 | # Use two digits after decimal for floats 187 | str_val = f"{value:.2f}" 188 | except: 189 | # Otherwise just include the variable value (e.g. strings) 190 | str_val = f"{value}" 191 | self.metadata["sel_dict"][varname] = str_val 192 | self.isel_str = self.get_isel_str(da, isel_dict) 193 | 194 | def get_filepaths(self): 195 | file_prefix = ( 196 | f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}" 197 | ) 198 | filepath = os.path.join(file_prefix) 199 | jsonpath = os.path.join("metadata", file_prefix) 200 | 201 | return filepath, jsonpath 202 | 203 | 204 | ################################################################################ 205 | 206 | 207 | class TrendHistClass(_PlotTypeBaseClass): 208 | def __init__(self, da, casename, start_date, end_date, isel_dict): 209 | self.metadata = dict() 210 | self.metadata["plot_type"] = "trend_hist" 211 | self.metadata["varname"] = da.name 212 | self.metadata["casename"] = casename 213 | self.metadata["time_period"] = f"{start_date}_{end_date}" 214 | self.metadata["sel_dict"] = dict() 215 | for varname in isel_dict: 216 | value = da[varname].data 217 | try: 218 | # Use two digits after decimal for floats 219 | str_val = f"{value:.2f}" 220 | except: 221 | # Otherwise just include the variable value (e.g. strings) 222 | str_val = f"{value}" 223 | self.metadata["sel_dict"][varname] = str_val 224 | self.isel_str = self.get_isel_str(da, isel_dict) 225 | 226 | def get_filepaths(self): 227 | file_prefix = ( 228 | f"{self.metadata['varname']}.{self.metadata['time_period']}{self.isel_str}" 229 | ) 230 | filepath = os.path.join(file_prefix) 231 | jsonpath = os.path.join("metadata", file_prefix) 232 | 233 | return filepath, jsonpath 234 | -------------------------------------------------------------------------------- /notebooks/gen_csv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from utils import generate_plot_catalog\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "Generating csv for histogram plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n", 23 | "Generating csv for summary_map plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n", 24 | "Generating csv for time_series plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n", 25 | "Generating csv for trend_hist plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n", 26 | "Found 0 files with extension=.json in images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001/trend_hist.\n", 27 | "Generating csv for trend_map plots in g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\n", 28 | "Found 0 files with extension=.json in images/g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001/trend_map.\n", 29 | "Generating csv for histogram plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n", 30 | "Generating csv for summary_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n", 31 | "Generating csv for time_series plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n", 32 | "Generating csv for trend_hist plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n", 33 | "Generating csv for trend_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.003\n", 34 | "Generating csv for histogram plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n", 35 | "Generating csv for summary_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n", 36 | "Generating csv for time_series plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n", 37 | "Generating csv for trend_hist plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n", 38 | "Generating csv for trend_map plots in g.e22.G1850ECO_JRA_HR.TL319_t13.004\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "df = dict()\n", 44 | "for casename in [\n", 45 | " \"g.e22b05.G1850ECOIAF_JRA.TL319_g17.cocco.001\",\n", 46 | " \"g.e22.G1850ECO_JRA_HR.TL319_t13.003\",\n", 47 | " \"g.e22.G1850ECO_JRA_HR.TL319_t13.004\",\n", 48 | "]:\n", 49 | " df[casename] = pd.DataFrame({})\n", 50 | " for plottype in [\n", 51 | " \"histogram\",\n", 52 | " \"summary_map\",\n", 53 | " \"time_series\",\n", 54 | " \"trend_hist\",\n", 55 | " \"trend_map\",\n", 56 | " ]:\n", 57 | " print(f\"Generating csv for {plottype} plots in {casename}\")\n", 58 | " df[casename] = pd.concat(\n", 59 | " [\n", 60 | " df[casename],\n", 61 | " generate_plot_catalog(\n", 62 | " \"./\",\n", 63 | " image_dir_name=f\"images/{casename}/{plottype}\",\n", 64 | " use_full_path=False,\n", 65 | " ),\n", 66 | " ]\n", 67 | " )\n", 68 | " df[casename].to_csv(\n", 69 | " f\"images/{casename}/png_catalog.csv\",\n", 70 | " compression=None,\n", 71 | " index=False,\n", 72 | " )" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/html": [ 83 | "
\n", 84 | "\n", 97 | "\n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | "
plot_typevarnamecasenameapply_log10time_periodsel_dictfilepathdate
0histogramCaCO3_FLUX_100mg.e22.G1850ECO_JRA_HR.TL319_t13.004False0001-01-01_0001-12-31{}histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3...NaN
1histogramCaCO3_FLUX_100mg.e22.G1850ECO_JRA_HR.TL319_t13.004True0001-01-01_0001-12-31{}histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3...NaN
2histogramCaCO3_FLUX_100mg.e22.G1850ECO_JRA_HR.TL319_t13.004False0002-01-01_0002-12-31{}histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3...NaN
3histogramCaCO3_FLUX_100mg.e22.G1850ECO_JRA_HR.TL319_t13.004True0002-01-01_0002-12-31{}histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3...NaN
4histogramCaCO3_FLUX_100mg.e22.G1850ECO_JRA_HR.TL319_t13.004False0003-01-01_0003-12-31{}histogram/CaCO3_FLUX_100m.0003-01-01_0003-12-3...NaN
...........................
25trend_mapSiO3g.e22.G1850ECO_JRA_HR.TL319_t13.004NaN0002-01-01_0004-12-31{'basins': 'Atlantic'}trend_map/SiO3.0002-01-01_0004-12-31.basins--A...NaN
26trend_mapSiO3g.e22.G1850ECO_JRA_HR.TL319_t13.004NaN0002-01-01_0004-12-31{'basins': 'Global'}trend_map/SiO3.0002-01-01_0004-12-31.basins--G...NaN
27trend_mapSiO3g.e22.G1850ECO_JRA_HR.TL319_t13.004NaN0002-01-01_0004-12-31{'basins': 'Indian'}trend_map/SiO3.0002-01-01_0004-12-31.basins--I...NaN
28trend_mapSiO3g.e22.G1850ECO_JRA_HR.TL319_t13.004NaN0002-01-01_0004-12-31{'basins': 'Pacific'}trend_map/SiO3.0002-01-01_0004-12-31.basins--P...NaN
29trend_mapSiO3g.e22.G1850ECO_JRA_HR.TL319_t13.004NaN0002-01-01_0004-12-31{'z_t': '35109.35'}trend_map/SiO3.0002-01-01_0004-12-31.z_t--3510...NaN
\n", 235 | "

1244 rows × 8 columns

\n", 236 | "
" 237 | ], 238 | "text/plain": [ 239 | " plot_type varname casename \\\n", 240 | "0 histogram CaCO3_FLUX_100m g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 241 | "1 histogram CaCO3_FLUX_100m g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 242 | "2 histogram CaCO3_FLUX_100m g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 243 | "3 histogram CaCO3_FLUX_100m g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 244 | "4 histogram CaCO3_FLUX_100m g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 245 | ".. ... ... ... \n", 246 | "25 trend_map SiO3 g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 247 | "26 trend_map SiO3 g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 248 | "27 trend_map SiO3 g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 249 | "28 trend_map SiO3 g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 250 | "29 trend_map SiO3 g.e22.G1850ECO_JRA_HR.TL319_t13.004 \n", 251 | "\n", 252 | " apply_log10 time_period sel_dict \\\n", 253 | "0 False 0001-01-01_0001-12-31 {} \n", 254 | "1 True 0001-01-01_0001-12-31 {} \n", 255 | "2 False 0002-01-01_0002-12-31 {} \n", 256 | "3 True 0002-01-01_0002-12-31 {} \n", 257 | "4 False 0003-01-01_0003-12-31 {} \n", 258 | ".. ... ... ... \n", 259 | "25 NaN 0002-01-01_0004-12-31 {'basins': 'Atlantic'} \n", 260 | "26 NaN 0002-01-01_0004-12-31 {'basins': 'Global'} \n", 261 | "27 NaN 0002-01-01_0004-12-31 {'basins': 'Indian'} \n", 262 | "28 NaN 0002-01-01_0004-12-31 {'basins': 'Pacific'} \n", 263 | "29 NaN 0002-01-01_0004-12-31 {'z_t': '35109.35'} \n", 264 | "\n", 265 | " filepath date \n", 266 | "0 histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3... NaN \n", 267 | "1 histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-3... NaN \n", 268 | "2 histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3... NaN \n", 269 | "3 histogram/CaCO3_FLUX_100m.0002-01-01_0002-12-3... NaN \n", 270 | "4 histogram/CaCO3_FLUX_100m.0003-01-01_0003-12-3... NaN \n", 271 | ".. ... ... \n", 272 | "25 trend_map/SiO3.0002-01-01_0004-12-31.basins--A... NaN \n", 273 | "26 trend_map/SiO3.0002-01-01_0004-12-31.basins--G... NaN \n", 274 | "27 trend_map/SiO3.0002-01-01_0004-12-31.basins--I... NaN \n", 275 | "28 trend_map/SiO3.0002-01-01_0004-12-31.basins--P... NaN \n", 276 | "29 trend_map/SiO3.0002-01-01_0004-12-31.z_t--3510... NaN \n", 277 | "\n", 278 | "[1244 rows x 8 columns]" 279 | ] 280 | }, 281 | "execution_count": 3, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "df[\"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"]" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 4, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "'histogram/CaCO3_FLUX_100m.0001-01-01_0001-12-31.png'" 299 | ] 300 | }, 301 | "execution_count": 4, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "df[\"g.e22.G1850ECO_JRA_HR.TL319_t13.004\"][\"filepath\"].to_list()[0]" 308 | ] 309 | } 310 | ], 311 | "metadata": { 312 | "kernelspec": { 313 | "display_name": "Python [conda env:miniconda3-hires-marbl]", 314 | "language": "python", 315 | "name": "conda-env-miniconda3-hires-marbl-py" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.7.8" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 4 332 | } 333 | -------------------------------------------------------------------------------- /notebooks/utils/Plotting.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools to find and open files associated with the runs 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import xarray as xr 8 | import cftime 9 | import datetime 10 | 11 | # local modules, not available through __init__ 12 | from .utils import time_year_plus_frac, round_sig 13 | from .utils_units import conv_units 14 | from .PlotTypeClass import ( 15 | SummaryMapClass, 16 | SummaryTSClass, 17 | SummaryHistClass, 18 | TrendMapClass, 19 | TrendHistClass, 20 | ) 21 | 22 | ################################################################################ 23 | 24 | 25 | def compare_fields_at_lat_lon( 26 | list_of_das_in, nlat, nlon, individual_plots=False, filename=None 27 | ): 28 | 29 | # This shouldn't be hard-coded... but how else to get? 30 | xticks = 365 + np.array([0, 31, 59, 90, 120, 151]) 31 | xlabels = ["Jan 1", "Feb 1", "Mar 1", "Apr 1", "May 1", "June 1"] 32 | yticks = np.linspace(0, 17e4, 18) 33 | 34 | list_of_das = [] 35 | for da in list_of_das_in: 36 | list_of_das.append(da.isel(nlat=nlat, nlon=nlon).compute()) 37 | 38 | # Get longitude and latitude (hard-coded to assume we want W and S) 39 | long_west = 360 - list_of_das[0]["TLONG"].data 40 | lat_south = -list_of_das[0]["TLAT"].data 41 | 42 | if individual_plots: 43 | nrows = int(np.ceil(len(list_of_das) / 2)) 44 | fig, axes = plt.subplots( 45 | nrows=nrows, ncols=2, figsize=(9 * nrows, 10.5), sharex=True 46 | ) 47 | 48 | # Hard-coded title is also a bad idea 49 | fig.suptitle(f"Mix Layer Depth at ({long_west:.2f} W, {lat_south:.2f} S)") 50 | 51 | for n, da in enumerate(list_of_das): 52 | plt.subplot(nrows, 2, n + 1) 53 | da.plot() 54 | plt.title(f"Run {(n+1):03}") 55 | # plt.xlim((np.min(xticks), np.max(xticks))) 56 | # plt.xticks(xticks, xlabels) 57 | 58 | # Only label yticks on left-most column 59 | if n % 2 == 0: 60 | plt.yticks(yticks) 61 | else: 62 | plt.yticks(yticks, "") 63 | plt.ylabel("") 64 | 65 | # Only add xlabel on last row 66 | if (n + 1) / 2 == nrows: 67 | plt.xlabel("Date (year 0001)") 68 | else: 69 | plt.xlabel("") 70 | else: 71 | fig = plt.figure(figsize=(9.0, 5.25), clear=True) 72 | fig.suptitle(f"Mix Layer Depth at ({long_west:.2f} W, {lat_south:.2f} S)") 73 | 74 | for da in list_of_das: 75 | da.plot() 76 | plt.title("All 4 runs overlay") 77 | # plt.xlim((np.min(xticks), np.max(xticks))) 78 | # plt.xticks(xticks, xlabels) 79 | plt.yticks(yticks) 80 | plt.xlabel("Date (year 0001)") 81 | 82 | if filename: 83 | fig.savefig(filename) 84 | 85 | return fig 86 | 87 | 88 | ################################################################################ 89 | 90 | 91 | def plot_dict_with_date_keys(dict_in, title, legend=None): 92 | """ 93 | Assume that keys of dict_in are 'YYYYMMDD' and values are numeric 94 | """ 95 | time_units = "days since 0001-01-01 0:00:00" 96 | time = [] 97 | array_val = [] 98 | for date in dict_in.keys(): 99 | if "log" not in date: 100 | (year, month, day) = date.split("-") 101 | time.append(cftime.DatetimeNoLeap(int(year), int(month), int(day))) 102 | array_val.append(dict_in[date]) 103 | 104 | if type(array_val[0]) == list: 105 | dim2 = len(array_val[0]) 106 | da = xr.DataArray(array_val, dims=["time", "dim2"]) 107 | else: 108 | dim2 = None 109 | da = xr.DataArray(array_val, dims="time") 110 | da["time"] = time 111 | 112 | fig = plt.figure(figsize=(9.0, 5.25), clear=True) 113 | if dim2: 114 | for dim2ind in range(dim2): 115 | da.isel(dim2=dim2ind).plot() 116 | else: 117 | da.plot() 118 | if legend: 119 | plt.legend(legend) 120 | plt.title(title) 121 | plt.show() 122 | 123 | 124 | # return fig 125 | 126 | ################################################################################ 127 | 128 | 129 | def _extract_field_from_file(ds, varname, nlat, nlon): 130 | return ds[varname].isel(nlat=nlat, nlon=nlon).compute() 131 | 132 | 133 | ################################################################################ 134 | 135 | 136 | def summary_plot_global_ts( 137 | ds, da, diag_metadata, time_coarsen_len=None, **plot_options 138 | ): 139 | casename = ds.attrs["title"] 140 | save_pngs = plot_options.get("save_pngs", False) 141 | if save_pngs: 142 | root_dir = plot_options.get("root_dir", "images") 143 | kwargs = plot_options.get("savefig_kwargs", {}) 144 | isel_dict = diag_metadata.get("isel_dict", {}) 145 | 146 | reduce_dims = da.dims[-2:] 147 | weights = ds["TAREA"].fillna(0) 148 | da_weighted = da.weighted(weights) 149 | spatial_op = diag_metadata.get("spatial_op", "average") 150 | if spatial_op == "average": 151 | to_plot = da_weighted.mean(dim=reduce_dims) 152 | to_plot.attrs = da.attrs 153 | if "display_units" in diag_metadata: 154 | to_plot = conv_units(to_plot, diag_metadata["display_units"]) 155 | if spatial_op == "integrate": 156 | to_plot = da_weighted.sum(dim=reduce_dims) 157 | to_plot.attrs = da.attrs 158 | to_plot.attrs["units"] += f" {weights.attrs['units']}" 159 | if "integral_display_units" in diag_metadata: 160 | to_plot = conv_units( 161 | to_plot, 162 | diag_metadata["integral_display_units"], 163 | units_scalef=diag_metadata.get("integral_unit_conv"), 164 | ) 165 | # do not use to_plot.plot.line("-o") because of incorrect time axis values 166 | # https://github.com/pydata/xarray/issues/4401 167 | fig, ax = plt.subplots() 168 | ax.plot(time_year_plus_frac(to_plot, "time"), to_plot.values, "-o") 169 | ax.set_xlabel(xr.plot.utils.label_from_attrs(to_plot["time"])) 170 | ax.set_ylabel(xr.plot.utils.label_from_attrs(to_plot)) 171 | ax.set_title(to_plot._title_for_slice()) 172 | if time_coarsen_len is not None: 173 | tlen = len(to_plot.time) 174 | tlen_trunc = (tlen // time_coarsen_len) * time_coarsen_len 175 | to_plot_trunc = to_plot.isel(time=slice(0, tlen_trunc)) 176 | to_plot_coarse = to_plot_trunc.coarsen({"time": time_coarsen_len}).mean() 177 | ax.plot( 178 | time_year_plus_frac(to_plot_coarse, "time"), to_plot_coarse.values, "-o" 179 | ) 180 | title = ax.get_title() 181 | if title != "": 182 | title += ", " 183 | title += f"last mean value={round_sig(to_plot_coarse.values[-1],4)}" 184 | ax.set_title(title) 185 | if save_pngs: 186 | str_datestamp = f'{ds[ds["time"].attrs["bounds"]].load().data[0,0]}' 187 | first_datestamp = str_datestamp.split(" ")[0] 188 | str_datestamp = ( 189 | f'{ds[ds["time"].attrs["bounds"]].data[-1,-1]-datetime.timedelta(days=1)}' 190 | ) 191 | last_datestamp = str_datestamp.split(" ")[0] 192 | summary_ts = SummaryTSClass( 193 | da, casename, first_datestamp, last_datestamp, isel_dict 194 | ) 195 | summary_ts.savefig(fig, root_dir=root_dir, **kwargs) 196 | else: 197 | plt.show() 198 | plt.close(fig) 199 | 200 | 201 | ################################################################################ 202 | 203 | 204 | def summary_plot_histogram(ds, da, diag_metadata, lines_per_plot=12, **plot_options): 205 | save_pngs = plot_options.get("save_pngs", False) 206 | casename = ds.attrs["title"] 207 | if save_pngs: 208 | root_dir = plot_options.get("root_dir", "images") 209 | kwargs = plot_options.get("savefig_kwargs", {}) 210 | isel_dict = diag_metadata.get("isel_dict", {}) 211 | 212 | # histogram, all time levels in one plot 213 | hist_bins = 20 214 | hist_log = True 215 | 216 | # Loop length 217 | t_cnt = len(da["time"]) 218 | for apply_log10 in _apply_log10_vals(diag_metadata): 219 | t_ind_beg = 0 220 | fig, ax = plt.subplots() 221 | # fig.tight_layout() 222 | for t_ind in range(t_cnt): 223 | to_plot = da.isel(time=t_ind) 224 | if "display_units" in diag_metadata: 225 | to_plot = conv_units(to_plot, diag_metadata["display_units"]) 226 | if apply_log10: 227 | to_plot = np.log10(xr.where(to_plot > 0, to_plot, np.nan)) 228 | to_plot.name = f"log10({to_plot.name})" 229 | # to_plot.plot.hist(bins=hist_bins, log=hist_log, histtype="step") 230 | to_plot.plot.hist(ax=ax, bins=hist_bins, log=hist_log, histtype="step") 231 | if t_ind % lines_per_plot == lines_per_plot - 1: 232 | t_beg = ds[ds["time"].attrs["bounds"]].values[t_ind_beg, 0] 233 | t_str_beg = f"{t_beg.year:04}-{t_beg.month:02}-{t_beg.day:02}" 234 | t_ind_end = t_ind 235 | t_end = ds[ds["time"].attrs["bounds"]].values[ 236 | t_ind_end, -1 237 | ] - datetime.timedelta(days=1) 238 | t_str_end = f"{t_end.year:04}-{t_end.month:02}-{t_end.day:02}" 239 | plt.title(f"Histogram: {t_str_beg} : {t_str_end}") 240 | t_ind_beg = t_ind_end + 1 241 | if save_pngs: 242 | summary_hist = SummaryHistClass( 243 | da, casename, apply_log10, t_str_beg, t_str_end, isel_dict 244 | ) 245 | summary_hist.savefig(fig, root_dir=root_dir, **kwargs) 246 | else: 247 | plt.show() 248 | plt.close(fig) 249 | if t_ind != t_cnt - 1: 250 | fig, ax = plt.subplots() 251 | 252 | if t_ind % lines_per_plot != lines_per_plot - 1: 253 | t_beg = ds[ds["time"].attrs["bounds"]].values[t_ind_beg, 0] 254 | t_str_beg = f"{t_beg.year:04}-{t_beg.month:02}-{t_beg.day:02}" 255 | t_ind_end = t_ind 256 | t_end = ds[ds["time"].attrs["bounds"]].values[ 257 | t_ind_end, -1 258 | ] - datetime.timedelta(days=1) 259 | t_str_end = f"{t_end.year:04}-{t_end.month:02}-{t_end.day:02}" 260 | plt.title(f"Histogram: {t_str_beg} : {t_str_end}") 261 | if save_pngs: 262 | summary_hist = SummaryHistClass( 263 | da, casename, t_str_beg, t_str_end, isel_dict 264 | ) 265 | summary_hist.savefig(fig, root_dir=root_dir, **kwargs) 266 | else: 267 | plt.show() 268 | plt.close(fig) 269 | 270 | 271 | ################################################################################ 272 | 273 | 274 | def summary_plot_maps(ds, da, diag_metadata, **plot_options): 275 | 276 | save_pngs = plot_options.get("save_pngs", False) 277 | casename = ds.attrs["title"] 278 | if save_pngs: 279 | root_dir = plot_options.get("root_dir", "images") 280 | kwargs = plot_options.get("savefig_kwargs", {}) 281 | isel_dict = diag_metadata.get("isel_dict", {}) 282 | 283 | # maps, 1 plots for time level 284 | cmap = "plasma" 285 | 286 | for apply_log10 in _apply_log10_vals(diag_metadata): 287 | vmin = diag_metadata.get("map_vmin") 288 | vmax = diag_metadata.get("map_vmax") 289 | if apply_log10: 290 | if vmin is not None: 291 | vmin = np.log10(vmin) if vmin > 0.0 else None 292 | if vmax is not None: 293 | vmax = np.log10(vmax) if vmax > 0.0 else None 294 | for t_ind in range(len(da["time"])): 295 | to_plot = da.isel(time=t_ind) 296 | if "display_units" in diag_metadata: 297 | to_plot = conv_units(to_plot, diag_metadata["display_units"]) 298 | if apply_log10: 299 | to_plot = np.log10(xr.where(to_plot > 0.0, to_plot, np.nan)) 300 | to_plot.name = f"log10({to_plot.name})" 301 | 302 | ax = to_plot.plot(cmap=cmap, vmin=vmin, vmax=vmax) 303 | fig = ax.get_figure() 304 | if save_pngs: 305 | datestamp = f"{da.time[t_ind].data.item()}".split(" ")[0] 306 | summary_map = SummaryMapClass( 307 | da, casename, datestamp, apply_log10, isel_dict 308 | ) 309 | summary_map.savefig(fig, root_dir=root_dir, **kwargs) 310 | else: 311 | plt.show() 312 | plt.close(fig) 313 | 314 | 315 | ################################################################################ 316 | 317 | 318 | def trend_plot(ds, da, vmin=None, vmax=None, invert_yaxis=False, **plot_options): 319 | 320 | save_pngs = plot_options.get("save_pngs", False) 321 | casename = ds.attrs["title"] 322 | if save_pngs: 323 | root_dir = plot_options.get("root_dir", "images") 324 | kwargs = plot_options.get("savefig_kwargs", {}) 325 | isel_dict = plot_options.get("isel_dict", {}) 326 | t_beg = ds[ds["time"].attrs["bounds"]].values[0, 0] 327 | t_str_beg = f"{t_beg.year:04}-{t_beg.month:02}-{t_beg.day:02}" 328 | t_end = ds[ds["time"].attrs["bounds"]].values[-1, -1] - datetime.timedelta( 329 | days=1 330 | ) 331 | t_str_end = f"{t_end.year:04}-{t_end.month:02}-{t_end.day:02}" 332 | 333 | trend = da.polyfit("time", 1).polyfit_coefficients.sel(degree=1) 334 | trend.name = da.name + " Trend" 335 | trend.attrs["long_name"] = da.attrs["long_name"] + " Trend" 336 | nsec_per_yr = 1.0e9 * 86400 * 365 337 | trend = nsec_per_yr * trend 338 | trend.attrs["units"] = da.attrs["units"] + "/yr" 339 | trend.load() 340 | 341 | fig, ax = plt.subplots() 342 | trend.plot.hist(bins=20, log=True, ax=ax) 343 | plt.title(da._title_for_slice()) 344 | if save_pngs: 345 | trend_hist = TrendHistClass(da, casename, t_str_beg, t_str_end, isel_dict) 346 | trend_hist.savefig(fig, root_dir=root_dir, **kwargs) 347 | else: 348 | plt.show() 349 | plt.close(fig) 350 | 351 | fig, ax = plt.subplots() 352 | trend.plot.pcolormesh(cmap="plasma", vmin=vmin, vmax=vmax, ax=ax) 353 | plt.title(da._title_for_slice()) 354 | if invert_yaxis: 355 | ax.invert_yaxis() 356 | if save_pngs: 357 | trend_map = TrendMapClass(da, casename, t_str_beg, t_str_end, isel_dict) 358 | trend_map.savefig(fig, root_dir=root_dir, **kwargs) 359 | else: 360 | plt.show() 361 | plt.close(fig) 362 | 363 | 364 | ################################################################################ 365 | 366 | 367 | def _apply_log10_vals(diag_metadata): 368 | if diag_metadata.get("apply_log10", False): 369 | return [False, True] 370 | else: 371 | return [False] 372 | 373 | 374 | ################################################################################ 375 | -------------------------------------------------------------------------------- /notebooks/utils/CaseClass.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class to use to access output (log and netCDF) from CESM runs 3 | """ 4 | 5 | import glob 6 | import os 7 | import gzip as gz 8 | import cftime 9 | import numpy as np 10 | import xarray as xr 11 | 12 | # local modules, not available through __init__ 13 | from .config import add_first_date_and_reformat 14 | 15 | from .utils import time_set_mid, dict_copy_vals, print_key_metadata 16 | 17 | ################################################################################ 18 | 19 | 20 | class CaseClass(object): 21 | 22 | # Constructor [goal: get an intake-esm catalog into memory; read from disk or generate it] 23 | def __init__( 24 | self, casenames, output_roots, verbose=False, 25 | ): 26 | """ 27 | casenames: a string or list containing the name(s) of the case(s) to include in the object 28 | output_roots: a string or list containing the name(s) of the directories to search for log / netCDF files 29 | * netCDF files may be in one of three locations: 30 | 1. history files may be in {output_root} itself 31 | [e.g. output_root = RUNDIR] 32 | 2. history files may be in {output_root}/{component}/hist 33 | [e.g. output_root = DOUT_S] 34 | 3. time series files may be in {output_root}/{component}/proc/tseries/{freq} 35 | [e.g. output_root = root of pyReshaper output] 36 | * log files may be in one of two locations 37 | 1. {output_root} itself [e.g. output_root = RUNDIR] 38 | 2. {output_root}/logs [e.g. output_root = DOUT_S] 39 | """ 40 | if type(casenames) == str: 41 | casenames = [casenames] 42 | if type(casenames) != list: 43 | raise ValueError(f"{casenames} is not a string or list") 44 | 45 | if type(output_roots) == str: 46 | output_roots = [output_roots] 47 | if type(output_roots) != list: 48 | raise ValueError(f"{output_roots} is not a string or list") 49 | 50 | self._casenames = casenames 51 | self._output_roots = [] 52 | for output_dir in output_roots: 53 | if os.path.isdir(output_dir): 54 | self._output_roots.append(output_dir) 55 | self._verbose = verbose 56 | # TODO: figure out how to let this configuration be user-specified (maybe YAML?) 57 | self._stream_metadata = dict() 58 | self._stream_metadata["pop.h"] = {"comp": "ocn", "freq": "month_1"} 59 | self._stream_metadata["pop.h.nday1"] = {"comp": "ocn", "freq": "day_1"} 60 | self._stream_metadata["pop.h.nyear1"] = {"comp": "ocn", "freq": "year_1"} 61 | self._stream_metadata["cice.h"] = {"comp": "ice", "freq": "month_1"} 62 | self._stream_metadata["cice.h1"] = {"comp": "ice", "freq": "day_1"} 63 | self._log_filenames = self._find_log_files() 64 | self._history_filenames, self._timeseries_filenames = self._find_nc_files() 65 | self._dataset_files = dict() 66 | self._dataset_src = dict() 67 | 68 | self.log_contents = dict() 69 | 70 | ############################################################################ 71 | 72 | def get_co2calc_warning_cnt(self, max_it=4): 73 | self._read_log("cesm") 74 | 75 | warning_count = dict() 76 | # For each date, pull value from most recent log file 77 | for date in self.log_contents["cesm"]: 78 | logs = list(self.log_contents["cesm"][date].keys()) 79 | logs.sort() 80 | warning_count[date] = [] 81 | for it in range(1, max_it + 1): 82 | warning_count[date].append( 83 | sum( 84 | [ 85 | f"MARBL WARNING (marbl_co2calc_mod:drtsafe): (marbl_co2calc_mod:drtsafe) it = {it}" 86 | in entry 87 | for entry in self.log_contents["cesm"][date][logs[-1]] 88 | ] 89 | ) 90 | ) 91 | 92 | return warning_count 93 | 94 | ############################################################################ 95 | 96 | def _get_single_year_timeseries_files(self, year, stream, varname): 97 | timeseries_filenames = [ 98 | filename 99 | for filename in self._timeseries_filenames[stream] 100 | if (f".{varname}." in filename and f".{year:04}" in filename) 101 | ] 102 | return timeseries_filenames 103 | 104 | ############################################################################ 105 | 106 | def get_timeseries_files(self, year, stream, varnames=None): 107 | if type(varnames) == str: 108 | varnames = [varnames] 109 | if not (type(varnames) == list or varnames is None): 110 | raise ValueError( 111 | f"varnames = {varnames} which is not None, a string, or a list" 112 | ) 113 | 114 | timeseries_filenames = [] 115 | if varnames: 116 | for varname in varnames: 117 | timeseries_filenames.extend( 118 | self._get_single_year_timeseries_files(year, stream, varname) 119 | ) 120 | else: 121 | timeseries_filenames = self._get_single_year_timeseries_files(year, stream) 122 | 123 | return timeseries_filenames 124 | 125 | ############################################################################ 126 | 127 | def check_for_year_in_timeseries_files(self, year, stream): 128 | """ 129 | Return True if {stream} has any timeseries files from {year} 130 | """ 131 | return any( 132 | [ 133 | f".{year:04}" in filename 134 | for filename in self._timeseries_filenames[stream] 135 | ] 136 | ) 137 | 138 | ############################################################################ 139 | 140 | def get_history_files(self, year, stream): 141 | return [ 142 | filename 143 | for filename in self._history_filenames[stream] 144 | if f"{stream}.{year:04}" in filename 145 | ] 146 | 147 | ############################################################################ 148 | 149 | def _find_log_files(self): 150 | """ 151 | Look in each _output_roots dir (and /logs) for cesm.log, ocn.log, and cpl.log files 152 | """ 153 | files = dict() 154 | for component in ["cesm", "ocn", "cpl"]: 155 | files[component] = [] 156 | for output_dir in self._output_roots: 157 | files[component].extend( 158 | glob.glob(os.path.join(output_dir, f"{component}.log.*")) 159 | ) 160 | if os.path.isdir(os.path.join(output_dir, "logs")): 161 | files[component].extend( 162 | glob.glob( 163 | os.path.join(output_dir, "logs", f"{component}.log.*") 164 | ) 165 | ) 166 | return files 167 | 168 | ############################################################################ 169 | 170 | def _find_nc_files(self): 171 | """ 172 | Look for netcdf files in each output_root directory, as well as 173 | {component}/hist and {component}/proc/tseries/{freq} subdirectories 174 | """ 175 | hist_files = dict() 176 | ts_files = dict() 177 | for stream in self._stream_metadata: 178 | hist_files[stream] = [] 179 | ts_files[stream] = [] 180 | comp = self._stream_metadata[stream]["comp"] 181 | freq = self._stream_metadata[stream]["freq"] 182 | for casename in self._casenames: 183 | for output_dir in self._output_roots: 184 | if self._verbose: 185 | print(f"Checking {output_dir} for {stream} files...") 186 | # (1) Look for history files in output_dir 187 | # TODO: need better way to avoid wrong stream than .0* 188 | # (do not want to glob *.pop.h.nday1.* when looking for pop.h files) 189 | pattern = f"{casename}.{stream}.0*.nc" 190 | files_found = glob.glob(os.path.join(output_dir, pattern)) 191 | files_found.sort() 192 | hist_files[stream].extend(files_found) 193 | 194 | # (2) look for history files that might be in {output_dir}/{comp}/hist 195 | # TODO: need better way to avoid wrong stream than .0* 196 | # (do not want to glob *.pop.h.nday1.* when looking for pop.h files) 197 | hist_dir = os.path.join(output_dir, comp, "hist") 198 | if os.path.isdir(hist_dir): 199 | pattern = f"{casename}.{stream}.0*.nc" 200 | files_found = glob.glob(os.path.join(hist_dir, pattern)) 201 | files_found.sort() 202 | hist_files[stream].extend(files_found) 203 | 204 | # (3) look for time series files that might be in {output_dir}/{comp}/proc/time_series/{freq} 205 | tseries_dir = os.path.join( 206 | output_dir, comp, "proc", "tseries", freq 207 | ) 208 | if os.path.isdir(tseries_dir): 209 | pattern = f"{casename}.{stream}.*.nc" 210 | files_found = glob.glob(os.path.join(tseries_dir, pattern)) 211 | files_found.sort() 212 | ts_files[stream].extend(files_found) 213 | 214 | return hist_files, ts_files 215 | 216 | ############################################################################ 217 | 218 | def _read_log(self, component): 219 | """ 220 | Read all log files from specified component. Returns a dict where keys 221 | are dates and values are contents of log from that date; if multiple 222 | logs contain the same date, uses the most recent. 223 | """ 224 | if component in self.log_contents: 225 | return 226 | if component not in self._log_filenames: 227 | raise ValueError(f"No known {component}.log files") 228 | 229 | datestamps = {"cesm": "model date =", "cpl": "tStamp"} 230 | try: 231 | datestamp = datestamps[component] 232 | except: 233 | raise ValueError(f"Do not know how to find dates in {component}.log") 234 | 235 | all_dates = [] 236 | contents = dict() 237 | for log in self._log_filenames[component]: 238 | # Open file 239 | is_gz = log.endswith("gz") 240 | if is_gz: 241 | local_open = gz.open 242 | mode = "rt" 243 | else: 244 | local_open = open 245 | mode = "r" 246 | with local_open(log, mode) as f: 247 | single_log_contents = f.readlines() 248 | 249 | # Look for datestamps in log; if none found, save contents as 'date_unknown' 250 | date_inds = np.where([datestamp in entry for entry in single_log_contents])[ 251 | 0 252 | ] 253 | if len(date_inds) == 0: 254 | date = log.split("/")[-1] 255 | if date not in contents: 256 | contents[date] = dict() 257 | contents[date][log] = single_log_contents 258 | continue 259 | 260 | # Set up list of dates and np array of indices 261 | dates_in_log = [ 262 | entry.split(datestamp)[1].strip()[:8] 263 | for entry in np.array(single_log_contents)[date_inds].tolist() 264 | ] 265 | # add first day of run to dates_in_log, and prepend 0 to date_inds 266 | date_inds = np.insert(date_inds, 0, 0) 267 | dates_in_log = add_first_date_and_reformat(dates_in_log) 268 | 269 | # for each date, add contents to dictionary 270 | for n, date in enumerate(dates_in_log[:-1]): 271 | if date not in contents: 272 | contents[date] = dict() 273 | contents[date][log] = single_log_contents[ 274 | date_inds[n] : date_inds[n + 1] 275 | ] 276 | 277 | # Need to account for partial days from runs that die 278 | # e.g. model crashes midway through 00010104 => need an 00010105 stamp (since we're using datestamp from end of the day, e.g. midnight the next day) 279 | if not is_gz: 280 | date = dates_in_log[-1] 281 | if date not in contents: 282 | contents[date] = dict() 283 | contents[date][log] = single_log_contents[date_inds[-1] :] 284 | 285 | self.log_contents[component] = dict() 286 | for key in sorted(contents): 287 | self.log_contents[component][key] = contents[key] 288 | 289 | ############################################################################ 290 | 291 | def get_catalog(self): 292 | """ 293 | Return intake esm catalog that was created / read in constructor 294 | """ 295 | return self.catalog 296 | 297 | ############################################################################ 298 | 299 | def get_dataset_source(self, stream, year, varname): 300 | 301 | # Does _dataset_src[stream] exist? 302 | if stream not in self._dataset_src: 303 | print(f"No datasets have been returned from {stream}") 304 | return None 305 | 306 | # Does _dataset_src[stream][year] exist? 307 | if year not in self._dataset_src[stream]: 308 | print( 309 | f"No datasets covering year {year:04} have been returned from {stream}" 310 | ) 311 | return None 312 | 313 | # Does _dataset_src[stream][year][varname] exist? 314 | if varname not in self._dataset_src[stream][year]: 315 | print( 316 | f"No dataset containing {varname} from year {year:04} have been returned from {stream}" 317 | ) 318 | return None 319 | 320 | return self._dataset_src[stream][year][varname] 321 | 322 | ############################################################################ 323 | 324 | def gen_dataset( 325 | self, 326 | varnames, 327 | stream, 328 | vars_to_keep=None, 329 | start_year=1, 330 | end_year=61, 331 | quiet=False, 332 | debug=False, 333 | **kwargs, 334 | ): 335 | """ 336 | Open all history files from a specified stream. Returns a dict where keys 337 | are stream names and values are xarray Datasets 338 | 339 | Pared-down API for working with intake-esm catalog. 340 | Users familiar with intake-esm may prefer self.get_catalog() and then querying directly. 341 | """ 342 | if type(varnames) == str: 343 | varnames = [varnames] 344 | if type(varnames) != list: 345 | raise ValueError(f"{varnames} is not a string or list") 346 | 347 | if stream not in self._dataset_files: 348 | self._dataset_files[stream] = dict() 349 | self._dataset_src[stream] = dict() 350 | 351 | # Set some defaults to pass to open_mfdataset, then apply kwargs argument 352 | open_mfdataset_kwargs = dict() 353 | # data_vars="minimal", to avoid introducing time dimension to time-invariant fields 354 | open_mfdataset_kwargs["data_vars"] = "minimal" 355 | # compat="override", to skip var consistency checks (for speed) 356 | open_mfdataset_kwargs["compat"] = "override" 357 | # coords="minimal", because coords cannot be default="different" if compat="override" 358 | open_mfdataset_kwargs["coords"] = "minimal" 359 | # parallel=True to open files in parallel 360 | open_mfdataset_kwargs["parallel"] = True 361 | open_mfdataset_kwargs.update(kwargs) 362 | 363 | # Pull specific keys from open_mfdataset_kwargs to pass to xr.concat 364 | concat_keys = ["data_vars", "compat", "coords"] 365 | concat_kwargs = { 366 | key: value 367 | for key, value in open_mfdataset_kwargs.items() 368 | if key in concat_keys 369 | } 370 | 371 | # Make sure these variables are kept in all datasets 372 | _vars_to_keep = ["time_bound", "TAREA"] 373 | if vars_to_keep is not None: 374 | if type(vars_to_keep) == str: 375 | vars_to_keep = [vars_to_keep] 376 | if type(vars_to_keep) != list: 377 | raise ValueError(f"{vars_to_keep} is not a string or list") 378 | _vars_to_keep.extend(vars_to_keep) 379 | 380 | # Pare down time series file list (only contains years and variables we are interested in) 381 | ds_timeseries_per_var = [] 382 | for varname in varnames: 383 | timeseries_filenames = [] 384 | for year in range(start_year, end_year + 1): 385 | if year not in self._dataset_files[stream]: 386 | self._dataset_files[stream][year] = dict() 387 | self._dataset_src[stream][year] = dict() 388 | self._dataset_files[stream][year][varname] = self.get_timeseries_files( 389 | year, stream, varname 390 | ) 391 | if self._dataset_files[stream][year][varname]: 392 | self._dataset_src[stream][year][varname] = "time series" 393 | timeseries_filenames.extend( 394 | self._dataset_files[stream][year][varname] 395 | ) 396 | if timeseries_filenames: 397 | dsmf = xr.open_mfdataset(timeseries_filenames, **open_mfdataset_kwargs)[ 398 | [varname] + _vars_to_keep 399 | ] 400 | with xr.open_dataset(timeseries_filenames[0])[ 401 | [varname] + _vars_to_keep 402 | ] as ds0: 403 | if debug: 404 | print(open_mfdataset_kwargs) 405 | print_key_metadata( 406 | dsmf, "timeseries_filenames open_mfdataset dsmf" 407 | ) 408 | print_key_metadata( 409 | ds0, "timeseries_filenames open_mfdataset ds0" 410 | ) 411 | dict_copy_vals(ds0.encoding, dsmf.encoding, "unlimited_dims") 412 | dict_copy_vals( 413 | ds0["time"].encoding, 414 | dsmf["time"].encoding, 415 | ["dtype", "_FillValue", "units", "calendar"], 416 | ) 417 | ds_timeseries_per_var.append(dsmf) 418 | 419 | if ds_timeseries_per_var: 420 | ds_timeseries = xr.merge(ds_timeseries_per_var, combine_attrs="override") 421 | ds0 = ds_timeseries_per_var[0] 422 | if debug: 423 | print_key_metadata( 424 | ds_timeseries, "ds_timeseries_per_var merge ds_timeseries" 425 | ) 426 | print_key_metadata(ds0, "ds_timeseries_per_var merge ds0") 427 | dict_copy_vals(ds0.encoding, ds_timeseries.encoding, "unlimited_dims") 428 | tb_name_ts = ds_timeseries["time"].attrs["bounds"] 429 | tb = ds_timeseries[tb_name_ts] 430 | if tb.dtype == np.dtype("O"): 431 | start_year = int(tb.values[-1, 1].strftime("%Y")) 432 | else: 433 | # NOTE: this block will be used if decode_times=False in open_mfdataset() 434 | # If decode_times=False because cftime can not decode the time dimension, 435 | # then this will likely fail and we'll need a better way to determine 436 | # the last year read from time series. Maybe pull from filenames? 437 | decoded_tb = cftime.num2date( 438 | tb.values[-1, 1], 439 | tb.attrs["units"], 440 | calendar=ds_timeseries["time"].attrs["calendar"], 441 | ) 442 | start_year = int(decoded_tb.strftime("%Y")) 443 | 444 | # Pare down history file list 445 | history_filenames = [] 446 | for year in range(start_year, end_year + 1): 447 | if year not in self._dataset_files[stream]: 448 | self._dataset_files[stream][year] = dict() 449 | self._dataset_src[stream][year] = dict() 450 | self._dataset_files[stream][year][varname] = self.get_history_files( 451 | year, stream 452 | ) 453 | if self._dataset_files[stream][year][varname]: 454 | self._dataset_src[stream][year][varname] = "hist" 455 | history_filenames.extend(self._dataset_files[stream][year][varname]) 456 | 457 | if history_filenames: 458 | ds_history = xr.open_mfdataset(history_filenames, **open_mfdataset_kwargs)[ 459 | varnames + _vars_to_keep 460 | ] 461 | with xr.open_dataset(history_filenames[0])[varnames + _vars_to_keep] as ds0: 462 | if debug: 463 | print_key_metadata( 464 | ds_history, "history_filenames open_mfdataset ds_history" 465 | ) 466 | print_key_metadata(ds0, "history_filenames open_mfdataset ds0") 467 | dict_copy_vals(ds0.encoding, ds_history.encoding, "unlimited_dims") 468 | dict_copy_vals( 469 | ds0["time"].encoding, 470 | ds_history["time"].encoding, 471 | ["dtype", "_FillValue", "units", "calendar"], 472 | ) 473 | 474 | # Concatenate discovered datasets 475 | if ds_timeseries_per_var: 476 | if history_filenames: 477 | print( 478 | f'Time series ends at {ds_timeseries["time_bound"].values[-1,1]}, history files begin at {ds_history["time_bound"].values[0,0]}' 479 | ) 480 | ds = xr.concat([ds_timeseries, ds_history], dim="time", **concat_kwargs) 481 | if debug: 482 | print_key_metadata(ds, "xr.concat ds") 483 | print_key_metadata(ds_timeseries, "xr.concat ds_timeseries") 484 | print_key_metadata(ds_history, "xr.concat ds_history") 485 | for ds_src in [ds_timeseries, ds_history]: 486 | dict_copy_vals( 487 | ds_src["time"].encoding, 488 | ds["time"].encoding, 489 | ["dtype", "_FillValue", "units", "calendar"], 490 | ) 491 | else: 492 | ds = ds_timeseries 493 | else: 494 | if history_filenames: 495 | ds = ds_history 496 | else: 497 | raise ValueError( 498 | f"Can not find requested variables between {start_year:04} and {end_year:04}" 499 | ) 500 | 501 | ds = time_set_mid(ds, "time") 502 | 503 | if not quiet: 504 | print(f'Datasets contain a total of {ds.sizes["time"]} time samples') 505 | tb_name = ds["time"].attrs["bounds"] 506 | if not quiet: 507 | print(f"Last average written at {ds[tb_name].values[-1, 1]}") 508 | return ds 509 | --------------------------------------------------------------------------------