├── postproc_cosmo
    ├── constants.py
    ├── functions.py
    ├── settings.py
    └── extpar_adapt.py
├── step_01_extract_deltas
    ├── settings.py
    ├── constants.py
    ├── functions.py
    ├── CFday_cut_subdomain.sh
    ├── CFday_target_p_MPI-ESM1-2-HR.dat
    ├── Emon_add_top_from_Amon.sh
    ├── CFday_interp_to_plev.py
    ├── Emon_convert_hus_to_hur.py
    ├── extract_climate_delta.sh
    └── CFday_wget_scripts
    │   ├── wget_CFday_ssp585_ua.sh
    │   ├── wget_CFday_ssp585_va.sh
    │   └── wget_CFday_ssp585_hur.sh
├── .gitignore
├── constants.py
├── LICENSE
├── Documentations
    └── README_CMOR.md
├── parallel.py
├── README.md
├── settings.py
├── step_02_preproc_deltas.py
├── environment.yml
├── fields
    └── plot.py
└── step_03_apply_to_era.py


/postproc_cosmo/constants.py:
--------------------------------------------------------------------------------
1 | ../constants.py


--------------------------------------------------------------------------------
/postproc_cosmo/functions.py:
--------------------------------------------------------------------------------
1 | ../functions.py


--------------------------------------------------------------------------------
/postproc_cosmo/settings.py:
--------------------------------------------------------------------------------
1 | ../settings.py


--------------------------------------------------------------------------------
/step_01_extract_deltas/settings.py:
--------------------------------------------------------------------------------
1 | ../settings.py


--------------------------------------------------------------------------------
/step_01_extract_deltas/constants.py:
--------------------------------------------------------------------------------
1 | ../constants.py


--------------------------------------------------------------------------------
/step_01_extract_deltas/functions.py:
--------------------------------------------------------------------------------
1 | ../functions.py


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | target_grid*
 2 | bOut
 3 | __pycache__
 4 | .ipynb_checkpoints
 5 | 
 6 | Session.vim
 7 | .*.swp
 8 | 
 9 | *.txt
10 | *.nc
11 | 
12 | #xesmf
13 | PET*
14 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
1 | ### Source: COSMO source code, data_constants.f90
2 | # gas constant for dry air
3 | CON_RD = 287.05 # [J kg-1 K-1]
4 | # gravity constant (assumed constant over the entire profile)
5 | CON_G = 9.80665 # [m s-2]
6 | # ratio of molecular mass between water and dry air
7 | CON_MW_MD = 0.622 # [1]
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019, ETH Zurich 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/CFday_cut_subdomain.sh:
--------------------------------------------------------------------------------
 1 | download_data_dir=/net/o3/hymet_nobackup/heimc/data/pgw/download
 2 | subdom_data_dir=/net/o3/hymet_nobackup/heimc/data/pgw/download/subdomain
 3 | 
 4 | box=-73,37,-42,34
 5 | 
 6 | # Note: use ps,hurs,tas from Amon and zg from Emon
 7 | 
 8 | var_names=(ta hur ua va)
 9 | experiments=(historical ssp585)
10 | 
11 | for var_name in ${var_names[@]}; do
12 |     echo $var_name
13 |     for exp in ${experiments[@]}; do
14 |         echo $exp
15 |         if [[ "$exp" == "historical" ]]; then
16 |             years=(19850101-19891231 19900101-19941231 \
17 |                    19950101-19991231 20000101-20041231 \
18 |                    20050101-20091231 20100101-20141231)
19 |             years=(19850101-19891231 19900101-19941231)
20 |         elif [[ "$exp" == "ssp585" ]]; then
21 |             years=(20700101-20751231 20750101-20791231 \
22 |                    20800101-20841231 20850101-20891231 \
23 |                    20900101-20941231 20950101-20991231)
24 |             years=(20700101-20751231 20750101-20791231)
25 |         fi
26 |         for year in ${years[@]}; do
27 |             echo $year
28 |             cdo sellonlatbox,$box \
29 |                 $download_data_dir/${var_name}_CFday_MPI-ESM1-2-HR_${exp}_r1i1p1f1_gn_${year}.nc \
30 |                 $subdom_data_dir/${var_name}_CFday_MPI-ESM1-2-HR_${exp}_r1i1p1f1_gn_${year}.nc
31 |         done
32 |     done
33 | done
34 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/CFday_target_p_MPI-ESM1-2-HR.dat:
--------------------------------------------------------------------------------
  1 | 101000.
  2 | 100000.
  3 | 99000.
  4 | 98000.
  5 | 97000.
  6 | 96000.
  7 | 95000.
  8 | 94000.
  9 | 93000.
 10 | 92000.
 11 | 91000.
 12 | 90000.
 13 | 89000.
 14 | 88000.
 15 | 87000.
 16 | 86000.
 17 | 85000.
 18 | 84000.
 19 | 83000.
 20 | 82000.
 21 | 81000.
 22 | 80000.
 23 | 77500.
 24 | 75000.
 25 | 72500.
 26 | 70000.
 27 | 67500.
 28 | 65000.
 29 | 62500.
 30 | 60000.
 31 | 57500.
 32 | 55000.
 33 | 52500.
 34 | 50000.
 35 | 47500.
 36 | 45000.
 37 | 42500.
 38 | 40000.
 39 | 37500.
 40 | 35000.
 41 | 32500.
 42 | 30000.
 43 | 28000.
 44 | 26000.
 45 | 24000.
 46 | 22000.
 47 | 20000.
 48 | 19000.
 49 | 18000.
 50 | 17000.
 51 | 16000.
 52 | 15000.
 53 | 14000.
 54 | 13000.
 55 | 12000.
 56 | 11000.
 57 | 10401.23842568
 58 | 9464.68561727
 59 | 8611.75611196
 60 | 7834.96484375
 61 | 7127.05859375
 62 | 6480.85351562
 63 | 5891.58789062
 64 | 5354.86132812
 65 | 4865.74804688
 66 | 4419.8828125
 67 | 4013.44238281
 68 | 3642.94140625
 69 | 3305.20080566
 70 | 2997.32421875
 71 | 2716.67102051
 72 | 2460.83398438
 73 | 2227.61889648
 74 | 2015.02539062
 75 | 1821.22998047
 76 | 1644.57055664
 77 | 1483.60742188
 78 | 1337.09008789
 79 | 1203.85314941
 80 | 1082.81188965
 81 | 972.95776367
 82 | 873.35437012
 83 | 783.13342285
 84 | 701.49108887
 85 | 627.68395996
 86 | 561.02514648
 87 | 500.88085938
 88 | 446.6673584
 89 | 397.84741211
 90 | 353.92700195
 91 | 314.45275879
 92 | 279.00915527
 93 | 247.21544647
 94 | 218.72327423
 95 | 193.21455383
 96 | 170.39893341
 97 | 150.01178741
 98 | 131.81220245
 99 | 115.58110046
100 | 


--------------------------------------------------------------------------------
/Documentations/README_CMOR.md:
--------------------------------------------------------------------------------
 1 | # PGW-Simulation for CMOR input data
 2 | 
 3 | This is an attempt at a very practical explaination of how to set up a PGW simulation using global climate model data in the CMOR-Format as input (for example CMIP5 or CMIP6 data).
 4 | 
 5 | **What data to get?**
 6 | 
 7 | You will need data for the following variables: hur, ta, ua, va, zg, pa, hurs, tas, ts, tos
 8 | 
 9 | **What time resolution should one choose?**
10 | 
11 | Monthly mean data is the easiest. This is called e.g. Amon in CMOR. tos is part of Omon in CMOR.
12 | 
13 | **How to preprocess the data?**
14 | 
15 | For all variables we need to know how they will change under climate change. This needs to be expressed as a mean annual cycle of changes.
16 | In practice we can get a time slice of the "historical" period (HIST) and from a future period under a certain emission scenario (SCEN) such as "rcp85". A typical example: For the historical period, get data from 1971-2000. Then construct the mean annual cycle for 1971-2000, for example using the [cdo-command "ymonmean"](https://code.mpimet.mpg.de/projects/cdo/embedded/index.html#x1-5370002.8.33). Repeat for 2070-2099 and the rcp85 data. 
17 | Lastly, subtract the historical monthly-mean annual cycle from the future monthly-mean annual cycle. Save the result from the subtraction, or the difference between the two periods, as a single netcdf-file per variable (e.g. delta_ta.nc, delta_hurs.nc, ....).
18 | These netcdf files are needed as input for [setp_02_preproc_deltas.py](/setp_02_preproc_deltas.py), and the naming convention can be specified in [settings.py](/settings.py) (look for the dictionary "file_name_bases").
19 | 


--------------------------------------------------------------------------------
/postproc_cosmo/extpar_adapt.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from netCDF4 import Dataset
 3 | from functions import load_delta
 4 | 
 5 | 
 6 | var_name_map = {
 7 |     'ts':   'T_CL',
 8 | }
 9 | 
10 | 
11 | 
12 | 
13 | def extpar_adapt(ext_file_path, delta_inp_path):
14 | 
15 |     ext_file = Dataset(ext_file_path, 'a')
16 | 
17 |     # update T_C
18 |     print('update deep soil temperature')
19 | 
20 |     delta_ts = load_delta(delta_inp_path, 'ts', None)
21 | 
22 |     ## Make sure dimensions are exactly the same.
23 |     ## There are numerical differences between CDO remapped objects
24 |     ## and xarray data...
25 |     #delta_ts = delta_ts.assign_coords(
26 |     #                {'rlat':ext_file.rlat.values,
27 |     #                 'rlon':ext_file.rlon.values})
28 | 
29 |     delta_ts_clim = delta_ts.mean(dim=['time'])
30 |     print(delta_ts_clim)
31 | 
32 |     ext_file['T_CL'][:] += delta_ts_clim.values.squeeze()
33 | 
34 |     ext_file.close()
35 |     print('Done.')
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | if __name__ == "__main__":
45 | 
46 |     ## input arguments
47 |     parser = argparse.ArgumentParser(description =
48 |                     'COSMO-specific: Perturb Extpar soil temperature climatology with ts climate delta.')
49 |     # extpar file to modify
50 |     parser.add_argument('extpar_file_path', type=str,
51 |             help='Path to extpar file to modify T_CL.')
52 | 
53 |     # climate delta directory (already remapped to ERA5 grid)
54 |     parser.add_argument('-d', '--delta_input_dir', type=str, default=None,
55 |             help='Directory with GCM climate deltas to be used. ' +
56 |             'This directory should have a climate delta for ts ' +
57 |             'already horizontally remapped to the grid of ' +
58 |             'the extpar file which can perhaps be done with ' +
59 |             'step_02_preproc_deltas.py or otherwise with CDO.')
60 |     args = parser.parse_args()
61 |     print(args)
62 | 
63 |     extpar_adapt(args.extpar_file_path, args.delta_input_dir)
64 | 
65 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/Emon_add_top_from_Amon.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # base directory where output should be stored
 4 | out_base_dir=/net/o3/hymet_nobackup/heimc/data/pgw/deltas/native
 5 | 
 6 | # name of the GCM to extract data for
 7 | gcm_name=MPI-ESM1-2-HR
 8 | 
 9 | ## type of CMIP6 model output (e.g. monthly or daily, etc.)
10 | ## to use
11 | # high-resolution monthly data for only very few GCMs
12 | table_ID=Emon
13 | 
14 | ## select variables to add model top
15 | var_names=(ua va ta zg hur)
16 | #var_names=(hur)
17 | 
18 | ## CMIP experiments to use to compute climate deltas
19 | ## --> climate delta = future climatology - ERA climatology
20 | # CMIP experiment to use for ERA climatology 
21 | era_climate_experiment=historical
22 | # CMIP experiment to use for future climatology 
23 | future_climate_experiment=ssp585
24 | 
25 | # iterate over both experiments and climate delta
26 | experiments=($era_climate_experiment $future_climate_experiment delta)
27 | 
28 | 
29 | out_dir=$out_base_dir/$table_ID/$gcm_name
30 | 
31 | for var_name in ${var_names[@]}; do
32 |     echo "#################################################################"
33 |     echo $var_name
34 |     echo "#################################################################"
35 | 
36 | 
37 |     # add Amon model top values to Emon
38 |     if [[ "$table_ID" == "Emon" ]]; then
39 |         for experiment in ${experiments[@]}
40 |         do
41 |             echo $experiment
42 | 
43 |             #mv $out_dir/${var_name}_${experiment}.nc \
44 |             #    $out_dir/Emon_model_bottom_${var_name}_${experiment}.nc
45 |             cdo sellevel,100000,97500,95000,92500,90000,87500,85000,82500,80000,77500,75000,70000,65000,60000,55000,50000,45000,40000,35000,30000,25000,22500,20000,17500,15000,12500,10000 $out_dir/${var_name}_${experiment}.nc \
46 |                 $out_dir/Emon_model_bottom_${var_name}_${experiment}.nc
47 | 
48 |             Amon_out_base_dir=$out_base_dir/Amon
49 |             Amon_out_dir=$Amon_out_base_dir/$gcm_name
50 |             cdo sellevel,7000,5000,3000,2000,1000,500,100 \
51 |                 $Amon_out_dir/${var_name}_${experiment}.nc \
52 |                 $out_dir/Amon_model_top_${var_name}_${experiment}.nc
53 |             cdo -O merge \
54 |                 $out_dir/Emon_model_bottom_${var_name}_${experiment}.nc \
55 |                 $out_dir/Amon_model_top_${var_name}_${experiment}.nc \
56 |                 $out_dir/${var_name}_${experiment}.nc
57 |         done
58 |     fi
59 | 
60 | 
61 | done
62 | 
63 | 


--------------------------------------------------------------------------------
/parallel.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | description         Helper class for parallel computing.
  5 | author		    Christoph Heim
  6 | usage		    use in another script
  7 | """
  8 | ###############################################################################
  9 | import multiprocessing as mp
 10 | ###############################################################################
 11 | 
 12 | def starmap_helper(tup):
 13 |     func = tup['func']
 14 |     del tup['func']
 15 |     return(func(**tup))
 16 | 
 17 | 
 18 | def run_starmap(func, fargs={}, njobs=1, run_async=False):
 19 |     outputs = []
 20 |     if njobs > 1:
 21 |         pool = mp.Pool(processes=njobs)
 22 |         if run_async:
 23 |             outputs = pool.starmap_async(starmap_helper, fargs).get()
 24 |         else:
 25 |             outputs = pool.starmap(starmap_helper, fargs)
 26 |         pool.close()
 27 |         pool.join()
 28 |     else:
 29 |         for i in range(len(fargs)):
 30 |             out = func(**fargs[i])
 31 |             outputs.append(out)
 32 |     return(outputs)
 33 | 
 34 | 
 35 | 
 36 | class IterMP:
 37 | 
 38 |     def __init__(self, njobs=None, run_async=False):
 39 |         self.run_async = run_async
 40 | 
 41 |         if njobs is None:
 42 |             if len(sys.argv) > 1:
 43 |                 self.njobs = int(sys.argv[1])
 44 |             else:
 45 |                 self.njobs = 1
 46 |         else:
 47 |             self.njobs = njobs
 48 |         print('IterMP: njobs = '+str(self.njobs))
 49 | 
 50 |         self.output = None
 51 | 
 52 | 
 53 |     def run(self, func, fargs={}, step_args=None):
 54 |         outputs = []
 55 | 
 56 |         input = []
 57 |         for tI in range(len(step_args)):
 58 |             this_fargs = fargs.copy()
 59 |             if step_args is not None:
 60 |                 this_fargs.update(step_args[tI])
 61 | 
 62 |             if self.njobs > 1:
 63 |                 this_fargs['func'] = func
 64 |                 this_fargs = (this_fargs,)
 65 |             input.append(this_fargs)
 66 | 
 67 |         self.output = run_starmap(func, fargs=input,
 68 |                         njobs=self.njobs, run_async=self.run_async) 
 69 |         
 70 | 
 71 | 
 72 | 
 73 | 
 74 | def test_IMP(iter_arg, fixed_arg):
 75 |     #print(str(iter_arg) + ' ' + str(fixed_arg))
 76 |     work = []
 77 |     for i in range(int(1E7)):
 78 |         work.append(1)
 79 |     return(iter_arg)
 80 | 
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 | 
 85 | 
 86 |     if len(sys.argv) > 1:
 87 |         njobs = int(sys.argv[1])
 88 |     else:
 89 |         njobs = 1
 90 |     
 91 |     # testing
 92 |     t0 = time.time()
 93 |     IMP = IterMP(njobs=njobs, run_async=False)
 94 |     fargs = {'fixed_arg':'fixed',}
 95 |     step_args = []
 96 |     for i in range(20):
 97 |         step_args.append({'iter_arg':i})
 98 |     IMP.run(test_IMP, fargs, step_args)
 99 |     print(IMP.output)
100 |     t1 = time.time()
101 |     print(t1 - t0)
102 | 
103 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Repository pgw-python
 2 | 
 3 | Software to modify ERA5 files to impose a large-scale climate change signal in ERA5 files 
 4 | as described e.g. here https://iopscience.iop.org/article/10.1088/1748-9326/ab4438 or also
 5 | here https://doi.org/10.1175/JCLI-D-18-0431.1
 6 | 
 7 | # General Documentation
 8 | To modify the ERA5 files, we need a climate change signal obtained from the difference between two GCM climatologies, HIST and SCEN. The climate change signal is thus SCEN-HIST and referred to as climate delta.
 9 | 
10 | The structure of the repository is built as follows:
11 | 
12 | The top level directory contains the central scripts to preprocess the GCM climate change signal [step_02_preproc_deltas.py](/step_02_preproc_deltas.py) and to modify the ERA5 files [step_03_apply_to_era.py](/step_03_apply_to_era.py) with the climate change signal.
13 | The subdirectory [step_01_extract_deltas](/step_01_extract_deltas/) contains less generic code that can serve as a template to obtain the GCM climatologies HIST, SCEN, as well as the climate delta SCEN-HIST from raw CMIP6 output. The starting point here is the script [extract_climate_delta.sh](/step_01_extract_deltas/extract_climate_delta.sh). This script has to be adjusted depending on the specific use case.
14 | 
15 | Note that essential usage-oriented information can be found by running `python step_02_preproc_deltas.py --help` and `python step_03_apply_to_era.py --help`.
16 | 
17 | Note that users that feed the processed ERA5 files into Int2lm (to run COSMO or ICON) should also modify the variable T_CL in their external parameter file, see postproc_cosmo for more information.
18 | 
19 | # Software Requirements
20 | 
21 | The software is written in python 3 and requires multiple python modules. The ennvironment-file **environment.yml** can be used to install a conda environment to run the software. More information about what conda is and how it works: https://docs.conda.io/projects/conda/en/latest/user-guide/index.html#
22 | 
23 | To install the enviroment, just execute `conda env create -f environment.yml` once conda is installed. 
24 | 
25 | # Workflows Based on Input Data
26 | 
27 | **Requeriments**
28 | 
29 | Annual climate deltas (SCEN-HIST) and a historical climatology (HIST) from a global climate model in either daily or monthly steps.
30 | Climate deltas refer to the difference between the fields predicted by the climate model between two different time periods (usually future and present). If climate model data in the CMOR format (e.g. CMIP simulations) will be used to force the PGW simulations there is a practical [documentation](/Documentations/README_CMOR.md) on which variables are needed.
31 | Template scripts to extract CMIP6 data are given in [step_01_extract_deltas](/step_01_extract_deltas/), e.g. [here](/step_01_extract_deltas/extract_climate_delta.sh).
32 | 
33 | After computing the raw climate deltas on the GCM grid, run the following scripts:
34 | 1) Only if using daily climate deltas instead of monthly (note that is not really recommended), smooth deltas in time: `python step_02_preproc_deltas.py smoothing [...]`
35 | 2) Regrid deltas to ERA5 grid: `python step_02_preproc_deltas.py regridding [...]`
36 | 3) Modify ERA5 files: `python step_03_apply_to_era.py [...]`
37 | 4) There may be some additional steps required for a specific limited-area model. For instance in COSMO, the deep soil temperature climatology has to be adjusted in the external parameters file. [postproc_cosmo](/postproc_cosmo/). 
38 | 5) After these steps, the limited-area-model-specific routine to convert ERA5 files to model initial and boundary conditions can be run using the modified ERA5 files as input.
39 | 
40 | # References
41 | To acknowledge this software cite the following article:
42 | 
43 | Brogli, R., Heim, C., Mensch, J., Sørland, S. L., & Schär, C. (2023). The pseudo-global-warming (PGW) approach: Methodology, software package PGW4ERA5 v1.1, validation and sensitivity analyses. Geoscientific Model Development, preprint. https://doi.org/10.5194/gmd-2022-167 
44 | 


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | description     Settings namelist for all routines in PGW for ERA5
  5 | authors		Before 2022:    original developments by Roman Brogli
  6 |                 Since 2022:     upgrade to PGW for ERA5 by Christoph Heim 
  7 |                 2022:           udpates by Jonas Mensch
  8 | """
  9 | ##############################################################################
 10 | ##############################################################################
 11 | 
 12 | ### GENERAL SETTINGS
 13 | ##############################################################################
 14 | # debug output level
 15 | i_debug = 2 # [0-2]
 16 | 
 17 | # Input and output file naming convention for the HIST and climate delta
 18 | # (SCEN-HIST) files from the GCM.
 19 | # ({} is placeholder for variable name).
 20 | file_name_bases = {
 21 |     'SCEN-HIST':    '{}_delta.nc',
 22 |     'HIST':         '{}_historical.nc',
 23 | }
 24 | 
 25 | # File naming convention for ERA5 files to be read in and written out.
 26 | era5_file_name_base = 'cas{:%Y%m%d%H}0000.nc'
 27 | #era5_file_name_base = 'caf{:%Y%m%d%H}.nc'
 28 | 
 29 | # dimension names in ERA5 file
 30 | TIME_ERA        = 'time'
 31 | LON_ERA         = 'lon'
 32 | LAT_ERA         = 'lat'
 33 | LEV_ERA         = 'level'
 34 | HLEV_ERA        = 'level1'
 35 | SOIL_HLEV_ERA   = 'soil1'
 36 | 
 37 | # dimension names in GCM (used for all GCM variables except tos)
 38 | TIME_GCM        = 'time'
 39 | LON_GCM         = 'lon'
 40 | LAT_GCM         = 'lat'
 41 | PLEV_GCM        = 'plev'
 42 | LEV_GCM         = 'lev'
 43 | 
 44 | # dimension names in GCM ocean model (used for tos)
 45 | TIME_GCM_OCEAN  = 'time'
 46 | LON_GCM_OCEAN   = 'longitude'
 47 | LAT_GCM_OCEAN   = 'latitude'
 48 | 
 49 | ### VARIABLE LIST
 50 | ##############################################################################
 51 | # The names on the left side (dict keys) are CMOR convention names
 52 | # The names on the right side (dict values) are the name of the
 53 | # respective variables in the ERA5 files (Please adjust to ERA5 format used).
 54 | # Not all of these variables are required as climate deltas.
 55 | # Only zg,ta,hur,ua,va,tas,tos are required as climate delta (SCEN-HIST)
 56 | # while ps is required for the HIST climatology.
 57 | var_name_map = {
 58 | 
 59 |     ##### climate delta (SCEN-HIST) required
 60 |     ####################
 61 | 
 62 |     # 3D air temperature
 63 |     'ta'   :'T',
 64 |     # 3D lon-wind speed
 65 |     'ua'   :'U',
 66 |     # 3D lat-wind speed 
 67 |     'va'   :'V',
 68 |     # 3D air relative humidity
 69 |     'hur'  :'RELHUM',
 70 | 
 71 |     # geopotential
 72 |     'zg'   :'PHI', # used for pressure adjustment only
 73 | 
 74 |     # near-surface temperature
 75 |     'tas'  :None, # not modified in ERA5 (auxiliary field for computations)
 76 |     # near-surface relative humidity
 77 |     'hurs' :None, # not modified in ERA5 (auxiliary field for computations)
 78 |     # sea-surface temperature (SST)
 79 |     'tos'  :None, # not modified in ERA5 (auxiliary field for computations)
 80 | 
 81 | 
 82 |     ##### HIST climatology required
 83 |     ####################
 84 | 
 85 |     # surface pressure
 86 |     'ps'   :'PS', # auxiliary field for interpolation and pressure adjustm.
 87 | 
 88 | 
 89 |     ##### no GCM data required but ERA5 variable used by the code
 90 |     ####################
 91 | 
 92 |     # air specific humidity
 93 |     'hus'  :'QV',
 94 |     # surface geopotential
 95 |     'zgs'  :'FIS', # used for pressure adjustment
 96 |     # surface skin temperature
 97 |     'ts'   :'T_SKIN',
 98 |     # soil layer temperature
 99 |     'st'   :'T_SO',
100 |     # land area fraction
101 |     'sftlf':'FR_LAND',
102 |     # sea-ice area fraction
103 |     'sic':  'FR_SEA_ICE',
104 | }
105 | 
106 | 
107 | ### 02 PREPROCESS DELTAS 
108 | ##############################################################################
109 | 
110 | ### SMOOTHING
111 | ####################################
112 | 
113 | ### REGRIDDING
114 | ####################################
115 | # depending on whether the xesmf pacakge is installed, it can be used
116 | # for interpolation. Else, an xarray-based method is used.
117 | # the latter should be identical to XESMF
118 | # except for tiny differences that appear to originate from
119 | # numerical precision
120 | i_use_xesmf_regridding = 0
121 | 
122 | ## ## Nan-Ingoring kernel interpolation used for tos climate delta
123 | # maximum kernel radius
124 | # higher values imply that remote lakes (and bays) without GCM SST data will
125 | # receive data from further remote GCM SST grid points instead of falling
126 | # back to the tas (near surface temperature) climate delta
127 | nan_interp_kernel_radius = 1000000 # m
128 | # sharpness: decrease (increase) for smoother (sharper) interpolation
129 | nan_interp_sharpness = 4
130 | 
131 | 
132 | ### SURFACE PRESSURE ADJUSTMENT SETTINGS 
133 | ##########################################################################
134 | # reference pressure level
135 | # if set to None, the reference pressure level is chosen locally.
136 | # if the climate deltas have low vertical resolution (e.g. Amon data
137 | # with only 6 vertical levels between 1000-500 hPa), settting
138 | # p_ref_inp = None may help to improve the accuray of the
139 | # pressure adjustment. See publication for more information.
140 | p_ref_inp = 30000 # Pa
141 | #p_ref_inp = None
142 | # surface pressure adjustment factor in the iterative routine
143 | adj_factor = 0.95
144 | # convergence threshold (maximum geopotential error)
145 | # if procedure does not converge, raise this value a little bit.
146 | thresh_phi_ref_max_error = 0.15
147 | # maximum number of iterations before error is raised.
148 | max_n_iter = 20
149 | # re-interpolation turned on/off
150 | i_reinterp = 0
151 | ##########################################################################
152 | 
153 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/CFday_interp_to_plev.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | description     Extract CFday data
  5 | author			Christoph Heim
  6 | date created    21.02.2021
  7 | usage           no args
  8 | """
  9 | ###############################################################################
 10 | import os, argparse
 11 | import numpy as np
 12 | import xarray as xr
 13 | import matplotlib.pyplot as plt
 14 | from pathlib import Path
 15 | from functions import interp_logp_4d
 16 | from settings import (
 17 |     TIME_GCM, LEV_GCM, PLEV_GCM, LON_GCM, LAT_GCM,
 18 | )
 19 | ###############################################################################
 20 | 
 21 | 
 22 | ###############################################################################
 23 | ###############################################################################
 24 | ###############################################################################
 25 | 
 26 | 
 27 | if __name__ == '__main__':
 28 | 
 29 |     parser = argparse.ArgumentParser(description = 
 30 |                 'Interpolate CFday output to pressure levels')
 31 |     # variable name 
 32 |     parser.add_argument('var_names', type=str)
 33 |     # CMIP6 experiment
 34 |     parser.add_argument('experiment', type=str)
 35 |     args = parser.parse_args()
 36 | 
 37 |     inp_dir = '/net/o3/hymet_nobackup/heimc/data/pgw/download/subdomain'
 38 |     out_base_dir ='/net/o3/hymet_nobackup/heimc/data/pgw/download/interp_plev' 
 39 | 
 40 |     model_name = 'MPI-ESM1-2-HR'
 41 |     #var_name = 'ta'
 42 |     #experiment = 'ssp585'
 43 |     #experiment = 'historical'
 44 | 
 45 |     var_names = args.var_names.split(',')
 46 |     experiment = args.experiment
 47 | 
 48 |     times = {
 49 |         'ssp585': [
 50 |             '20700101-20741231',
 51 |             '20750101-20791231',
 52 |             '20800101-20841231',
 53 |             '20850101-20891231',
 54 |             '20900101-20941231',
 55 |             '20950101-20991231',
 56 |         ],
 57 |         'historical': [
 58 |             '19850101-19891231',
 59 |             '19900101-19941231',
 60 |             '19950101-19991231',
 61 |             '20000101-20041231',
 62 |             '20050101-20091231',
 63 |             '20100101-20141231',
 64 |         ],
 65 |     }
 66 |     for var_name in var_names:
 67 |         for time_ind in range(0,len(times[experiment])):
 68 |             print(time_ind)
 69 | 
 70 |             inp_file_name = '{}_CFday_{}_{}_r1i1p1f1_gn_{}.nc'.format(
 71 |                                     var_name, model_name, experiment, 
 72 |                                     times[experiment][time_ind])
 73 |             out_dir = os.path.join(out_base_dir, model_name)
 74 |             Path(out_dir).mkdir(parents=True, exist_ok=True)
 75 |             out_file_name = '{}_CFday_{}_{}_r1i1p1f1_gn_{}.nc'.format(
 76 |                                     var_name, model_name, experiment, 
 77 |                                     times[experiment][time_ind])
 78 | 
 79 |             # create input and output file paths
 80 |             out_file_path = os.path.join(out_dir, out_file_name)
 81 |             inp_file_path = os.path.join(inp_dir, inp_file_name)
 82 | 
 83 |             print('Process input file: \n{}\nto output file: \n{}'.format(
 84 |                     inp_file_path, out_file_path))
 85 | 
 86 |             ds = xr.open_dataset(inp_file_path)
 87 | 
 88 |             # sort pressure ascending
 89 |             ds = ds.reindex({LEV_GCM:ds['lev'][::-1]})
 90 |             # compute pressure on full levels
 91 |             source_P = (ds.ap + ds.b * ds.ps).transpose(
 92 |                                 TIME_GCM, LEV_GCM, LAT_GCM, LON_GCM)
 93 |             var = ds[var_name]
 94 | 
 95 | 
 96 | 
 97 |             ### Determine target pressure using tropical ocea-only domain
 98 |             #############################################################
 99 |             #mean_p = p.mean(dim=['lon','lat','time']).values
100 |             #print(np.around(mean_p[30:], -2))
101 |             #p_integ = np.arange(101000, 100000, -1000)
102 |             #p_integ = np.append(p_integ, np.arange(100000, 80000, -1000))
103 |             #p_integ = np.append(p_integ, np.arange(80000, 30000, -2500))
104 |             #p_integ = np.append(p_integ, np.arange(30000, 20000, -2000))
105 |             #p_integ = np.append(p_integ, np.arange(20000, 10000, -1000))
106 |             #p_integ = np.append(p_integ, mean_p[30:-22])
107 |             #print(p_integ)
108 |             ##plt.scatter(np.arange(len(p.lev.values)),
109 |             ##            p.mean(dim=['lon','lat','time']).values)
110 |             ##plt.show()
111 |             #quit()
112 | 
113 |             # load target pressure levels
114 |             targ_plev = np.sort(np.loadtxt('CFday_target_p_MPI-ESM1-2-HR.dat'))
115 |             targ_P = xr.DataArray(targ_plev, dims=[PLEV_GCM])
116 | 
117 |             # create 4d target pressure data array
118 |             targ_P = targ_P.expand_dims(
119 |                             dim={LON_GCM:var[LON_GCM],
120 |                                  LAT_GCM:var[LAT_GCM],
121 |                                  TIME_GCM:var[TIME_GCM]}).transpose(
122 |                                         TIME_GCM, PLEV_GCM, LAT_GCM, LON_GCM)
123 | 
124 |             # run interpolation from GCM model levels to constant pressure levels
125 |             var_out = interp_logp_4d(var, source_P, targ_P, extrapolate='constant',
126 |                                     time_key=TIME_GCM, lat_key=LAT_GCM,
127 |                                     lon_key=LON_GCM)
128 | 
129 |             # set pressure levels as coordinate
130 |             var_out = var_out.assign_coords(coords={PLEV_GCM:targ_plev})
131 |             
132 |             # sort for descending pressure
133 |             var_out = var_out.reindex(
134 |                     {PLEV_GCM:list(reversed(var_out[PLEV_GCM]))})
135 |             #print(var_out)
136 |             #var_out.to_netcdf('test.nc')
137 | 
138 |             # convert to dataset
139 |             ds_out = var_out.to_dataset(name=var_name)
140 | 
141 |             ## make sure to keep time encoding identical
142 |             ds_out.time.encoding = ds.time.encoding
143 |             ## make sure to keep attributes identical
144 |             for key,val in ds.time.attrs.items():
145 |                 ds_out.time.attrs[key] = val
146 |             for key,val in ds.lon.attrs.items():
147 |                 ds_out.lon.attrs[key] = val
148 |             for key,val in ds.lat.attrs.items():
149 |                 ds_out.lat.attrs[key] = val
150 |             for key,val in ds[var_name].attrs.items():
151 |                 ds_out[var_name].attrs[key] = val
152 | 
153 |             # save output file
154 |             ds_out.to_netcdf(out_file_path)
155 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/Emon_convert_hus_to_hur.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | description     Convert GCM specific humidity to relative humidity.
  5 | author			Christoph Heim
  6 | date created    08.02.2021
  7 | usage           no args
  8 | """
  9 | ###############################################################################
 10 | import os, argparse
 11 | import numpy as np
 12 | import xarray as xr
 13 | import matplotlib.pyplot as plt
 14 | ###############################################################################
 15 | 
 16 | def specific_to_relative_humidity(QV, P, T):
 17 |     """
 18 |     Compute relative humidity from specific humidity.
 19 |     """
 20 |     RH = 0.263 * P * QV *(np.exp(17.67*(T - 273.15)/(T-29.65)))**(-1)
 21 |     return(RH)
 22 | 
 23 | 
 24 | ###############################################################################
 25 | ###############################################################################
 26 | ###############################################################################
 27 | 
 28 | 
 29 | if __name__ == '__main__':
 30 | 
 31 |     parser = argparse.ArgumentParser(description = 
 32 |                 'Convert GCM specific humidity to relative humidity')
 33 |     # input specific humidity file
 34 |     parser.add_argument('hus_file', type=str)
 35 |     # input temperature file
 36 |     parser.add_argument('ta_file', type=str)
 37 |     # output relative humidity file
 38 |     parser.add_argument('hur_file', type=str)
 39 |     # Amon relative humidity file
 40 |     parser.add_argument('-a', '--amon_hur_file', type=None)
 41 |     args = parser.parse_args()
 42 | 
 43 |     #xr.set_options(keep_attrs=True)
 44 | 
 45 |     # temperature
 46 |     var_name = 'ta'
 47 |     ta = xr.open_dataset(args.ta_file, decode_cf=False).ta
 48 |     # specific humidity
 49 |     var_name = 'hus'
 50 |     ds = xr.open_dataset(args.hus_file, decode_cf=False)
 51 |     hus = ds.hus
 52 |     # pressure
 53 |     pa = ds.plev.expand_dims(
 54 |             dim={'lon':ds.lon, 'lat':ds.lat, 'time':ds.time})
 55 |     pa = pa.transpose('time','plev','lat','lon')
 56 | 
 57 |     if hus.shape != ta.shape:
 58 |         print(hus.shape)
 59 |         print(ta.shape)
 60 |         raise ValueError()
 61 | 
 62 |     hur = specific_to_relative_humidity(hus, pa, ta)
 63 | 
 64 |     ## If hur is given from the coarser dataset Amon
 65 |     ## it is here used to interpolate hur to the higher
 66 |     ## resolution using information from the high-resolved hur.
 67 |     ## The reason is that the high-resolved hur is computed
 68 |     ## based on monthly mean values and deviates a lot from 
 69 |     ## the true hur in the coarse data set on the coarse levels.
 70 |     ## Nevertheless, it contains information about the vertical
 71 |     ## variability and this is exploited here for a better
 72 |     ## informed vertical interpolation of the coarse hur
 73 |     ## to high resolution.
 74 |     ## This means that the above computed high-resolved hur
 75 |     ## is only indirectly used for the final hur output.
 76 |     amon_hur = xr.open_dataset(args.amon_hur_file, decode_cf=False).hur
 77 | 
 78 |     hur_interp = hur.copy()
 79 |     #print(amon_hur.plev.values)
 80 |     #print(hur.plev.values)
 81 | 
 82 |     for plev in hur.plev.values:
 83 |         if plev not in amon_hur.plev.values:
 84 |             print('{}: interpolate'.format(plev))
 85 |             plev_below = amon_hur.plev.where((amon_hur.plev-plev) > 0, np.nan)
 86 |             plev_below = amon_hur.plev.isel(plev=plev_below.argmin(dim='plev').values).values
 87 | 
 88 |             plev_above = amon_hur.plev.where((amon_hur.plev-plev) < 0, np.nan)
 89 |             plev_above = amon_hur.plev.isel(plev=plev_above.argmax(dim='plev').values).values
 90 | 
 91 |             #print(plev_above)
 92 |             #print(plev_below)
 93 | 
 94 |             hur_plev = hur.sel(plev=plev)
 95 |             hur_above = hur.sel(plev=plev_above)
 96 |             hur_below = hur.sel(plev=plev_below)
 97 | 
 98 |             #print(hur_above.isel(time=0,lon=10,lat=10).values)
 99 |             #print(hur_plev.isel(time=0,lon=10,lat=10).values)
100 |             #print(hur_below.isel(time=0,lon=10,lat=10).values)
101 | 
102 |             weight_above = 1 - np.abs(hur_plev - hur_above)/(
103 |                                 np.abs(hur_plev - hur_above) +
104 |                                 np.abs(hur_plev - hur_below))
105 |             weight_below = 1 - np.abs(hur_plev - hur_below)/(
106 |                                 np.abs(hur_plev - hur_above) +
107 |                                 np.abs(hur_plev - hur_below))
108 | 
109 |             #print(weight_above.isel(time=0,lon=10,lat=10).values)
110 |             #print(weight_below.isel(time=0,lon=10,lat=10).values)
111 | 
112 |             interp = (
113 |                 amon_hur.sel(plev=plev_above) * weight_above +
114 |                 amon_hur.sel(plev=plev_below) * weight_below 
115 |             )
116 |             #print(interp.isel(time=0,lon=10,lat=10).values)
117 |             #print()
118 |             hur_interp.loc[dict(plev=plev)] = interp
119 |             #quit()
120 |         else:
121 |             print('{}: take from Amon'.format(plev))
122 |             hur_interp.loc[dict(plev=plev)] = amon_hur.sel(plev=plev)
123 |     #quit()
124 | 
125 | 
126 |     handles = []
127 |     handle, = plt.plot(hur.mean(dim=['time','lon','lat']),
128 |              hur.plev, label='Emon hur=f(hus,ta)')
129 |     handles.append(handle)
130 |     handle, = plt.plot(amon_hur.mean(dim=['time','lon','lat']),
131 |              amon_hur.plev, label='Amon hur')
132 |     handles.append(handle)
133 |     handle, = plt.plot(hur_interp.mean(dim=['time','lon','lat']),
134 |              hur_interp.plev, label='Amon hur interpolated using Emon hur')
135 |     handles.append(handle)
136 |     plt.legend(handles=handles)
137 |     plt.ylim((100000,5000))
138 |     plt.ylabel('p [Pa]')
139 |     plt.xlabel('RH [%]')
140 |     plt.show()
141 |     ##quit()
142 | 
143 |     ds_out = ds.copy()
144 |     ds_out['hur'] = hur_interp
145 |     del ds_out['hus']
146 |     ds_out.attrs['variable_id'] = 'hur'
147 | 
148 |     ## make sure to keep time encoding identical
149 |     for key,val in ds.time.attrs.items():
150 |         ds_out.time.attrs[key] = val
151 |     for key,val in ds.lon.attrs.items():
152 |         ds_out.lon.attrs[key] = val
153 |     for key,val in ds.lat.attrs.items():
154 |         ds_out.lat.attrs[key] = val
155 |     for key,val in ds.hus.attrs.items():
156 |         if key == 'standard_name':
157 |             ds_out.hur.attrs[key] = 'relative_humidity'
158 |         if key == 'long_name':
159 |             ds_out.hur.attrs[key] = 'Relative Humidity'
160 |         else:
161 |             ds_out.hur.attrs[key] = val
162 | 
163 | 
164 |     ds_out.to_netcdf(args.hur_file)
165 | 
166 | 


--------------------------------------------------------------------------------
/step_02_preproc_deltas.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | description     PGW for ERA5 preprocessing of climate deltas
  5 | authors		Before 2022:    original developments by Roman Brogli
  6 |                 Since 2022:     upgrade to PGW for ERA5 by Christoph Heim 
  7 |                 2022:           udpates by Jonas Mensch
  8 | """
  9 | ##############################################################################
 10 | import os, argparse
 11 | import xarray as xr
 12 | import numpy as np
 13 | from pathlib import Path
 14 | from functions import filter_data, regrid_lat_lon, interp_wrapper
 15 | from settings import (
 16 |     i_debug,
 17 |     file_name_bases,
 18 |     LON_ERA, LAT_ERA,
 19 |     TIME_GCM, PLEV_GCM, LON_GCM, LAT_GCM,
 20 |     i_use_xesmf_regridding,
 21 |     nan_interp_kernel_radius,
 22 |     nan_interp_sharpness,
 23 | )
 24 | ##############################################################################
 25 | 
 26 | ## input arguments
 27 | parser = argparse.ArgumentParser(description =
 28 |             'PGW for ERA5: Preprocess GCM data before modifying ' +
 29 |             'the ERA5 files. The main PGW routine (step_03_apply_to_era.py) ' +
 30 |             'requires GCM climate delta files (SCEN-HIST) for ' +
 31 |             'ta,hur,ua,va,zg,hurs,tas,tos as well ' +
 32 |             'as the GCM HIST climatology file for ps. This script here by ' +
 33 |             'default preprocesses both the SCEN-HIST and HIST files ' +
 34 |             'for the variables it is run for. Both are thus required for ' +
 35 |             'every variable being processed. The script ' +
 36 |             'looks for the inputs files using the naming convention ' +
 37 |             '${var_name}_${file_name_base}.nc, where the ${file_name_base} ' +
 38 |             'for the SCEN-HIST and the HIST files ' +
 39 |             'can be set in settings.py. If this script ' +
 40 |             'is used to preprocess daily GCM data, one can run it twice and '+
 41 |             'store the intermediate results: once '+
 42 |             'for processing_step "smoothing" and once for "regridding". ' +
 43 |             'More details are given below.')
 44 | 
 45 | # processing step to perform during script execution
 46 | parser.add_argument('processing_step', type=str, 
 47 |             choices=['smoothing','regridding'],
 48 |             help='Possible processing steps are: ' +
 49 |             'smoothing: [For daily climate deltas, a smoothing of ' +
 50 |             'the annual cycle should be applied. For monthly ' +
 51 |             'climate deltas this is not necessary.] ' +
 52 |             'regridding: [If the climate deltas are not on the same ' +
 53 |             'horizontal grid as ERA5, they can be regridded here. '+
 54 |             'WARNING: The default interpolation routine ' +
 55 |             '(i_use_xesmf_regridding = 0) assumes a regular '+
 56 |             '(thus non-rotated) lat/lon grid for ' +
 57 |             'input (GCM data) and output (ERA5 data) grids! ' +
 58 |             'If this is not the case for the GCM data, using the ' +
 59 |             'xESMF package for regridding may help ' +
 60 |             '(i_use_xesmf_regridding = 1). However, such cases have not ' +
 61 |             'been tested in detail and may require code adjustments in the ' +
 62 |             'function "regrid_lat_lon" in "functions.py".]')
 63 | 
 64 | # input directory
 65 | parser.add_argument('-i', '--input_dir', type=str,
 66 |             help='Directory with input GCM data files (SCEN-HIST, HIST) ' +
 67 |             'for selected processing step.')
 68 | 
 69 | # output directory
 70 | parser.add_argument('-o', '--output_dir', type=str,
 71 |             help='Directory where the preprocessed output GCM data files ' +
 72 |             'for the selected processing step should be stored.')
 73 | 
 74 | # target ERA5 example file to take grid information
 75 | parser.add_argument('-e', '--era5_file_path', type=str, default=None,
 76 |             help='Path to example ERA5 file ' +
 77 |             'from which to take grid information for regridding.')
 78 | 
 79 | # variable(s) to process
 80 | parser.add_argument('-v', '--var_names', type=str,
 81 |             help='Variable names (e.g. ta) to process. Separate ' +
 82 |             'multiple variable names with "," (e.g. tas,ta). Default is ' +
 83 |             'to process all required variables ta,hur,ua,va,zg,hurs,tas,ps,tos,ts,siconc.',
 84 |             default='ta,hur,ua,va,zg,hurs,tas,ps,tos,ts,siconc')
 85 | 
 86 | 
 87 | args = parser.parse_args()
 88 | print(args)
 89 | ##############################################################################
 90 | 
 91 | # make sure required input arguments are set.
 92 | if args.input_dir is None:
 93 |     raise ValueError('Input directory (-i) is required.')
 94 | if args.output_dir is None:
 95 |     raise ValueError('Output directory (-o) is required.')
 96 | if (args.processing_step == 'regridding') and (args.era5_file_path is None):
 97 |     raise ValueError('era5_file_path is required for regridding step.')
 98 | 
 99 | # create output directory
100 | Path(args.output_dir).mkdir(exist_ok=True, parents=True)
101 | 
102 | # set up list of variable names
103 | var_names = args.var_names.split(',')
104 | print('Run {} for variable names {}.'.format(
105 |         args.processing_step, var_names))
106 | 
107 | 
108 | ##############################################################################
109 | # iterate over all variables to preprocess
110 | for var_name in var_names:
111 |     print(var_name)
112 |     # if var_name == 'ps':
113 |     #    clim_periods = ['HIST','SCEN-HIST']
114 |     # else:
115 |     #    clim_periods = ['SCEN-HIST']
116 |     clim_periods = ['HIST', 'SCEN-HIST']
117 |     # iterate over the two types of GCM data files
118 |     # (HIST and SCEN-HIST)
119 |     for clim_period in clim_periods:
120 | 
121 |         var_file_name = file_name_bases[clim_period].format(var_name)
122 | 
123 |         inp_file = os.path.join(args.input_dir, var_file_name)
124 |         out_file = os.path.join(args.output_dir, var_file_name)
125 | 
126 |         # open ERA5 file with target grid
127 |         ds_era5 = xr.open_dataset(args.era5_file_path)
128 | 
129 |         # smoothing
130 |         if args.processing_step == 'smoothing':
131 | 
132 |             filter_data(inp_file, var_name, out_file)
133 | 
134 |         # regridding
135 |         elif args.processing_step == 'regridding':
136 |             try:
137 |                 ds_gcm = xr.open_dataset(inp_file)
138 |             except:
139 |                 raise("Files for variable " + var_name + " are missing")
140 |             
141 |             ds_gcm = interp_wrapper(
142 |                 ds_gcm, 
143 |                 ds_era5, 
144 |                 var_name, 
145 |                 i_use_xesmf=i_use_xesmf_regridding,
146 |                 nan_interp_kernel_radius=nan_interp_kernel_radius,
147 |                 nan_interp_sharpness=nan_interp_sharpness,
148 |             )
149 |             
150 |             ds_gcm.to_netcdf(out_file)
151 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: pgw
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=conda_forge
  7 |   - _openmp_mutex=4.5=2_gnu
  8 |   - alsa-lib=1.2.8=h166bdaf_0
  9 |   - attr=2.5.1=h166bdaf_1
 10 |   - bokeh=3.0.2=pyhd8ed1ab_0
 11 |   - brotli=1.0.9=h166bdaf_8
 12 |   - brotli-bin=1.0.9=h166bdaf_8
 13 |   - bzip2=1.0.8=h7f98852_4
 14 |   - c-ares=1.18.1=h7f98852_0
 15 |   - ca-certificates=2022.9.24=ha878542_0
 16 |   - certifi=2022.9.24=pyhd8ed1ab_0
 17 |   - cf_xarray=0.7.5=pyhd8ed1ab_0
 18 |   - cftime=1.6.2=py39h2ae25f5_1
 19 |   - click=8.0.4=py39hf3d152e_0
 20 |   - cloudpickle=2.2.0=pyhd8ed1ab_0
 21 |   - contourpy=1.0.6=py39hf939315_0
 22 |   - curl=7.86.0=h2283fc2_1
 23 |   - cycler=0.11.0=pyhd8ed1ab_0
 24 |   - cytoolz=0.12.0=py39hb9d737c_1
 25 |   - dask=2022.2.0=pyhd8ed1ab_0
 26 |   - dask-core=2022.2.0=pyhd8ed1ab_0
 27 |   - dbus=1.13.6=h5008d03_3
 28 |   - distributed=2022.2.0=py39hf3d152e_0
 29 |   - esmf=8.2.0=nompi_h61edca3_0
 30 |   - esmpy=8.2.0=nompi_py39hc8aa433_1
 31 |   - expat=2.5.0=h27087fc_0
 32 |   - fftw=3.3.10=nompi_hf0379b8_106
 33 |   - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
 34 |   - font-ttf-inconsolata=3.000=h77eed37_0
 35 |   - font-ttf-source-code-pro=2.038=h77eed37_0
 36 |   - font-ttf-ubuntu=0.83=hab24e00_0
 37 |   - fontconfig=2.14.1=hc2a2eb6_0
 38 |   - fonts-conda-ecosystem=1=0
 39 |   - fonts-conda-forge=1=0
 40 |   - fonttools=4.38.0=py39hb9d737c_1
 41 |   - freetype=2.12.1=hca18f0e_1
 42 |   - fsspec=2022.11.0=pyhd8ed1ab_0
 43 |   - geos=3.11.1=h27087fc_0
 44 |   - gettext=0.21.1=h27087fc_0
 45 |   - glib=2.74.1=h6239696_1
 46 |   - glib-tools=2.74.1=h6239696_1
 47 |   - gst-plugins-base=1.21.2=h3e40eee_0
 48 |   - gstreamer=1.21.2=hd4edc92_0
 49 |   - gstreamer-orc=0.4.33=h166bdaf_0
 50 |   - hdf4=4.2.15=h9772cbc_5
 51 |   - hdf5=1.12.1=nompi_h4df4325_104
 52 |   - heapdict=1.0.1=py_0
 53 |   - icu=70.1=h27087fc_0
 54 |   - jack=1.9.21=h583fa2b_2
 55 |   - jinja2=3.1.2=pyhd8ed1ab_1
 56 |   - jpeg=9e=h166bdaf_2
 57 |   - keyutils=1.6.1=h166bdaf_0
 58 |   - kiwisolver=1.4.4=py39hf939315_1
 59 |   - krb5=1.19.3=h08a2579_0
 60 |   - lame=3.100=h166bdaf_1003
 61 |   - lcms2=2.14=h6ed2654_0
 62 |   - ld_impl_linux-64=2.39=hcc3a1bd_1
 63 |   - lerc=4.0.0=h27087fc_0
 64 |   - libblas=3.9.0=16_linux64_openblas
 65 |   - libbrotlicommon=1.0.9=h166bdaf_8
 66 |   - libbrotlidec=1.0.9=h166bdaf_8
 67 |   - libbrotlienc=1.0.9=h166bdaf_8
 68 |   - libcap=2.66=ha37c62d_0
 69 |   - libcblas=3.9.0=16_linux64_openblas
 70 |   - libclang=15.0.6=default_h2e3cab8_0
 71 |   - libclang13=15.0.6=default_h3a83d3e_0
 72 |   - libcups=2.3.3=h3e49a29_2
 73 |   - libcurl=7.86.0=h2283fc2_1
 74 |   - libdb=6.2.32=h9c3ff4c_0
 75 |   - libdeflate=1.14=h166bdaf_0
 76 |   - libedit=3.1.20191231=he28a2e2_2
 77 |   - libev=4.33=h516909a_1
 78 |   - libevent=2.1.10=h28343ad_4
 79 |   - libffi=3.4.2=h7f98852_5
 80 |   - libflac=1.4.2=h27087fc_0
 81 |   - libgcc-ng=12.2.0=h65d4601_19
 82 |   - libgcrypt=1.10.1=h166bdaf_0
 83 |   - libgfortran-ng=12.2.0=h69a702a_19
 84 |   - libgfortran5=12.2.0=h337968e_19
 85 |   - libglib=2.74.1=h606061b_1
 86 |   - libgomp=12.2.0=h65d4601_19
 87 |   - libgpg-error=1.45=hc0c96e0_0
 88 |   - libiconv=1.17=h166bdaf_0
 89 |   - liblapack=3.9.0=16_linux64_openblas
 90 |   - libllvm11=11.1.0=he0ac6c6_5
 91 |   - libllvm15=15.0.6=h63197d8_0
 92 |   - libnetcdf=4.8.1=nompi_h329d8a1_102
 93 |   - libnghttp2=1.47.0=hff17c54_1
 94 |   - libnsl=2.0.0=h7f98852_0
 95 |   - libogg=1.3.4=h7f98852_1
 96 |   - libopenblas=0.3.21=pthreads_h78a6416_3
 97 |   - libopus=1.3.1=h7f98852_1
 98 |   - libpng=1.6.39=h753d276_0
 99 |   - libpq=15.1=h67c24c5_1
100 |   - libsndfile=1.1.0=hcb278e6_1
101 |   - libsqlite=3.40.0=h753d276_0
102 |   - libssh2=1.10.0=hf14f497_3
103 |   - libstdcxx-ng=12.2.0=h46fd767_19
104 |   - libsystemd0=252=h2a991cd_0
105 |   - libtiff=4.4.0=h55922b4_4
106 |   - libtool=2.4.6=h9c3ff4c_1008
107 |   - libudev1=252=h166bdaf_0
108 |   - libuuid=2.32.1=h7f98852_1000
109 |   - libvorbis=1.3.7=h9c3ff4c_0
110 |   - libwebp-base=1.2.4=h166bdaf_0
111 |   - libxcb=1.13=h7f98852_1004
112 |   - libxkbcommon=1.0.3=he3ba5ed_0
113 |   - libxml2=2.10.3=h7463322_0
114 |   - libzip=1.9.2=hc929e4a_1
115 |   - libzlib=1.2.13=h166bdaf_4
116 |   - llvmlite=0.39.1=py39h7d9a04d_1
117 |   - locket=1.0.0=pyhd8ed1ab_0
118 |   - lz4-c=1.9.3=h9c3ff4c_1
119 |   - markupsafe=2.1.1=py39hb9d737c_2
120 |   - matplotlib=3.5.1=py39hf3d152e_0
121 |   - matplotlib-base=3.5.1=py39h2fa2bec_0
122 |   - mpg123=1.31.1=h27087fc_0
123 |   - msgpack-python=1.0.4=py39hf939315_1
124 |   - munkres=1.1.4=pyh9f0ad1d_0
125 |   - mysql-common=8.0.31=h26416b9_0
126 |   - mysql-libs=8.0.31=hbc51c84_0
127 |   - ncurses=6.3=h27087fc_1
128 |   - netcdf-fortran=4.5.4=nompi_h2b6e579_100
129 |   - netcdf4=1.5.8=nompi_py39h64b754b_101
130 |   - nspr=4.35=h27087fc_0
131 |   - nss=3.82=he02c5a1_0
132 |   - numba=0.56.4=py39h61ddf18_0
133 |   - numpy=1.23.5=py39h3d75532_0
134 |   - openjpeg=2.5.0=h7d73246_1
135 |   - openssl=3.0.7=h0b41bf4_1
136 |   - packaging=21.3=pyhd8ed1ab_0
137 |   - pandas=1.5.2=py39h4661b88_0
138 |   - partd=1.3.0=pyhd8ed1ab_0
139 |   - pcre2=10.40=hc3806b6_0
140 |   - pillow=9.2.0=py39hf3a2cdf_3
141 |   - pip=22.3.1=pyhd8ed1ab_0
142 |   - ply=3.11=py_1
143 |   - psutil=5.9.4=py39hb9d737c_0
144 |   - pthread-stubs=0.4=h36c2ea0_1001
145 |   - pulseaudio=16.1=h126f2b6_0
146 |   - pyparsing=3.0.9=pyhd8ed1ab_0
147 |   - pyqt=5.15.7=py39h18e9c17_2
148 |   - pyqt5-sip=12.11.0=py39h5a03fae_2
149 |   - python=3.9.15=hba424b6_0_cpython
150 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
151 |   - python_abi=3.9=3_cp39
152 |   - pytz=2022.6=pyhd8ed1ab_0
153 |   - pyyaml=6.0=py39hb9d737c_5
154 |   - qt-main=5.15.6=he99da89_3
155 |   - readline=8.1.2=h0f457ee_0
156 |   - scipy=1.9.3=py39hddc5342_2
157 |   - setuptools=59.8.0=py39hf3d152e_1
158 |   - shapely=1.8.5=py39h76a96b7_2
159 |   - sip=6.7.5=py39h5a03fae_0
160 |   - six=1.16.0=pyh6c4a22f_0
161 |   - sortedcontainers=2.4.0=pyhd8ed1ab_0
162 |   - sparse=0.13.0=pyhd8ed1ab_0
163 |   - tblib=1.7.0=pyhd8ed1ab_0
164 |   - tk=8.6.12=h27826a3_0
165 |   - toml=0.10.2=pyhd8ed1ab_0
166 |   - toolz=0.12.0=pyhd8ed1ab_0
167 |   - tornado=6.1=py39hb9d737c_3
168 |   - tzdata=2022g=h191b570_0
169 |   - unicodedata2=15.0.0=py39hb9d737c_0
170 |   - wheel=0.38.4=pyhd8ed1ab_0
171 |   - xarray=2022.12.0=pyhd8ed1ab_0
172 |   - xcb-util=0.4.0=h166bdaf_0
173 |   - xcb-util-image=0.4.0=h166bdaf_0
174 |   - xcb-util-keysyms=0.4.0=h166bdaf_0
175 |   - xcb-util-renderutil=0.3.9=h166bdaf_0
176 |   - xcb-util-wm=0.4.1=h166bdaf_0
177 |   - xesmf=0.6.2=pyhd8ed1ab_0
178 |   - xorg-libxau=1.0.9=h7f98852_0
179 |   - xorg-libxdmcp=1.1.3=h7f98852_0
180 |   - xyzservices=2022.9.0=pyhd8ed1ab_0
181 |   - xz=5.2.6=h166bdaf_0
182 |   - yaml=0.2.5=h7f98852_2
183 |   - zict=2.2.0=pyhd8ed1ab_0
184 |   - zlib=1.2.13=h166bdaf_4
185 |   - zstd=1.5.2=h6239696_4
186 |   - pip:
187 |     - aiohttp==3.8.3
188 |     - aiosignal==1.3.1
189 |     - appdirs==1.4.4
190 |     - async-timeout==4.0.2
191 |     - attrs==22.1.0
192 |     - charset-normalizer==2.1.1
193 |     - frozenlist==1.3.3
194 |     - idna==3.4
195 |     - imageio==2.22.4
196 |     - multidict==6.0.3
197 |     - pooch==1.6.0
198 |     - pyproj==3.4.0
199 |     - pyvista==0.37.0
200 |     - requests==2.28.1
201 |     - scooby==0.7.0
202 |     - urllib3==1.26.13
203 |     - vtk==9.2.2
204 |     - wslink==1.9.1
205 |     - yarl==1.8.2
206 | prefix: /home/heimc/miniconda3/envs/pgw
207 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/extract_climate_delta.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ##############################################################################
  3 | ## Template script to extract climate deltas from CMIP GCMs.
  4 | ## Computes climatologies and the climate deltas for specific CMIP 
  5 | ## experiments and members.
  6 | ## The script only serves as inspiration for the extraction of the climate
  7 | ## deltas and it is not generally valid and has to be adjusted 
  8 | ## for specific use case.
  9 | 
 10 | ##### IMPORTANT NOTES FOR Emon DATA USERS:
 11 | ## FIRST
 12 | ## hur is not available in the Emon output and has to be computed from hus.
 13 | ## This is not excat because it matters if hur is computed from hus
 14 | ## on the model levels every time the GCM writes an output or whether it is
 15 | ## computed on the monthly-aggregated output on pressure levels.
 16 | ## Therefore, the hur is used from the coarse resolution output group Amon.
 17 | ## However, the Amon output is vertically interpolated to the higher resolution
 18 | ## of Emon using information from the computed Emon hur=f(hus,ta)
 19 | ## data. This helps to do a "better informed" vertical interpolation of the
 20 | ## coarse Amon hur to the higher resolved Emon grid.
 21 | ## This steps are done by the convert_Emon_hus_to_hur.py script which is
 22 | ## called automatically in the current implementation.
 23 | ## Consequently, Amon must have been exctracted before Emon can be!
 24 | ## SECOND
 25 | ## After computing hur for Emon, the delta still has to be computed
 26 | ## by running the script again only for var_names=(hur).
 27 | ## (turn off i_exctract_vars but keep i_compute_delta)
 28 | ## THIRD
 29 | ## The Emon data does not reach up as far as the Amon data
 30 | ## Thus, after completing this script, run add_Emon_model_top.sh
 31 | ## to add the model top data from Amon to the Emon fields.
 32 | ## FOURTH
 33 | ## These Emon fixes are still a bit experimental and not very user-friendly.
 34 | ## Sorry for that!
 35 | ##############################################################################
 36 | Usage="
 37 | ##############################################################################
 38 | ## Template script to extract climate deltas from CMIP GCMs.
 39 | ## Computes climatologies and the climate deltas for specific CMIP 
 40 | ## experiments and members.
 41 | ## The script only serves as inspiration for the extraction of the climate
 42 | ## deltas and it is not generally valid and has to be adjusted 
 43 | ## for specific use case.
 44 | 
 45 | -h to see this message
 46 | <dataset_name> to get the desired delta from this dataset
 47 |     Curently supported are: Amon, Omon, Emon, day, CFday and SImon"
 48 | 
 49 | while getopts 'h' flag; do
 50 |   case "${flag}" in
 51 |     h) echo "$Usage" ; exit 0;
 52 |   esac
 53 | done
 54 | 
 55 | # USER SETTINGS
 56 | ##############################################################################
 57 | # base directory where cmip6 data is stored
 58 | cmip_data_dir=/net/atmos/data/cmip6/
 59 | # base directory where output should be stored
 60 | #
 61 | out_base_dir=.
 62 | 
 63 | # name of the GCM to extract data for
 64 | gcm_name=MPI-ESM1-2-HR
 65 | 
 66 | ## CMIP experiments to use to compute climate deltas
 67 | ## --> climate delta = future climatology - ERA climatology
 68 | # CMIP experiment to use for ERA climatology 
 69 | era_climate_experiment=historical
 70 | # CMIP experiment to use for future climatology 
 71 | future_climate_experiment=ssp585
 72 | 
 73 | 
 74 | ## type of CMIP6 model output (e.g. monthly or daily, etc.)
 75 | ## to use
 76 | # standard monthly output
 77 | #table_ID=Amon
 78 | ## high-resolution monthly data for only very few GCMs
 79 | #table_ID=Emon
 80 | ## standard daily output
 81 | #table_ID=day
 82 | ## CFMIP daily output
 83 | #table_ID=CFday
 84 | ## ocean monthly output
 85 | #table_ID=Omon
 86 | 
 87 | ##Experimental addition to use command-line args to select type
 88 | ##Can (should) be done in a foor loop so that it runs for multiple data sets at the same time
 89 | if [[ "$1" == "Amon" ]]; then
 90 |     table_ID=$1
 91 | elif [[ "$1" == "Omon" ]]; then
 92 |     table_ID=$1
 93 | elif [[ "$1" == "Emon" ]]; then
 94 |     table_ID=$1
 95 | elif [[ "$1" == "day" ]]; then
 96 |     table_ID=$1
 97 | elif [[ "$1" == "CFday" ]]; then
 98 |     table_ID=$1
 99 | elif [[ "$1" == "SImon" ]]; then
100 | 	table_ID=$1
101 | elif [ $# -eq 0 ]; then
102 |     echo "Please specify which dataset the deltas should be extracted from. See -h for more info"
103 |     exit 0
104 | else
105 |     txt="This dataset is currently not supported
106 | Check -h for all supported datasets"
107 |     echo "$txt"
108 |     exit 0
109 | fi
110 | 
111 | ## select variables to extract
112 | if [[ "$table_ID" == "Amon" ]]; then
113 |     var_names=(ts tas hurs ps ua va ta hur zg)
114 | elif [[ "$table_ID" == "day" ]]; then
115 |     var_names=(tas hurs ps ua va ta hur zg)
116 | elif [[ "$table_ID" == "Emon" ]]; then
117 |     var_names=(ua va ta hus zg)
118 | elif [[ "$table_ID" == "CFday" ]]; then
119 |     var_names=(ua va ta hur)
120 | elif [[ "$table_ID" == "Omon" ]]; then
121 |     var_names=(tos)
122 | elif [[ "$table_ID" == "SImon" ]]; then
123 |     var_names=(siconc)
124 | fi
125 | ## for Emon, compute hur delta separately after 
126 | ## it has been derived from hus
127 | ## (turn off i_exctract_vars but keep i_compute_delta)
128 | #var_names=(hur)
129 | 
130 | 
131 | # should variables be exctracted for the two climatologies=
132 | i_extract_vars=1
133 | # should climate deltas be computed?
134 | i_compute_delta=1
135 | # for Emon, hur is not available and needs to be approximated
136 | # using the high-resolution hus climatology, as well as the hur climatology
137 | if [[ "$table_ID" == "Emon" ]]; then
138 |     i_convert_hus_to_hur=1
139 | else
140 |     i_convert_hus_to_hur=0
141 | fi
142 | 
143 | ## subdomain for which to extract GCM data
144 | ## should be either global (0,360,-90,90)
145 | ## or anything larger than ERA5 subdomain
146 | ## except for storage and performance reasons, there is no benefit of
147 | ## using a subdomain.
148 | box=0,360,-90,90
149 | # subdomain
150 | #box=-74,40,-45,35
151 | #box=-73,37,-42,34
152 | 
153 | # select appropriate cdo time aggregation command
154 | # depending if input data is monthly or daily.
155 | if [[ "$table_ID" == "day" ]]; then
156 |     cdo_agg_command=ydaymean
157 | else 
158 |     cdo_agg_command=ymonmean
159 | fi
160 | 
161 | # iterate over both experiments to extract data
162 | experiments=($era_climate_experiment $future_climate_experiment)
163 | 
164 | ##############################################################################
165 | 
166 | out_dir=$out_base_dir/$table_ID/$gcm_name
167 | echo $out_dir
168 | mkdir -p $out_dir
169 | 
170 | for var_name in ${var_names[@]}; do
171 |     echo "#################################################################"
172 |     echo $var_name
173 |     echo "#################################################################"
174 | 
175 |     for experiment in ${experiments[@]}; do
176 |         echo "#######################################"
177 |         echo $experiment
178 |         echo "#######################################"
179 | 
180 |         if [[ $i_extract_vars == 1 ]]; then
181 | 
182 |             # data folder hierarchy for CMIP6
183 |             inp_dir=$cmip_data_dir/$experiment/$table_ID/$var_name/$gcm_name/r1i1p1f1/gn
184 | 
185 |             # start of the CMIP6 file names
186 |             file_name_base=${table_ID}_${gcm_name}_${experiment}_r1i1p1f1_gn
187 | 
188 |             ## overwrite old data
189 |             #rm $out_dir/${var_name}_${experiment}.nc
190 | 
191 |             ## compute ERA climatology
192 |             if [[ "$experiment" == "$era_climate_experiment" ]]; then
193 |                 # extract full time series
194 |                 cdo -L -sellonlatbox,$box \
195 |                     -selyear,1985/2014 \
196 |                     -cat \
197 |                     $inp_dir/${var_name}_${file_name_base}_19[8-9]*.nc \
198 |                     $inp_dir/${var_name}_${file_name_base}_20[0-1]*.nc \
199 |                     $out_dir/${var_name}_${experiment}_full.nc
200 | 
201 |             ## compute future experiment climatology
202 |             elif [[ "$experiment" == "$future_climate_experiment" ]]; then
203 |                 # extract full time series
204 |                 cdo -L -sellonlatbox,$box \
205 |                     -selyear,2070/2099 \
206 |                     -cat \
207 |                     $inp_dir/${var_name}_${file_name_base}_20[6-9]*.nc \
208 |                     $out_dir/${var_name}_${experiment}_full.nc
209 |             fi
210 | 
211 |             # aggregate to yearly monthly/daily means
212 |             # in principal this could be done directly during extraction
213 |             # step above. However, for Emon computation of hur from hus
214 |             # this should be done on the basis of monthly values not
215 |             # with the mean annual cycle. Due to this, the full time
216 |             # series is stored as well.
217 |             cdo -L -$cdo_agg_command \
218 |                 $out_dir/${var_name}_${experiment}_full.nc \
219 |                 $out_dir/${var_name}_${experiment}.nc
220 |         fi
221 | 
222 |         ## convert hus to hur if required
223 |         if [[ $i_convert_hus_to_hur == 1 ]]; then
224 |             if [[ "$var_name" == "hus" ]]; then
225 |                 echo Convert Emon hus to hur using Amon hur data.
226 | 
227 |                 Amon_out_dir=$out_base_dir/Amon/$gcm_name
228 | 
229 |                 python Emon_convert_hus_to_hur.py  \
230 |                     $out_dir/hus_${experiment}_full.nc \
231 |                     $out_dir/ta_${experiment}_full.nc \
232 |                     $out_dir/hur_${experiment}_full.nc \
233 |                     -a $Amon_out_dir/hur_${experiment}_full.nc
234 |                     
235 |                 # aggregate to yearly monthly/daily means
236 |                 cdo -L -$cdo_agg_command \
237 |                     $out_dir/hur_${experiment}_full.nc \
238 |                     $out_dir/hur_${experiment}.nc
239 |             fi
240 |         fi
241 | 
242 |     done
243 | 
244 |     ## compute delta (future climatology - ERA climatology)
245 |     if [[ $i_compute_delta == 1 ]]; then
246 |         cdo -L -sub $out_dir/${var_name}_$future_climate_experiment.nc \
247 |                 $out_dir/${var_name}_$era_climate_experiment.nc \
248 |                 $out_dir/${var_name}_delta.nc
249 |     fi
250 | 
251 | done
252 | 
253 | # link surface fields from Amon because they are not available
254 | # in Emon
255 | if [[ "$table_ID" == "Emon" ]]; then
256 |     cd $out_dir
257 |     ln -s $out_base_dir/Amon/$gcm_name/*s_*.nc .
258 |     cd -
259 | fi
260 | 
261 | 


--------------------------------------------------------------------------------
/fields/plot.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------------------------------------
  2 | # modules
  3 | #
  4 | import xarray as xr
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import cartopy.crs as ccrs
  8 | import matplotlib.gridspec as gridspec
  9 | import cmcrameri.cm as cmc
 10 | from matplotlib.colors import BoundaryNorm, LinearSegmentedColormap
 11 | import matplotlib
 12 | 
 13 | font = {'size': 12}
 14 | matplotlib.rc('font', **font)
 15 | 
 16 | def drywet(numcolors, colormap):
 17 | 
 18 |     colors_blue = colormap(np.linspace(0.5, 1, 5))
 19 |     colors_white = np.array([1, 1, 1, 1])
 20 |     colors_brown = [[84, 48, 5, 255],
 21 |                     [140, 81, 10, 255],
 22 |                     [191, 129, 45, 255],
 23 |                     [223, 194, 125, 255],
 24 |                     [246, 232, 195, 255]]
 25 |     rgb = []
 26 |     for i in range(len(colors_brown)):
 27 |         z = [x / 255 for x in colors_brown[i]]
 28 |         rgb.append(z)
 29 |     colors = np.vstack((rgb, colors_white, colors_blue))
 30 | 
 31 |     cmap = LinearSegmentedColormap.from_list(name=colormap, colors=colors, N=numcolors)
 32 | 
 33 |     return cmap
 34 | 
 35 | # -------------------------------------------------------------------------------
 36 | # read data
 37 | # %%
 38 | var_name = 'FR_SEA_ICE'
 39 | 
 40 | sims = ['new', 'old', 'diff']
 41 | friac = {}
 42 | labels = {'new': 'Sea Ice update', 'old': 'Sea Ice static', 'diff': 'Difference between versions'}
 43 | 
 44 | for s in range(len(sims)):
 45 |     sim = sims[s]
 46 |     friac[sim] = {}
 47 |     friac[sim]['label'] = labels[sim]
 48 |     data = xr.open_dataset(f'{sim}_version.nc')
 49 |     dt = data[var_name].values[0, :, :]
 50 |     friac[sim][var_name] = dt
 51 | # %%
 52 | lat = xr.open_dataset('old_version.nc')['lat'].values[:]
 53 | lon = xr.open_dataset('old_version.nc')['lon'].values[:]
 54 | lat_, lon_ = np.meshgrid(lon, lat)
 55 | print("load done")
 56 | # -------------------------------------------------------------------------------
 57 | # plot
 58 | # %%
 59 | ar = 1.0  # initial aspect ratio for first trial
 60 | wi = 12  # height in inches #15
 61 | hi = 2.5  # width in inches #10
 62 | ncol = 3  # edit here
 63 | nrow = 1
 64 | axs, cs, gl = np.empty(shape=(nrow, ncol), dtype='object'), np.empty(shape=(nrow, ncol), dtype='object'), np.empty(shape=(nrow, ncol), dtype='object')
 65 | 
 66 | cmap1 = cmc.davos_r
 67 | levels1 = np.linspace(0, 100, 21, endpoint=True)
 68 | norm1 = BoundaryNorm(levels1, ncolors=cmap1.N, clip=True)
 69 | 
 70 | cmap2 = drywet(25, cmc.vik_r)
 71 | levels2 = np.linspace(0, 40, 11, endpoint=True)
 72 | norm2 = BoundaryNorm(levels2, ncolors=cmap2.N, clip=True)
 73 | 
 74 | # change here the lat and lon
 75 | map_ext = [-50, 50, 40, 90]
 76 | 
 77 | fig = plt.figure(figsize=(wi, hi))
 78 | left, bottom, right, top = 0.07, 0.01, 0.94, 0.95
 79 | gs = gridspec.GridSpec(nrows=1, ncols=3, left=left, bottom=bottom, right=right, top=top,
 80 |                        wspace=0.1, hspace=0.15)
 81 | 
 82 | for i in range(3):
 83 |     sim = sims[i]
 84 |     label = friac[sim]['label']
 85 |     axs[0, i] = fig.add_subplot(gs[0, i], projection=ccrs.PlateCarree())
 86 |     axs[0, i].set_extent(map_ext, crs=ccrs.PlateCarree())
 87 |     axs[0, i].coastlines(zorder=3)
 88 |     axs[0, i].stock_img()
 89 |     gl[0, i] = axs[0, i].gridlines(crs=ccrs.PlateCarree(), draw_labels=True, x_inline=False, y_inline=False, linewidth=1, color='grey', alpha=0.5, linestyle='--')
 90 |     gl[0, i].right_labels = False
 91 |     gl[0, i].top_labels = False
 92 |     gl[0, i].left_labels = False
 93 |     cs[0, i] = axs[0, i].pcolormesh(lon, lat, friac[sim][var_name], cmap=cmap1, norm=norm1, shading="auto",
 94 |                                     transform=ccrs.PlateCarree())
 95 |     axs[0, i].set_title(f'{label}', fontweight='bold', pad=6, fontsize=14, loc='center')
 96 | 
 97 | gl[0, 0].left_labels = True
 98 | 
 99 | cax = fig.add_axes(
100 |     [axs[0, 2].get_position().x1 + 0.01, axs[0, 2].get_position().y0, 0.01, axs[0, 2].get_position().height])
101 | cbar = fig.colorbar(cs[0, 1], cax=cax, orientation='vertical',
102 |                     ticks=np.linspace(0, 100, 6, endpoint=True))
103 | cbar.ax.tick_params(labelsize=14)
104 | 
105 | 
106 | axs[0, 0].text(-0.2, 0.5, 'Sea ice', ha='center', va='center', rotation='vertical',
107 |                transform=axs[0, 0].transAxes, fontsize=14, fontweight='bold')
108 | axs[0, 2].text(1.07, 1.09, '[%]', ha='center', va='center', rotation='horizontal',
109 |                transform=axs[0, 2].transAxes, fontsize=12)
110 | 
111 | fig.show()
112 | # plotpath = "/project/pr133/rxiang/figure/echam5/"
113 | # fig.savefig(plotpath + 'friac' + f'{mon}.png', dpi=500)
114 | plt.close(fig)
115 | """
116 | import cartopy.crs as ccrs
117 | import cartopy.feature as cfeature
118 | import matplotlib.pyplot as plt
119 | from matplotlib import cm
120 | import matplotlib.ticker as mticker
121 | import numpy as np
122 | from netCDF4 import Dataset
123 | import mplotutils as mpu
124 | import xarray as xr
125 | from matplotlib.colors import TwoSlopeNorm
126 | 
127 | 
128 | 
129 | def plot(filename, field, title, metric, lon, lat, max=None, min=None, 
130 |     lat_pole=90, lon_pole=-180, coastline=True, colormap="RdBu_r", 
131 |     centered_bar=False): 
132 | 
133 |     rotated_pole = ccrs.RotatedPole(pole_latitude=lat_pole, pole_longitude=lon_pole)
134 |     data_crs = ccrs.PlateCarree()
135 | 
136 |     # create the plot and set the size
137 |     plt.figure(figsize=(20,10))
138 |     axes = plt.axes(projection= rotated_pole)
139 | 
140 | 
141 |     # create country's borders and landsea mask
142 |     #land_50m = cfeature.NaturalEarthFeature('cultural', 'admin_0_countries', '50m', edgecolor='black', facecolor='none', linewidth=0.2)
143 |     broder_50m = cfeature.NaturalEarthFeature('physical', 'coastline', '50m', edgecolor='black', facecolor='none', linewidth=0.8)
144 |     if max is None:
145 |         max = np.nanmax(field)
146 |     if min is None:
147 |         min = np.nanmin(field)
148 |     # activate the labels and set countour of the countourf
149 |     draw_labels = True
150 |     reversed_cmap = False
151 |     levels = np.arange(min, max, 0.05)
152 |     color_map = plt.cm.get_cmap(colormap)
153 |     if reversed_cmap:
154 |         reversed_color_map = color_map.reversed()
155 |     else:
156 |         reversed_color_map = color_map
157 |     plt.gca().set_facecolor("dimgrey")
158 | 
159 |     if centered_bar:
160 |         norm = TwoSlopeNorm(vmin=min, vmax=max, vcenter=0.0001)
161 |         # plot in each subplot
162 |         h = plt.contourf(lon, lat, field[:,:], levels=levels, cmap=reversed_color_map, extend='both', norm=norm)
163 |     else:
164 |         h = plt.contourf(lon, lat, field[:,:], levels=levels, cmap=reversed_color_map , extend='both')
165 |     axes.set_title(title, fontsize=25, weight="bold")
166 | 
167 |     ## add borders and landsea mask
168 |     #axes.add_feature(land_50m)
169 |     if coastline:
170 |         axes.add_feature(broder_50m)
171 |     
172 |     gl = axes.gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False)
173 |     gl.ylocator     = mticker.FixedLocator(np.arange(-60, 80, 10))
174 |     gl.xlocator     = mticker.FixedLocator(np.arange(-100,  90 ,10))
175 |     gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
176 |     gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
177 |     gl.top_labels = False
178 |     gl.right_labels = False
179 |     gl.left_labels = False
180 |     gl.bottom_labels = True
181 |     gl.left_labels = True
182 | 
183 |     #set colorbar
184 |     cb = plt.colorbar(orientation="horizontal", shrink=0.4, pad=0.07, format="%.2f")
185 |     cb.ax.tick_params(labelsize=14)
186 |     cb.set_label(label= str(metric),fontsize=20)
187 | 
188 |     plt.tight_layout()
189 |     plt.savefig(str(filename) + ".png")
190 |     
191 | # create var wherre to store others
192 | result = np.zeros((1,224,544))
193 | 
194 | 
195 | era5_old = xr.open_dataset("old_version.nc")
196 | era5_new = xr.open_dataset("new_version.nc")
197 | tas = xr.open_dataset("ts_delta.nc")
198 | tos = xr.open_dataset("tos_delta.nc")
199 | 
200 | 
201 | lon = tos.variables['lon'][:]
202 | lat = tos.variables['lat'][:]
203 | month = 0
204 | print("loads done")
205 | plot("old_final_temp", era5_old.variables['T_SKIN'][0,:,:].values, "Final PGW Temperature w/o sea ice update", 
206 |      "T_SKIN [K]", lon, lat)
207 | print("1 done")
208 | plot("new_final_temp", era5_new.variables['T_SKIN'][0,:,:].values, "Final PGW Temperature with sea ice update", 
209 |      "T_SKIN [K]", lon, lat)
210 | print("2 done")
211 | plot("old_final_sic", era5_old.variables['FR_SEA_ICE'][0,:,:].values, "ERA5 Sea Ice", 
212 |      "Sea Ice frac [1]", lon, lat)
213 | print("3 done")
214 | plot("new_final_sic", era5_new.variables['FR_SEA_ICE'][0,:,:].values, "PGW Sea Ice", 
215 |      "Sea Ice frac [1]", lon, lat)
216 | print("4 done")
217 | 
218 | plot("diff_winter", result - tas.variables['tas'][month,:,:].values,"Differences between new and previous PGW versions for January", 
219 |      "TAS delta [K]",lon,lat )
220 | plot("tas_winter"+addon, tas.variables['tas'][month,:,:].values, "TAS field from previous PGW version for January", 
221 |       "TAS delta [K]",lon,lat )
222 | 
223 | plot ("cdo_winter"+addon, cdo.variables['tos'][month,:,:].values, "SST field from bi-linear interpolation for January", 
224 |       "SST delta [K]",lon,lat )
225 | plot ("sst"+addon, tos.variables['sst'][month,:,:].values, "SST field from NaN-ignoring interpolation using kernel interp for January", 
226 |       "SST delta [K]" ,lon,lat)
227 | 
228 | plot ("sst_tas_diff"+addon, tos.variables['sst'][month,:,:].values- tas.variables['tas'][month,:,:].values, "Differences between SST and TAS for January", 
229 |       "SST delta [K]",lon,lat)
230 | plot ("ice"+addon, era5.variables['FR_SEA_ICE'][0,:,:].values, "Sea ice fraction from ERA5 for January", 
231 |       "Ice fraction [%]",lon,lat, colormap="Blues")
232 | lon = christoph.variables['lon'][:]
233 | lat = christoph.variables['lat'][:]
234 | print(np.sum(christoph.variables['ts'][0,:,:].values))
235 | print(np.sum(christoph.variables['ts'][0,:,:].values)/ (len(lon)*len(lat)))
236 | plot ("heim", christoph.variables['ts'][0,:,:].values, "Differences between TS and SST for January", 
237 |       "Temperature [K]",lon,lat)
238 | 
239 | 
240 | def plot_paper(filename, field, title, metric, max=None, min=None, lat_pole=90, lon_pole=-180, 
241 |          coastline=True, colormap="RdBu_r"): 
242 | 
243 |     rotated_pole = ccrs.RotatedPole(pole_latitude = lat_pole, pole_longitude = lon_pole)
244 |     data_crs = ccrs.PlateCarree()
245 |     
246 |     # create the plot and set the size
247 |     fig, axs = plt.subplots(1,3, sharex=True, sharey=True , subplot_kw=dict(projection= rotated_pole), figsize = (20*3,10))
248 |     fig.subplots_adjust( wspace=0.05, left=0.05, right=0.99, bottom=0.12, top=0.92)
249 |     #fig.suptitle(r'$\Delta$'+ "SST comparison between native GCM data, bi-linear interpolation \n and NaN-ignoring interpolation", fontsize=30, weight="bold")
250 | 
251 |     # create country's borders and landsea mask
252 |     #land_50m = cfeature.NaturalEarthFeature('cultural', 'admin_0_countries', '50m', edgecolor='black', facecolor='none', linewidth=0.2)
253 |     broder_50m = cfeature.NaturalEarthFeature('physical', 'coastline', '50m', edgecolor='black', facecolor='none', linewidth=0.8)
254 |     if max == None:
255 |         max = np.nanmax(field)
256 |     if min == None:
257 |         min = np.nanmin(field)
258 |     # activate the labels and set countour of the countourf
259 |     draw_labels = True
260 |     reversed_cmap = False
261 |     levels = np.arange(min, max, 0.05)
262 |     color_map = plt.cm.get_cmap("Reds")
263 |     if reversed_cmap == True:
264 |         reversed_color_map = color_map.reversed()
265 |     else:
266 |         reversed_color_map = color_map
267 |     #plt.gca().set_facecolor("dimgrey")
268 |     #norm = TwoSlopeNorm(vmin=min, vmax = max, vcenter=0.0001)
269 |     origin_dim = raw_sst.coords['longitude'].values.shape
270 |     lon_raw = raw_sst.coords['longitude'].values.reshape(-1)
271 |     for i in range(len(lon_raw)):
272 |         if lon_raw[i] > 180:
273 |             lon_raw[i] -= 360    
274 |     # plot in each subplot
275 |     h1 = axs[0].contourf(lon_raw.reshape(origin_dim), raw_sst.coords['latitude'].values, raw_sst.variables['tos'][month,:,:].values, levels=levels, cmap=reversed_color_map , extend='both')
276 |     axs[0].set_title(r'$\Delta$SST on GCM ocean model grid', fontsize=25, weight="bold")
277 |     ## add borders and landsea mask
278 |     #axes.add_feature(land_50m)
279 |     if coastline:
280 |         axs[0].add_feature(broder_50m)
281 |     
282 |     gl = axs[0].gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False)
283 |     gl.ylocator     = mticker.FixedLocator(np.arange(-60, 80, 10))
284 |     gl.xlocator     = mticker.FixedLocator(np.arange(-100,  90 ,10))
285 |     gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
286 |     gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
287 |     gl.top_labels = False
288 |     gl.right_labels = False
289 |     gl.left_labels = False
290 |     gl.bottom_labels = True
291 |     gl.left_labels = True
292 |     
293 |         # plot in each subplot
294 |     h = axs[1].contourf(lon[210:-150], lat[30:-115], cdo.variables['tos'][month,:,:].values[30:-115,210:-150], levels=levels, cmap=reversed_color_map , extend='both')
295 |     axs[1].set_title(r'$\Delta$'+ "SST using bi-linear interpolation", fontsize=25, weight="bold")
296 |     ## add borders and landsea mask
297 |     #axes.add_feature(land_50m)
298 |     if coastline:
299 |         axs[1].add_feature(broder_50m)
300 |     
301 |     gl = axs[1].gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False)
302 |     gl.ylocator     = mticker.FixedLocator(np.arange(-60, 80, 10))
303 |     gl.xlocator     = mticker.FixedLocator(np.arange(-100,  90 ,10))
304 |     gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
305 |     gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
306 |     gl.top_labels = False
307 |     gl.right_labels = False
308 |     gl.left_labels = False
309 |     gl.bottom_labels = True
310 |     gl.left_labels = True
311 | 
312 |         # plot in each subplot
313 |     h = axs[2].contourf(lon[210:-150], lat[30:-115], tos.variables['sst'][month,:,:].values[30:-115,210:-150], levels=levels, cmap=reversed_color_map , extend='both')
314 |     axs[2].set_title(r'$\Delta$'+ "SST using NaN-ignoring interpolation", fontsize=25, weight="bold")
315 |     ## add borders and landsea mask
316 |     #axes.add_feature(land_50m)
317 |     if coastline:
318 |         axs[2].add_feature(broder_50m)
319 |     
320 |     gl = axs[2].gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False)
321 |     gl.ylocator     = mticker.FixedLocator(np.arange(-60, 80, 10))
322 |     gl.xlocator     = mticker.FixedLocator(np.arange(-100,  90 ,10))
323 |     gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
324 |     gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'}
325 |     gl.top_labels = False
326 |     gl.right_labels = False
327 |     gl.left_labels = False
328 |     gl.bottom_labels = True
329 |     gl.left_labels = True   
330 | 
331 |     #set colorbar
332 |     cb = mpu.colorbar(h1, axs[1], orientation = 'horizontal', pad = 0.15, aspect=50, format='%.1f') 
333 |     cb.ax.tick_params(labelsize=14)
334 |     cb.set_label(label="SST delta [K]",fontsize=20)
335 | 
336 |     #plt.tight_layout()
337 |     plt.savefig(str(filename) + ".png")
338 | 
339 | #plot_paper("jonas_figure"+addon, raw_sst.variables['tos'][month,:,:].values, "Combined TAS field from NaN-ignoring interpolation for January", 
340 | #     "TAS delta [K]")
341 | """
342 | 


--------------------------------------------------------------------------------
/step_03_apply_to_era.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | description     PGW for ERA5 main routine to update ERA5 files with climate
  5 |                 deltas to transition from ERA climate to PGW climate.
  6 | authors		Before 2022:    original developments by Roman Brogli
  7 |                 Since 2022:     upgrade to PGW for ERA5 by Christoph Heim 
  8 |                 2022:           udpates by Jonas Mensch
  9 | """
 10 | ##############################################################################
 11 | import argparse, os
 12 | import xarray as xr
 13 | import numpy as np
 14 | from argparse import RawDescriptionHelpFormatter
 15 | from pathlib import Path
 16 | from datetime import datetime, timedelta
 17 | from functions import (
 18 |     specific_to_relative_humidity,
 19 |     relative_to_specific_humidity,
 20 |     load_delta,
 21 |     load_delta_interp,
 22 |     integ_geopot,
 23 |     interp_logp_4d,
 24 |     determine_p_ref,
 25 |     integrate_tos
 26 |     )
 27 | from constants import CON_G, CON_RD
 28 | from parallel import IterMP
 29 | from settings import (
 30 |     i_debug,
 31 |     era5_file_name_base,
 32 |     var_name_map,
 33 |     TIME_ERA, LEV_ERA, HLEV_ERA, LON_ERA, LAT_ERA, SOIL_HLEV_ERA,
 34 |     TIME_GCM, PLEV_GCM,
 35 |     i_reinterp,
 36 |     p_ref_inp,
 37 |     thresh_phi_ref_max_error,
 38 |     max_n_iter,
 39 |     adj_factor,
 40 |     file_name_bases,
 41 |     )
 42 | ##############################################################################
 43 | 
 44 | def pgw_for_era5(inp_era_file_path, out_era_file_path,
 45 |                 delta_input_dir, era_step_dt,
 46 |                 ignore_top_pressure_error,
 47 |                 debug_mode=None):
 48 |     if i_debug >= 0:
 49 |         print('Start working on input file {}'.format(inp_era_file_path))
 50 | 
 51 |     #########################################################################
 52 |     ### PREPARATION STEPS
 53 |     #########################################################################
 54 |     # containers for variable computation
 55 |     vars_era = {}
 56 |     vars_pgw = {}
 57 |     deltas = {}
 58 | 
 59 |     # open data set
 60 |     era_file = xr.open_dataset(inp_era_file_path, decode_cf=False)
 61 | 
 62 |     ## compute pressure on ERA5 full levels and half levels
 63 |     # pressure on half levels
 64 |     pa_hl_era = (era_file.ak + 
 65 |                 era_file[var_name_map['ps']] * era_file.bk).transpose(
 66 |                 TIME_ERA, HLEV_ERA, LAT_ERA, LON_ERA)
 67 |     # if akm and akb coefficients (for full levels) exist, use them
 68 |     if 'akm' in era_file:
 69 |         akm = era_file.akm
 70 |         bkm = era_file.bkm
 71 |     # if akm and abk  coefficients do not exist, computed them
 72 |     # with the average of the half-level coefficients above and below
 73 |     else:
 74 |         akm = (
 75 |             0.5 * era_file.ak.diff(
 76 |             dim=HLEV_ERA, 
 77 |             label='lower').rename({HLEV_ERA:LEV_ERA}) + 
 78 |             era_file.ak.isel({HLEV_ERA:range(len(era_file.level1)-1)}).values
 79 |         )
 80 |         bkm = (
 81 |             0.5 * era_file.bk.diff(
 82 |             dim=HLEV_ERA, 
 83 |             label='lower').rename({HLEV_ERA:LEV_ERA}) + 
 84 |             era_file.bk.isel({HLEV_ERA:range(len(era_file.level1)-1)}).values
 85 |         )
 86 |     # pressure on full levels
 87 |     pa_era = (akm + era_file[var_name_map['ps']] * bkm).transpose(
 88 |                 TIME_ERA, LEV_ERA, LAT_ERA, LON_ERA)
 89 | 
 90 |     # compute relative humidity in ERA climate state
 91 |     era_file[var_name_map['hur']] = specific_to_relative_humidity(
 92 |                         era_file[var_name_map['hus']], 
 93 |                         pa_era, era_file[var_name_map['ta']]).transpose(
 94 |                         TIME_ERA, LEV_ERA, LAT_ERA, LON_ERA)
 95 | 
 96 |     #########################################################################
 97 |     ### UPDATE SURFACE AND SOIL TEMPERATURE
 98 |     #########################################################################
 99 |     # update surface skin temperature using SST delta over sea and and 
100 |     # surface skin temperature delta over land and over sea ice
101 |     if i_debug >= 2:
102 |         print('update surface skin temperature (ts)')
103 |     delta_siconc = load_delta(delta_input_dir, 'siconc',
104 |                         era_file[TIME_ERA], era_step_dt) 
105 |     era_file[var_name_map['sic']].values += delta_siconc.values/100
106 |     era_file[var_name_map['sic']].values = np.clip(
107 |                         era_file[var_name_map['sic']].values, 0, 1)
108 |     print(np.nanmin(era_file[var_name_map['sic']].values))
109 |     #deltas['siconc'] = delta_siconc
110 |     # load surface temperature climate delta
111 |     #(for grid points over land and sea ice)
112 |     delta_ts = load_delta(delta_input_dir, 'ts',
113 |                            era_file[TIME_ERA], era_step_dt)
114 |     # load SST climate delta (for grid points over open water)
115 |     delta_tos = load_delta(delta_input_dir, 'tos',
116 |                            era_file[TIME_ERA], era_step_dt)
117 |     # combine using land and sea-ice mask in ERA5
118 |     delta_ts_combined = integrate_tos(
119 |         delta_tos.values,
120 |         delta_ts.values, 
121 |         era_file[var_name_map['sftlf']].isel({TIME_ERA:0}).values, 
122 |         era_file[var_name_map['sic']].isel({TIME_ERA:0}).values
123 |     )
124 |     era_file[var_name_map['ts']].values += delta_ts_combined
125 |     delta_ts.values = delta_ts_combined
126 |     # store delta for output in case of --debug_mode = interpolate_full
127 |     deltas['ts'] = delta_ts
128 |     print(np.nanmin(era_file[var_name_map['sic']].values))
129 |     # update temperature of soil layers
130 |     if i_debug >= 2:
131 |         print('update soil layer temperature (st)')
132 |     # set climatological lower soil temperature delta to annual mean
133 |     # climate delta of surface skin temperature.
134 |     delta_st_clim = load_delta(delta_input_dir, 'ts',
135 |                             era_file[TIME_ERA], 
136 |                             target_date_time=None).mean(dim=[TIME_GCM])
137 |     # interpolate between surface temperature and deep soil temperature
138 |     # using exponential decay of annual cycle signal
139 |     delta_soilt = (
140 |             delta_st_clim + np.exp(-era_file.soil1/2.8) * 
141 |                     (delta_ts - delta_st_clim)
142 |     )
143 |     delta_soilt = delta_soilt.transpose(TIME_ERA, SOIL_HLEV_ERA, LAT_ERA, LON_ERA)
144 |     era_file[var_name_map['st']].values += delta_soilt
145 |     # store delta for output in case of --debug_mode = interpolate_full
146 |     deltas['st'] = delta_soilt
147 | 
148 |     #########################################################################
149 |     ### START UPDATING 3D FIELDS
150 |     #########################################################################
151 |     # If no re-interpolation is done, the final PGW climate state
152 |     # variables can be computed already now, before updating the 
153 |     # surface pressure. This means that the climate deltas or
154 |     # interpolated on the ERA5 model levels of the ERA climate state.
155 |     if not i_reinterp:
156 | 
157 |         ### interpolate climate deltas onto ERA5 grid
158 |         for var_name in ['ta','hur','ua','va']:
159 |             if i_debug >= 2:
160 |                 print('update {}'.format(var_name))
161 | 
162 |             ## interpolate climate deltas to ERA5 model levels
163 |             ## use ERA climate state
164 |             delta_var = load_delta_interp(delta_input_dir,
165 |                     var_name, pa_era, era_file[TIME_ERA], era_step_dt,
166 |                     ignore_top_pressure_error)
167 |             deltas[var_name] = delta_var
168 | 
169 |             ## compute PGW climate state variables
170 |             vars_pgw[var_name] = (
171 |                     era_file[var_name_map[var_name]] + 
172 |                     deltas[var_name]
173 |             )
174 | 
175 | 
176 |     #########################################################################
177 |     ### UPDATE SURFACE PRESSURE USING ITERATIVE PROCEDURE
178 |     #########################################################################
179 |     if i_debug >= 2:
180 |         print('###### Start with iterative surface pressure adjustment.')
181 |     # change in surface pressure between ERA and PGW climate states
182 |     delta_ps = xr.zeros_like(era_file[var_name_map['ps']])
183 |     # increment to adjust delta_ps with each iteration
184 |     adj_ps = xr.zeros_like(era_file[var_name_map['ps']])
185 |     # maximum error in geopotential (used in iteration)
186 |     phi_ref_max_error = np.inf
187 | 
188 |     it = 1
189 |     while phi_ref_max_error > thresh_phi_ref_max_error:
190 | 
191 |         # update surface pressure
192 |         delta_ps += adj_ps
193 |         ps_pgw = era_file[var_name_map['ps']] + delta_ps
194 | 
195 |         # recompute pressure on full and half levels
196 |         pa_pgw = (akm + ps_pgw * bkm).transpose(
197 |                     TIME_ERA, LEV_ERA, LAT_ERA, LON_ERA)
198 |         pa_hl_pgw = (era_file.ak + ps_pgw * era_file.bk).transpose(
199 |                     TIME_ERA, HLEV_ERA, LAT_ERA, LON_ERA)
200 | 
201 | 
202 |         if i_reinterp:
203 |             # interpolate ERA climate state variables as well as
204 |             # climate deltas onto updated model levels, and
205 |             # compute PGW climate state variables
206 |             if i_debug >= 2:
207 |                 print('reinterpolate ta and hur')
208 |             for var_name in ['ta', 'hur']:
209 |                 vars_era[var_name] = interp_logp_4d(
210 |                                 era_file[var_name_map[var_name]], 
211 |                                 pa_era, pa_pgw, extrapolate='constant')
212 |                 deltas[var_name] = load_delta_interp(delta_input_dir,
213 |                                                 var_name, pa_pgw,
214 |                                                 era_file[TIME_ERA], era_step_dt,
215 |                                                 ignore_top_pressure_error)
216 |                 vars_pgw[var_name] = vars_era[var_name] + deltas[var_name]
217 | 
218 |         # Determine current reference pressure (p_ref)
219 |         if p_ref_inp is None:
220 |             # get GCM pressure levels as candidates for reference pressure
221 |             p_ref_opts = load_delta(delta_input_dir, 'zg',
222 |                                 era_file[TIME_ERA], era_step_dt)[PLEV_GCM]
223 |             # maximum reference pressure in ERA and PGW climate states
224 |             # (take 95% of surface pressure to ensure that a few model
225 |             # levels are located in between which makes the solution
226 |             # smoother).
227 |             p_min_era = pa_hl_era.isel(
228 |                         {HLEV_ERA:len(pa_hl_era[HLEV_ERA])-1}) * 0.95
229 |             p_min_pgw = pa_hl_pgw.isel(
230 |                         {HLEV_ERA:len(pa_hl_era[HLEV_ERA])-1}) * 0.95
231 |             # reference pressure from a former iteration already set?
232 |             try:
233 |                 p_ref_last = p_ref
234 |             except UnboundLocalError:
235 |                 p_ref_last = None
236 |             # determine local reference pressure
237 |             p_ref = xr.apply_ufunc(determine_p_ref, p_min_era, p_min_pgw, 
238 |                     p_ref_opts, p_ref_last,
239 |                     input_core_dims=[[],[],[PLEV_GCM],[]],
240 |                     vectorize=True)
241 |             if HLEV_ERA in p_ref.coords:
242 |                 del p_ref[HLEV_ERA]
243 |             # make sure a reference pressure above the required model
244 |             # level could be found everywhere
245 |             if np.any(np.isnan(p_ref)):
246 |                 raise ValueError('No reference pressure level above the ' +
247 |                         'required local minimum pressure level could not ' +
248 |                         'be found everywhere. ' +
249 |                         'This is likely the case because your geopotential ' +
250 |                         'data set does not reach up high enough (e.g. only to ' +
251 |                         '500 hPa instead of e.g. 300 hPa?)')
252 |         else:
253 |             p_ref = p_ref_inp
254 | 
255 |         #p_sfc_era.to_netcdf('psfc_era.nc')
256 |         #p_ref.to_netcdf('pref.nc')
257 |         #quit()
258 | 
259 |         # convert relative humidity to speicifc humidity in pgw
260 |         # take PGW climate state temperature and relative humidity
261 |         # and pressure of current iteration
262 |         vars_pgw['hus'] = relative_to_specific_humidity(
263 |             vars_pgw['hur'], 
264 |             pa_pgw, 
265 |             vars_pgw['ta']
266 |         )
267 | 
268 |         # compute updated geopotential at reference pressure
269 |         phi_ref_pgw = integ_geopot(
270 |             pa_hl_pgw, 
271 |             era_file[var_name_map['zgs']], 
272 |             vars_pgw['ta'], 
273 |             vars_pgw['hus'], 
274 |             era_file[HLEV_ERA], 
275 |             p_ref
276 |         )
277 | 
278 |         # recompute original geopotential at currently used 
279 |         # reference pressure level
280 |         phi_ref_era = integ_geopot(
281 |             pa_hl_era, 
282 |             era_file[var_name_map['zgs']],
283 |             era_file[var_name_map['ta']], 
284 |             era_file[var_name_map['hus']], 
285 |             era_file[HLEV_ERA],
286 |             p_ref
287 |         )
288 | 
289 |         delta_phi_ref = phi_ref_pgw - phi_ref_era
290 | 
291 |         ## load climate delta at currently used reference pressure level
292 |         climate_delta_phi_ref = load_delta(delta_input_dir, 'zg',
293 |                             era_file[TIME_ERA], era_step_dt) * CON_G
294 |         climate_delta_phi_ref = climate_delta_phi_ref.sel({PLEV_GCM:p_ref})
295 |         del climate_delta_phi_ref[PLEV_GCM]
296 | 
297 |         # error in future geopotential
298 |         phi_ref_error = delta_phi_ref - climate_delta_phi_ref
299 | 
300 |         # adjust surface pressure by some amount in the right direction
301 |         adj_ps = - adj_factor * ps_pgw / (
302 |                 CON_RD * 
303 |                 vars_pgw['ta'].sel({LEV_ERA:np.max(era_file[LEV_ERA])})
304 |             ) * phi_ref_error
305 |         if LEV_ERA in adj_ps.coords:
306 |             del adj_ps[LEV_ERA]
307 | 
308 |         phi_ref_max_error = np.abs(phi_ref_error).max().values
309 |         if i_debug >= 2:
310 |             print('### iteration {:03d}, phi max error: {}'.
311 |                             format(it, phi_ref_max_error))
312 | 
313 |         it += 1
314 | 
315 |         if it > max_n_iter:
316 |             raise ValueError('ERROR! Pressure adjustment did not converge '+
317 |                   'for file {}. '.format(inp_era_file_path) +
318 |                   'Consider increasing the value for "max_n_iter" in ' +
319 |                   'settings.py')
320 | 
321 |     #########################################################################
322 |     ### FINISH UPDATING 3D FIELDS
323 |     #########################################################################
324 |     # store computed delta ps for output in case of 
325 |     # --debug_mode = interpolate_full
326 |     deltas['ps'] = ps_pgw - era_file.PS
327 | 
328 |     ## If re-interpolation is enabled, interpolate climate deltas for
329 |     ## ua and va onto final PGW climate state ERA5 model levels.
330 |     if i_reinterp:
331 |         for var_name in ['ua', 'va']:
332 |             if i_debug >= 2:
333 |                 print('add {}'.format(var_name))
334 |             var_era = interp_logp_4d(era_file[var_name_map[var_name]], 
335 |                             pa_era, pa_pgw, extrapolate='constant')
336 |             delta_var = load_delta_interp(delta_input_dir,
337 |                     var_name, pa_pgw,
338 |                     era_file[TIME_ERA], era_step_dt,
339 |                     ignore_top_pressure_error)
340 |             vars_pgw[var_name] = var_era + delta_var
341 |             # store delta for output in case of 
342 |             # --debug_mode = interpolate_full
343 |             deltas[var_name] = delta_var
344 | 
345 |     #########################################################################
346 |     ### DEBUG MODE
347 |     #########################################################################
348 |     ## for debug_mode == interpolate_full, write final climate deltas
349 |     ## to output directory
350 |     if debug_mode == 'interpolate_full':
351 |         var_names = ['ps','ta','hur','ua','va','st','ts']
352 |         for var_name in var_names:
353 |             print(var_name)
354 |             # creat output file name
355 |             out_file_path = os.path.join(Path(out_era_file_path).parents[0],
356 |                                 '{}_delta_{}'.format(var_name_map[var_name], 
357 |                                             Path(out_era_file_path).name))
358 |             # convert to dataset
359 |             delta = deltas[var_name].to_dataset(name=var_name_map[var_name])
360 |             # save climate delta
361 |             delta.to_netcdf(out_file_path, mode='w')
362 | 
363 |     #########################################################################
364 |     ### SAVE PGW ERA5 FILE
365 |     #########################################################################
366 |     ## for production mode, modify ERA5 file and save
367 |     else:
368 |         ## update 3D fields in ERA file
369 |         era_file[var_name_map['ps']] = ps_pgw
370 |         for var_name in ['ta','hus','ua','va']:
371 |             era_file[var_name_map[var_name]] = vars_pgw[var_name]
372 |         ## remove manually computed RH field in ERA5 file
373 |         del era_file[var_name_map['hur']]
374 | 
375 | 
376 |         ## save updated ERA5 file
377 |         print(np.nanmin(era_file[var_name_map['sic']].values))
378 |         era_file.to_netcdf(out_era_file_path, mode='w')
379 |         era_file.close()
380 |         if i_debug >= 1:
381 |             print('Done. Saved to file {}.'.format(out_era_file_path))
382 | 
383 | 
384 | 
385 | ##############################################################################
386 | 
387 | def debug_interpolate_time(
388 |                 inp_era_file_path, out_era_file_path,
389 |                 delta_input_dir, era_step_dt,
390 |                 ignore_top_pressure_error,
391 |                 debug_mode=None):
392 |     """
393 |     Debugging function to test time interpolation. Is called if input
394 |     inputg argument --debug_mode is set to "interpolate_time".
395 |     """
396 |     # load input ERA5 file
397 |     # in this debugging function, the only purpose of this is to obtain 
398 |     # the time format of the ERA5 file
399 |     era_file = xr.open_dataset(inp_era_file_path, decode_cf=False)
400 | 
401 |     var_names = ['tos','tas','hurs','ps','ta','hur','ua','va','zg']
402 |     for var_name in var_names:
403 |         print(var_name)
404 |         # creat output file name
405 |         out_file_path = os.path.join(Path(out_era_file_path).parents[0],
406 |                                     '{}_{}_{}'.format("delta",var_name, 
407 |                                         Path(out_era_file_path).name))
408 |         # load climate delta interpolated in time only
409 |         delta = load_delta(delta_input_dir, var_name, era_file[TIME_ERA], 
410 |                        target_date_time=era_step_dt)
411 |         # convert to dataset
412 |         delta = delta.to_dataset(name=var_name)
413 |         delta.to_netcdf(out_file_path, mode='w')
414 |     era_file.close()
415 | 
416 | 
417 | 
418 | 
419 | 
420 | 
421 | ##############################################################################
422 | if __name__ == "__main__":
423 |     ## input arguments
424 |     parser = argparse.ArgumentParser(description =
425 |     """
426 |     Perturb ERA5 with PGW climate deltas. Settings can be made in
427 |     "settings.py".
428 |     ##########################################################################
429 | 
430 |     Main function to update ERA5 files with the PGW signal.
431 |     The terminology used is HIST referring to the historical (or reference)
432 |     climatology, SCEN referring to the future (climate change scenario)
433 |     climatology, and SCEN-HIST (a.k.a. climate delta) referring to the
434 |     PGW signal which should be applied to the ERA5 files.
435 |     The script computes and adds a climate change signal for:
436 |         - ua
437 |         - va
438 |         - ta (including tas for interpolation near the surface)
439 |         - hus (computed using a hur and hurs climate delta)
440 |         - surface skin temperature (including SST) and soil temperature
441 |     and consequently iteratively updates ps to maintain hydrostatic
442 |     balance. During this, the climate delta for zg is additionally required.
443 |     A list of all climate deltas required is shown in settings.py.
444 | 
445 |     ##########################################################################
446 | 
447 |     If the variable names in the ERA5 files to be processed deviate from
448 |     the CMOR convention, the dict 'var_name_map' in the file 
449 |     settings.py allows to map between the CMOR names and the names in the ERA5
450 |     file. Also the coordinate names in the ERA5 or the GCM climate
451 |     delta files can be changed in settings.py, if required.
452 | 
453 |     ##########################################################################
454 | 
455 |     The code can be run in parallel on multiple ERA5 files at the same time.
456 |     See input arguments.
457 | 
458 |     ##########################################################################
459 | 
460 |     Note that epxloring the option --debug_mode (-D) can provide a lot of
461 |     insight into what the code does and can help gain confidence using the
462 |     code (see argument documentation below for more information).
463 | 
464 |     ##########################################################################
465 | 
466 |     Some more information about the iterative surface pressure
467 |     adjustment:
468 | 
469 |     - The procedure requires a reference pressure level (e.g. 500 hPa) for
470 |     which the geopotential is computed. Based on the deviation between the
471 |     computed and the GCM reference pressure geopotential, the surface pressure
472 |     is adjusted. Since the climate deltas may not always be available at 
473 |     native vertical GCM resolution, but the climate delta for the geopotential
474 |     on one specific pressure level itself is computed by the GCM using data
475 |     from all GCM model levels, this introduces an error in the surface
476 |     pressure adjustment used here. See publication for more details.
477 |     The higher (in terms of altitdue) the reference pressure is chosen, 
478 |     the larger this error may get. 
479 |     Alternatively, the reference pressure can be determined locally 
480 |     as the lowest possible pressure above the surface for which a climate 
481 |     delta for the geopotential is available (see settings.py).
482 | 
483 |     - If the iteration does not converge, 'thresh_phi_ref_max_error' in
484 |     the file settings.py may have to be raised a little bit. Setting
485 |     i_debug = 2 may help to diagnose if this helps.
486 | 
487 |     - As a default option, the climate deltas are interpolated to
488 |     the ERA5 model levels of the ERA climate state before the surface
489 |     pressure is adjusted (i_reinterp = 0).
490 |     There is an option implemented (i_reinterp = 1) in which the
491 |     deltas are re-interpolated on the updated ERA5 model levels
492 |     with each iteration of surface pressure adjustment. However, this
493 |     implies that the ERA5 fields are extrapolated at the surface
494 |     (if the surface pressure increases) the effect of which was not
495 |     tested in detail. The extrapolation is done assuming that the
496 |     boundary values are constant, which is not ideal for height-dependent
497 |     variables like e.g. temperature. As a default, it is recommended to set
498 |     i_reinterp = 0.
499 | 
500 |     ##########################################################################
501 | 
502 |     """, formatter_class=RawDescriptionHelpFormatter)
503 | 
504 |     # input era5 directory
505 |     parser.add_argument('-i', '--input_dir', type=str, default=None,
506 |             help='Directory with ERA5 input files to process. ' +
507 |                  'These files are not overwritten but copies will ' +
508 |                  'be save in --output_dir .')
509 | 
510 |     # output era5 directory
511 |     parser.add_argument('-o', '--output_dir', type=str, default=None,
512 |             help='Directory to store processed ERA5 files.')
513 | 
514 |     # first bc step to compute 
515 |     parser.add_argument('-f', '--first_era_step', type=str,
516 |             default='2006080200',
517 |             help='Date of first ERA5 time step to process. Format should ' +
518 |             'be YYYYMMDDHH.')
519 | 
520 |     # last bc step to compute 
521 |     parser.add_argument('-l', '--last_era_step', type=str,
522 |             default='2006080300',
523 |             help='Date of last ERA5 time step to process. Format should ' +
524 |             'be YYYYMMDDHH.')
525 | 
526 |     # delta hour increments
527 |     parser.add_argument('-H', '--hour_inc_step', type=int, default=3,
528 |             help='Hourly increment of the ERA5 time steps to process '+
529 |             'between --first_era_step and --last_era_step. Default value ' +
530 |             'is 3-hourly, i.e. (00, 03, 06, 09, 12, 15, 18, 21 UTC).')
531 | 
532 |     # climate delta directory (already remapped to ERA5 grid)
533 |     parser.add_argument('-d', '--delta_input_dir', type=str, default=None,
534 |             help='Directory with GCM climate deltas (SCEN-HIST) to be used. ' +
535 |             'This directory should have a climate delta for ta,hur,' +
536 |             'ua,va,zg,tas,hurs,ts,tos (e.g. ta_delta.nc), as well as the ' +
537 |             'HIST climatology value for ps (e.g. ps_historical.nc). ' +
538 |             'All files have to be horizontally remapped to the grid of ' +
539 |             'the ERA5 files used (see step_02_preproc_deltas.py).')
540 | 
541 |     # number of parallel jobs
542 |     parser.add_argument('-p', '--n_par', type=int, default=1,
543 |             help='Number of parallel tasks. Parallelization is done ' +
544 |             'on the level of individual ERA5 files being processed at ' +
545 |             'the same time.')
546 | 
547 |     # flag to ignore the error from to pressure extrapolation at the model top
548 |     parser.add_argument('-t', '--ignore_top_pressure_error',
549 |             action='store_true',
550 |             help='Flag to ignore an error due to pressure ' +
551 |             'extrapolation at the model top if GCM climate deltas reach ' +
552 |             'up less far than ERA5. This can only be done if ERA5 data ' +
553 |             'is not used by the limited-area model '+
554 |             'beyond the upper-most level of the GCM climate ' +
555 |             'deltas!!')
556 | 
557 |     # input era5 directory
558 |     parser.add_argument('-D', '--debug_mode', type=str, default=None,
559 |             help='If this flag is set, the ERA5 files will not be ' +
560 |                  'modified but instead the processed climate deltas '
561 |                  'are written to the output directory. There are two ' +
562 |                  'options: for "-D interpolate_time", the climate deltas ' +
563 |                  'are only interpolated to the time of the ERA5 files ' +
564 |                  'and then stored. for "-D interpolate_full", the ' +
565 |                  'full routine is run but instead of the processed ERA5 ' +
566 |                  'files, only the difference between the processed and ' +
567 |                  'the unprocessed ERA5 files is store (i.e. the climate ' +
568 |                  'deltas after full interpolation to the ERA5 grid).')
569 | 
570 | 
571 |     args = parser.parse_args()
572 |     ##########################################################################
573 | 
574 |     # make sure required input arguments are set.
575 |     if args.input_dir is None:
576 |         raise ValueError('Input directory (-i) is required.')
577 |     if args.output_dir is None:
578 |         raise ValueError('Output directory (-o) is required.')
579 |     if args.delta_input_dir is None:
580 |         raise ValueError('Delta input directory (-d) is required.')
581 | 
582 |     # check for debug mode
583 |     if args.debug_mode is not None:
584 |         if args.debug_mode not in ['interpolate_time', 'interpolate_full']:
585 |             raise ValueError('Invalid input for argument --debug_mode! ' +
586 |                             'Valid arguments are: ' +
587 |                              '"interpolate_time" or "interpolate_full"')
588 | 
589 |     # first date and last date to datetime object
590 |     first_era_step = datetime.strptime(args.first_era_step, '%Y%m%d%H')
591 |     last_era_step = datetime.strptime(args.last_era_step, '%Y%m%d%H')
592 | 
593 |     # time steps to process
594 |     era_step_dts = np.arange(first_era_step,
595 |                         last_era_step+timedelta(hours=args.hour_inc_step),
596 |                         timedelta(hours=args.hour_inc_step)).tolist()
597 | 
598 |     # if output directory doesn't exist create it
599 |     Path(args.output_dir).mkdir(parents=True, exist_ok=True)
600 | 
601 |     IMP = IterMP(njobs=args.n_par, run_async=True)
602 |     fargs = dict(
603 |         delta_input_dir = args.delta_input_dir,
604 |         ignore_top_pressure_error = args.ignore_top_pressure_error,
605 |         debug_mode = args.debug_mode,
606 |     )
607 |     step_args = []
608 | 
609 |     ##########################################################################
610 |     # iterate over time step and prepare function arguments
611 |     for era_step_dt in era_step_dts:
612 |         print(era_step_dt)
613 | 
614 |         # set output and input ERA5 file
615 |         inp_era_file_path = os.path.join(args.input_dir, 
616 |                 era5_file_name_base.format(era_step_dt))
617 |         out_era_file_path = os.path.join(args.output_dir, 
618 |                 era5_file_name_base.format(era_step_dt))
619 | 
620 |         step_args.append(dict(
621 |             inp_era_file_path = inp_era_file_path,
622 |             out_era_file_path = out_era_file_path,
623 |             era_step_dt = era_step_dt
624 |             )
625 |         )
626 | 
627 |     # choose either main function (pgw_for_era5) for production mode and
628 |     # debug mode "interpolate_full", or function time_interpolation 
629 |     # for debug mode "interpolate_time"
630 |     if (args.debug_mode is None) or (args.debug_mode == 'interpolate_full'):
631 |         run_function = pgw_for_era5
632 |     elif args.debug_mode == 'interpolate_time':
633 |         run_function = debug_interpolate_time
634 |     else:
635 |         raise NotImplementedError()
636 | 
637 |     # run in parallel if args.n_par > 1
638 |     IMP.run(run_function, fargs, step_args)
639 | 
640 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/CFday_wget_scripts/wget_CFday_ssp585_ua.sh:
--------------------------------------------------------------------------------
   1 | #!/bin/bash
   2 | ##############################################################################
   3 | # ESG Federation download script
   4 | #
   5 | # Template version: 1.2
   6 | # Generated by esgf-data.dkrz.de - 2022/02/21 20:59:57
   7 | # Search URL: https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.ua.gn.v20190710|esgf3.dkrz.de
   8 | #
   9 | ###############################################################################
  10 | # first be sure it's bash... anything out of bash or sh will break
  11 | # and the test will assure we are not using sh instead of bash
  12 | if [ $BASH ] && [ `basename $BASH` != bash ]; then
  13 |     echo "######## This is a bash script! ##############" 
  14 |     echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh."
  15 |     echo "Trying to recover automatically..."
  16 |     sleep 1
  17 |     /bin/bash $0 $@
  18 |     exit $?
  19 | fi
  20 | 
  21 | version=1.3.2
  22 | CACHE_FILE=.$(basename $0).status
  23 | openId=
  24 | search_url='https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.ua.gn.v20190710|esgf3.dkrz.de'
  25 | 
  26 | #These are the embedded files to be downloaded
  27 | download_files="$(cat <<EOF--dataset.file.url.chksum_type.chksum
  28 | 'ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20700101-20741231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/ua/gn/v20190710/ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20700101-20741231.nc' 'SHA256' 'a9fed04e1c210ed01bbf19cdfa6a71a104abe21ba0398cd49bd0c6310593cdaa'
  29 | 'ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20750101-20791231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/ua/gn/v20190710/ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20750101-20791231.nc' 'SHA256' '2f93cbd31be8609277c3043493f7828a052be9c32d532f6a29163c2bade9dc57'
  30 | 'ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20800101-20841231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/ua/gn/v20190710/ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20800101-20841231.nc' 'SHA256' '03e576b0aed976ce698f38c4dc4749ea03bc926f8a3b0babf21372bb81de6109'
  31 | 'ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20850101-20891231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/ua/gn/v20190710/ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20850101-20891231.nc' 'SHA256' '2255438222e737b35bcdd11a4c0dad0d50bcc25a25f6b59b545063c7f4086d8b'
  32 | 'ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20900101-20941231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/ua/gn/v20190710/ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20900101-20941231.nc' 'SHA256' '7bed4f0d81fcf545b42a24de2876166aee52a3825a73fc3dbdd22b7cda6b42f4'
  33 | 'ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20950101-20991231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/ua/gn/v20190710/ua_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20950101-20991231.nc' 'SHA256' '255c10eeaf3835725b737b054f81766485005900cccacdd871e393bf2585430c'
  34 | EOF--dataset.file.url.chksum_type.chksum
  35 | )"
  36 | 
  37 | # ESG_HOME should point to the directory containing ESG credentials.
  38 | #   Default is $HOME/.esg
  39 | ESG_HOME=${ESG_HOME:-$HOME/.esg}
  40 | [[ -d $ESG_HOME ]] || mkdir -p $ESG_HOME
  41 | 
  42 | ESG_CREDENTIALS=${X509_USER_PROXY:-$ESG_HOME/credentials.pem}
  43 | ESG_CERT_DIR=${X509_CERT_DIR:-$ESG_HOME/certificates}
  44 | MYPROXY_STATUS=$HOME/.MyProxyLogon
  45 | COOKIE_JAR=$ESG_HOME/cookies
  46 | MYPROXY_GETCERT=$ESG_HOME/getcert.jar
  47 | CERT_EXPIRATION_WARNING=$((60 * 60 * 8))   #Eight hour (in seconds)
  48 | 
  49 | WGET_TRUSTED_CERTIFICATES=$ESG_HOME/certificates
  50 | 
  51 | 
  52 | # Configure checking of server SSL certificates.
  53 | #   Disabling server certificate checking can resolve problems with myproxy
  54 | #   servers being out of sync with datanodes.
  55 | CHECK_SERVER_CERT=${CHECK_SERVER_CERT:-Yes}
  56 | 
  57 | check_os() {
  58 |     local os_name=$(uname | awk '{print $1}')
  59 |     case ${os_name} in
  60 |         Linux)
  61 |             ((debug)) && echo "Linux operating system detected"
  62 |             LINUX=1
  63 |             MACOSX=0
  64 |             ;;
  65 |         Darwin)
  66 |             ((debug)) && echo "Mac OS X operating system detected"
  67 |             LINUX=0
  68 |             MACOSX=1
  69 |             ;;
  70 |         *)
  71 |             echo "Unrecognized OS [${os_name}]"
  72 |             return 1
  73 |             ;;
  74 |     esac
  75 |     return 0
  76 | }
  77 | 
  78 | #taken from http://stackoverflow.com/a/4025065/1182464
  79 | vercomp () {
  80 |     if [[ $1 == $2 ]]
  81 |     then
  82 |         return 0
  83 |     fi
  84 |     local IFS=.
  85 |     local i ver1=($1) ver2=($2)
  86 |     # fill empty fields in ver1 with zeros
  87 |     for ((i=${#ver1[@]}; i<${#ver2[@]}; i++))
  88 |     do
  89 |         ver1[i]=0
  90 |     done
  91 |     for ((i=0; i<${#ver1[@]}; i++))
  92 |     do
  93 |         if [[ -z ${ver2[i]} ]]
  94 |         then
  95 |             # fill empty fields in ver2 with zeros
  96 |             ver2[i]=0
  97 |         fi
  98 |         if ((10#${ver1[i]} > 10#${ver2[i]}))
  99 |         then
 100 |             return 1
 101 |         fi
 102 |         if ((10#${ver1[i]} < 10#${ver2[i]}))
 103 |         then
 104 |             return 2
 105 |         fi
 106 |     done
 107 |     return 0
 108 | }
 109 | 
 110 | check_commands() {
 111 |     #check wget
 112 |     local MIN_WGET_VERSION=1.10
 113 |     vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION
 114 |     case $? in
 115 |         2) #lower
 116 |             wget -V
 117 |             echo
 118 |             echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2
 119 |             exit 1
 120 |     esac
 121 | }
 122 | 
 123 | usage() {
 124 |     echo "Usage: $(basename $0) [flags] [openid] [username]"
 125 |     echo "Flags is one of:"
 126 |     sed -n '/^while getopts/,/^done/  s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0
 127 |     echo
 128 |     echo "This command stores the states of the downloads in .$0.status"
 129 |     echo "For more information check the website: http://esgf.org/wiki/ESGF_wget"
 130 | }
 131 | 
 132 | #defaults
 133 | debug=0
 134 | clean_work=1
 135 | 
 136 | #parse flags
 137 | while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do
 138 |     case $OPT in
 139 |         H) skip_security=1 && use_http_sec=1;; #       : Authenticate with OpenID (username,) and password, without the need for a certificate.
 140 |         T) force_TLSv1=1;;              #       : Forces wget to use TLSv1. 
 141 |         c) ESG_CREDENTIALS="$OPTARG";;  #<cert> : use this certificate for authentication.
 142 |         f) force=1;;                    #       : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies.
 143 |         F) input_file="$OPTARG";;       #<file> : read input from file instead of the embedded one (use - to read from stdin)
 144 |         o) openId="$OPTARG";;           #<openid>: Provide OpenID instead of interactively asking for it.
 145 |         I) username_supplied="$OPTARG";;    #<user_id> : Explicitly set user ID.  By default, the user ID is extracted from the last component of the OpenID URL.  Use this flag to override this behaviour.                   
 146 |         w) output="$OPTARG";;           #<file> : Write embedded files into a file and exit
 147 |         i) insecure=1;;                 #       : set insecure mode, i.e. don't check server certificate
 148 |         s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;;            #       : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only).
 149 |         u) update=1;;                   #       : Issue the search again and see if something has changed.
 150 |         U) update_files=1;;             #       : Update files from server overwriting local ones (detect with -u)
 151 |         n) dry_run=1;;                  #       : Don't download any files, just report.
 152 |         p) clean_work=0;;               #       : preserve data that failed checksum
 153 |         d) verbose=1;debug=1;;          #       : display debug information
 154 |         v) verbose=1;;                  #       : be more verbose
 155 |         q) quiet=1;;                    #       : be less verbose
 156 |         h) usage && exit 0;;            #       : displays this help
 157 |         \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;;
 158 |         \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;;
 159 |     esac
 160 | done
 161 | shift $(($OPTIND - 1))
 162 | 
 163 | #setup input as desired by the user
 164 | if [[ "$input_file" ]]; then
 165 |     if [[ "$input_file" == '-' ]]; then
 166 |         download_files="$(cat)" #read from STDIN
 167 |         exec 0</dev/tty #reopen STDIN as cat closed it
 168 |     else
 169 |         download_files="$(cat $input_file)" #read from file
 170 |     fi
 171 | fi
 172 | 
 173 | #if -w (output) was selected write file and finish:
 174 | if [[ "$output" ]]; then
 175 |     #check the file
 176 |     if [[ -f "$output" ]]; then
 177 |         read -p "Overwrite existing file $output? (y/N) " answ
 178 |         case $answ in y|Y|yes|Yes);; *) echo "Aborting then..."; exit 0;; esac
 179 |     fi
 180 |     echo "$download_files">$output
 181 |     exit
 182 | fi
 183 | 
 184 | 
 185 | #assure we have everything we need
 186 | check_commands
 187 | 
 188 | if ((update)); then
 189 |     echo "Checking the server for changes..."
 190 |     new_wget="$(wget "$search_url" -qO -)"
 191 |     compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'"
 192 |     if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then
 193 |         echo "No changes detected."
 194 |     else
 195 |         echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)"
 196 |         counter=0
 197 |         while [[ -f $0.old.$counter ]]; do ((counter++)); done
 198 |         mv $0 $0.old.$counter
 199 |         echo "$new_wget" > $0
 200 |     fi
 201 |     exit 0      
 202 | fi
 203 | 
 204 | 
 205 | ##############################################################################
 206 | check_java() {
 207 |     if ! type java >& /dev/null; then
 208 |         echo "Java could not be found." >&2
 209 |         return 1
 210 |     fi
 211 |     if java -version 2>&1|grep openjdk >/dev/null; then
 212 |         openjdk=1;
 213 |     else
 214 |         openjdk=0;
 215 |     fi
 216 |     jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ }))
 217 |     mVer=${jversion[1]}
 218 |     if [ $openjdk -eq 1 ]; then
 219 |         mVer=${jversion[0]}
 220 |         if ((mVer<5)); then
 221 |             echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2
 222 |             echo "Current version seems older: $(java -version | head -n1) " >&2
 223 |             return 1
 224 |         fi
 225 |     else
 226 |     
 227 |         if ((mVer<5)); then
 228 |             echo "Java version 1.5+ is required for retrieving the certificate." >&2
 229 |             echo "Current version seems older: $(java -version | head -n1) " >&2
 230 |             return 1
 231 |         fi
 232 |     fi
 233 | }
 234 | 
 235 | check_myproxy_logon() {
 236 |     if ! type myproxy-logon >& /dev/null; then
 237 | 	echo "myproxy-logon could not be found." >&2
 238 | 	return 1
 239 |     fi
 240 |     echo "myproxy-logon found" >&2
 241 | }
 242 | 
 243 | proxy_to_java() {
 244 |     local proxy_user proxy_pass proxy_server proxy_port
 245 |     eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy)
 246 |     local JAVA_PROXY=
 247 |     [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server"
 248 |     [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port"
 249 |     eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy)
 250 |     [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server"
 251 |     [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port"
 252 |     
 253 |     echo "$JAVA_PROXY"
 254 | }
 255 | 
 256 | # get certificates from github
 257 | get_certificates() {
 258 |     # don't if this was already done today
 259 |     [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0
 260 |     echo -n "Retrieving Federation Certificates..." >&2
 261 | 
 262 |     if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then
 263 |         echo "Could not fetch esg-truststore";
 264 |         return 1
 265 |     fi
 266 |     
 267 |     if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then
 268 |         #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why
 269 |         wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar
 270 |         echo "Could't update certs!" >&2
 271 |         return 1
 272 |     else
 273 |         #if here everythng went fine. Replace old cert with this ones    
 274 |         [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR)
 275 |         mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR
 276 |         touch $ESG_CERT_DIR
 277 |         echo "done!" >&2
 278 |     fi
 279 | 
 280 | }
 281 | 
 282 | # Retrieve ESG credentials
 283 | unset pass
 284 | get_credentials() {
 285 |     if check_java
 286 |     then
 287 | 	use_java=1
 288 |     else	
 289 | 	use_java=0
 290 | 	echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2
 291 | 	check_myproxy_logon || exit 1
 292 |     fi
 293 |     #get all certificates
 294 |     get_certificates
 295 | 
 296 |     if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then
 297 |         echo -n "(Downloading $MYPROXY_GETCERT... "
 298 |         mkdir -p $(dirname $MYPROXY_GETCERT)
 299 |         if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then
 300 |             echo 'done)'
 301 |             touch $MYPROXY_GETCERT
 302 |         else
 303 |             echo 'failed)'
 304 |         fi
 305 |     fi
 306 | 
 307 |     #if the user already defined one, use it
 308 |     if [[ -z $openId ]]; then
 309 |         #try to parse the last valid value if any
 310 |         [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS)
 311 |         if [[ -z $openId ]]; then
 312 |             #no OpenID, we need to ask the user
 313 |             echo -n "Please give your OpenID (Example: https://myserver/example/username) ? "
 314 |         else
 315 |             #Allow the user to change it if desired
 316 |             echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? "
 317 |         fi
 318 |         read -e
 319 |         [[ "$REPLY" ]] && openId="$REPLY"
 320 |     else
 321 |         ((verbose)) && echo "Using user defined OpenID $openId (to change use -o <open_id>)"
 322 |     fi
 323 | 
 324 |     if grep -q ceda.ac.uk <<<$openId; then
 325 |         username=${openId##*/}
 326 |         echo -n "Please give your username if different [$username]: "
 327 |         read -e
 328 |         [[ "$REPLY" ]] && username="$REPLY"
 329 |     fi
 330 |     
 331 | 
 332 | 
 333 |     if [ $use_java -eq 1 ]
 334 |     then
 335 |         local args=
 336 |         #get password
 337 | 	[[ ! "$pass" ]] && read -sp "MyProxy Password? " pass
 338 |         [[ "$openId" ]] && args=$args" --oid $openId"
 339 |         [[ "$pass" ]] && args=$args" -P $pass"
 340 |         [[ "$username" ]] && args=$args" -l $username"
 341 |         
 342 |         echo -n $'\nRetrieving Credentials...' >&2
 343 |         if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then        
 344 |             echo "Certificate could not be retrieved"
 345 |             exit 1
 346 |         fi
 347 |         echo "done!" >&2
 348 |     else
 349 |         args=`openid_to_myproxy_args $openId $username` || exit 1
 350 |         if ! myproxy-logon $args -b -o $ESG_CREDENTIALS
 351 | 	then
 352 |             echo "Certificate could not be retrieved"
 353 | 	    exit 1
 354 |         fi
 355 | 	cp $HOME/.globus/certificates/* $ESG_CERT_DIR/	
 356 |     fi
 357 | }
 358 | 
 359 | openid_to_myproxy_args() {
 360 |   python - <<EOF || exit 1
 361 | import sys
 362 | import re
 363 | import xml.etree.ElementTree as ET
 364 | import urllib2
 365 | openid = "$1"
 366 | username = "$2" or re.sub(".*/", "", openid)
 367 | e = ET.parse(urllib2.urlopen(openid))
 368 | servs = [el for el in e.getiterator() if el.tag.endswith("Service")]
 369 | for serv in servs:
 370 |     servinfo = dict([(re.sub(".*}", "", c.tag), c.text)
 371 |                      for c in serv.getchildren()])
 372 |     try:
 373 |         if servinfo["Type"].endswith("myproxy-service"):
 374 |             m = re.match("socket://(.*):(.*)", servinfo["URI"])
 375 |             if m:
 376 |                 host = m.group(1)
 377 |                 port = m.group(2)
 378 |                 print "-s %s -p %s -l %s" % (host, port, username)
 379 |                 break
 380 |     except KeyError:
 381 |         continue
 382 | else:
 383 |     sys.stderr.write("myproxy service could not be found\n")
 384 |     sys.exit(1)
 385 | EOF
 386 | }
 387 | 
 388 | # check the certificate validity
 389 | check_cert() {
 390 |     if [[ ! -f "$ESG_CERT" || $force ]]; then
 391 |         #not there, just get it
 392 |         get_credentials
 393 |     elif which openssl &>/dev/null; then
 394 |         #check openssl and certificate
 395 |         if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then
 396 |             echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..."
 397 |             get_credentials
 398 |         else
 399 |             #ok, certificate is fine
 400 |             return 0
 401 |         fi
 402 |     fi
 403 | }
 404 | 
 405 | #
 406 | # Detect ESG credentials
 407 | #
 408 | find_credentials() {
 409 | 
 410 |     #is X509_USER_PROXY or $HOME/.esg/credential.pem
 411 |     if [[ -f "$ESG_CREDENTIALS" ]]; then
 412 |         # file found, proceed.
 413 |         ESG_CERT="$ESG_CREDENTIALS"
 414 |         ESG_KEY="$ESG_CREDENTIALS"
 415 |     elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then
 416 |         # second try, use these certificates.
 417 |         ESG_CERT="$X509_USER_CERT"
 418 |         ESG_KEY="$X509_USER_KEY"
 419 |     else
 420 |         # If credentials are not present, just point to where they should go 
 421 |         echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2
 422 |             ESG_CERT="$ESG_CREDENTIALS"
 423 |             ESG_KEY="$ESG_CREDENTIALS"
 424 |             #they will be retrieved later one
 425 |     fi
 426 | 
 427 | 
 428 |     #chek openssl and certificate
 429 |     if (which openssl &>/dev/null); then
 430 |         if ( openssl version | grep 'OpenSSL 1\.0' ); then
 431 |             echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+'
 432 |         fi
 433 |         check_cert || { (($?==1)); exit 1; }
 434 |     fi
 435 |     
 436 |     if [[ $CHECK_SERVER_CERT == "Yes" ]]; then
 437 |         [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; }
 438 |         PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR"
 439 |     fi
 440 | 
 441 |     #some wget version complain if there's no file present
 442 |     [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR
 443 | 
 444 |     PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT"
 445 | 
 446 | }
 447 | 
 448 | check_chksum() {
 449 |     local file="$1"
 450 |     local chk_type=$2
 451 |     local chk_value=$3
 452 |     local local_chksum=Unknown
 453 | 
 454 |     case $chk_type in
 455 |         md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");;
 456 |         sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);;
 457 |         *) echo "Can't verify checksum." && return 0;;
 458 |     esac
 459 | 
 460 |     #verify
 461 |     ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2
 462 |     echo $local_chksum
 463 | }
 464 | 
 465 | #Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x)
 466 | md5sum_() {
 467 |     hash -r
 468 |     if type md5sum >& /dev/null; then
 469 |         echo $(md5sum $@)
 470 |     else
 471 |         echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p')
 472 |     fi
 473 | }
 474 | 
 475 | #Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x)
 476 | sha256sum_() {
 477 |     hash -r
 478 |     if type sha256sum >& /dev/null; then
 479 |         echo $(sha256sum $@)
 480 |     elif type shasum >& /dev/null; then
 481 |         echo $(shasum -a 256 $@)
 482 |     else
 483 |         echo $(sha2 -q -256 $@)
 484 |     fi
 485 | }
 486 | 
 487 | get_mod_time_() {
 488 |     if ((MACOSX)); then
 489 |         #on a mac modtime is stat -f %m <file>
 490 |         echo "$(stat -f %m $@)"
 491 |     else
 492 |         #on linux (cygwin) modtime is stat -c %Y <file>
 493 |         echo "$(stat -c %Y $@)"
 494 |     fi
 495 |     return 0;
 496 | }
 497 | 
 498 | remove_from_cache() {
 499 |     local entry="$1"
 500 |     local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")"
 501 |     echo "$tmp_file" > "$CACHE_FILE"
 502 |     unset cached
 503 | }
 504 | 
 505 | #Download data from node using cookies and not certificates.
 506 | download_http_sec()
 507 | {
 508 |   #The data to be downloaded.
 509 |   data=" $url"
 510 |   filename="$file"  
 511 | 
 512 |   #Wget args.
 513 |   if ((insecure)) 
 514 |   then
 515 |    wget_args=" --no-check-certificate --cookies=on  --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 
 516 |   else
 517 |    wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt "  
 518 |   fi 
 519 | 
 520 |   if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) 
 521 |   then
 522 |    wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt"    
 523 |   fi
 524 |   
 525 |   if((force_TLSv1))
 526 |   then
 527 |    wget_args=" $wget_args"" --secure-protocol=TLSv1 "
 528 |   fi
 529 |   
 530 |   
 531 |   if [[ ! -z "$ESGF_WGET_OPTS" ]]
 532 |   then
 533 |     wget_args="$wget_args $ESGF_WGET_OPTS"
 534 |   fi  
 535 |   
 536 | 
 537 |   #use cookies for the next downloads
 538 |   use_cookies_for_http_basic_auth=1;
 539 |    
 540 |   #Debug message.
 541 |   if  ((debug))
 542 |   then
 543 |    echo -e "\nExecuting:\n"
 544 |    echo -e "wget $wget_args $data\n"
 545 |   fi
 546 | 
 547 | 
 548 |   #Try to download the data. 
 549 |   command="wget $wget_args -O $filename $data"
 550 |   http_resp=$(eval $command  2>&1) 
 551 |   cmd_exit_status="$?"
 552 |   
 553 |   if ((debug))
 554 |   then
 555 |    echo -e "\nHTTP response:\n $http_resp\n"
 556 |   fi
 557 |       
 558 |   #Extract orp service from url ?
 559 |   #Evaluate response.
 560 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 561 |   #(( "$redirects" == 1 )) && 
 562 |   if  echo "$http_resp" | grep -q "/esg-orp/"      
 563 |   then
 564 |    urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3)
 565 |    orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2)
 566 | 
 567 | 
 568 |    #Use cookies for transaction with orp.
 569 |    wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt"    
 570 |    
 571 |    #Download data using either http basic auth or http login form.
 572 |    if [[ "$openid_c" == */openid/  || "$openid_c" == */openid ]]
 573 |    then
 574 |     download_http_sec_open_id
 575 |    else
 576 |     download_http_sec_decide_service
 577 |    fi
 578 |   else  
 579 |    if    echo "$http_resp" | grep -q "401 Unauthorized"  \
 580 |       || echo "$http_resp" | grep -q "403: Forbidden"  \
 581 |       || echo "$http_resp" | grep -q "Connection timed out."  \
 582 |       || echo "$http_resp" | grep -q "no-check-certificate"  \
 583 |       || (( $cmd_exit_status != 0 ))      
 584 |    then 
 585 |     echo "ERROR : http request to OpenID Relying Party service failed."
 586 |     failed=1
 587 |    fi
 588 |   fi
 589 | }
 590 | 
 591 | 
 592 | #Function that decides which implementaion of idp to use.
 593 | download_http_sec_decide_service()
 594 | {
 595 |   #find claimed id
 596 | 
 597 |   pos=$(echo "$openid_c" | egrep -o '/' | wc -l)
 598 |   username_c=$(echo "$openid_c"  | cut -d'/' -f "$(($pos + 1))")
 599 |   esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/')
 600 | 
 601 |   host=$(echo "$openid_c"  | cut -d'/' -f 3)
 602 |   #test ceda first.
 603 | 
 604 |   if [[ -z "$esgf_uri" ]]
 605 |   then
 606 |    openid_c_tmp="https://""$host""/openid/"
 607 |   else
 608 |    openid_c_tmp="https://""$host""/esgf-idp/openid/" 
 609 |   fi
 610 | 
 611 |   command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-"
 612 |         
 613 |   if [[ ! -z "$ESGF_WGET_OPTS" ]]
 614 |   then
 615 |    command="$command $ESGF_WGET_OPTS"
 616 |   fi  
 617 |           
 618 |   #Debug message.
 619 |   if  ((debug))
 620 |   then
 621 |    echo -e "\nExecuting:\n"
 622 |    echo -e "$command\n"
 623 |   fi
 624 |             
 625 | 
 626 |   #Execution of command.
 627 |   http_resp=$(eval $command  2>&1)
 628 |   cmd_exit_status="$?"
 629 |   
 630 |   
 631 |   if ((debug))
 632 |   then
 633 |    echo -e "\nHTTP response:\n $http_resp\n"
 634 |   fi 
 635 |   
 636 | 
 637 |   if    echo "$http_resp" | grep -q "[application/xrds+xml]"  \
 638 |      && echo "$http_resp" | grep -q "200 OK"  \
 639 |      && (( cmd_exit_status == 0 ))       
 640 |   then
 641 |    openid_c=$openid_c_tmp
 642 |    download_http_sec_open_id
 643 |   else
 644 |    if [[ -z "$esgf_uri" ]]
 645 |    then
 646 |     echo "ERROR : HTTP request to OpenID Relying Party service failed."
 647 |     failed=1
 648 |    else
 649 |    download_http_sec_cl_id
 650 |    fi
 651 |   fi
 652 | }
 653 | 
 654 | 
 655 | download_http_sec_retry()
 656 | {
 657 |   echo -e "\nRetrying....\n"
 658 |   #Retry in case that last redirect did not work, this happens with older version of wget.
 659 |   command="wget $wget_args $data"
 660 |       
 661 |   #Debug message.
 662 |   if  ((debug))
 663 |   then
 664 |    echo -e "Executing:\n"
 665 |    echo -e "$command\n"
 666 |   fi   
 667 |    
 668 |   http_resp=$(eval $command  2>&1) 
 669 |   cmd_exit_status="$?"
 670 | 
 671 |   if ((debug))
 672 |   then
 673 |    echo -e "\nHTTP response:\n $http_resp\n"
 674 |   fi
 675 |    
 676 |   if    echo "$http_resp" | grep -q "401 Unauthorized"  \
 677 |      || echo "$http_resp" | grep -q "403: Forbidden"  \
 678 |      || echo "$http_resp" | grep -q "Connection timed out."  \
 679 |      || echo "$http_resp" | grep -q "no-check-certificate"  \
 680 |      || (( $cmd_exit_status != 0 ))      
 681 |   then 
 682 |    echo -e "\nERROR : Retry failed.\n"
 683 |    #rm "$filename"
 684 |    failed=1
 685 |   fi #if retry failed.
 686 | }
 687 | 
 688 | #Function for downloading data using the claimed id.
 689 | download_http_sec_cl_id()
 690 | {
 691 |   #Http request for sending openid to the orp service.
 692 |   command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\"  $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm "
 693 | 
 694 |   #Debug message.
 695 |   if  ((debug))
 696 |   then
 697 |    echo -e "Executing:\n"
 698 |    echo -e "wget $command\n"
 699 |   fi 
 700 |   
 701 |   
 702 |   #Execution of command.
 703 |   http_resp=$(eval $command  2>&1)
 704 |   cmd_exit_status="$?"
 705 | 
 706 |   
 707 |   if ((debug))
 708 |   then
 709 |    echo -e "\nHTTP response:\n $http_resp\n"
 710 |   fi 
 711 |     
 712 |   
 713 |   #Extract orp service from openid ?
 714 |   #Evaluate response.If redirected to idp service send the credentials.
 715 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 716 |   #(( redirects == 2  )) && 
 717 |   if  echo "$http_resp" | grep -q "login.htm"  && (( cmd_exit_status == 0 ))   
 718 |   then 
 719 |   
 720 |    urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3)
 721 |    idp_service=$(echo "$urls"  | tr '\n' ' ' | cut -d' ' -f 2) 
 722 |       
 723 |    command="wget --post-data  password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm"
 724 |    
 725 | 
 726 |    #Debug message.
 727 |    if  ((debug))
 728 |    then
 729 |     echo -e "Executing:\n"
 730 |     echo -e "wget $command\n"
 731 |    fi 
 732 | 
 733 |    #Execution of command.
 734 |    http_resp=$(eval $command  2>&1)
 735 |    cmd_exit_status="$?"
 736 |       
 737 |    if ((debug))
 738 |    then
 739 |     echo -e "\nHTTP response:\n $http_resp\n"
 740 |    fi 
 741 |         
 742 |    #Evaluate response. 
 743 |    #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 744 |    #(( "$redirects" != 5 )) \ 
 745 |    if    echo "$http_resp" | grep -q "text/html"  \
 746 |       || echo "$http_resp" | grep -q "403: Forbidden"  \
 747 |       || (( cmd_exit_status != 0 ))        
 748 |    then 
 749 |     rm "$filename"
 750 |     download_http_sec_retry
 751 |    fi
 752 |  
 753 |   else
 754 |    echo "ERROR : HTTP request to OpenID Provider service failed."
 755 |    failed=1
 756 |   fi #if redirected to idp.  
 757 | }
 758 | 
 759 | 
 760 | 
 761 | download_http_sec_open_id()
 762 | {
 763 |   #Http request for sending openid to the orp web service.
 764 |   command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\"  $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm "
 765 | 
 766 | 
 767 |   #Debug message.
 768 |   if  ((debug))
 769 |   then
 770 |    echo -e "Executing:\n"
 771 |    echo -e "$command\n"
 772 |   fi 
 773 | 
 774 |   #Execution of command.
 775 |   http_resp=$(eval $command  2>&1)
 776 |   cmd_exit_status="$?"
 777 |   
 778 |   
 779 |   if ((debug))
 780 |   then
 781 |    echo -e "\nHTTP response:\n $http_resp\n"
 782 |   fi 
 783 | 
 784 |   #Evaluate response.
 785 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 786 |   #(( "$redirects" != 7 )) ||
 787 |   if   echo "$http_resp" | grep -q "text/html"  ||  (( $cmd_exit_status != 0 ))   
 788 |   then
 789 |    rm "$filename"
 790 |    download_http_sec_retry     
 791 |   fi #if error during http basic authentication. 
 792 |   
 793 | }
 794 | 
 795 | 
 796 | download() {
 797 |     wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS"
 798 |     
 799 |     while read line
 800 |     do
 801 |         # read csv here document into proper variables
 802 |         eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) )
 803 | 
 804 |         #Process the file
 805 |         echo -n "$file ..."
 806 | 
 807 |         #get the cached entry if any.
 808 |         cached="$(grep -e "^$file" "$CACHE_FILE")"
 809 |         
 810 |         #if we have the cache entry but no file, clean it.
 811 |         if [[ ! -f $file && "$cached" ]]; then
 812 |             #the file was removed, clean the cache
 813 |             remove_from_cache "$file"
 814 |             unset cached
 815 |         fi
 816 |         
 817 |         #check it wasn't modified
 818 |         if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then
 819 |                     if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then
 820 |                 echo "Already downloaded and verified"
 821 |                 continue
 822 |             elif ((update_files)); then
 823 |                 #user want's to overwrite newer files
 824 |                 rm $file
 825 |                 remove_from_cache "$file"
 826 |                 unset cached
 827 |             else
 828 |                 #file on server is different from what we have. 
 829 |                 echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite"
 830 |                 continue
 831 |             fi
 832 |         fi
 833 |         unset chksum_err_value chksum_err_count
 834 |         
 835 |         while : ; do
 836 |             # (if we had the file size, we could check before trying to complete)
 837 |             echo "Downloading"
 838 |             [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")"
 839 |             if ((dry_run)); then
 840 |                 #all important info was already displayed, if in dry_run mode just abort
 841 |                 #No status will be stored
 842 |                 break
 843 |             else
 844 |                 if ((use_http_sec))
 845 |                 then
 846 |                  download_http_sec
 847 |                  if ((failed))
 848 |                  then
 849 |                   break
 850 |                  fi
 851 |                 else
 852 |                  $wget -O "$file" $url || { failed=1; break; }  
 853 |                 fi                
 854 |             fi
 855 | 
 856 |             #check if file is there
 857 |             if [[ -f $file ]]; then
 858 |                 ((debug)) && echo file found
 859 |                 if [[ ! "$chksum" ]]; then
 860 |                     echo "Checksum not provided, can't verify file integrity"
 861 |                     break
 862 |                 fi
 863 |                 result_chksum=$(check_chksum "$file" $chksum_type $chksum)
 864 |                 if [[ "$result_chksum" != "$chksum" ]]; then
 865 |                     echo "  $chksum_type failed!"
 866 |                     if ((clean_work)); then
 867 |                         if !((chksum_err_count)); then
 868 |                                 chksum_err_value=$result_chksum
 869 |                                 chksum_err_count=2
 870 |                             elif ((checksum_err_count--)); then
 871 |                                 if [[ "$result_chksum" != "$chksum_err_value" ]]; then
 872 |                                     #this is a real transmission problem
 873 |                                     chksum_err_value=$result_chksum
 874 |                                     chksum_err_count=2
 875 |                                 fi
 876 |                             else
 877 |                                 #ok if here we keep getting the same "different" checksum
 878 |                                 echo "The file returns always a different checksum!"
 879 |                                 echo "Contact the data owner to verify what is happening."
 880 |                                 echo
 881 |                                 sleep 1
 882 |                                 break
 883 |                             fi
 884 |                         
 885 |                             rm $file
 886 |                             #try again
 887 |                             echo -n "  re-trying..."
 888 |                             continue
 889 |                     else
 890 |                             echo "  don't use -p or remove manually."
 891 |                     fi
 892 |                 else
 893 |                     echo "  $chksum_type ok. done!"
 894 |                     echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE
 895 |                 fi
 896 |             fi
 897 |             #done!
 898 |             break
 899 |         done
 900 |         
 901 |         if ((failed)); then
 902 |             echo "download failed"
 903 |             # most common failure is certificate expiration, so check this
 904 |             #if we have the pasword we can retrigger download
 905 |             ((!skip_security)) && [[ "$pass" ]] && check_cert
 906 |             unset failed
 907 |         fi
 908 |         
 909 | done <<<"$download_files"
 910 | 
 911 | }
 912 | 
 913 | dedup_cache_() {
 914 |     local file=${1:-${CACHE_FILE}}
 915 |     ((debug)) && echo "dedup'ing cache ${file} ..."
 916 |     local tmp=$(LC_ALL='C' sort  -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2)
 917 |     ((DEBUG)) && echo "$tmp"
 918 |     echo "$tmp" > $file
 919 |     ((debug)) && echo "(cache dedup'ed)"
 920 | }
 921 | 
 922 | http_basic_auth_func_info_message()
 923 | {
 924 |   echo  "********************************************************************************"
 925 |   echo  "*                                                                              *"
 926 |   echo  "* Note that new functionality to allow authentication without the need for     *"
 927 |   echo  "* certificates is available with this version of the wget script.  To enable,  *"
 928 |   echo  "* use the \"-H\" option and enter your OpenID and password when prompted:        *"
 929 |   echo  "*                                                                              *"
 930 |   echo  "* $ "$(basename "$0")" -H [options...]                                     *"
 931 |   echo  "*                                                                              *"
 932 |   echo  "* For a full description of the available options use the help option:         *"
 933 |   echo  "*                                                                              *"
 934 |   echo  "* $ "$(basename "$0")" -h                                                  *"
 935 |   echo  "*                                                                              *"
 936 |   echo  "********************************************************************************"
 937 | }
 938 | 
 939 | #
 940 | # MAIN
 941 | #
 942 | 
 943 | if ((!use_http_sec))
 944 | then 
 945 |  http_basic_auth_func_info_message
 946 | fi
 947 | 
 948 | echo "Running $(basename $0) version: $version"
 949 | ((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly"
 950 | echo "Use $(basename $0) -h for help."$'\n'
 951 | 
 952 | ((debug)) && cat<<EOF
 953 | ** Debug info **
 954 | ESG_HOME=$ESG_HOME
 955 | ESG_CREDENTIALS=$ESG_CREDENTIALS
 956 | ESG_CERT_DIR=$ESG_CERT_DIR
 957 | ** -- ** -- ** -- ** --
 958 | 
 959 | EOF
 960 | 
 961 | 
 962 | cat <<'EOF-MESSAGE'
 963 | Script created for 18 file(s)
 964 | (The count won't match if you manually edit this file!)
 965 | 
 966 | 
 967 | 
 968 | EOF-MESSAGE
 969 | sleep 1
 970 | 
 971 | check_os
 972 | ((!skip_security)) && find_credentials
 973 | 
 974 | if ((use_http_sec))
 975 | then 
 976 |      
 977 |  if (( ! insecure))
 978 |  then 
 979 |   get_certificates
 980 |  fi
 981 | 
 982 |  #Cookies folder.
 983 |  COOKIES_FOLDER="$ESG_HOME/wget_cookies"
 984 |  
 985 |  if (( force ))
 986 |  then
 987 |   if [ -d $COOKIES_FOLDER ] 
 988 |   then
 989 |    rm -rf $COOKIES_FOLDER
 990 |   fi
 991 |  fi
 992 | 
 993 |  #Create cookies folder. 
 994 |  if [[ ! -d $COOKIES_FOLDER ]] 
 995 |  then
 996 |   mkdir $COOKIES_FOLDER
 997 |  fi
 998 |  
 999 |  if((! use_cookies_for_http_basic_auth_start))
1000 |  then
1001 | 
1002 |   #Read openid.
1003 |   if [[ ! -z "$openId" ]]
1004 |   then
1005 |    openid_c="$openId"
1006 |   elif ( (("$#" > 1)) || (("$#" == 1)) ) 
1007 |   then
1008 |    openid_c=$1
1009 |   else
1010 |    read -p    "Enter your openid : " openid_c
1011 |   fi
1012 |   
1013 |   
1014 |   #Read username.
1015 |   if [[ ! -z "$username_supplied" ]]
1016 |   then
1017 |    username_c="$username_supplied"
1018 |   elif (("$#" == 2))
1019 |   then
1020 |    username_c=$2
1021 |   elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]]
1022 |   then
1023 |    read -p    "Enter username : " username_c
1024 |   fi
1025 |   
1026 |   #Read password.
1027 |   read -s -p "Enter password : " password_c
1028 |   echo -e "\n"
1029 | 
1030 |  fi #use cookies
1031 | 
1032 | fi #use_http_sec 
1033 | 
1034 | 
1035 | #do we have old results? Create the file if not
1036 | [ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE
1037 | 
1038 | #clean the force parameter if here (at htis point we already have the certificate)
1039 | unset force
1040 | 
1041 | download
1042 | 
1043 | dedup_cache_
1044 | 
1045 | 
1046 | echo "done"
1047 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/CFday_wget_scripts/wget_CFday_ssp585_va.sh:
--------------------------------------------------------------------------------
   1 | #!/bin/bash
   2 | ##############################################################################
   3 | # ESG Federation download script
   4 | #
   5 | # Template version: 1.2
   6 | # Generated by esgf-data.dkrz.de - 2022/02/21 20:59:38
   7 | # Search URL: https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.va.gn.v20190710|esgf3.dkrz.de
   8 | #
   9 | ###############################################################################
  10 | # first be sure it's bash... anything out of bash or sh will break
  11 | # and the test will assure we are not using sh instead of bash
  12 | if [ $BASH ] && [ `basename $BASH` != bash ]; then
  13 |     echo "######## This is a bash script! ##############" 
  14 |     echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh."
  15 |     echo "Trying to recover automatically..."
  16 |     sleep 1
  17 |     /bin/bash $0 $@
  18 |     exit $?
  19 | fi
  20 | 
  21 | version=1.3.2
  22 | CACHE_FILE=.$(basename $0).status
  23 | openId=
  24 | search_url='https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.va.gn.v20190710|esgf3.dkrz.de'
  25 | 
  26 | #These are the embedded files to be downloaded
  27 | download_files="$(cat <<EOF--dataset.file.url.chksum_type.chksum
  28 | 'va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20700101-20741231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/va/gn/v20190710/va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20700101-20741231.nc' 'SHA256' '60d0153677d5eafb3d5ad5dcc614128bb31172e6b69bd591bad3f5170ba2ff0f'
  29 | 'va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20750101-20791231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/va/gn/v20190710/va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20750101-20791231.nc' 'SHA256' '423c45ff56566ced67233cc54d6cecc63741e15d16192216b2b5ef73e248bc25'
  30 | 'va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20800101-20841231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/va/gn/v20190710/va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20800101-20841231.nc' 'SHA256' '6abd9f0fe1b427c7743e6798e939554e752e7294472ba9598a310bc4a7d2c8bd'
  31 | 'va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20850101-20891231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/va/gn/v20190710/va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20850101-20891231.nc' 'SHA256' '1368943480130cddfa967591417688ca05ca2f437634e121cc67fc84c49afb8f'
  32 | 'va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20900101-20941231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/va/gn/v20190710/va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20900101-20941231.nc' 'SHA256' '1570989f14f14801aa3fcc2d2bd05374ec7cfaa913e2a1cc7ec335c168671725'
  33 | 'va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20950101-20991231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/va/gn/v20190710/va_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20950101-20991231.nc' 'SHA256' '1ea94f12d5c75eb12e453ecea5e7ee724f816af334bbd4febec818203df75dfb'
  34 | EOF--dataset.file.url.chksum_type.chksum
  35 | )"
  36 | 
  37 | # ESG_HOME should point to the directory containing ESG credentials.
  38 | #   Default is $HOME/.esg
  39 | ESG_HOME=${ESG_HOME:-$HOME/.esg}
  40 | [[ -d $ESG_HOME ]] || mkdir -p $ESG_HOME
  41 | 
  42 | ESG_CREDENTIALS=${X509_USER_PROXY:-$ESG_HOME/credentials.pem}
  43 | ESG_CERT_DIR=${X509_CERT_DIR:-$ESG_HOME/certificates}
  44 | MYPROXY_STATUS=$HOME/.MyProxyLogon
  45 | COOKIE_JAR=$ESG_HOME/cookies
  46 | MYPROXY_GETCERT=$ESG_HOME/getcert.jar
  47 | CERT_EXPIRATION_WARNING=$((60 * 60 * 8))   #Eight hour (in seconds)
  48 | 
  49 | WGET_TRUSTED_CERTIFICATES=$ESG_HOME/certificates
  50 | 
  51 | 
  52 | # Configure checking of server SSL certificates.
  53 | #   Disabling server certificate checking can resolve problems with myproxy
  54 | #   servers being out of sync with datanodes.
  55 | CHECK_SERVER_CERT=${CHECK_SERVER_CERT:-Yes}
  56 | 
  57 | check_os() {
  58 |     local os_name=$(uname | awk '{print $1}')
  59 |     case ${os_name} in
  60 |         Linux)
  61 |             ((debug)) && echo "Linux operating system detected"
  62 |             LINUX=1
  63 |             MACOSX=0
  64 |             ;;
  65 |         Darwin)
  66 |             ((debug)) && echo "Mac OS X operating system detected"
  67 |             LINUX=0
  68 |             MACOSX=1
  69 |             ;;
  70 |         *)
  71 |             echo "Unrecognized OS [${os_name}]"
  72 |             return 1
  73 |             ;;
  74 |     esac
  75 |     return 0
  76 | }
  77 | 
  78 | #taken from http://stackoverflow.com/a/4025065/1182464
  79 | vercomp () {
  80 |     if [[ $1 == $2 ]]
  81 |     then
  82 |         return 0
  83 |     fi
  84 |     local IFS=.
  85 |     local i ver1=($1) ver2=($2)
  86 |     # fill empty fields in ver1 with zeros
  87 |     for ((i=${#ver1[@]}; i<${#ver2[@]}; i++))
  88 |     do
  89 |         ver1[i]=0
  90 |     done
  91 |     for ((i=0; i<${#ver1[@]}; i++))
  92 |     do
  93 |         if [[ -z ${ver2[i]} ]]
  94 |         then
  95 |             # fill empty fields in ver2 with zeros
  96 |             ver2[i]=0
  97 |         fi
  98 |         if ((10#${ver1[i]} > 10#${ver2[i]}))
  99 |         then
 100 |             return 1
 101 |         fi
 102 |         if ((10#${ver1[i]} < 10#${ver2[i]}))
 103 |         then
 104 |             return 2
 105 |         fi
 106 |     done
 107 |     return 0
 108 | }
 109 | 
 110 | check_commands() {
 111 |     #check wget
 112 |     local MIN_WGET_VERSION=1.10
 113 |     vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION
 114 |     case $? in
 115 |         2) #lower
 116 |             wget -V
 117 |             echo
 118 |             echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2
 119 |             exit 1
 120 |     esac
 121 | }
 122 | 
 123 | usage() {
 124 |     echo "Usage: $(basename $0) [flags] [openid] [username]"
 125 |     echo "Flags is one of:"
 126 |     sed -n '/^while getopts/,/^done/  s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0
 127 |     echo
 128 |     echo "This command stores the states of the downloads in .$0.status"
 129 |     echo "For more information check the website: http://esgf.org/wiki/ESGF_wget"
 130 | }
 131 | 
 132 | #defaults
 133 | debug=0
 134 | clean_work=1
 135 | 
 136 | #parse flags
 137 | while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do
 138 |     case $OPT in
 139 |         H) skip_security=1 && use_http_sec=1;; #       : Authenticate with OpenID (username,) and password, without the need for a certificate.
 140 |         T) force_TLSv1=1;;              #       : Forces wget to use TLSv1. 
 141 |         c) ESG_CREDENTIALS="$OPTARG";;  #<cert> : use this certificate for authentication.
 142 |         f) force=1;;                    #       : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies.
 143 |         F) input_file="$OPTARG";;       #<file> : read input from file instead of the embedded one (use - to read from stdin)
 144 |         o) openId="$OPTARG";;           #<openid>: Provide OpenID instead of interactively asking for it.
 145 |         I) username_supplied="$OPTARG";;    #<user_id> : Explicitly set user ID.  By default, the user ID is extracted from the last component of the OpenID URL.  Use this flag to override this behaviour.                   
 146 |         w) output="$OPTARG";;           #<file> : Write embedded files into a file and exit
 147 |         i) insecure=1;;                 #       : set insecure mode, i.e. don't check server certificate
 148 |         s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;;            #       : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only).
 149 |         u) update=1;;                   #       : Issue the search again and see if something has changed.
 150 |         U) update_files=1;;             #       : Update files from server overwriting local ones (detect with -u)
 151 |         n) dry_run=1;;                  #       : Don't download any files, just report.
 152 |         p) clean_work=0;;               #       : preserve data that failed checksum
 153 |         d) verbose=1;debug=1;;          #       : display debug information
 154 |         v) verbose=1;;                  #       : be more verbose
 155 |         q) quiet=1;;                    #       : be less verbose
 156 |         h) usage && exit 0;;            #       : displays this help
 157 |         \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;;
 158 |         \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;;
 159 |     esac
 160 | done
 161 | shift $(($OPTIND - 1))
 162 | 
 163 | #setup input as desired by the user
 164 | if [[ "$input_file" ]]; then
 165 |     if [[ "$input_file" == '-' ]]; then
 166 |         download_files="$(cat)" #read from STDIN
 167 |         exec 0</dev/tty #reopen STDIN as cat closed it
 168 |     else
 169 |         download_files="$(cat $input_file)" #read from file
 170 |     fi
 171 | fi
 172 | 
 173 | #if -w (output) was selected write file and finish:
 174 | if [[ "$output" ]]; then
 175 |     #check the file
 176 |     if [[ -f "$output" ]]; then
 177 |         read -p "Overwrite existing file $output? (y/N) " answ
 178 |         case $answ in y|Y|yes|Yes);; *) echo "Aborting then..."; exit 0;; esac
 179 |     fi
 180 |     echo "$download_files">$output
 181 |     exit
 182 | fi
 183 | 
 184 | 
 185 | #assure we have everything we need
 186 | check_commands
 187 | 
 188 | if ((update)); then
 189 |     echo "Checking the server for changes..."
 190 |     new_wget="$(wget "$search_url" -qO -)"
 191 |     compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'"
 192 |     if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then
 193 |         echo "No changes detected."
 194 |     else
 195 |         echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)"
 196 |         counter=0
 197 |         while [[ -f $0.old.$counter ]]; do ((counter++)); done
 198 |         mv $0 $0.old.$counter
 199 |         echo "$new_wget" > $0
 200 |     fi
 201 |     exit 0      
 202 | fi
 203 | 
 204 | 
 205 | ##############################################################################
 206 | check_java() {
 207 |     if ! type java >& /dev/null; then
 208 |         echo "Java could not be found." >&2
 209 |         return 1
 210 |     fi
 211 |     if java -version 2>&1|grep openjdk >/dev/null; then
 212 |         openjdk=1;
 213 |     else
 214 |         openjdk=0;
 215 |     fi
 216 |     jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ }))
 217 |     mVer=${jversion[1]}
 218 |     if [ $openjdk -eq 1 ]; then
 219 |         mVer=${jversion[0]}
 220 |         if ((mVer<5)); then
 221 |             echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2
 222 |             echo "Current version seems older: $(java -version | head -n1) " >&2
 223 |             return 1
 224 |         fi
 225 |     else
 226 |     
 227 |         if ((mVer<5)); then
 228 |             echo "Java version 1.5+ is required for retrieving the certificate." >&2
 229 |             echo "Current version seems older: $(java -version | head -n1) " >&2
 230 |             return 1
 231 |         fi
 232 |     fi
 233 | }
 234 | 
 235 | check_myproxy_logon() {
 236 |     if ! type myproxy-logon >& /dev/null; then
 237 | 	echo "myproxy-logon could not be found." >&2
 238 | 	return 1
 239 |     fi
 240 |     echo "myproxy-logon found" >&2
 241 | }
 242 | 
 243 | proxy_to_java() {
 244 |     local proxy_user proxy_pass proxy_server proxy_port
 245 |     eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy)
 246 |     local JAVA_PROXY=
 247 |     [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server"
 248 |     [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port"
 249 |     eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy)
 250 |     [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server"
 251 |     [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port"
 252 |     
 253 |     echo "$JAVA_PROXY"
 254 | }
 255 | 
 256 | # get certificates from github
 257 | get_certificates() {
 258 |     # don't if this was already done today
 259 |     [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0
 260 |     echo -n "Retrieving Federation Certificates..." >&2
 261 | 
 262 |     if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then
 263 |         echo "Could not fetch esg-truststore";
 264 |         return 1
 265 |     fi
 266 |     
 267 |     if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then
 268 |         #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why
 269 |         wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar
 270 |         echo "Could't update certs!" >&2
 271 |         return 1
 272 |     else
 273 |         #if here everythng went fine. Replace old cert with this ones    
 274 |         [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR)
 275 |         mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR
 276 |         touch $ESG_CERT_DIR
 277 |         echo "done!" >&2
 278 |     fi
 279 | 
 280 | }
 281 | 
 282 | # Retrieve ESG credentials
 283 | unset pass
 284 | get_credentials() {
 285 |     if check_java
 286 |     then
 287 | 	use_java=1
 288 |     else	
 289 | 	use_java=0
 290 | 	echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2
 291 | 	check_myproxy_logon || exit 1
 292 |     fi
 293 |     #get all certificates
 294 |     get_certificates
 295 | 
 296 |     if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then
 297 |         echo -n "(Downloading $MYPROXY_GETCERT... "
 298 |         mkdir -p $(dirname $MYPROXY_GETCERT)
 299 |         if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then
 300 |             echo 'done)'
 301 |             touch $MYPROXY_GETCERT
 302 |         else
 303 |             echo 'failed)'
 304 |         fi
 305 |     fi
 306 | 
 307 |     #if the user already defined one, use it
 308 |     if [[ -z $openId ]]; then
 309 |         #try to parse the last valid value if any
 310 |         [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS)
 311 |         if [[ -z $openId ]]; then
 312 |             #no OpenID, we need to ask the user
 313 |             echo -n "Please give your OpenID (Example: https://myserver/example/username) ? "
 314 |         else
 315 |             #Allow the user to change it if desired
 316 |             echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? "
 317 |         fi
 318 |         read -e
 319 |         [[ "$REPLY" ]] && openId="$REPLY"
 320 |     else
 321 |         ((verbose)) && echo "Using user defined OpenID $openId (to change use -o <open_id>)"
 322 |     fi
 323 | 
 324 |     if grep -q ceda.ac.uk <<<$openId; then
 325 |         username=${openId##*/}
 326 |         echo -n "Please give your username if different [$username]: "
 327 |         read -e
 328 |         [[ "$REPLY" ]] && username="$REPLY"
 329 |     fi
 330 |     
 331 | 
 332 | 
 333 |     if [ $use_java -eq 1 ]
 334 |     then
 335 |         local args=
 336 |         #get password
 337 | 	[[ ! "$pass" ]] && read -sp "MyProxy Password? " pass
 338 |         [[ "$openId" ]] && args=$args" --oid $openId"
 339 |         [[ "$pass" ]] && args=$args" -P $pass"
 340 |         [[ "$username" ]] && args=$args" -l $username"
 341 |         
 342 |         echo -n $'\nRetrieving Credentials...' >&2
 343 |         if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then        
 344 |             echo "Certificate could not be retrieved"
 345 |             exit 1
 346 |         fi
 347 |         echo "done!" >&2
 348 |     else
 349 |         args=`openid_to_myproxy_args $openId $username` || exit 1
 350 |         if ! myproxy-logon $args -b -o $ESG_CREDENTIALS
 351 | 	then
 352 |             echo "Certificate could not be retrieved"
 353 | 	    exit 1
 354 |         fi
 355 | 	cp $HOME/.globus/certificates/* $ESG_CERT_DIR/	
 356 |     fi
 357 | }
 358 | 
 359 | openid_to_myproxy_args() {
 360 |   python - <<EOF || exit 1
 361 | import sys
 362 | import re
 363 | import xml.etree.ElementTree as ET
 364 | import urllib2
 365 | openid = "$1"
 366 | username = "$2" or re.sub(".*/", "", openid)
 367 | e = ET.parse(urllib2.urlopen(openid))
 368 | servs = [el for el in e.getiterator() if el.tag.endswith("Service")]
 369 | for serv in servs:
 370 |     servinfo = dict([(re.sub(".*}", "", c.tag), c.text)
 371 |                      for c in serv.getchildren()])
 372 |     try:
 373 |         if servinfo["Type"].endswith("myproxy-service"):
 374 |             m = re.match("socket://(.*):(.*)", servinfo["URI"])
 375 |             if m:
 376 |                 host = m.group(1)
 377 |                 port = m.group(2)
 378 |                 print "-s %s -p %s -l %s" % (host, port, username)
 379 |                 break
 380 |     except KeyError:
 381 |         continue
 382 | else:
 383 |     sys.stderr.write("myproxy service could not be found\n")
 384 |     sys.exit(1)
 385 | EOF
 386 | }
 387 | 
 388 | # check the certificate validity
 389 | check_cert() {
 390 |     if [[ ! -f "$ESG_CERT" || $force ]]; then
 391 |         #not there, just get it
 392 |         get_credentials
 393 |     elif which openssl &>/dev/null; then
 394 |         #check openssl and certificate
 395 |         if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then
 396 |             echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..."
 397 |             get_credentials
 398 |         else
 399 |             #ok, certificate is fine
 400 |             return 0
 401 |         fi
 402 |     fi
 403 | }
 404 | 
 405 | #
 406 | # Detect ESG credentials
 407 | #
 408 | find_credentials() {
 409 | 
 410 |     #is X509_USER_PROXY or $HOME/.esg/credential.pem
 411 |     if [[ -f "$ESG_CREDENTIALS" ]]; then
 412 |         # file found, proceed.
 413 |         ESG_CERT="$ESG_CREDENTIALS"
 414 |         ESG_KEY="$ESG_CREDENTIALS"
 415 |     elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then
 416 |         # second try, use these certificates.
 417 |         ESG_CERT="$X509_USER_CERT"
 418 |         ESG_KEY="$X509_USER_KEY"
 419 |     else
 420 |         # If credentials are not present, just point to where they should go 
 421 |         echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2
 422 |             ESG_CERT="$ESG_CREDENTIALS"
 423 |             ESG_KEY="$ESG_CREDENTIALS"
 424 |             #they will be retrieved later one
 425 |     fi
 426 | 
 427 | 
 428 |     #chek openssl and certificate
 429 |     if (which openssl &>/dev/null); then
 430 |         if ( openssl version | grep 'OpenSSL 1\.0' ); then
 431 |             echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+'
 432 |         fi
 433 |         check_cert || { (($?==1)); exit 1; }
 434 |     fi
 435 |     
 436 |     if [[ $CHECK_SERVER_CERT == "Yes" ]]; then
 437 |         [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; }
 438 |         PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR"
 439 |     fi
 440 | 
 441 |     #some wget version complain if there's no file present
 442 |     [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR
 443 | 
 444 |     PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT"
 445 | 
 446 | }
 447 | 
 448 | check_chksum() {
 449 |     local file="$1"
 450 |     local chk_type=$2
 451 |     local chk_value=$3
 452 |     local local_chksum=Unknown
 453 | 
 454 |     case $chk_type in
 455 |         md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");;
 456 |         sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);;
 457 |         *) echo "Can't verify checksum." && return 0;;
 458 |     esac
 459 | 
 460 |     #verify
 461 |     ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2
 462 |     echo $local_chksum
 463 | }
 464 | 
 465 | #Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x)
 466 | md5sum_() {
 467 |     hash -r
 468 |     if type md5sum >& /dev/null; then
 469 |         echo $(md5sum $@)
 470 |     else
 471 |         echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p')
 472 |     fi
 473 | }
 474 | 
 475 | #Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x)
 476 | sha256sum_() {
 477 |     hash -r
 478 |     if type sha256sum >& /dev/null; then
 479 |         echo $(sha256sum $@)
 480 |     elif type shasum >& /dev/null; then
 481 |         echo $(shasum -a 256 $@)
 482 |     else
 483 |         echo $(sha2 -q -256 $@)
 484 |     fi
 485 | }
 486 | 
 487 | get_mod_time_() {
 488 |     if ((MACOSX)); then
 489 |         #on a mac modtime is stat -f %m <file>
 490 |         echo "$(stat -f %m $@)"
 491 |     else
 492 |         #on linux (cygwin) modtime is stat -c %Y <file>
 493 |         echo "$(stat -c %Y $@)"
 494 |     fi
 495 |     return 0;
 496 | }
 497 | 
 498 | remove_from_cache() {
 499 |     local entry="$1"
 500 |     local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")"
 501 |     echo "$tmp_file" > "$CACHE_FILE"
 502 |     unset cached
 503 | }
 504 | 
 505 | #Download data from node using cookies and not certificates.
 506 | download_http_sec()
 507 | {
 508 |   #The data to be downloaded.
 509 |   data=" $url"
 510 |   filename="$file"  
 511 | 
 512 |   #Wget args.
 513 |   if ((insecure)) 
 514 |   then
 515 |    wget_args=" --no-check-certificate --cookies=on  --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 
 516 |   else
 517 |    wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt "  
 518 |   fi 
 519 | 
 520 |   if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) 
 521 |   then
 522 |    wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt"    
 523 |   fi
 524 |   
 525 |   if((force_TLSv1))
 526 |   then
 527 |    wget_args=" $wget_args"" --secure-protocol=TLSv1 "
 528 |   fi
 529 |   
 530 |   
 531 |   if [[ ! -z "$ESGF_WGET_OPTS" ]]
 532 |   then
 533 |     wget_args="$wget_args $ESGF_WGET_OPTS"
 534 |   fi  
 535 |   
 536 | 
 537 |   #use cookies for the next downloads
 538 |   use_cookies_for_http_basic_auth=1;
 539 |    
 540 |   #Debug message.
 541 |   if  ((debug))
 542 |   then
 543 |    echo -e "\nExecuting:\n"
 544 |    echo -e "wget $wget_args $data\n"
 545 |   fi
 546 | 
 547 | 
 548 |   #Try to download the data. 
 549 |   command="wget $wget_args -O $filename $data"
 550 |   http_resp=$(eval $command  2>&1) 
 551 |   cmd_exit_status="$?"
 552 |   
 553 |   if ((debug))
 554 |   then
 555 |    echo -e "\nHTTP response:\n $http_resp\n"
 556 |   fi
 557 |       
 558 |   #Extract orp service from url ?
 559 |   #Evaluate response.
 560 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 561 |   #(( "$redirects" == 1 )) && 
 562 |   if  echo "$http_resp" | grep -q "/esg-orp/"      
 563 |   then
 564 |    urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3)
 565 |    orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2)
 566 | 
 567 | 
 568 |    #Use cookies for transaction with orp.
 569 |    wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt"    
 570 |    
 571 |    #Download data using either http basic auth or http login form.
 572 |    if [[ "$openid_c" == */openid/  || "$openid_c" == */openid ]]
 573 |    then
 574 |     download_http_sec_open_id
 575 |    else
 576 |     download_http_sec_decide_service
 577 |    fi
 578 |   else  
 579 |    if    echo "$http_resp" | grep -q "401 Unauthorized"  \
 580 |       || echo "$http_resp" | grep -q "403: Forbidden"  \
 581 |       || echo "$http_resp" | grep -q "Connection timed out."  \
 582 |       || echo "$http_resp" | grep -q "no-check-certificate"  \
 583 |       || (( $cmd_exit_status != 0 ))      
 584 |    then 
 585 |     echo "ERROR : http request to OpenID Relying Party service failed."
 586 |     failed=1
 587 |    fi
 588 |   fi
 589 | }
 590 | 
 591 | 
 592 | #Function that decides which implementaion of idp to use.
 593 | download_http_sec_decide_service()
 594 | {
 595 |   #find claimed id
 596 | 
 597 |   pos=$(echo "$openid_c" | egrep -o '/' | wc -l)
 598 |   username_c=$(echo "$openid_c"  | cut -d'/' -f "$(($pos + 1))")
 599 |   esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/')
 600 | 
 601 |   host=$(echo "$openid_c"  | cut -d'/' -f 3)
 602 |   #test ceda first.
 603 | 
 604 |   if [[ -z "$esgf_uri" ]]
 605 |   then
 606 |    openid_c_tmp="https://""$host""/openid/"
 607 |   else
 608 |    openid_c_tmp="https://""$host""/esgf-idp/openid/" 
 609 |   fi
 610 | 
 611 |   command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-"
 612 |         
 613 |   if [[ ! -z "$ESGF_WGET_OPTS" ]]
 614 |   then
 615 |    command="$command $ESGF_WGET_OPTS"
 616 |   fi  
 617 |           
 618 |   #Debug message.
 619 |   if  ((debug))
 620 |   then
 621 |    echo -e "\nExecuting:\n"
 622 |    echo -e "$command\n"
 623 |   fi
 624 |             
 625 | 
 626 |   #Execution of command.
 627 |   http_resp=$(eval $command  2>&1)
 628 |   cmd_exit_status="$?"
 629 |   
 630 |   
 631 |   if ((debug))
 632 |   then
 633 |    echo -e "\nHTTP response:\n $http_resp\n"
 634 |   fi 
 635 |   
 636 | 
 637 |   if    echo "$http_resp" | grep -q "[application/xrds+xml]"  \
 638 |      && echo "$http_resp" | grep -q "200 OK"  \
 639 |      && (( cmd_exit_status == 0 ))       
 640 |   then
 641 |    openid_c=$openid_c_tmp
 642 |    download_http_sec_open_id
 643 |   else
 644 |    if [[ -z "$esgf_uri" ]]
 645 |    then
 646 |     echo "ERROR : HTTP request to OpenID Relying Party service failed."
 647 |     failed=1
 648 |    else
 649 |    download_http_sec_cl_id
 650 |    fi
 651 |   fi
 652 | }
 653 | 
 654 | 
 655 | download_http_sec_retry()
 656 | {
 657 |   echo -e "\nRetrying....\n"
 658 |   #Retry in case that last redirect did not work, this happens with older version of wget.
 659 |   command="wget $wget_args $data"
 660 |       
 661 |   #Debug message.
 662 |   if  ((debug))
 663 |   then
 664 |    echo -e "Executing:\n"
 665 |    echo -e "$command\n"
 666 |   fi   
 667 |    
 668 |   http_resp=$(eval $command  2>&1) 
 669 |   cmd_exit_status="$?"
 670 | 
 671 |   if ((debug))
 672 |   then
 673 |    echo -e "\nHTTP response:\n $http_resp\n"
 674 |   fi
 675 |    
 676 |   if    echo "$http_resp" | grep -q "401 Unauthorized"  \
 677 |      || echo "$http_resp" | grep -q "403: Forbidden"  \
 678 |      || echo "$http_resp" | grep -q "Connection timed out."  \
 679 |      || echo "$http_resp" | grep -q "no-check-certificate"  \
 680 |      || (( $cmd_exit_status != 0 ))      
 681 |   then 
 682 |    echo -e "\nERROR : Retry failed.\n"
 683 |    #rm "$filename"
 684 |    failed=1
 685 |   fi #if retry failed.
 686 | }
 687 | 
 688 | #Function for downloading data using the claimed id.
 689 | download_http_sec_cl_id()
 690 | {
 691 |   #Http request for sending openid to the orp service.
 692 |   command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\"  $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm "
 693 | 
 694 |   #Debug message.
 695 |   if  ((debug))
 696 |   then
 697 |    echo -e "Executing:\n"
 698 |    echo -e "wget $command\n"
 699 |   fi 
 700 |   
 701 |   
 702 |   #Execution of command.
 703 |   http_resp=$(eval $command  2>&1)
 704 |   cmd_exit_status="$?"
 705 | 
 706 |   
 707 |   if ((debug))
 708 |   then
 709 |    echo -e "\nHTTP response:\n $http_resp\n"
 710 |   fi 
 711 |     
 712 |   
 713 |   #Extract orp service from openid ?
 714 |   #Evaluate response.If redirected to idp service send the credentials.
 715 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 716 |   #(( redirects == 2  )) && 
 717 |   if  echo "$http_resp" | grep -q "login.htm"  && (( cmd_exit_status == 0 ))   
 718 |   then 
 719 |   
 720 |    urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3)
 721 |    idp_service=$(echo "$urls"  | tr '\n' ' ' | cut -d' ' -f 2) 
 722 |       
 723 |    command="wget --post-data  password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm"
 724 |    
 725 | 
 726 |    #Debug message.
 727 |    if  ((debug))
 728 |    then
 729 |     echo -e "Executing:\n"
 730 |     echo -e "wget $command\n"
 731 |    fi 
 732 | 
 733 |    #Execution of command.
 734 |    http_resp=$(eval $command  2>&1)
 735 |    cmd_exit_status="$?"
 736 |       
 737 |    if ((debug))
 738 |    then
 739 |     echo -e "\nHTTP response:\n $http_resp\n"
 740 |    fi 
 741 |         
 742 |    #Evaluate response. 
 743 |    #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 744 |    #(( "$redirects" != 5 )) \ 
 745 |    if    echo "$http_resp" | grep -q "text/html"  \
 746 |       || echo "$http_resp" | grep -q "403: Forbidden"  \
 747 |       || (( cmd_exit_status != 0 ))        
 748 |    then 
 749 |     rm "$filename"
 750 |     download_http_sec_retry
 751 |    fi
 752 |  
 753 |   else
 754 |    echo "ERROR : HTTP request to OpenID Provider service failed."
 755 |    failed=1
 756 |   fi #if redirected to idp.  
 757 | }
 758 | 
 759 | 
 760 | 
 761 | download_http_sec_open_id()
 762 | {
 763 |   #Http request for sending openid to the orp web service.
 764 |   command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\"  $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm "
 765 | 
 766 | 
 767 |   #Debug message.
 768 |   if  ((debug))
 769 |   then
 770 |    echo -e "Executing:\n"
 771 |    echo -e "$command\n"
 772 |   fi 
 773 | 
 774 |   #Execution of command.
 775 |   http_resp=$(eval $command  2>&1)
 776 |   cmd_exit_status="$?"
 777 |   
 778 |   
 779 |   if ((debug))
 780 |   then
 781 |    echo -e "\nHTTP response:\n $http_resp\n"
 782 |   fi 
 783 | 
 784 |   #Evaluate response.
 785 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 786 |   #(( "$redirects" != 7 )) ||
 787 |   if   echo "$http_resp" | grep -q "text/html"  ||  (( $cmd_exit_status != 0 ))   
 788 |   then
 789 |    rm "$filename"
 790 |    download_http_sec_retry     
 791 |   fi #if error during http basic authentication. 
 792 |   
 793 | }
 794 | 
 795 | 
 796 | download() {
 797 |     wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS"
 798 |     
 799 |     while read line
 800 |     do
 801 |         # read csv here document into proper variables
 802 |         eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) )
 803 | 
 804 |         #Process the file
 805 |         echo -n "$file ..."
 806 | 
 807 |         #get the cached entry if any.
 808 |         cached="$(grep -e "^$file" "$CACHE_FILE")"
 809 |         
 810 |         #if we have the cache entry but no file, clean it.
 811 |         if [[ ! -f $file && "$cached" ]]; then
 812 |             #the file was removed, clean the cache
 813 |             remove_from_cache "$file"
 814 |             unset cached
 815 |         fi
 816 |         
 817 |         #check it wasn't modified
 818 |         if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then
 819 |                     if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then
 820 |                 echo "Already downloaded and verified"
 821 |                 continue
 822 |             elif ((update_files)); then
 823 |                 #user want's to overwrite newer files
 824 |                 rm $file
 825 |                 remove_from_cache "$file"
 826 |                 unset cached
 827 |             else
 828 |                 #file on server is different from what we have. 
 829 |                 echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite"
 830 |                 continue
 831 |             fi
 832 |         fi
 833 |         unset chksum_err_value chksum_err_count
 834 |         
 835 |         while : ; do
 836 |             # (if we had the file size, we could check before trying to complete)
 837 |             echo "Downloading"
 838 |             [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")"
 839 |             if ((dry_run)); then
 840 |                 #all important info was already displayed, if in dry_run mode just abort
 841 |                 #No status will be stored
 842 |                 break
 843 |             else
 844 |                 if ((use_http_sec))
 845 |                 then
 846 |                  download_http_sec
 847 |                  if ((failed))
 848 |                  then
 849 |                   break
 850 |                  fi
 851 |                 else
 852 |                  $wget -O "$file" $url || { failed=1; break; }  
 853 |                 fi                
 854 |             fi
 855 | 
 856 |             #check if file is there
 857 |             if [[ -f $file ]]; then
 858 |                 ((debug)) && echo file found
 859 |                 if [[ ! "$chksum" ]]; then
 860 |                     echo "Checksum not provided, can't verify file integrity"
 861 |                     break
 862 |                 fi
 863 |                 result_chksum=$(check_chksum "$file" $chksum_type $chksum)
 864 |                 if [[ "$result_chksum" != "$chksum" ]]; then
 865 |                     echo "  $chksum_type failed!"
 866 |                     if ((clean_work)); then
 867 |                         if !((chksum_err_count)); then
 868 |                                 chksum_err_value=$result_chksum
 869 |                                 chksum_err_count=2
 870 |                             elif ((checksum_err_count--)); then
 871 |                                 if [[ "$result_chksum" != "$chksum_err_value" ]]; then
 872 |                                     #this is a real transmission problem
 873 |                                     chksum_err_value=$result_chksum
 874 |                                     chksum_err_count=2
 875 |                                 fi
 876 |                             else
 877 |                                 #ok if here we keep getting the same "different" checksum
 878 |                                 echo "The file returns always a different checksum!"
 879 |                                 echo "Contact the data owner to verify what is happening."
 880 |                                 echo
 881 |                                 sleep 1
 882 |                                 break
 883 |                             fi
 884 |                         
 885 |                             rm $file
 886 |                             #try again
 887 |                             echo -n "  re-trying..."
 888 |                             continue
 889 |                     else
 890 |                             echo "  don't use -p or remove manually."
 891 |                     fi
 892 |                 else
 893 |                     echo "  $chksum_type ok. done!"
 894 |                     echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE
 895 |                 fi
 896 |             fi
 897 |             #done!
 898 |             break
 899 |         done
 900 |         
 901 |         if ((failed)); then
 902 |             echo "download failed"
 903 |             # most common failure is certificate expiration, so check this
 904 |             #if we have the pasword we can retrigger download
 905 |             ((!skip_security)) && [[ "$pass" ]] && check_cert
 906 |             unset failed
 907 |         fi
 908 |         
 909 | done <<<"$download_files"
 910 | 
 911 | }
 912 | 
 913 | dedup_cache_() {
 914 |     local file=${1:-${CACHE_FILE}}
 915 |     ((debug)) && echo "dedup'ing cache ${file} ..."
 916 |     local tmp=$(LC_ALL='C' sort  -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2)
 917 |     ((DEBUG)) && echo "$tmp"
 918 |     echo "$tmp" > $file
 919 |     ((debug)) && echo "(cache dedup'ed)"
 920 | }
 921 | 
 922 | http_basic_auth_func_info_message()
 923 | {
 924 |   echo  "********************************************************************************"
 925 |   echo  "*                                                                              *"
 926 |   echo  "* Note that new functionality to allow authentication without the need for     *"
 927 |   echo  "* certificates is available with this version of the wget script.  To enable,  *"
 928 |   echo  "* use the \"-H\" option and enter your OpenID and password when prompted:        *"
 929 |   echo  "*                                                                              *"
 930 |   echo  "* $ "$(basename "$0")" -H [options...]                                     *"
 931 |   echo  "*                                                                              *"
 932 |   echo  "* For a full description of the available options use the help option:         *"
 933 |   echo  "*                                                                              *"
 934 |   echo  "* $ "$(basename "$0")" -h                                                  *"
 935 |   echo  "*                                                                              *"
 936 |   echo  "********************************************************************************"
 937 | }
 938 | 
 939 | #
 940 | # MAIN
 941 | #
 942 | 
 943 | if ((!use_http_sec))
 944 | then 
 945 |  http_basic_auth_func_info_message
 946 | fi
 947 | 
 948 | echo "Running $(basename $0) version: $version"
 949 | ((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly"
 950 | echo "Use $(basename $0) -h for help."$'\n'
 951 | 
 952 | ((debug)) && cat<<EOF
 953 | ** Debug info **
 954 | ESG_HOME=$ESG_HOME
 955 | ESG_CREDENTIALS=$ESG_CREDENTIALS
 956 | ESG_CERT_DIR=$ESG_CERT_DIR
 957 | ** -- ** -- ** -- ** --
 958 | 
 959 | EOF
 960 | 
 961 | 
 962 | cat <<'EOF-MESSAGE'
 963 | Script created for 18 file(s)
 964 | (The count won't match if you manually edit this file!)
 965 | 
 966 | 
 967 | 
 968 | EOF-MESSAGE
 969 | sleep 1
 970 | 
 971 | check_os
 972 | ((!skip_security)) && find_credentials
 973 | 
 974 | if ((use_http_sec))
 975 | then 
 976 |      
 977 |  if (( ! insecure))
 978 |  then 
 979 |   get_certificates
 980 |  fi
 981 | 
 982 |  #Cookies folder.
 983 |  COOKIES_FOLDER="$ESG_HOME/wget_cookies"
 984 |  
 985 |  if (( force ))
 986 |  then
 987 |   if [ -d $COOKIES_FOLDER ] 
 988 |   then
 989 |    rm -rf $COOKIES_FOLDER
 990 |   fi
 991 |  fi
 992 | 
 993 |  #Create cookies folder. 
 994 |  if [[ ! -d $COOKIES_FOLDER ]] 
 995 |  then
 996 |   mkdir $COOKIES_FOLDER
 997 |  fi
 998 |  
 999 |  if((! use_cookies_for_http_basic_auth_start))
1000 |  then
1001 | 
1002 |   #Read openid.
1003 |   if [[ ! -z "$openId" ]]
1004 |   then
1005 |    openid_c="$openId"
1006 |   elif ( (("$#" > 1)) || (("$#" == 1)) ) 
1007 |   then
1008 |    openid_c=$1
1009 |   else
1010 |    read -p    "Enter your openid : " openid_c
1011 |   fi
1012 |   
1013 |   
1014 |   #Read username.
1015 |   if [[ ! -z "$username_supplied" ]]
1016 |   then
1017 |    username_c="$username_supplied"
1018 |   elif (("$#" == 2))
1019 |   then
1020 |    username_c=$2
1021 |   elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]]
1022 |   then
1023 |    read -p    "Enter username : " username_c
1024 |   fi
1025 |   
1026 |   #Read password.
1027 |   read -s -p "Enter password : " password_c
1028 |   echo -e "\n"
1029 | 
1030 |  fi #use cookies
1031 | 
1032 | fi #use_http_sec 
1033 | 
1034 | 
1035 | #do we have old results? Create the file if not
1036 | [ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE
1037 | 
1038 | #clean the force parameter if here (at htis point we already have the certificate)
1039 | unset force
1040 | 
1041 | download
1042 | 
1043 | dedup_cache_
1044 | 
1045 | 
1046 | echo "done"
1047 | 


--------------------------------------------------------------------------------
/step_01_extract_deltas/CFday_wget_scripts/wget_CFday_ssp585_hur.sh:
--------------------------------------------------------------------------------
   1 | #!/bin/bash
   2 | ##############################################################################
   3 | # ESG Federation download script
   4 | #
   5 | # Template version: 1.2
   6 | # Generated by esgf-data.dkrz.de - 2022/02/21 21:00:30
   7 | # Search URL: https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.hur.gn.v20190710|esgf3.dkrz.de
   8 | #
   9 | ###############################################################################
  10 | # first be sure it's bash... anything out of bash or sh will break
  11 | # and the test will assure we are not using sh instead of bash
  12 | if [ $BASH ] && [ `basename $BASH` != bash ]; then
  13 |     echo "######## This is a bash script! ##############" 
  14 |     echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh."
  15 |     echo "Trying to recover automatically..."
  16 |     sleep 1
  17 |     /bin/bash $0 $@
  18 |     exit $?
  19 | fi
  20 | 
  21 | version=1.3.2
  22 | CACHE_FILE=.$(basename $0).status
  23 | openId=
  24 | search_url='https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.hur.gn.v20190710|esgf3.dkrz.de'
  25 | 
  26 | #These are the embedded files to be downloaded
  27 | download_files="$(cat <<EOF--dataset.file.url.chksum_type.chksum
  28 | 'hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20700101-20741231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/hur/gn/v20190710/hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20700101-20741231.nc' 'SHA256' '0ede4b7ec90ef47cb78599fa75e5092780305959c001490eec6f6af7c70e4044'
  29 | 'hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20750101-20791231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/hur/gn/v20190710/hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20750101-20791231.nc' 'SHA256' '9de17f37a111167d8bfdb251ae89f716b5b9e06d995aba29580e4ca9b330ec60'
  30 | 'hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20800101-20841231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/hur/gn/v20190710/hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20800101-20841231.nc' 'SHA256' '7d80c39488f3548876082676ce46e6b6b9fc3ba3362c34251a41040a84e566c9'
  31 | 'hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20850101-20891231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/hur/gn/v20190710/hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20850101-20891231.nc' 'SHA256' '99a6a9b91affeb8ab491a569a9d228346198d97859143b2ce9e0fd2baa5a8bcd'
  32 | 'hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20900101-20941231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/hur/gn/v20190710/hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20900101-20941231.nc' 'SHA256' '0ff6973439a672d4a2e28fcd6f99b57142ba1ee4a23de72ef94a8d5731d05e45'
  33 | 'hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20950101-20991231.nc' 'http://esgf3.dkrz.de/thredds/fileServer/cmip6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp585/r1i1p1f1/CFday/hur/gn/v20190710/hur_CFday_MPI-ESM1-2-HR_ssp585_r1i1p1f1_gn_20950101-20991231.nc' 'SHA256' '24bc222058a3a2eeb6b4d86098c07240b605783e8e6c868cc79bc51058eb68f9'
  34 | EOF--dataset.file.url.chksum_type.chksum
  35 | )"
  36 | 
  37 | # ESG_HOME should point to the directory containing ESG credentials.
  38 | #   Default is $HOME/.esg
  39 | ESG_HOME=${ESG_HOME:-$HOME/.esg}
  40 | [[ -d $ESG_HOME ]] || mkdir -p $ESG_HOME
  41 | 
  42 | ESG_CREDENTIALS=${X509_USER_PROXY:-$ESG_HOME/credentials.pem}
  43 | ESG_CERT_DIR=${X509_CERT_DIR:-$ESG_HOME/certificates}
  44 | MYPROXY_STATUS=$HOME/.MyProxyLogon
  45 | COOKIE_JAR=$ESG_HOME/cookies
  46 | MYPROXY_GETCERT=$ESG_HOME/getcert.jar
  47 | CERT_EXPIRATION_WARNING=$((60 * 60 * 8))   #Eight hour (in seconds)
  48 | 
  49 | WGET_TRUSTED_CERTIFICATES=$ESG_HOME/certificates
  50 | 
  51 | 
  52 | # Configure checking of server SSL certificates.
  53 | #   Disabling server certificate checking can resolve problems with myproxy
  54 | #   servers being out of sync with datanodes.
  55 | CHECK_SERVER_CERT=${CHECK_SERVER_CERT:-Yes}
  56 | 
  57 | check_os() {
  58 |     local os_name=$(uname | awk '{print $1}')
  59 |     case ${os_name} in
  60 |         Linux)
  61 |             ((debug)) && echo "Linux operating system detected"
  62 |             LINUX=1
  63 |             MACOSX=0
  64 |             ;;
  65 |         Darwin)
  66 |             ((debug)) && echo "Mac OS X operating system detected"
  67 |             LINUX=0
  68 |             MACOSX=1
  69 |             ;;
  70 |         *)
  71 |             echo "Unrecognized OS [${os_name}]"
  72 |             return 1
  73 |             ;;
  74 |     esac
  75 |     return 0
  76 | }
  77 | 
  78 | #taken from http://stackoverflow.com/a/4025065/1182464
  79 | vercomp () {
  80 |     if [[ $1 == $2 ]]
  81 |     then
  82 |         return 0
  83 |     fi
  84 |     local IFS=.
  85 |     local i ver1=($1) ver2=($2)
  86 |     # fill empty fields in ver1 with zeros
  87 |     for ((i=${#ver1[@]}; i<${#ver2[@]}; i++))
  88 |     do
  89 |         ver1[i]=0
  90 |     done
  91 |     for ((i=0; i<${#ver1[@]}; i++))
  92 |     do
  93 |         if [[ -z ${ver2[i]} ]]
  94 |         then
  95 |             # fill empty fields in ver2 with zeros
  96 |             ver2[i]=0
  97 |         fi
  98 |         if ((10#${ver1[i]} > 10#${ver2[i]}))
  99 |         then
 100 |             return 1
 101 |         fi
 102 |         if ((10#${ver1[i]} < 10#${ver2[i]}))
 103 |         then
 104 |             return 2
 105 |         fi
 106 |     done
 107 |     return 0
 108 | }
 109 | 
 110 | check_commands() {
 111 |     #check wget
 112 |     local MIN_WGET_VERSION=1.10
 113 |     vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION
 114 |     case $? in
 115 |         2) #lower
 116 |             wget -V
 117 |             echo
 118 |             echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2
 119 |             exit 1
 120 |     esac
 121 | }
 122 | 
 123 | usage() {
 124 |     echo "Usage: $(basename $0) [flags] [openid] [username]"
 125 |     echo "Flags is one of:"
 126 |     sed -n '/^while getopts/,/^done/  s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0
 127 |     echo
 128 |     echo "This command stores the states of the downloads in .$0.status"
 129 |     echo "For more information check the website: http://esgf.org/wiki/ESGF_wget"
 130 | }
 131 | 
 132 | #defaults
 133 | debug=0
 134 | clean_work=1
 135 | 
 136 | #parse flags
 137 | while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do
 138 |     case $OPT in
 139 |         H) skip_security=1 && use_http_sec=1;; #       : Authenticate with OpenID (username,) and password, without the need for a certificate.
 140 |         T) force_TLSv1=1;;              #       : Forces wget to use TLSv1. 
 141 |         c) ESG_CREDENTIALS="$OPTARG";;  #<cert> : use this certificate for authentication.
 142 |         f) force=1;;                    #       : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies.
 143 |         F) input_file="$OPTARG";;       #<file> : read input from file instead of the embedded one (use - to read from stdin)
 144 |         o) openId="$OPTARG";;           #<openid>: Provide OpenID instead of interactively asking for it.
 145 |         I) username_supplied="$OPTARG";;    #<user_id> : Explicitly set user ID.  By default, the user ID is extracted from the last component of the OpenID URL.  Use this flag to override this behaviour.                   
 146 |         w) output="$OPTARG";;           #<file> : Write embedded files into a file and exit
 147 |         i) insecure=1;;                 #       : set insecure mode, i.e. don't check server certificate
 148 |         s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;;            #       : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only).
 149 |         u) update=1;;                   #       : Issue the search again and see if something has changed.
 150 |         U) update_files=1;;             #       : Update files from server overwriting local ones (detect with -u)
 151 |         n) dry_run=1;;                  #       : Don't download any files, just report.
 152 |         p) clean_work=0;;               #       : preserve data that failed checksum
 153 |         d) verbose=1;debug=1;;          #       : display debug information
 154 |         v) verbose=1;;                  #       : be more verbose
 155 |         q) quiet=1;;                    #       : be less verbose
 156 |         h) usage && exit 0;;            #       : displays this help
 157 |         \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;;
 158 |         \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;;
 159 |     esac
 160 | done
 161 | shift $(($OPTIND - 1))
 162 | 
 163 | #setup input as desired by the user
 164 | if [[ "$input_file" ]]; then
 165 |     if [[ "$input_file" == '-' ]]; then
 166 |         download_files="$(cat)" #read from STDIN
 167 |         exec 0</dev/tty #reopen STDIN as cat closed it
 168 |     else
 169 |         download_files="$(cat $input_file)" #read from file
 170 |     fi
 171 | fi
 172 | 
 173 | #if -w (output) was selected write file and finish:
 174 | if [[ "$output" ]]; then
 175 |     #check the file
 176 |     if [[ -f "$output" ]]; then
 177 |         read -p "Overwrite existing file $output? (y/N) " answ
 178 |         case $answ in y|Y|yes|Yes);; *) echo "Aborting then..."; exit 0;; esac
 179 |     fi
 180 |     echo "$download_files">$output
 181 |     exit
 182 | fi
 183 | 
 184 | 
 185 | #assure we have everything we need
 186 | check_commands
 187 | 
 188 | if ((update)); then
 189 |     echo "Checking the server for changes..."
 190 |     new_wget="$(wget "$search_url" -qO -)"
 191 |     compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'"
 192 |     if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then
 193 |         echo "No changes detected."
 194 |     else
 195 |         echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)"
 196 |         counter=0
 197 |         while [[ -f $0.old.$counter ]]; do ((counter++)); done
 198 |         mv $0 $0.old.$counter
 199 |         echo "$new_wget" > $0
 200 |     fi
 201 |     exit 0      
 202 | fi
 203 | 
 204 | 
 205 | ##############################################################################
 206 | check_java() {
 207 |     if ! type java >& /dev/null; then
 208 |         echo "Java could not be found." >&2
 209 |         return 1
 210 |     fi
 211 |     if java -version 2>&1|grep openjdk >/dev/null; then
 212 |         openjdk=1;
 213 |     else
 214 |         openjdk=0;
 215 |     fi
 216 |     jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ }))
 217 |     mVer=${jversion[1]}
 218 |     if [ $openjdk -eq 1 ]; then
 219 |         mVer=${jversion[0]}
 220 |         if ((mVer<5)); then
 221 |             echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2
 222 |             echo "Current version seems older: $(java -version | head -n1) " >&2
 223 |             return 1
 224 |         fi
 225 |     else
 226 |     
 227 |         if ((mVer<5)); then
 228 |             echo "Java version 1.5+ is required for retrieving the certificate." >&2
 229 |             echo "Current version seems older: $(java -version | head -n1) " >&2
 230 |             return 1
 231 |         fi
 232 |     fi
 233 | }
 234 | 
 235 | check_myproxy_logon() {
 236 |     if ! type myproxy-logon >& /dev/null; then
 237 | 	echo "myproxy-logon could not be found." >&2
 238 | 	return 1
 239 |     fi
 240 |     echo "myproxy-logon found" >&2
 241 | }
 242 | 
 243 | proxy_to_java() {
 244 |     local proxy_user proxy_pass proxy_server proxy_port
 245 |     eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy)
 246 |     local JAVA_PROXY=
 247 |     [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server"
 248 |     [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port"
 249 |     eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy)
 250 |     [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server"
 251 |     [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port"
 252 |     
 253 |     echo "$JAVA_PROXY"
 254 | }
 255 | 
 256 | # get certificates from github
 257 | get_certificates() {
 258 |     # don't if this was already done today
 259 |     [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0
 260 |     echo -n "Retrieving Federation Certificates..." >&2
 261 | 
 262 |     if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then
 263 |         echo "Could not fetch esg-truststore";
 264 |         return 1
 265 |     fi
 266 |     
 267 |     if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then
 268 |         #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why
 269 |         wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar
 270 |         echo "Could't update certs!" >&2
 271 |         return 1
 272 |     else
 273 |         #if here everythng went fine. Replace old cert with this ones    
 274 |         [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR)
 275 |         mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR
 276 |         touch $ESG_CERT_DIR
 277 |         echo "done!" >&2
 278 |     fi
 279 | 
 280 | }
 281 | 
 282 | # Retrieve ESG credentials
 283 | unset pass
 284 | get_credentials() {
 285 |     if check_java
 286 |     then
 287 | 	use_java=1
 288 |     else	
 289 | 	use_java=0
 290 | 	echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2
 291 | 	check_myproxy_logon || exit 1
 292 |     fi
 293 |     #get all certificates
 294 |     get_certificates
 295 | 
 296 |     if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then
 297 |         echo -n "(Downloading $MYPROXY_GETCERT... "
 298 |         mkdir -p $(dirname $MYPROXY_GETCERT)
 299 |         if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then
 300 |             echo 'done)'
 301 |             touch $MYPROXY_GETCERT
 302 |         else
 303 |             echo 'failed)'
 304 |         fi
 305 |     fi
 306 | 
 307 |     #if the user already defined one, use it
 308 |     if [[ -z $openId ]]; then
 309 |         #try to parse the last valid value if any
 310 |         [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS)
 311 |         if [[ -z $openId ]]; then
 312 |             #no OpenID, we need to ask the user
 313 |             echo -n "Please give your OpenID (Example: https://myserver/example/username) ? "
 314 |         else
 315 |             #Allow the user to change it if desired
 316 |             echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? "
 317 |         fi
 318 |         read -e
 319 |         [[ "$REPLY" ]] && openId="$REPLY"
 320 |     else
 321 |         ((verbose)) && echo "Using user defined OpenID $openId (to change use -o <open_id>)"
 322 |     fi
 323 | 
 324 |     if grep -q ceda.ac.uk <<<$openId; then
 325 |         username=${openId##*/}
 326 |         echo -n "Please give your username if different [$username]: "
 327 |         read -e
 328 |         [[ "$REPLY" ]] && username="$REPLY"
 329 |     fi
 330 |     
 331 | 
 332 | 
 333 |     if [ $use_java -eq 1 ]
 334 |     then
 335 |         local args=
 336 |         #get password
 337 | 	[[ ! "$pass" ]] && read -sp "MyProxy Password? " pass
 338 |         [[ "$openId" ]] && args=$args" --oid $openId"
 339 |         [[ "$pass" ]] && args=$args" -P $pass"
 340 |         [[ "$username" ]] && args=$args" -l $username"
 341 |         
 342 |         echo -n $'\nRetrieving Credentials...' >&2
 343 |         if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then        
 344 |             echo "Certificate could not be retrieved"
 345 |             exit 1
 346 |         fi
 347 |         echo "done!" >&2
 348 |     else
 349 |         args=`openid_to_myproxy_args $openId $username` || exit 1
 350 |         if ! myproxy-logon $args -b -o $ESG_CREDENTIALS
 351 | 	then
 352 |             echo "Certificate could not be retrieved"
 353 | 	    exit 1
 354 |         fi
 355 | 	cp $HOME/.globus/certificates/* $ESG_CERT_DIR/	
 356 |     fi
 357 | }
 358 | 
 359 | openid_to_myproxy_args() {
 360 |   python - <<EOF || exit 1
 361 | import sys
 362 | import re
 363 | import xml.etree.ElementTree as ET
 364 | import urllib2
 365 | openid = "$1"
 366 | username = "$2" or re.sub(".*/", "", openid)
 367 | e = ET.parse(urllib2.urlopen(openid))
 368 | servs = [el for el in e.getiterator() if el.tag.endswith("Service")]
 369 | for serv in servs:
 370 |     servinfo = dict([(re.sub(".*}", "", c.tag), c.text)
 371 |                      for c in serv.getchildren()])
 372 |     try:
 373 |         if servinfo["Type"].endswith("myproxy-service"):
 374 |             m = re.match("socket://(.*):(.*)", servinfo["URI"])
 375 |             if m:
 376 |                 host = m.group(1)
 377 |                 port = m.group(2)
 378 |                 print "-s %s -p %s -l %s" % (host, port, username)
 379 |                 break
 380 |     except KeyError:
 381 |         continue
 382 | else:
 383 |     sys.stderr.write("myproxy service could not be found\n")
 384 |     sys.exit(1)
 385 | EOF
 386 | }
 387 | 
 388 | # check the certificate validity
 389 | check_cert() {
 390 |     if [[ ! -f "$ESG_CERT" || $force ]]; then
 391 |         #not there, just get it
 392 |         get_credentials
 393 |     elif which openssl &>/dev/null; then
 394 |         #check openssl and certificate
 395 |         if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then
 396 |             echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..."
 397 |             get_credentials
 398 |         else
 399 |             #ok, certificate is fine
 400 |             return 0
 401 |         fi
 402 |     fi
 403 | }
 404 | 
 405 | #
 406 | # Detect ESG credentials
 407 | #
 408 | find_credentials() {
 409 | 
 410 |     #is X509_USER_PROXY or $HOME/.esg/credential.pem
 411 |     if [[ -f "$ESG_CREDENTIALS" ]]; then
 412 |         # file found, proceed.
 413 |         ESG_CERT="$ESG_CREDENTIALS"
 414 |         ESG_KEY="$ESG_CREDENTIALS"
 415 |     elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then
 416 |         # second try, use these certificates.
 417 |         ESG_CERT="$X509_USER_CERT"
 418 |         ESG_KEY="$X509_USER_KEY"
 419 |     else
 420 |         # If credentials are not present, just point to where they should go 
 421 |         echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2
 422 |             ESG_CERT="$ESG_CREDENTIALS"
 423 |             ESG_KEY="$ESG_CREDENTIALS"
 424 |             #they will be retrieved later one
 425 |     fi
 426 | 
 427 | 
 428 |     #chek openssl and certificate
 429 |     if (which openssl &>/dev/null); then
 430 |         if ( openssl version | grep 'OpenSSL 1\.0' ); then
 431 |             echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+'
 432 |         fi
 433 |         check_cert || { (($?==1)); exit 1; }
 434 |     fi
 435 |     
 436 |     if [[ $CHECK_SERVER_CERT == "Yes" ]]; then
 437 |         [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; }
 438 |         PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR"
 439 |     fi
 440 | 
 441 |     #some wget version complain if there's no file present
 442 |     [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR
 443 | 
 444 |     PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT"
 445 | 
 446 | }
 447 | 
 448 | check_chksum() {
 449 |     local file="$1"
 450 |     local chk_type=$2
 451 |     local chk_value=$3
 452 |     local local_chksum=Unknown
 453 | 
 454 |     case $chk_type in
 455 |         md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");;
 456 |         sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);;
 457 |         *) echo "Can't verify checksum." && return 0;;
 458 |     esac
 459 | 
 460 |     #verify
 461 |     ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2
 462 |     echo $local_chksum
 463 | }
 464 | 
 465 | #Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x)
 466 | md5sum_() {
 467 |     hash -r
 468 |     if type md5sum >& /dev/null; then
 469 |         echo $(md5sum $@)
 470 |     else
 471 |         echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p')
 472 |     fi
 473 | }
 474 | 
 475 | #Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x)
 476 | sha256sum_() {
 477 |     hash -r
 478 |     if type sha256sum >& /dev/null; then
 479 |         echo $(sha256sum $@)
 480 |     elif type shasum >& /dev/null; then
 481 |         echo $(shasum -a 256 $@)
 482 |     else
 483 |         echo $(sha2 -q -256 $@)
 484 |     fi
 485 | }
 486 | 
 487 | get_mod_time_() {
 488 |     if ((MACOSX)); then
 489 |         #on a mac modtime is stat -f %m <file>
 490 |         echo "$(stat -f %m $@)"
 491 |     else
 492 |         #on linux (cygwin) modtime is stat -c %Y <file>
 493 |         echo "$(stat -c %Y $@)"
 494 |     fi
 495 |     return 0;
 496 | }
 497 | 
 498 | remove_from_cache() {
 499 |     local entry="$1"
 500 |     local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")"
 501 |     echo "$tmp_file" > "$CACHE_FILE"
 502 |     unset cached
 503 | }
 504 | 
 505 | #Download data from node using cookies and not certificates.
 506 | download_http_sec()
 507 | {
 508 |   #The data to be downloaded.
 509 |   data=" $url"
 510 |   filename="$file"  
 511 | 
 512 |   #Wget args.
 513 |   if ((insecure)) 
 514 |   then
 515 |    wget_args=" --no-check-certificate --cookies=on  --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 
 516 |   else
 517 |    wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt "  
 518 |   fi 
 519 | 
 520 |   if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) 
 521 |   then
 522 |    wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt"    
 523 |   fi
 524 |   
 525 |   if((force_TLSv1))
 526 |   then
 527 |    wget_args=" $wget_args"" --secure-protocol=TLSv1 "
 528 |   fi
 529 |   
 530 |   
 531 |   if [[ ! -z "$ESGF_WGET_OPTS" ]]
 532 |   then
 533 |     wget_args="$wget_args $ESGF_WGET_OPTS"
 534 |   fi  
 535 |   
 536 | 
 537 |   #use cookies for the next downloads
 538 |   use_cookies_for_http_basic_auth=1;
 539 |    
 540 |   #Debug message.
 541 |   if  ((debug))
 542 |   then
 543 |    echo -e "\nExecuting:\n"
 544 |    echo -e "wget $wget_args $data\n"
 545 |   fi
 546 | 
 547 | 
 548 |   #Try to download the data. 
 549 |   command="wget $wget_args -O $filename $data"
 550 |   http_resp=$(eval $command  2>&1) 
 551 |   cmd_exit_status="$?"
 552 |   
 553 |   if ((debug))
 554 |   then
 555 |    echo -e "\nHTTP response:\n $http_resp\n"
 556 |   fi
 557 |       
 558 |   #Extract orp service from url ?
 559 |   #Evaluate response.
 560 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 561 |   #(( "$redirects" == 1 )) && 
 562 |   if  echo "$http_resp" | grep -q "/esg-orp/"      
 563 |   then
 564 |    urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3)
 565 |    orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2)
 566 | 
 567 | 
 568 |    #Use cookies for transaction with orp.
 569 |    wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt"    
 570 |    
 571 |    #Download data using either http basic auth or http login form.
 572 |    if [[ "$openid_c" == */openid/  || "$openid_c" == */openid ]]
 573 |    then
 574 |     download_http_sec_open_id
 575 |    else
 576 |     download_http_sec_decide_service
 577 |    fi
 578 |   else  
 579 |    if    echo "$http_resp" | grep -q "401 Unauthorized"  \
 580 |       || echo "$http_resp" | grep -q "403: Forbidden"  \
 581 |       || echo "$http_resp" | grep -q "Connection timed out."  \
 582 |       || echo "$http_resp" | grep -q "no-check-certificate"  \
 583 |       || (( $cmd_exit_status != 0 ))      
 584 |    then 
 585 |     echo "ERROR : http request to OpenID Relying Party service failed."
 586 |     failed=1
 587 |    fi
 588 |   fi
 589 | }
 590 | 
 591 | 
 592 | #Function that decides which implementaion of idp to use.
 593 | download_http_sec_decide_service()
 594 | {
 595 |   #find claimed id
 596 | 
 597 |   pos=$(echo "$openid_c" | egrep -o '/' | wc -l)
 598 |   username_c=$(echo "$openid_c"  | cut -d'/' -f "$(($pos + 1))")
 599 |   esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/')
 600 | 
 601 |   host=$(echo "$openid_c"  | cut -d'/' -f 3)
 602 |   #test ceda first.
 603 | 
 604 |   if [[ -z "$esgf_uri" ]]
 605 |   then
 606 |    openid_c_tmp="https://""$host""/openid/"
 607 |   else
 608 |    openid_c_tmp="https://""$host""/esgf-idp/openid/" 
 609 |   fi
 610 | 
 611 |   command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-"
 612 |         
 613 |   if [[ ! -z "$ESGF_WGET_OPTS" ]]
 614 |   then
 615 |    command="$command $ESGF_WGET_OPTS"
 616 |   fi  
 617 |           
 618 |   #Debug message.
 619 |   if  ((debug))
 620 |   then
 621 |    echo -e "\nExecuting:\n"
 622 |    echo -e "$command\n"
 623 |   fi
 624 |             
 625 | 
 626 |   #Execution of command.
 627 |   http_resp=$(eval $command  2>&1)
 628 |   cmd_exit_status="$?"
 629 |   
 630 |   
 631 |   if ((debug))
 632 |   then
 633 |    echo -e "\nHTTP response:\n $http_resp\n"
 634 |   fi 
 635 |   
 636 | 
 637 |   if    echo "$http_resp" | grep -q "[application/xrds+xml]"  \
 638 |      && echo "$http_resp" | grep -q "200 OK"  \
 639 |      && (( cmd_exit_status == 0 ))       
 640 |   then
 641 |    openid_c=$openid_c_tmp
 642 |    download_http_sec_open_id
 643 |   else
 644 |    if [[ -z "$esgf_uri" ]]
 645 |    then
 646 |     echo "ERROR : HTTP request to OpenID Relying Party service failed."
 647 |     failed=1
 648 |    else
 649 |    download_http_sec_cl_id
 650 |    fi
 651 |   fi
 652 | }
 653 | 
 654 | 
 655 | download_http_sec_retry()
 656 | {
 657 |   echo -e "\nRetrying....\n"
 658 |   #Retry in case that last redirect did not work, this happens with older version of wget.
 659 |   command="wget $wget_args $data"
 660 |       
 661 |   #Debug message.
 662 |   if  ((debug))
 663 |   then
 664 |    echo -e "Executing:\n"
 665 |    echo -e "$command\n"
 666 |   fi   
 667 |    
 668 |   http_resp=$(eval $command  2>&1) 
 669 |   cmd_exit_status="$?"
 670 | 
 671 |   if ((debug))
 672 |   then
 673 |    echo -e "\nHTTP response:\n $http_resp\n"
 674 |   fi
 675 |    
 676 |   if    echo "$http_resp" | grep -q "401 Unauthorized"  \
 677 |      || echo "$http_resp" | grep -q "403: Forbidden"  \
 678 |      || echo "$http_resp" | grep -q "Connection timed out."  \
 679 |      || echo "$http_resp" | grep -q "no-check-certificate"  \
 680 |      || (( $cmd_exit_status != 0 ))      
 681 |   then 
 682 |    echo -e "\nERROR : Retry failed.\n"
 683 |    #rm "$filename"
 684 |    failed=1
 685 |   fi #if retry failed.
 686 | }
 687 | 
 688 | #Function for downloading data using the claimed id.
 689 | download_http_sec_cl_id()
 690 | {
 691 |   #Http request for sending openid to the orp service.
 692 |   command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\"  $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm "
 693 | 
 694 |   #Debug message.
 695 |   if  ((debug))
 696 |   then
 697 |    echo -e "Executing:\n"
 698 |    echo -e "wget $command\n"
 699 |   fi 
 700 |   
 701 |   
 702 |   #Execution of command.
 703 |   http_resp=$(eval $command  2>&1)
 704 |   cmd_exit_status="$?"
 705 | 
 706 |   
 707 |   if ((debug))
 708 |   then
 709 |    echo -e "\nHTTP response:\n $http_resp\n"
 710 |   fi 
 711 |     
 712 |   
 713 |   #Extract orp service from openid ?
 714 |   #Evaluate response.If redirected to idp service send the credentials.
 715 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 716 |   #(( redirects == 2  )) && 
 717 |   if  echo "$http_resp" | grep -q "login.htm"  && (( cmd_exit_status == 0 ))   
 718 |   then 
 719 |   
 720 |    urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3)
 721 |    idp_service=$(echo "$urls"  | tr '\n' ' ' | cut -d' ' -f 2) 
 722 |       
 723 |    command="wget --post-data  password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm"
 724 |    
 725 | 
 726 |    #Debug message.
 727 |    if  ((debug))
 728 |    then
 729 |     echo -e "Executing:\n"
 730 |     echo -e "wget $command\n"
 731 |    fi 
 732 | 
 733 |    #Execution of command.
 734 |    http_resp=$(eval $command  2>&1)
 735 |    cmd_exit_status="$?"
 736 |       
 737 |    if ((debug))
 738 |    then
 739 |     echo -e "\nHTTP response:\n $http_resp\n"
 740 |    fi 
 741 |         
 742 |    #Evaluate response. 
 743 |    #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 744 |    #(( "$redirects" != 5 )) \ 
 745 |    if    echo "$http_resp" | grep -q "text/html"  \
 746 |       || echo "$http_resp" | grep -q "403: Forbidden"  \
 747 |       || (( cmd_exit_status != 0 ))        
 748 |    then 
 749 |     rm "$filename"
 750 |     download_http_sec_retry
 751 |    fi
 752 |  
 753 |   else
 754 |    echo "ERROR : HTTP request to OpenID Provider service failed."
 755 |    failed=1
 756 |   fi #if redirected to idp.  
 757 | }
 758 | 
 759 | 
 760 | 
 761 | download_http_sec_open_id()
 762 | {
 763 |   #Http request for sending openid to the orp web service.
 764 |   command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\"  $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm "
 765 | 
 766 | 
 767 |   #Debug message.
 768 |   if  ((debug))
 769 |   then
 770 |    echo -e "Executing:\n"
 771 |    echo -e "$command\n"
 772 |   fi 
 773 | 
 774 |   #Execution of command.
 775 |   http_resp=$(eval $command  2>&1)
 776 |   cmd_exit_status="$?"
 777 |   
 778 |   
 779 |   if ((debug))
 780 |   then
 781 |    echo -e "\nHTTP response:\n $http_resp\n"
 782 |   fi 
 783 | 
 784 |   #Evaluate response.
 785 |   #redirects=$(echo "$http_resp" | egrep -c ' 302 ')
 786 |   #(( "$redirects" != 7 )) ||
 787 |   if   echo "$http_resp" | grep -q "text/html"  ||  (( $cmd_exit_status != 0 ))   
 788 |   then
 789 |    rm "$filename"
 790 |    download_http_sec_retry     
 791 |   fi #if error during http basic authentication. 
 792 |   
 793 | }
 794 | 
 795 | 
 796 | download() {
 797 |     wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS"
 798 |     
 799 |     while read line
 800 |     do
 801 |         # read csv here document into proper variables
 802 |         eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) )
 803 | 
 804 |         #Process the file
 805 |         echo -n "$file ..."
 806 | 
 807 |         #get the cached entry if any.
 808 |         cached="$(grep -e "^$file" "$CACHE_FILE")"
 809 |         
 810 |         #if we have the cache entry but no file, clean it.
 811 |         if [[ ! -f $file && "$cached" ]]; then
 812 |             #the file was removed, clean the cache
 813 |             remove_from_cache "$file"
 814 |             unset cached
 815 |         fi
 816 |         
 817 |         #check it wasn't modified
 818 |         if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then
 819 |                     if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then
 820 |                 echo "Already downloaded and verified"
 821 |                 continue
 822 |             elif ((update_files)); then
 823 |                 #user want's to overwrite newer files
 824 |                 rm $file
 825 |                 remove_from_cache "$file"
 826 |                 unset cached
 827 |             else
 828 |                 #file on server is different from what we have. 
 829 |                 echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite"
 830 |                 continue
 831 |             fi
 832 |         fi
 833 |         unset chksum_err_value chksum_err_count
 834 |         
 835 |         while : ; do
 836 |             # (if we had the file size, we could check before trying to complete)
 837 |             echo "Downloading"
 838 |             [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")"
 839 |             if ((dry_run)); then
 840 |                 #all important info was already displayed, if in dry_run mode just abort
 841 |                 #No status will be stored
 842 |                 break
 843 |             else
 844 |                 if ((use_http_sec))
 845 |                 then
 846 |                  download_http_sec
 847 |                  if ((failed))
 848 |                  then
 849 |                   break
 850 |                  fi
 851 |                 else
 852 |                  $wget -O "$file" $url || { failed=1; break; }  
 853 |                 fi                
 854 |             fi
 855 | 
 856 |             #check if file is there
 857 |             if [[ -f $file ]]; then
 858 |                 ((debug)) && echo file found
 859 |                 if [[ ! "$chksum" ]]; then
 860 |                     echo "Checksum not provided, can't verify file integrity"
 861 |                     break
 862 |                 fi
 863 |                 result_chksum=$(check_chksum "$file" $chksum_type $chksum)
 864 |                 if [[ "$result_chksum" != "$chksum" ]]; then
 865 |                     echo "  $chksum_type failed!"
 866 |                     if ((clean_work)); then
 867 |                         if !((chksum_err_count)); then
 868 |                                 chksum_err_value=$result_chksum
 869 |                                 chksum_err_count=2
 870 |                             elif ((checksum_err_count--)); then
 871 |                                 if [[ "$result_chksum" != "$chksum_err_value" ]]; then
 872 |                                     #this is a real transmission problem
 873 |                                     chksum_err_value=$result_chksum
 874 |                                     chksum_err_count=2
 875 |                                 fi
 876 |                             else
 877 |                                 #ok if here we keep getting the same "different" checksum
 878 |                                 echo "The file returns always a different checksum!"
 879 |                                 echo "Contact the data owner to verify what is happening."
 880 |                                 echo
 881 |                                 sleep 1
 882 |                                 break
 883 |                             fi
 884 |                         
 885 |                             rm $file
 886 |                             #try again
 887 |                             echo -n "  re-trying..."
 888 |                             continue
 889 |                     else
 890 |                             echo "  don't use -p or remove manually."
 891 |                     fi
 892 |                 else
 893 |                     echo "  $chksum_type ok. done!"
 894 |                     echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE
 895 |                 fi
 896 |             fi
 897 |             #done!
 898 |             break
 899 |         done
 900 |         
 901 |         if ((failed)); then
 902 |             echo "download failed"
 903 |             # most common failure is certificate expiration, so check this
 904 |             #if we have the pasword we can retrigger download
 905 |             ((!skip_security)) && [[ "$pass" ]] && check_cert
 906 |             unset failed
 907 |         fi
 908 |         
 909 | done <<<"$download_files"
 910 | 
 911 | }
 912 | 
 913 | dedup_cache_() {
 914 |     local file=${1:-${CACHE_FILE}}
 915 |     ((debug)) && echo "dedup'ing cache ${file} ..."
 916 |     local tmp=$(LC_ALL='C' sort  -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2)
 917 |     ((DEBUG)) && echo "$tmp"
 918 |     echo "$tmp" > $file
 919 |     ((debug)) && echo "(cache dedup'ed)"
 920 | }
 921 | 
 922 | http_basic_auth_func_info_message()
 923 | {
 924 |   echo  "********************************************************************************"
 925 |   echo  "*                                                                              *"
 926 |   echo  "* Note that new functionality to allow authentication without the need for     *"
 927 |   echo  "* certificates is available with this version of the wget script.  To enable,  *"
 928 |   echo  "* use the \"-H\" option and enter your OpenID and password when prompted:        *"
 929 |   echo  "*                                                                              *"
 930 |   echo  "* $ "$(basename "$0")" -H [options...]                                     *"
 931 |   echo  "*                                                                              *"
 932 |   echo  "* For a full description of the available options use the help option:         *"
 933 |   echo  "*                                                                              *"
 934 |   echo  "* $ "$(basename "$0")" -h                                                  *"
 935 |   echo  "*                                                                              *"
 936 |   echo  "********************************************************************************"
 937 | }
 938 | 
 939 | #
 940 | # MAIN
 941 | #
 942 | 
 943 | if ((!use_http_sec))
 944 | then 
 945 |  http_basic_auth_func_info_message
 946 | fi
 947 | 
 948 | echo "Running $(basename $0) version: $version"
 949 | ((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly"
 950 | echo "Use $(basename $0) -h for help."$'\n'
 951 | 
 952 | ((debug)) && cat<<EOF
 953 | ** Debug info **
 954 | ESG_HOME=$ESG_HOME
 955 | ESG_CREDENTIALS=$ESG_CREDENTIALS
 956 | ESG_CERT_DIR=$ESG_CERT_DIR
 957 | ** -- ** -- ** -- ** --
 958 | 
 959 | EOF
 960 | 
 961 | 
 962 | cat <<'EOF-MESSAGE'
 963 | Script created for 18 file(s)
 964 | (The count won't match if you manually edit this file!)
 965 | 
 966 | 
 967 | 
 968 | EOF-MESSAGE
 969 | sleep 1
 970 | 
 971 | check_os
 972 | ((!skip_security)) && find_credentials
 973 | 
 974 | if ((use_http_sec))
 975 | then 
 976 |      
 977 |  if (( ! insecure))
 978 |  then 
 979 |   get_certificates
 980 |  fi
 981 | 
 982 |  #Cookies folder.
 983 |  COOKIES_FOLDER="$ESG_HOME/wget_cookies"
 984 |  
 985 |  if (( force ))
 986 |  then
 987 |   if [ -d $COOKIES_FOLDER ] 
 988 |   then
 989 |    rm -rf $COOKIES_FOLDER
 990 |   fi
 991 |  fi
 992 | 
 993 |  #Create cookies folder. 
 994 |  if [[ ! -d $COOKIES_FOLDER ]] 
 995 |  then
 996 |   mkdir $COOKIES_FOLDER
 997 |  fi
 998 |  
 999 |  if((! use_cookies_for_http_basic_auth_start))
1000 |  then
1001 | 
1002 |   #Read openid.
1003 |   if [[ ! -z "$openId" ]]
1004 |   then
1005 |    openid_c="$openId"
1006 |   elif ( (("$#" > 1)) || (("$#" == 1)) ) 
1007 |   then
1008 |    openid_c=$1
1009 |   else
1010 |    read -p    "Enter your openid : " openid_c
1011 |   fi
1012 |   
1013 |   
1014 |   #Read username.
1015 |   if [[ ! -z "$username_supplied" ]]
1016 |   then
1017 |    username_c="$username_supplied"
1018 |   elif (("$#" == 2))
1019 |   then
1020 |    username_c=$2
1021 |   elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]]
1022 |   then
1023 |    read -p    "Enter username : " username_c
1024 |   fi
1025 |   
1026 |   #Read password.
1027 |   read -s -p "Enter password : " password_c
1028 |   echo -e "\n"
1029 | 
1030 |  fi #use cookies
1031 | 
1032 | fi #use_http_sec 
1033 | 
1034 | 
1035 | #do we have old results? Create the file if not
1036 | [ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE
1037 | 
1038 | #clean the force parameter if here (at htis point we already have the certificate)
1039 | unset force
1040 | 
1041 | download
1042 | 
1043 | dedup_cache_
1044 | 
1045 | 
1046 | echo "done"
1047 | 


--------------------------------------------------------------------------------