├── postproc_cosmo ├── constants.py ├── functions.py ├── settings.py └── extpar_adapt.py ├── step_01_extract_deltas ├── settings.py ├── constants.py ├── functions.py ├── CFday_cut_subdomain.sh ├── CFday_target_p_MPI-ESM1-2-HR.dat ├── Emon_add_top_from_Amon.sh ├── CFday_interp_to_plev.py ├── Emon_convert_hus_to_hur.py ├── extract_climate_delta.sh └── CFday_wget_scripts │ ├── wget_CFday_ssp585_ua.sh │ ├── wget_CFday_ssp585_va.sh │ └── wget_CFday_ssp585_hur.sh ├── .gitignore ├── constants.py ├── LICENSE ├── Documentations └── README_CMOR.md ├── parallel.py ├── README.md ├── settings.py ├── step_02_preproc_deltas.py ├── environment.yml ├── fields └── plot.py └── step_03_apply_to_era.py /postproc_cosmo/constants.py: -------------------------------------------------------------------------------- 1 | ../constants.py -------------------------------------------------------------------------------- /postproc_cosmo/functions.py: -------------------------------------------------------------------------------- 1 | ../functions.py -------------------------------------------------------------------------------- /postproc_cosmo/settings.py: -------------------------------------------------------------------------------- 1 | ../settings.py -------------------------------------------------------------------------------- /step_01_extract_deltas/settings.py: -------------------------------------------------------------------------------- 1 | ../settings.py -------------------------------------------------------------------------------- /step_01_extract_deltas/constants.py: -------------------------------------------------------------------------------- 1 | ../constants.py -------------------------------------------------------------------------------- /step_01_extract_deltas/functions.py: -------------------------------------------------------------------------------- 1 | ../functions.py -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target_grid* 2 | bOut 3 | __pycache__ 4 | .ipynb_checkpoints 5 | 6 | Session.vim 7 | .*.swp 8 | 9 | *.txt 10 | *.nc 11 | 12 | #xesmf 13 | PET* 14 | -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | ### Source: COSMO source code, data_constants.f90 2 | # gas constant for dry air 3 | CON_RD = 287.05 # [J kg-1 K-1] 4 | # gravity constant (assumed constant over the entire profile) 5 | CON_G = 9.80665 # [m s-2] 6 | # ratio of molecular mass between water and dry air 7 | CON_MW_MD = 0.622 # [1] 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019, ETH Zurich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /step_01_extract_deltas/CFday_cut_subdomain.sh: -------------------------------------------------------------------------------- 1 | download_data_dir=/net/o3/hymet_nobackup/heimc/data/pgw/download 2 | subdom_data_dir=/net/o3/hymet_nobackup/heimc/data/pgw/download/subdomain 3 | 4 | box=-73,37,-42,34 5 | 6 | # Note: use ps,hurs,tas from Amon and zg from Emon 7 | 8 | var_names=(ta hur ua va) 9 | experiments=(historical ssp585) 10 | 11 | for var_name in ${var_names[@]}; do 12 | echo $var_name 13 | for exp in ${experiments[@]}; do 14 | echo $exp 15 | if [[ "$exp" == "historical" ]]; then 16 | years=(19850101-19891231 19900101-19941231 \ 17 | 19950101-19991231 20000101-20041231 \ 18 | 20050101-20091231 20100101-20141231) 19 | years=(19850101-19891231 19900101-19941231) 20 | elif [[ "$exp" == "ssp585" ]]; then 21 | years=(20700101-20751231 20750101-20791231 \ 22 | 20800101-20841231 20850101-20891231 \ 23 | 20900101-20941231 20950101-20991231) 24 | years=(20700101-20751231 20750101-20791231) 25 | fi 26 | for year in ${years[@]}; do 27 | echo $year 28 | cdo sellonlatbox,$box \ 29 | $download_data_dir/${var_name}_CFday_MPI-ESM1-2-HR_${exp}_r1i1p1f1_gn_${year}.nc \ 30 | $subdom_data_dir/${var_name}_CFday_MPI-ESM1-2-HR_${exp}_r1i1p1f1_gn_${year}.nc 31 | done 32 | done 33 | done 34 | -------------------------------------------------------------------------------- /step_01_extract_deltas/CFday_target_p_MPI-ESM1-2-HR.dat: -------------------------------------------------------------------------------- 1 | 101000. 2 | 100000. 3 | 99000. 4 | 98000. 5 | 97000. 6 | 96000. 7 | 95000. 8 | 94000. 9 | 93000. 10 | 92000. 11 | 91000. 12 | 90000. 13 | 89000. 14 | 88000. 15 | 87000. 16 | 86000. 17 | 85000. 18 | 84000. 19 | 83000. 20 | 82000. 21 | 81000. 22 | 80000. 23 | 77500. 24 | 75000. 25 | 72500. 26 | 70000. 27 | 67500. 28 | 65000. 29 | 62500. 30 | 60000. 31 | 57500. 32 | 55000. 33 | 52500. 34 | 50000. 35 | 47500. 36 | 45000. 37 | 42500. 38 | 40000. 39 | 37500. 40 | 35000. 41 | 32500. 42 | 30000. 43 | 28000. 44 | 26000. 45 | 24000. 46 | 22000. 47 | 20000. 48 | 19000. 49 | 18000. 50 | 17000. 51 | 16000. 52 | 15000. 53 | 14000. 54 | 13000. 55 | 12000. 56 | 11000. 57 | 10401.23842568 58 | 9464.68561727 59 | 8611.75611196 60 | 7834.96484375 61 | 7127.05859375 62 | 6480.85351562 63 | 5891.58789062 64 | 5354.86132812 65 | 4865.74804688 66 | 4419.8828125 67 | 4013.44238281 68 | 3642.94140625 69 | 3305.20080566 70 | 2997.32421875 71 | 2716.67102051 72 | 2460.83398438 73 | 2227.61889648 74 | 2015.02539062 75 | 1821.22998047 76 | 1644.57055664 77 | 1483.60742188 78 | 1337.09008789 79 | 1203.85314941 80 | 1082.81188965 81 | 972.95776367 82 | 873.35437012 83 | 783.13342285 84 | 701.49108887 85 | 627.68395996 86 | 561.02514648 87 | 500.88085938 88 | 446.6673584 89 | 397.84741211 90 | 353.92700195 91 | 314.45275879 92 | 279.00915527 93 | 247.21544647 94 | 218.72327423 95 | 193.21455383 96 | 170.39893341 97 | 150.01178741 98 | 131.81220245 99 | 115.58110046 100 | -------------------------------------------------------------------------------- /Documentations/README_CMOR.md: -------------------------------------------------------------------------------- 1 | # PGW-Simulation for CMOR input data 2 | 3 | This is an attempt at a very practical explaination of how to set up a PGW simulation using global climate model data in the CMOR-Format as input (for example CMIP5 or CMIP6 data). 4 | 5 | **What data to get?** 6 | 7 | You will need data for the following variables: hur, ta, ua, va, zg, pa, hurs, tas, ts, tos 8 | 9 | **What time resolution should one choose?** 10 | 11 | Monthly mean data is the easiest. This is called e.g. Amon in CMOR. tos is part of Omon in CMOR. 12 | 13 | **How to preprocess the data?** 14 | 15 | For all variables we need to know how they will change under climate change. This needs to be expressed as a mean annual cycle of changes. 16 | In practice we can get a time slice of the "historical" period (HIST) and from a future period under a certain emission scenario (SCEN) such as "rcp85". A typical example: For the historical period, get data from 1971-2000. Then construct the mean annual cycle for 1971-2000, for example using the [cdo-command "ymonmean"](https://code.mpimet.mpg.de/projects/cdo/embedded/index.html#x1-5370002.8.33). Repeat for 2070-2099 and the rcp85 data. 17 | Lastly, subtract the historical monthly-mean annual cycle from the future monthly-mean annual cycle. Save the result from the subtraction, or the difference between the two periods, as a single netcdf-file per variable (e.g. delta_ta.nc, delta_hurs.nc, ....). 18 | These netcdf files are needed as input for [setp_02_preproc_deltas.py](/setp_02_preproc_deltas.py), and the naming convention can be specified in [settings.py](/settings.py) (look for the dictionary "file_name_bases"). 19 | -------------------------------------------------------------------------------- /postproc_cosmo/extpar_adapt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from netCDF4 import Dataset 3 | from functions import load_delta 4 | 5 | 6 | var_name_map = { 7 | 'ts': 'T_CL', 8 | } 9 | 10 | 11 | 12 | 13 | def extpar_adapt(ext_file_path, delta_inp_path): 14 | 15 | ext_file = Dataset(ext_file_path, 'a') 16 | 17 | # update T_C 18 | print('update deep soil temperature') 19 | 20 | delta_ts = load_delta(delta_inp_path, 'ts', None) 21 | 22 | ## Make sure dimensions are exactly the same. 23 | ## There are numerical differences between CDO remapped objects 24 | ## and xarray data... 25 | #delta_ts = delta_ts.assign_coords( 26 | # {'rlat':ext_file.rlat.values, 27 | # 'rlon':ext_file.rlon.values}) 28 | 29 | delta_ts_clim = delta_ts.mean(dim=['time']) 30 | print(delta_ts_clim) 31 | 32 | ext_file['T_CL'][:] += delta_ts_clim.values.squeeze() 33 | 34 | ext_file.close() 35 | print('Done.') 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | if __name__ == "__main__": 45 | 46 | ## input arguments 47 | parser = argparse.ArgumentParser(description = 48 | 'COSMO-specific: Perturb Extpar soil temperature climatology with ts climate delta.') 49 | # extpar file to modify 50 | parser.add_argument('extpar_file_path', type=str, 51 | help='Path to extpar file to modify T_CL.') 52 | 53 | # climate delta directory (already remapped to ERA5 grid) 54 | parser.add_argument('-d', '--delta_input_dir', type=str, default=None, 55 | help='Directory with GCM climate deltas to be used. ' + 56 | 'This directory should have a climate delta for ts ' + 57 | 'already horizontally remapped to the grid of ' + 58 | 'the extpar file which can perhaps be done with ' + 59 | 'step_02_preproc_deltas.py or otherwise with CDO.') 60 | args = parser.parse_args() 61 | print(args) 62 | 63 | extpar_adapt(args.extpar_file_path, args.delta_input_dir) 64 | 65 | -------------------------------------------------------------------------------- /step_01_extract_deltas/Emon_add_top_from_Amon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # base directory where output should be stored 4 | out_base_dir=/net/o3/hymet_nobackup/heimc/data/pgw/deltas/native 5 | 6 | # name of the GCM to extract data for 7 | gcm_name=MPI-ESM1-2-HR 8 | 9 | ## type of CMIP6 model output (e.g. monthly or daily, etc.) 10 | ## to use 11 | # high-resolution monthly data for only very few GCMs 12 | table_ID=Emon 13 | 14 | ## select variables to add model top 15 | var_names=(ua va ta zg hur) 16 | #var_names=(hur) 17 | 18 | ## CMIP experiments to use to compute climate deltas 19 | ## --> climate delta = future climatology - ERA climatology 20 | # CMIP experiment to use for ERA climatology 21 | era_climate_experiment=historical 22 | # CMIP experiment to use for future climatology 23 | future_climate_experiment=ssp585 24 | 25 | # iterate over both experiments and climate delta 26 | experiments=($era_climate_experiment $future_climate_experiment delta) 27 | 28 | 29 | out_dir=$out_base_dir/$table_ID/$gcm_name 30 | 31 | for var_name in ${var_names[@]}; do 32 | echo "#################################################################" 33 | echo $var_name 34 | echo "#################################################################" 35 | 36 | 37 | # add Amon model top values to Emon 38 | if [[ "$table_ID" == "Emon" ]]; then 39 | for experiment in ${experiments[@]} 40 | do 41 | echo $experiment 42 | 43 | #mv $out_dir/${var_name}_${experiment}.nc \ 44 | # $out_dir/Emon_model_bottom_${var_name}_${experiment}.nc 45 | cdo sellevel,100000,97500,95000,92500,90000,87500,85000,82500,80000,77500,75000,70000,65000,60000,55000,50000,45000,40000,35000,30000,25000,22500,20000,17500,15000,12500,10000 $out_dir/${var_name}_${experiment}.nc \ 46 | $out_dir/Emon_model_bottom_${var_name}_${experiment}.nc 47 | 48 | Amon_out_base_dir=$out_base_dir/Amon 49 | Amon_out_dir=$Amon_out_base_dir/$gcm_name 50 | cdo sellevel,7000,5000,3000,2000,1000,500,100 \ 51 | $Amon_out_dir/${var_name}_${experiment}.nc \ 52 | $out_dir/Amon_model_top_${var_name}_${experiment}.nc 53 | cdo -O merge \ 54 | $out_dir/Emon_model_bottom_${var_name}_${experiment}.nc \ 55 | $out_dir/Amon_model_top_${var_name}_${experiment}.nc \ 56 | $out_dir/${var_name}_${experiment}.nc 57 | done 58 | fi 59 | 60 | 61 | done 62 | 63 | -------------------------------------------------------------------------------- /parallel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | description Helper class for parallel computing. 5 | author Christoph Heim 6 | usage use in another script 7 | """ 8 | ############################################################################### 9 | import multiprocessing as mp 10 | ############################################################################### 11 | 12 | def starmap_helper(tup): 13 | func = tup['func'] 14 | del tup['func'] 15 | return(func(**tup)) 16 | 17 | 18 | def run_starmap(func, fargs={}, njobs=1, run_async=False): 19 | outputs = [] 20 | if njobs > 1: 21 | pool = mp.Pool(processes=njobs) 22 | if run_async: 23 | outputs = pool.starmap_async(starmap_helper, fargs).get() 24 | else: 25 | outputs = pool.starmap(starmap_helper, fargs) 26 | pool.close() 27 | pool.join() 28 | else: 29 | for i in range(len(fargs)): 30 | out = func(**fargs[i]) 31 | outputs.append(out) 32 | return(outputs) 33 | 34 | 35 | 36 | class IterMP: 37 | 38 | def __init__(self, njobs=None, run_async=False): 39 | self.run_async = run_async 40 | 41 | if njobs is None: 42 | if len(sys.argv) > 1: 43 | self.njobs = int(sys.argv[1]) 44 | else: 45 | self.njobs = 1 46 | else: 47 | self.njobs = njobs 48 | print('IterMP: njobs = '+str(self.njobs)) 49 | 50 | self.output = None 51 | 52 | 53 | def run(self, func, fargs={}, step_args=None): 54 | outputs = [] 55 | 56 | input = [] 57 | for tI in range(len(step_args)): 58 | this_fargs = fargs.copy() 59 | if step_args is not None: 60 | this_fargs.update(step_args[tI]) 61 | 62 | if self.njobs > 1: 63 | this_fargs['func'] = func 64 | this_fargs = (this_fargs,) 65 | input.append(this_fargs) 66 | 67 | self.output = run_starmap(func, fargs=input, 68 | njobs=self.njobs, run_async=self.run_async) 69 | 70 | 71 | 72 | 73 | 74 | def test_IMP(iter_arg, fixed_arg): 75 | #print(str(iter_arg) + ' ' + str(fixed_arg)) 76 | work = [] 77 | for i in range(int(1E7)): 78 | work.append(1) 79 | return(iter_arg) 80 | 81 | 82 | 83 | if __name__ == '__main__': 84 | 85 | 86 | if len(sys.argv) > 1: 87 | njobs = int(sys.argv[1]) 88 | else: 89 | njobs = 1 90 | 91 | # testing 92 | t0 = time.time() 93 | IMP = IterMP(njobs=njobs, run_async=False) 94 | fargs = {'fixed_arg':'fixed',} 95 | step_args = [] 96 | for i in range(20): 97 | step_args.append({'iter_arg':i}) 98 | IMP.run(test_IMP, fargs, step_args) 99 | print(IMP.output) 100 | t1 = time.time() 101 | print(t1 - t0) 102 | 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Repository pgw-python 2 | 3 | Software to modify ERA5 files to impose a large-scale climate change signal in ERA5 files 4 | as described e.g. here https://iopscience.iop.org/article/10.1088/1748-9326/ab4438 or also 5 | here https://doi.org/10.1175/JCLI-D-18-0431.1 6 | 7 | # General Documentation 8 | To modify the ERA5 files, we need a climate change signal obtained from the difference between two GCM climatologies, HIST and SCEN. The climate change signal is thus SCEN-HIST and referred to as climate delta. 9 | 10 | The structure of the repository is built as follows: 11 | 12 | The top level directory contains the central scripts to preprocess the GCM climate change signal [step_02_preproc_deltas.py](/step_02_preproc_deltas.py) and to modify the ERA5 files [step_03_apply_to_era.py](/step_03_apply_to_era.py) with the climate change signal. 13 | The subdirectory [step_01_extract_deltas](/step_01_extract_deltas/) contains less generic code that can serve as a template to obtain the GCM climatologies HIST, SCEN, as well as the climate delta SCEN-HIST from raw CMIP6 output. The starting point here is the script [extract_climate_delta.sh](/step_01_extract_deltas/extract_climate_delta.sh). This script has to be adjusted depending on the specific use case. 14 | 15 | Note that essential usage-oriented information can be found by running `python step_02_preproc_deltas.py --help` and `python step_03_apply_to_era.py --help`. 16 | 17 | Note that users that feed the processed ERA5 files into Int2lm (to run COSMO or ICON) should also modify the variable T_CL in their external parameter file, see postproc_cosmo for more information. 18 | 19 | # Software Requirements 20 | 21 | The software is written in python 3 and requires multiple python modules. The ennvironment-file **environment.yml** can be used to install a conda environment to run the software. More information about what conda is and how it works: https://docs.conda.io/projects/conda/en/latest/user-guide/index.html# 22 | 23 | To install the enviroment, just execute `conda env create -f environment.yml` once conda is installed. 24 | 25 | # Workflows Based on Input Data 26 | 27 | **Requeriments** 28 | 29 | Annual climate deltas (SCEN-HIST) and a historical climatology (HIST) from a global climate model in either daily or monthly steps. 30 | Climate deltas refer to the difference between the fields predicted by the climate model between two different time periods (usually future and present). If climate model data in the CMOR format (e.g. CMIP simulations) will be used to force the PGW simulations there is a practical [documentation](/Documentations/README_CMOR.md) on which variables are needed. 31 | Template scripts to extract CMIP6 data are given in [step_01_extract_deltas](/step_01_extract_deltas/), e.g. [here](/step_01_extract_deltas/extract_climate_delta.sh). 32 | 33 | After computing the raw climate deltas on the GCM grid, run the following scripts: 34 | 1) Only if using daily climate deltas instead of monthly (note that is not really recommended), smooth deltas in time: `python step_02_preproc_deltas.py smoothing [...]` 35 | 2) Regrid deltas to ERA5 grid: `python step_02_preproc_deltas.py regridding [...]` 36 | 3) Modify ERA5 files: `python step_03_apply_to_era.py [...]` 37 | 4) There may be some additional steps required for a specific limited-area model. For instance in COSMO, the deep soil temperature climatology has to be adjusted in the external parameters file. [postproc_cosmo](/postproc_cosmo/). 38 | 5) After these steps, the limited-area-model-specific routine to convert ERA5 files to model initial and boundary conditions can be run using the modified ERA5 files as input. 39 | 40 | # References 41 | To acknowledge this software cite the following article: 42 | 43 | Brogli, R., Heim, C., Mensch, J., Sørland, S. L., & Schär, C. (2023). The pseudo-global-warming (PGW) approach: Methodology, software package PGW4ERA5 v1.1, validation and sensitivity analyses. Geoscientific Model Development, preprint. https://doi.org/10.5194/gmd-2022-167 44 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | description Settings namelist for all routines in PGW for ERA5 5 | authors Before 2022: original developments by Roman Brogli 6 | Since 2022: upgrade to PGW for ERA5 by Christoph Heim 7 | 2022: udpates by Jonas Mensch 8 | """ 9 | ############################################################################## 10 | ############################################################################## 11 | 12 | ### GENERAL SETTINGS 13 | ############################################################################## 14 | # debug output level 15 | i_debug = 2 # [0-2] 16 | 17 | # Input and output file naming convention for the HIST and climate delta 18 | # (SCEN-HIST) files from the GCM. 19 | # ({} is placeholder for variable name). 20 | file_name_bases = { 21 | 'SCEN-HIST': '{}_delta.nc', 22 | 'HIST': '{}_historical.nc', 23 | } 24 | 25 | # File naming convention for ERA5 files to be read in and written out. 26 | era5_file_name_base = 'cas{:%Y%m%d%H}0000.nc' 27 | #era5_file_name_base = 'caf{:%Y%m%d%H}.nc' 28 | 29 | # dimension names in ERA5 file 30 | TIME_ERA = 'time' 31 | LON_ERA = 'lon' 32 | LAT_ERA = 'lat' 33 | LEV_ERA = 'level' 34 | HLEV_ERA = 'level1' 35 | SOIL_HLEV_ERA = 'soil1' 36 | 37 | # dimension names in GCM (used for all GCM variables except tos) 38 | TIME_GCM = 'time' 39 | LON_GCM = 'lon' 40 | LAT_GCM = 'lat' 41 | PLEV_GCM = 'plev' 42 | LEV_GCM = 'lev' 43 | 44 | # dimension names in GCM ocean model (used for tos) 45 | TIME_GCM_OCEAN = 'time' 46 | LON_GCM_OCEAN = 'longitude' 47 | LAT_GCM_OCEAN = 'latitude' 48 | 49 | ### VARIABLE LIST 50 | ############################################################################## 51 | # The names on the left side (dict keys) are CMOR convention names 52 | # The names on the right side (dict values) are the name of the 53 | # respective variables in the ERA5 files (Please adjust to ERA5 format used). 54 | # Not all of these variables are required as climate deltas. 55 | # Only zg,ta,hur,ua,va,tas,tos are required as climate delta (SCEN-HIST) 56 | # while ps is required for the HIST climatology. 57 | var_name_map = { 58 | 59 | ##### climate delta (SCEN-HIST) required 60 | #################### 61 | 62 | # 3D air temperature 63 | 'ta' :'T', 64 | # 3D lon-wind speed 65 | 'ua' :'U', 66 | # 3D lat-wind speed 67 | 'va' :'V', 68 | # 3D air relative humidity 69 | 'hur' :'RELHUM', 70 | 71 | # geopotential 72 | 'zg' :'PHI', # used for pressure adjustment only 73 | 74 | # near-surface temperature 75 | 'tas' :None, # not modified in ERA5 (auxiliary field for computations) 76 | # near-surface relative humidity 77 | 'hurs' :None, # not modified in ERA5 (auxiliary field for computations) 78 | # sea-surface temperature (SST) 79 | 'tos' :None, # not modified in ERA5 (auxiliary field for computations) 80 | 81 | 82 | ##### HIST climatology required 83 | #################### 84 | 85 | # surface pressure 86 | 'ps' :'PS', # auxiliary field for interpolation and pressure adjustm. 87 | 88 | 89 | ##### no GCM data required but ERA5 variable used by the code 90 | #################### 91 | 92 | # air specific humidity 93 | 'hus' :'QV', 94 | # surface geopotential 95 | 'zgs' :'FIS', # used for pressure adjustment 96 | # surface skin temperature 97 | 'ts' :'T_SKIN', 98 | # soil layer temperature 99 | 'st' :'T_SO', 100 | # land area fraction 101 | 'sftlf':'FR_LAND', 102 | # sea-ice area fraction 103 | 'sic': 'FR_SEA_ICE', 104 | } 105 | 106 | 107 | ### 02 PREPROCESS DELTAS 108 | ############################################################################## 109 | 110 | ### SMOOTHING 111 | #################################### 112 | 113 | ### REGRIDDING 114 | #################################### 115 | # depending on whether the xesmf pacakge is installed, it can be used 116 | # for interpolation. Else, an xarray-based method is used. 117 | # the latter should be identical to XESMF 118 | # except for tiny differences that appear to originate from 119 | # numerical precision 120 | i_use_xesmf_regridding = 0 121 | 122 | ## ## Nan-Ingoring kernel interpolation used for tos climate delta 123 | # maximum kernel radius 124 | # higher values imply that remote lakes (and bays) without GCM SST data will 125 | # receive data from further remote GCM SST grid points instead of falling 126 | # back to the tas (near surface temperature) climate delta 127 | nan_interp_kernel_radius = 1000000 # m 128 | # sharpness: decrease (increase) for smoother (sharper) interpolation 129 | nan_interp_sharpness = 4 130 | 131 | 132 | ### SURFACE PRESSURE ADJUSTMENT SETTINGS 133 | ########################################################################## 134 | # reference pressure level 135 | # if set to None, the reference pressure level is chosen locally. 136 | # if the climate deltas have low vertical resolution (e.g. Amon data 137 | # with only 6 vertical levels between 1000-500 hPa), settting 138 | # p_ref_inp = None may help to improve the accuray of the 139 | # pressure adjustment. See publication for more information. 140 | p_ref_inp = 30000 # Pa 141 | #p_ref_inp = None 142 | # surface pressure adjustment factor in the iterative routine 143 | adj_factor = 0.95 144 | # convergence threshold (maximum geopotential error) 145 | # if procedure does not converge, raise this value a little bit. 146 | thresh_phi_ref_max_error = 0.15 147 | # maximum number of iterations before error is raised. 148 | max_n_iter = 20 149 | # re-interpolation turned on/off 150 | i_reinterp = 0 151 | ########################################################################## 152 | 153 | -------------------------------------------------------------------------------- /step_01_extract_deltas/CFday_interp_to_plev.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | description Extract CFday data 5 | author Christoph Heim 6 | date created 21.02.2021 7 | usage no args 8 | """ 9 | ############################################################################### 10 | import os, argparse 11 | import numpy as np 12 | import xarray as xr 13 | import matplotlib.pyplot as plt 14 | from pathlib import Path 15 | from functions import interp_logp_4d 16 | from settings import ( 17 | TIME_GCM, LEV_GCM, PLEV_GCM, LON_GCM, LAT_GCM, 18 | ) 19 | ############################################################################### 20 | 21 | 22 | ############################################################################### 23 | ############################################################################### 24 | ############################################################################### 25 | 26 | 27 | if __name__ == '__main__': 28 | 29 | parser = argparse.ArgumentParser(description = 30 | 'Interpolate CFday output to pressure levels') 31 | # variable name 32 | parser.add_argument('var_names', type=str) 33 | # CMIP6 experiment 34 | parser.add_argument('experiment', type=str) 35 | args = parser.parse_args() 36 | 37 | inp_dir = '/net/o3/hymet_nobackup/heimc/data/pgw/download/subdomain' 38 | out_base_dir ='/net/o3/hymet_nobackup/heimc/data/pgw/download/interp_plev' 39 | 40 | model_name = 'MPI-ESM1-2-HR' 41 | #var_name = 'ta' 42 | #experiment = 'ssp585' 43 | #experiment = 'historical' 44 | 45 | var_names = args.var_names.split(',') 46 | experiment = args.experiment 47 | 48 | times = { 49 | 'ssp585': [ 50 | '20700101-20741231', 51 | '20750101-20791231', 52 | '20800101-20841231', 53 | '20850101-20891231', 54 | '20900101-20941231', 55 | '20950101-20991231', 56 | ], 57 | 'historical': [ 58 | '19850101-19891231', 59 | '19900101-19941231', 60 | '19950101-19991231', 61 | '20000101-20041231', 62 | '20050101-20091231', 63 | '20100101-20141231', 64 | ], 65 | } 66 | for var_name in var_names: 67 | for time_ind in range(0,len(times[experiment])): 68 | print(time_ind) 69 | 70 | inp_file_name = '{}_CFday_{}_{}_r1i1p1f1_gn_{}.nc'.format( 71 | var_name, model_name, experiment, 72 | times[experiment][time_ind]) 73 | out_dir = os.path.join(out_base_dir, model_name) 74 | Path(out_dir).mkdir(parents=True, exist_ok=True) 75 | out_file_name = '{}_CFday_{}_{}_r1i1p1f1_gn_{}.nc'.format( 76 | var_name, model_name, experiment, 77 | times[experiment][time_ind]) 78 | 79 | # create input and output file paths 80 | out_file_path = os.path.join(out_dir, out_file_name) 81 | inp_file_path = os.path.join(inp_dir, inp_file_name) 82 | 83 | print('Process input file: \n{}\nto output file: \n{}'.format( 84 | inp_file_path, out_file_path)) 85 | 86 | ds = xr.open_dataset(inp_file_path) 87 | 88 | # sort pressure ascending 89 | ds = ds.reindex({LEV_GCM:ds['lev'][::-1]}) 90 | # compute pressure on full levels 91 | source_P = (ds.ap + ds.b * ds.ps).transpose( 92 | TIME_GCM, LEV_GCM, LAT_GCM, LON_GCM) 93 | var = ds[var_name] 94 | 95 | 96 | 97 | ### Determine target pressure using tropical ocea-only domain 98 | ############################################################# 99 | #mean_p = p.mean(dim=['lon','lat','time']).values 100 | #print(np.around(mean_p[30:], -2)) 101 | #p_integ = np.arange(101000, 100000, -1000) 102 | #p_integ = np.append(p_integ, np.arange(100000, 80000, -1000)) 103 | #p_integ = np.append(p_integ, np.arange(80000, 30000, -2500)) 104 | #p_integ = np.append(p_integ, np.arange(30000, 20000, -2000)) 105 | #p_integ = np.append(p_integ, np.arange(20000, 10000, -1000)) 106 | #p_integ = np.append(p_integ, mean_p[30:-22]) 107 | #print(p_integ) 108 | ##plt.scatter(np.arange(len(p.lev.values)), 109 | ## p.mean(dim=['lon','lat','time']).values) 110 | ##plt.show() 111 | #quit() 112 | 113 | # load target pressure levels 114 | targ_plev = np.sort(np.loadtxt('CFday_target_p_MPI-ESM1-2-HR.dat')) 115 | targ_P = xr.DataArray(targ_plev, dims=[PLEV_GCM]) 116 | 117 | # create 4d target pressure data array 118 | targ_P = targ_P.expand_dims( 119 | dim={LON_GCM:var[LON_GCM], 120 | LAT_GCM:var[LAT_GCM], 121 | TIME_GCM:var[TIME_GCM]}).transpose( 122 | TIME_GCM, PLEV_GCM, LAT_GCM, LON_GCM) 123 | 124 | # run interpolation from GCM model levels to constant pressure levels 125 | var_out = interp_logp_4d(var, source_P, targ_P, extrapolate='constant', 126 | time_key=TIME_GCM, lat_key=LAT_GCM, 127 | lon_key=LON_GCM) 128 | 129 | # set pressure levels as coordinate 130 | var_out = var_out.assign_coords(coords={PLEV_GCM:targ_plev}) 131 | 132 | # sort for descending pressure 133 | var_out = var_out.reindex( 134 | {PLEV_GCM:list(reversed(var_out[PLEV_GCM]))}) 135 | #print(var_out) 136 | #var_out.to_netcdf('test.nc') 137 | 138 | # convert to dataset 139 | ds_out = var_out.to_dataset(name=var_name) 140 | 141 | ## make sure to keep time encoding identical 142 | ds_out.time.encoding = ds.time.encoding 143 | ## make sure to keep attributes identical 144 | for key,val in ds.time.attrs.items(): 145 | ds_out.time.attrs[key] = val 146 | for key,val in ds.lon.attrs.items(): 147 | ds_out.lon.attrs[key] = val 148 | for key,val in ds.lat.attrs.items(): 149 | ds_out.lat.attrs[key] = val 150 | for key,val in ds[var_name].attrs.items(): 151 | ds_out[var_name].attrs[key] = val 152 | 153 | # save output file 154 | ds_out.to_netcdf(out_file_path) 155 | -------------------------------------------------------------------------------- /step_01_extract_deltas/Emon_convert_hus_to_hur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | description Convert GCM specific humidity to relative humidity. 5 | author Christoph Heim 6 | date created 08.02.2021 7 | usage no args 8 | """ 9 | ############################################################################### 10 | import os, argparse 11 | import numpy as np 12 | import xarray as xr 13 | import matplotlib.pyplot as plt 14 | ############################################################################### 15 | 16 | def specific_to_relative_humidity(QV, P, T): 17 | """ 18 | Compute relative humidity from specific humidity. 19 | """ 20 | RH = 0.263 * P * QV *(np.exp(17.67*(T - 273.15)/(T-29.65)))**(-1) 21 | return(RH) 22 | 23 | 24 | ############################################################################### 25 | ############################################################################### 26 | ############################################################################### 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | parser = argparse.ArgumentParser(description = 32 | 'Convert GCM specific humidity to relative humidity') 33 | # input specific humidity file 34 | parser.add_argument('hus_file', type=str) 35 | # input temperature file 36 | parser.add_argument('ta_file', type=str) 37 | # output relative humidity file 38 | parser.add_argument('hur_file', type=str) 39 | # Amon relative humidity file 40 | parser.add_argument('-a', '--amon_hur_file', type=None) 41 | args = parser.parse_args() 42 | 43 | #xr.set_options(keep_attrs=True) 44 | 45 | # temperature 46 | var_name = 'ta' 47 | ta = xr.open_dataset(args.ta_file, decode_cf=False).ta 48 | # specific humidity 49 | var_name = 'hus' 50 | ds = xr.open_dataset(args.hus_file, decode_cf=False) 51 | hus = ds.hus 52 | # pressure 53 | pa = ds.plev.expand_dims( 54 | dim={'lon':ds.lon, 'lat':ds.lat, 'time':ds.time}) 55 | pa = pa.transpose('time','plev','lat','lon') 56 | 57 | if hus.shape != ta.shape: 58 | print(hus.shape) 59 | print(ta.shape) 60 | raise ValueError() 61 | 62 | hur = specific_to_relative_humidity(hus, pa, ta) 63 | 64 | ## If hur is given from the coarser dataset Amon 65 | ## it is here used to interpolate hur to the higher 66 | ## resolution using information from the high-resolved hur. 67 | ## The reason is that the high-resolved hur is computed 68 | ## based on monthly mean values and deviates a lot from 69 | ## the true hur in the coarse data set on the coarse levels. 70 | ## Nevertheless, it contains information about the vertical 71 | ## variability and this is exploited here for a better 72 | ## informed vertical interpolation of the coarse hur 73 | ## to high resolution. 74 | ## This means that the above computed high-resolved hur 75 | ## is only indirectly used for the final hur output. 76 | amon_hur = xr.open_dataset(args.amon_hur_file, decode_cf=False).hur 77 | 78 | hur_interp = hur.copy() 79 | #print(amon_hur.plev.values) 80 | #print(hur.plev.values) 81 | 82 | for plev in hur.plev.values: 83 | if plev not in amon_hur.plev.values: 84 | print('{}: interpolate'.format(plev)) 85 | plev_below = amon_hur.plev.where((amon_hur.plev-plev) > 0, np.nan) 86 | plev_below = amon_hur.plev.isel(plev=plev_below.argmin(dim='plev').values).values 87 | 88 | plev_above = amon_hur.plev.where((amon_hur.plev-plev) < 0, np.nan) 89 | plev_above = amon_hur.plev.isel(plev=plev_above.argmax(dim='plev').values).values 90 | 91 | #print(plev_above) 92 | #print(plev_below) 93 | 94 | hur_plev = hur.sel(plev=plev) 95 | hur_above = hur.sel(plev=plev_above) 96 | hur_below = hur.sel(plev=plev_below) 97 | 98 | #print(hur_above.isel(time=0,lon=10,lat=10).values) 99 | #print(hur_plev.isel(time=0,lon=10,lat=10).values) 100 | #print(hur_below.isel(time=0,lon=10,lat=10).values) 101 | 102 | weight_above = 1 - np.abs(hur_plev - hur_above)/( 103 | np.abs(hur_plev - hur_above) + 104 | np.abs(hur_plev - hur_below)) 105 | weight_below = 1 - np.abs(hur_plev - hur_below)/( 106 | np.abs(hur_plev - hur_above) + 107 | np.abs(hur_plev - hur_below)) 108 | 109 | #print(weight_above.isel(time=0,lon=10,lat=10).values) 110 | #print(weight_below.isel(time=0,lon=10,lat=10).values) 111 | 112 | interp = ( 113 | amon_hur.sel(plev=plev_above) * weight_above + 114 | amon_hur.sel(plev=plev_below) * weight_below 115 | ) 116 | #print(interp.isel(time=0,lon=10,lat=10).values) 117 | #print() 118 | hur_interp.loc[dict(plev=plev)] = interp 119 | #quit() 120 | else: 121 | print('{}: take from Amon'.format(plev)) 122 | hur_interp.loc[dict(plev=plev)] = amon_hur.sel(plev=plev) 123 | #quit() 124 | 125 | 126 | handles = [] 127 | handle, = plt.plot(hur.mean(dim=['time','lon','lat']), 128 | hur.plev, label='Emon hur=f(hus,ta)') 129 | handles.append(handle) 130 | handle, = plt.plot(amon_hur.mean(dim=['time','lon','lat']), 131 | amon_hur.plev, label='Amon hur') 132 | handles.append(handle) 133 | handle, = plt.plot(hur_interp.mean(dim=['time','lon','lat']), 134 | hur_interp.plev, label='Amon hur interpolated using Emon hur') 135 | handles.append(handle) 136 | plt.legend(handles=handles) 137 | plt.ylim((100000,5000)) 138 | plt.ylabel('p [Pa]') 139 | plt.xlabel('RH [%]') 140 | plt.show() 141 | ##quit() 142 | 143 | ds_out = ds.copy() 144 | ds_out['hur'] = hur_interp 145 | del ds_out['hus'] 146 | ds_out.attrs['variable_id'] = 'hur' 147 | 148 | ## make sure to keep time encoding identical 149 | for key,val in ds.time.attrs.items(): 150 | ds_out.time.attrs[key] = val 151 | for key,val in ds.lon.attrs.items(): 152 | ds_out.lon.attrs[key] = val 153 | for key,val in ds.lat.attrs.items(): 154 | ds_out.lat.attrs[key] = val 155 | for key,val in ds.hus.attrs.items(): 156 | if key == 'standard_name': 157 | ds_out.hur.attrs[key] = 'relative_humidity' 158 | if key == 'long_name': 159 | ds_out.hur.attrs[key] = 'Relative Humidity' 160 | else: 161 | ds_out.hur.attrs[key] = val 162 | 163 | 164 | ds_out.to_netcdf(args.hur_file) 165 | 166 | -------------------------------------------------------------------------------- /step_02_preproc_deltas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | description PGW for ERA5 preprocessing of climate deltas 5 | authors Before 2022: original developments by Roman Brogli 6 | Since 2022: upgrade to PGW for ERA5 by Christoph Heim 7 | 2022: udpates by Jonas Mensch 8 | """ 9 | ############################################################################## 10 | import os, argparse 11 | import xarray as xr 12 | import numpy as np 13 | from pathlib import Path 14 | from functions import filter_data, regrid_lat_lon, interp_wrapper 15 | from settings import ( 16 | i_debug, 17 | file_name_bases, 18 | LON_ERA, LAT_ERA, 19 | TIME_GCM, PLEV_GCM, LON_GCM, LAT_GCM, 20 | i_use_xesmf_regridding, 21 | nan_interp_kernel_radius, 22 | nan_interp_sharpness, 23 | ) 24 | ############################################################################## 25 | 26 | ## input arguments 27 | parser = argparse.ArgumentParser(description = 28 | 'PGW for ERA5: Preprocess GCM data before modifying ' + 29 | 'the ERA5 files. The main PGW routine (step_03_apply_to_era.py) ' + 30 | 'requires GCM climate delta files (SCEN-HIST) for ' + 31 | 'ta,hur,ua,va,zg,hurs,tas,tos as well ' + 32 | 'as the GCM HIST climatology file for ps. This script here by ' + 33 | 'default preprocesses both the SCEN-HIST and HIST files ' + 34 | 'for the variables it is run for. Both are thus required for ' + 35 | 'every variable being processed. The script ' + 36 | 'looks for the inputs files using the naming convention ' + 37 | '${var_name}_${file_name_base}.nc, where the ${file_name_base} ' + 38 | 'for the SCEN-HIST and the HIST files ' + 39 | 'can be set in settings.py. If this script ' + 40 | 'is used to preprocess daily GCM data, one can run it twice and '+ 41 | 'store the intermediate results: once '+ 42 | 'for processing_step "smoothing" and once for "regridding". ' + 43 | 'More details are given below.') 44 | 45 | # processing step to perform during script execution 46 | parser.add_argument('processing_step', type=str, 47 | choices=['smoothing','regridding'], 48 | help='Possible processing steps are: ' + 49 | 'smoothing: [For daily climate deltas, a smoothing of ' + 50 | 'the annual cycle should be applied. For monthly ' + 51 | 'climate deltas this is not necessary.] ' + 52 | 'regridding: [If the climate deltas are not on the same ' + 53 | 'horizontal grid as ERA5, they can be regridded here. '+ 54 | 'WARNING: The default interpolation routine ' + 55 | '(i_use_xesmf_regridding = 0) assumes a regular '+ 56 | '(thus non-rotated) lat/lon grid for ' + 57 | 'input (GCM data) and output (ERA5 data) grids! ' + 58 | 'If this is not the case for the GCM data, using the ' + 59 | 'xESMF package for regridding may help ' + 60 | '(i_use_xesmf_regridding = 1). However, such cases have not ' + 61 | 'been tested in detail and may require code adjustments in the ' + 62 | 'function "regrid_lat_lon" in "functions.py".]') 63 | 64 | # input directory 65 | parser.add_argument('-i', '--input_dir', type=str, 66 | help='Directory with input GCM data files (SCEN-HIST, HIST) ' + 67 | 'for selected processing step.') 68 | 69 | # output directory 70 | parser.add_argument('-o', '--output_dir', type=str, 71 | help='Directory where the preprocessed output GCM data files ' + 72 | 'for the selected processing step should be stored.') 73 | 74 | # target ERA5 example file to take grid information 75 | parser.add_argument('-e', '--era5_file_path', type=str, default=None, 76 | help='Path to example ERA5 file ' + 77 | 'from which to take grid information for regridding.') 78 | 79 | # variable(s) to process 80 | parser.add_argument('-v', '--var_names', type=str, 81 | help='Variable names (e.g. ta) to process. Separate ' + 82 | 'multiple variable names with "," (e.g. tas,ta). Default is ' + 83 | 'to process all required variables ta,hur,ua,va,zg,hurs,tas,ps,tos,ts,siconc.', 84 | default='ta,hur,ua,va,zg,hurs,tas,ps,tos,ts,siconc') 85 | 86 | 87 | args = parser.parse_args() 88 | print(args) 89 | ############################################################################## 90 | 91 | # make sure required input arguments are set. 92 | if args.input_dir is None: 93 | raise ValueError('Input directory (-i) is required.') 94 | if args.output_dir is None: 95 | raise ValueError('Output directory (-o) is required.') 96 | if (args.processing_step == 'regridding') and (args.era5_file_path is None): 97 | raise ValueError('era5_file_path is required for regridding step.') 98 | 99 | # create output directory 100 | Path(args.output_dir).mkdir(exist_ok=True, parents=True) 101 | 102 | # set up list of variable names 103 | var_names = args.var_names.split(',') 104 | print('Run {} for variable names {}.'.format( 105 | args.processing_step, var_names)) 106 | 107 | 108 | ############################################################################## 109 | # iterate over all variables to preprocess 110 | for var_name in var_names: 111 | print(var_name) 112 | # if var_name == 'ps': 113 | # clim_periods = ['HIST','SCEN-HIST'] 114 | # else: 115 | # clim_periods = ['SCEN-HIST'] 116 | clim_periods = ['HIST', 'SCEN-HIST'] 117 | # iterate over the two types of GCM data files 118 | # (HIST and SCEN-HIST) 119 | for clim_period in clim_periods: 120 | 121 | var_file_name = file_name_bases[clim_period].format(var_name) 122 | 123 | inp_file = os.path.join(args.input_dir, var_file_name) 124 | out_file = os.path.join(args.output_dir, var_file_name) 125 | 126 | # open ERA5 file with target grid 127 | ds_era5 = xr.open_dataset(args.era5_file_path) 128 | 129 | # smoothing 130 | if args.processing_step == 'smoothing': 131 | 132 | filter_data(inp_file, var_name, out_file) 133 | 134 | # regridding 135 | elif args.processing_step == 'regridding': 136 | try: 137 | ds_gcm = xr.open_dataset(inp_file) 138 | except: 139 | raise("Files for variable " + var_name + " are missing") 140 | 141 | ds_gcm = interp_wrapper( 142 | ds_gcm, 143 | ds_era5, 144 | var_name, 145 | i_use_xesmf=i_use_xesmf_regridding, 146 | nan_interp_kernel_radius=nan_interp_kernel_radius, 147 | nan_interp_sharpness=nan_interp_sharpness, 148 | ) 149 | 150 | ds_gcm.to_netcdf(out_file) 151 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pgw 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=conda_forge 7 | - _openmp_mutex=4.5=2_gnu 8 | - alsa-lib=1.2.8=h166bdaf_0 9 | - attr=2.5.1=h166bdaf_1 10 | - bokeh=3.0.2=pyhd8ed1ab_0 11 | - brotli=1.0.9=h166bdaf_8 12 | - brotli-bin=1.0.9=h166bdaf_8 13 | - bzip2=1.0.8=h7f98852_4 14 | - c-ares=1.18.1=h7f98852_0 15 | - ca-certificates=2022.9.24=ha878542_0 16 | - certifi=2022.9.24=pyhd8ed1ab_0 17 | - cf_xarray=0.7.5=pyhd8ed1ab_0 18 | - cftime=1.6.2=py39h2ae25f5_1 19 | - click=8.0.4=py39hf3d152e_0 20 | - cloudpickle=2.2.0=pyhd8ed1ab_0 21 | - contourpy=1.0.6=py39hf939315_0 22 | - curl=7.86.0=h2283fc2_1 23 | - cycler=0.11.0=pyhd8ed1ab_0 24 | - cytoolz=0.12.0=py39hb9d737c_1 25 | - dask=2022.2.0=pyhd8ed1ab_0 26 | - dask-core=2022.2.0=pyhd8ed1ab_0 27 | - dbus=1.13.6=h5008d03_3 28 | - distributed=2022.2.0=py39hf3d152e_0 29 | - esmf=8.2.0=nompi_h61edca3_0 30 | - esmpy=8.2.0=nompi_py39hc8aa433_1 31 | - expat=2.5.0=h27087fc_0 32 | - fftw=3.3.10=nompi_hf0379b8_106 33 | - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 34 | - font-ttf-inconsolata=3.000=h77eed37_0 35 | - font-ttf-source-code-pro=2.038=h77eed37_0 36 | - font-ttf-ubuntu=0.83=hab24e00_0 37 | - fontconfig=2.14.1=hc2a2eb6_0 38 | - fonts-conda-ecosystem=1=0 39 | - fonts-conda-forge=1=0 40 | - fonttools=4.38.0=py39hb9d737c_1 41 | - freetype=2.12.1=hca18f0e_1 42 | - fsspec=2022.11.0=pyhd8ed1ab_0 43 | - geos=3.11.1=h27087fc_0 44 | - gettext=0.21.1=h27087fc_0 45 | - glib=2.74.1=h6239696_1 46 | - glib-tools=2.74.1=h6239696_1 47 | - gst-plugins-base=1.21.2=h3e40eee_0 48 | - gstreamer=1.21.2=hd4edc92_0 49 | - gstreamer-orc=0.4.33=h166bdaf_0 50 | - hdf4=4.2.15=h9772cbc_5 51 | - hdf5=1.12.1=nompi_h4df4325_104 52 | - heapdict=1.0.1=py_0 53 | - icu=70.1=h27087fc_0 54 | - jack=1.9.21=h583fa2b_2 55 | - jinja2=3.1.2=pyhd8ed1ab_1 56 | - jpeg=9e=h166bdaf_2 57 | - keyutils=1.6.1=h166bdaf_0 58 | - kiwisolver=1.4.4=py39hf939315_1 59 | - krb5=1.19.3=h08a2579_0 60 | - lame=3.100=h166bdaf_1003 61 | - lcms2=2.14=h6ed2654_0 62 | - ld_impl_linux-64=2.39=hcc3a1bd_1 63 | - lerc=4.0.0=h27087fc_0 64 | - libblas=3.9.0=16_linux64_openblas 65 | - libbrotlicommon=1.0.9=h166bdaf_8 66 | - libbrotlidec=1.0.9=h166bdaf_8 67 | - libbrotlienc=1.0.9=h166bdaf_8 68 | - libcap=2.66=ha37c62d_0 69 | - libcblas=3.9.0=16_linux64_openblas 70 | - libclang=15.0.6=default_h2e3cab8_0 71 | - libclang13=15.0.6=default_h3a83d3e_0 72 | - libcups=2.3.3=h3e49a29_2 73 | - libcurl=7.86.0=h2283fc2_1 74 | - libdb=6.2.32=h9c3ff4c_0 75 | - libdeflate=1.14=h166bdaf_0 76 | - libedit=3.1.20191231=he28a2e2_2 77 | - libev=4.33=h516909a_1 78 | - libevent=2.1.10=h28343ad_4 79 | - libffi=3.4.2=h7f98852_5 80 | - libflac=1.4.2=h27087fc_0 81 | - libgcc-ng=12.2.0=h65d4601_19 82 | - libgcrypt=1.10.1=h166bdaf_0 83 | - libgfortran-ng=12.2.0=h69a702a_19 84 | - libgfortran5=12.2.0=h337968e_19 85 | - libglib=2.74.1=h606061b_1 86 | - libgomp=12.2.0=h65d4601_19 87 | - libgpg-error=1.45=hc0c96e0_0 88 | - libiconv=1.17=h166bdaf_0 89 | - liblapack=3.9.0=16_linux64_openblas 90 | - libllvm11=11.1.0=he0ac6c6_5 91 | - libllvm15=15.0.6=h63197d8_0 92 | - libnetcdf=4.8.1=nompi_h329d8a1_102 93 | - libnghttp2=1.47.0=hff17c54_1 94 | - libnsl=2.0.0=h7f98852_0 95 | - libogg=1.3.4=h7f98852_1 96 | - libopenblas=0.3.21=pthreads_h78a6416_3 97 | - libopus=1.3.1=h7f98852_1 98 | - libpng=1.6.39=h753d276_0 99 | - libpq=15.1=h67c24c5_1 100 | - libsndfile=1.1.0=hcb278e6_1 101 | - libsqlite=3.40.0=h753d276_0 102 | - libssh2=1.10.0=hf14f497_3 103 | - libstdcxx-ng=12.2.0=h46fd767_19 104 | - libsystemd0=252=h2a991cd_0 105 | - libtiff=4.4.0=h55922b4_4 106 | - libtool=2.4.6=h9c3ff4c_1008 107 | - libudev1=252=h166bdaf_0 108 | - libuuid=2.32.1=h7f98852_1000 109 | - libvorbis=1.3.7=h9c3ff4c_0 110 | - libwebp-base=1.2.4=h166bdaf_0 111 | - libxcb=1.13=h7f98852_1004 112 | - libxkbcommon=1.0.3=he3ba5ed_0 113 | - libxml2=2.10.3=h7463322_0 114 | - libzip=1.9.2=hc929e4a_1 115 | - libzlib=1.2.13=h166bdaf_4 116 | - llvmlite=0.39.1=py39h7d9a04d_1 117 | - locket=1.0.0=pyhd8ed1ab_0 118 | - lz4-c=1.9.3=h9c3ff4c_1 119 | - markupsafe=2.1.1=py39hb9d737c_2 120 | - matplotlib=3.5.1=py39hf3d152e_0 121 | - matplotlib-base=3.5.1=py39h2fa2bec_0 122 | - mpg123=1.31.1=h27087fc_0 123 | - msgpack-python=1.0.4=py39hf939315_1 124 | - munkres=1.1.4=pyh9f0ad1d_0 125 | - mysql-common=8.0.31=h26416b9_0 126 | - mysql-libs=8.0.31=hbc51c84_0 127 | - ncurses=6.3=h27087fc_1 128 | - netcdf-fortran=4.5.4=nompi_h2b6e579_100 129 | - netcdf4=1.5.8=nompi_py39h64b754b_101 130 | - nspr=4.35=h27087fc_0 131 | - nss=3.82=he02c5a1_0 132 | - numba=0.56.4=py39h61ddf18_0 133 | - numpy=1.23.5=py39h3d75532_0 134 | - openjpeg=2.5.0=h7d73246_1 135 | - openssl=3.0.7=h0b41bf4_1 136 | - packaging=21.3=pyhd8ed1ab_0 137 | - pandas=1.5.2=py39h4661b88_0 138 | - partd=1.3.0=pyhd8ed1ab_0 139 | - pcre2=10.40=hc3806b6_0 140 | - pillow=9.2.0=py39hf3a2cdf_3 141 | - pip=22.3.1=pyhd8ed1ab_0 142 | - ply=3.11=py_1 143 | - psutil=5.9.4=py39hb9d737c_0 144 | - pthread-stubs=0.4=h36c2ea0_1001 145 | - pulseaudio=16.1=h126f2b6_0 146 | - pyparsing=3.0.9=pyhd8ed1ab_0 147 | - pyqt=5.15.7=py39h18e9c17_2 148 | - pyqt5-sip=12.11.0=py39h5a03fae_2 149 | - python=3.9.15=hba424b6_0_cpython 150 | - python-dateutil=2.8.2=pyhd8ed1ab_0 151 | - python_abi=3.9=3_cp39 152 | - pytz=2022.6=pyhd8ed1ab_0 153 | - pyyaml=6.0=py39hb9d737c_5 154 | - qt-main=5.15.6=he99da89_3 155 | - readline=8.1.2=h0f457ee_0 156 | - scipy=1.9.3=py39hddc5342_2 157 | - setuptools=59.8.0=py39hf3d152e_1 158 | - shapely=1.8.5=py39h76a96b7_2 159 | - sip=6.7.5=py39h5a03fae_0 160 | - six=1.16.0=pyh6c4a22f_0 161 | - sortedcontainers=2.4.0=pyhd8ed1ab_0 162 | - sparse=0.13.0=pyhd8ed1ab_0 163 | - tblib=1.7.0=pyhd8ed1ab_0 164 | - tk=8.6.12=h27826a3_0 165 | - toml=0.10.2=pyhd8ed1ab_0 166 | - toolz=0.12.0=pyhd8ed1ab_0 167 | - tornado=6.1=py39hb9d737c_3 168 | - tzdata=2022g=h191b570_0 169 | - unicodedata2=15.0.0=py39hb9d737c_0 170 | - wheel=0.38.4=pyhd8ed1ab_0 171 | - xarray=2022.12.0=pyhd8ed1ab_0 172 | - xcb-util=0.4.0=h166bdaf_0 173 | - xcb-util-image=0.4.0=h166bdaf_0 174 | - xcb-util-keysyms=0.4.0=h166bdaf_0 175 | - xcb-util-renderutil=0.3.9=h166bdaf_0 176 | - xcb-util-wm=0.4.1=h166bdaf_0 177 | - xesmf=0.6.2=pyhd8ed1ab_0 178 | - xorg-libxau=1.0.9=h7f98852_0 179 | - xorg-libxdmcp=1.1.3=h7f98852_0 180 | - xyzservices=2022.9.0=pyhd8ed1ab_0 181 | - xz=5.2.6=h166bdaf_0 182 | - yaml=0.2.5=h7f98852_2 183 | - zict=2.2.0=pyhd8ed1ab_0 184 | - zlib=1.2.13=h166bdaf_4 185 | - zstd=1.5.2=h6239696_4 186 | - pip: 187 | - aiohttp==3.8.3 188 | - aiosignal==1.3.1 189 | - appdirs==1.4.4 190 | - async-timeout==4.0.2 191 | - attrs==22.1.0 192 | - charset-normalizer==2.1.1 193 | - frozenlist==1.3.3 194 | - idna==3.4 195 | - imageio==2.22.4 196 | - multidict==6.0.3 197 | - pooch==1.6.0 198 | - pyproj==3.4.0 199 | - pyvista==0.37.0 200 | - requests==2.28.1 201 | - scooby==0.7.0 202 | - urllib3==1.26.13 203 | - vtk==9.2.2 204 | - wslink==1.9.1 205 | - yarl==1.8.2 206 | prefix: /home/heimc/miniconda3/envs/pgw 207 | -------------------------------------------------------------------------------- /step_01_extract_deltas/extract_climate_delta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################################## 3 | ## Template script to extract climate deltas from CMIP GCMs. 4 | ## Computes climatologies and the climate deltas for specific CMIP 5 | ## experiments and members. 6 | ## The script only serves as inspiration for the extraction of the climate 7 | ## deltas and it is not generally valid and has to be adjusted 8 | ## for specific use case. 9 | 10 | ##### IMPORTANT NOTES FOR Emon DATA USERS: 11 | ## FIRST 12 | ## hur is not available in the Emon output and has to be computed from hus. 13 | ## This is not excat because it matters if hur is computed from hus 14 | ## on the model levels every time the GCM writes an output or whether it is 15 | ## computed on the monthly-aggregated output on pressure levels. 16 | ## Therefore, the hur is used from the coarse resolution output group Amon. 17 | ## However, the Amon output is vertically interpolated to the higher resolution 18 | ## of Emon using information from the computed Emon hur=f(hus,ta) 19 | ## data. This helps to do a "better informed" vertical interpolation of the 20 | ## coarse Amon hur to the higher resolved Emon grid. 21 | ## This steps are done by the convert_Emon_hus_to_hur.py script which is 22 | ## called automatically in the current implementation. 23 | ## Consequently, Amon must have been exctracted before Emon can be! 24 | ## SECOND 25 | ## After computing hur for Emon, the delta still has to be computed 26 | ## by running the script again only for var_names=(hur). 27 | ## (turn off i_exctract_vars but keep i_compute_delta) 28 | ## THIRD 29 | ## The Emon data does not reach up as far as the Amon data 30 | ## Thus, after completing this script, run add_Emon_model_top.sh 31 | ## to add the model top data from Amon to the Emon fields. 32 | ## FOURTH 33 | ## These Emon fixes are still a bit experimental and not very user-friendly. 34 | ## Sorry for that! 35 | ############################################################################## 36 | Usage=" 37 | ############################################################################## 38 | ## Template script to extract climate deltas from CMIP GCMs. 39 | ## Computes climatologies and the climate deltas for specific CMIP 40 | ## experiments and members. 41 | ## The script only serves as inspiration for the extraction of the climate 42 | ## deltas and it is not generally valid and has to be adjusted 43 | ## for specific use case. 44 | 45 | -h to see this message 46 | to get the desired delta from this dataset 47 | Curently supported are: Amon, Omon, Emon, day, CFday and SImon" 48 | 49 | while getopts 'h' flag; do 50 | case "${flag}" in 51 | h) echo "$Usage" ; exit 0; 52 | esac 53 | done 54 | 55 | # USER SETTINGS 56 | ############################################################################## 57 | # base directory where cmip6 data is stored 58 | cmip_data_dir=/net/atmos/data/cmip6/ 59 | # base directory where output should be stored 60 | # 61 | out_base_dir=. 62 | 63 | # name of the GCM to extract data for 64 | gcm_name=MPI-ESM1-2-HR 65 | 66 | ## CMIP experiments to use to compute climate deltas 67 | ## --> climate delta = future climatology - ERA climatology 68 | # CMIP experiment to use for ERA climatology 69 | era_climate_experiment=historical 70 | # CMIP experiment to use for future climatology 71 | future_climate_experiment=ssp585 72 | 73 | 74 | ## type of CMIP6 model output (e.g. monthly or daily, etc.) 75 | ## to use 76 | # standard monthly output 77 | #table_ID=Amon 78 | ## high-resolution monthly data for only very few GCMs 79 | #table_ID=Emon 80 | ## standard daily output 81 | #table_ID=day 82 | ## CFMIP daily output 83 | #table_ID=CFday 84 | ## ocean monthly output 85 | #table_ID=Omon 86 | 87 | ##Experimental addition to use command-line args to select type 88 | ##Can (should) be done in a foor loop so that it runs for multiple data sets at the same time 89 | if [[ "$1" == "Amon" ]]; then 90 | table_ID=$1 91 | elif [[ "$1" == "Omon" ]]; then 92 | table_ID=$1 93 | elif [[ "$1" == "Emon" ]]; then 94 | table_ID=$1 95 | elif [[ "$1" == "day" ]]; then 96 | table_ID=$1 97 | elif [[ "$1" == "CFday" ]]; then 98 | table_ID=$1 99 | elif [[ "$1" == "SImon" ]]; then 100 | table_ID=$1 101 | elif [ $# -eq 0 ]; then 102 | echo "Please specify which dataset the deltas should be extracted from. See -h for more info" 103 | exit 0 104 | else 105 | txt="This dataset is currently not supported 106 | Check -h for all supported datasets" 107 | echo "$txt" 108 | exit 0 109 | fi 110 | 111 | ## select variables to extract 112 | if [[ "$table_ID" == "Amon" ]]; then 113 | var_names=(ts tas hurs ps ua va ta hur zg) 114 | elif [[ "$table_ID" == "day" ]]; then 115 | var_names=(tas hurs ps ua va ta hur zg) 116 | elif [[ "$table_ID" == "Emon" ]]; then 117 | var_names=(ua va ta hus zg) 118 | elif [[ "$table_ID" == "CFday" ]]; then 119 | var_names=(ua va ta hur) 120 | elif [[ "$table_ID" == "Omon" ]]; then 121 | var_names=(tos) 122 | elif [[ "$table_ID" == "SImon" ]]; then 123 | var_names=(siconc) 124 | fi 125 | ## for Emon, compute hur delta separately after 126 | ## it has been derived from hus 127 | ## (turn off i_exctract_vars but keep i_compute_delta) 128 | #var_names=(hur) 129 | 130 | 131 | # should variables be exctracted for the two climatologies= 132 | i_extract_vars=1 133 | # should climate deltas be computed? 134 | i_compute_delta=1 135 | # for Emon, hur is not available and needs to be approximated 136 | # using the high-resolution hus climatology, as well as the hur climatology 137 | if [[ "$table_ID" == "Emon" ]]; then 138 | i_convert_hus_to_hur=1 139 | else 140 | i_convert_hus_to_hur=0 141 | fi 142 | 143 | ## subdomain for which to extract GCM data 144 | ## should be either global (0,360,-90,90) 145 | ## or anything larger than ERA5 subdomain 146 | ## except for storage and performance reasons, there is no benefit of 147 | ## using a subdomain. 148 | box=0,360,-90,90 149 | # subdomain 150 | #box=-74,40,-45,35 151 | #box=-73,37,-42,34 152 | 153 | # select appropriate cdo time aggregation command 154 | # depending if input data is monthly or daily. 155 | if [[ "$table_ID" == "day" ]]; then 156 | cdo_agg_command=ydaymean 157 | else 158 | cdo_agg_command=ymonmean 159 | fi 160 | 161 | # iterate over both experiments to extract data 162 | experiments=($era_climate_experiment $future_climate_experiment) 163 | 164 | ############################################################################## 165 | 166 | out_dir=$out_base_dir/$table_ID/$gcm_name 167 | echo $out_dir 168 | mkdir -p $out_dir 169 | 170 | for var_name in ${var_names[@]}; do 171 | echo "#################################################################" 172 | echo $var_name 173 | echo "#################################################################" 174 | 175 | for experiment in ${experiments[@]}; do 176 | echo "#######################################" 177 | echo $experiment 178 | echo "#######################################" 179 | 180 | if [[ $i_extract_vars == 1 ]]; then 181 | 182 | # data folder hierarchy for CMIP6 183 | inp_dir=$cmip_data_dir/$experiment/$table_ID/$var_name/$gcm_name/r1i1p1f1/gn 184 | 185 | # start of the CMIP6 file names 186 | file_name_base=${table_ID}_${gcm_name}_${experiment}_r1i1p1f1_gn 187 | 188 | ## overwrite old data 189 | #rm $out_dir/${var_name}_${experiment}.nc 190 | 191 | ## compute ERA climatology 192 | if [[ "$experiment" == "$era_climate_experiment" ]]; then 193 | # extract full time series 194 | cdo -L -sellonlatbox,$box \ 195 | -selyear,1985/2014 \ 196 | -cat \ 197 | $inp_dir/${var_name}_${file_name_base}_19[8-9]*.nc \ 198 | $inp_dir/${var_name}_${file_name_base}_20[0-1]*.nc \ 199 | $out_dir/${var_name}_${experiment}_full.nc 200 | 201 | ## compute future experiment climatology 202 | elif [[ "$experiment" == "$future_climate_experiment" ]]; then 203 | # extract full time series 204 | cdo -L -sellonlatbox,$box \ 205 | -selyear,2070/2099 \ 206 | -cat \ 207 | $inp_dir/${var_name}_${file_name_base}_20[6-9]*.nc \ 208 | $out_dir/${var_name}_${experiment}_full.nc 209 | fi 210 | 211 | # aggregate to yearly monthly/daily means 212 | # in principal this could be done directly during extraction 213 | # step above. However, for Emon computation of hur from hus 214 | # this should be done on the basis of monthly values not 215 | # with the mean annual cycle. Due to this, the full time 216 | # series is stored as well. 217 | cdo -L -$cdo_agg_command \ 218 | $out_dir/${var_name}_${experiment}_full.nc \ 219 | $out_dir/${var_name}_${experiment}.nc 220 | fi 221 | 222 | ## convert hus to hur if required 223 | if [[ $i_convert_hus_to_hur == 1 ]]; then 224 | if [[ "$var_name" == "hus" ]]; then 225 | echo Convert Emon hus to hur using Amon hur data. 226 | 227 | Amon_out_dir=$out_base_dir/Amon/$gcm_name 228 | 229 | python Emon_convert_hus_to_hur.py \ 230 | $out_dir/hus_${experiment}_full.nc \ 231 | $out_dir/ta_${experiment}_full.nc \ 232 | $out_dir/hur_${experiment}_full.nc \ 233 | -a $Amon_out_dir/hur_${experiment}_full.nc 234 | 235 | # aggregate to yearly monthly/daily means 236 | cdo -L -$cdo_agg_command \ 237 | $out_dir/hur_${experiment}_full.nc \ 238 | $out_dir/hur_${experiment}.nc 239 | fi 240 | fi 241 | 242 | done 243 | 244 | ## compute delta (future climatology - ERA climatology) 245 | if [[ $i_compute_delta == 1 ]]; then 246 | cdo -L -sub $out_dir/${var_name}_$future_climate_experiment.nc \ 247 | $out_dir/${var_name}_$era_climate_experiment.nc \ 248 | $out_dir/${var_name}_delta.nc 249 | fi 250 | 251 | done 252 | 253 | # link surface fields from Amon because they are not available 254 | # in Emon 255 | if [[ "$table_ID" == "Emon" ]]; then 256 | cd $out_dir 257 | ln -s $out_base_dir/Amon/$gcm_name/*s_*.nc . 258 | cd - 259 | fi 260 | 261 | -------------------------------------------------------------------------------- /fields/plot.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------- 2 | # modules 3 | # 4 | import xarray as xr 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import cartopy.crs as ccrs 8 | import matplotlib.gridspec as gridspec 9 | import cmcrameri.cm as cmc 10 | from matplotlib.colors import BoundaryNorm, LinearSegmentedColormap 11 | import matplotlib 12 | 13 | font = {'size': 12} 14 | matplotlib.rc('font', **font) 15 | 16 | def drywet(numcolors, colormap): 17 | 18 | colors_blue = colormap(np.linspace(0.5, 1, 5)) 19 | colors_white = np.array([1, 1, 1, 1]) 20 | colors_brown = [[84, 48, 5, 255], 21 | [140, 81, 10, 255], 22 | [191, 129, 45, 255], 23 | [223, 194, 125, 255], 24 | [246, 232, 195, 255]] 25 | rgb = [] 26 | for i in range(len(colors_brown)): 27 | z = [x / 255 for x in colors_brown[i]] 28 | rgb.append(z) 29 | colors = np.vstack((rgb, colors_white, colors_blue)) 30 | 31 | cmap = LinearSegmentedColormap.from_list(name=colormap, colors=colors, N=numcolors) 32 | 33 | return cmap 34 | 35 | # ------------------------------------------------------------------------------- 36 | # read data 37 | # %% 38 | var_name = 'FR_SEA_ICE' 39 | 40 | sims = ['new', 'old', 'diff'] 41 | friac = {} 42 | labels = {'new': 'Sea Ice update', 'old': 'Sea Ice static', 'diff': 'Difference between versions'} 43 | 44 | for s in range(len(sims)): 45 | sim = sims[s] 46 | friac[sim] = {} 47 | friac[sim]['label'] = labels[sim] 48 | data = xr.open_dataset(f'{sim}_version.nc') 49 | dt = data[var_name].values[0, :, :] 50 | friac[sim][var_name] = dt 51 | # %% 52 | lat = xr.open_dataset('old_version.nc')['lat'].values[:] 53 | lon = xr.open_dataset('old_version.nc')['lon'].values[:] 54 | lat_, lon_ = np.meshgrid(lon, lat) 55 | print("load done") 56 | # ------------------------------------------------------------------------------- 57 | # plot 58 | # %% 59 | ar = 1.0 # initial aspect ratio for first trial 60 | wi = 12 # height in inches #15 61 | hi = 2.5 # width in inches #10 62 | ncol = 3 # edit here 63 | nrow = 1 64 | axs, cs, gl = np.empty(shape=(nrow, ncol), dtype='object'), np.empty(shape=(nrow, ncol), dtype='object'), np.empty(shape=(nrow, ncol), dtype='object') 65 | 66 | cmap1 = cmc.davos_r 67 | levels1 = np.linspace(0, 100, 21, endpoint=True) 68 | norm1 = BoundaryNorm(levels1, ncolors=cmap1.N, clip=True) 69 | 70 | cmap2 = drywet(25, cmc.vik_r) 71 | levels2 = np.linspace(0, 40, 11, endpoint=True) 72 | norm2 = BoundaryNorm(levels2, ncolors=cmap2.N, clip=True) 73 | 74 | # change here the lat and lon 75 | map_ext = [-50, 50, 40, 90] 76 | 77 | fig = plt.figure(figsize=(wi, hi)) 78 | left, bottom, right, top = 0.07, 0.01, 0.94, 0.95 79 | gs = gridspec.GridSpec(nrows=1, ncols=3, left=left, bottom=bottom, right=right, top=top, 80 | wspace=0.1, hspace=0.15) 81 | 82 | for i in range(3): 83 | sim = sims[i] 84 | label = friac[sim]['label'] 85 | axs[0, i] = fig.add_subplot(gs[0, i], projection=ccrs.PlateCarree()) 86 | axs[0, i].set_extent(map_ext, crs=ccrs.PlateCarree()) 87 | axs[0, i].coastlines(zorder=3) 88 | axs[0, i].stock_img() 89 | gl[0, i] = axs[0, i].gridlines(crs=ccrs.PlateCarree(), draw_labels=True, x_inline=False, y_inline=False, linewidth=1, color='grey', alpha=0.5, linestyle='--') 90 | gl[0, i].right_labels = False 91 | gl[0, i].top_labels = False 92 | gl[0, i].left_labels = False 93 | cs[0, i] = axs[0, i].pcolormesh(lon, lat, friac[sim][var_name], cmap=cmap1, norm=norm1, shading="auto", 94 | transform=ccrs.PlateCarree()) 95 | axs[0, i].set_title(f'{label}', fontweight='bold', pad=6, fontsize=14, loc='center') 96 | 97 | gl[0, 0].left_labels = True 98 | 99 | cax = fig.add_axes( 100 | [axs[0, 2].get_position().x1 + 0.01, axs[0, 2].get_position().y0, 0.01, axs[0, 2].get_position().height]) 101 | cbar = fig.colorbar(cs[0, 1], cax=cax, orientation='vertical', 102 | ticks=np.linspace(0, 100, 6, endpoint=True)) 103 | cbar.ax.tick_params(labelsize=14) 104 | 105 | 106 | axs[0, 0].text(-0.2, 0.5, 'Sea ice', ha='center', va='center', rotation='vertical', 107 | transform=axs[0, 0].transAxes, fontsize=14, fontweight='bold') 108 | axs[0, 2].text(1.07, 1.09, '[%]', ha='center', va='center', rotation='horizontal', 109 | transform=axs[0, 2].transAxes, fontsize=12) 110 | 111 | fig.show() 112 | # plotpath = "/project/pr133/rxiang/figure/echam5/" 113 | # fig.savefig(plotpath + 'friac' + f'{mon}.png', dpi=500) 114 | plt.close(fig) 115 | """ 116 | import cartopy.crs as ccrs 117 | import cartopy.feature as cfeature 118 | import matplotlib.pyplot as plt 119 | from matplotlib import cm 120 | import matplotlib.ticker as mticker 121 | import numpy as np 122 | from netCDF4 import Dataset 123 | import mplotutils as mpu 124 | import xarray as xr 125 | from matplotlib.colors import TwoSlopeNorm 126 | 127 | 128 | 129 | def plot(filename, field, title, metric, lon, lat, max=None, min=None, 130 | lat_pole=90, lon_pole=-180, coastline=True, colormap="RdBu_r", 131 | centered_bar=False): 132 | 133 | rotated_pole = ccrs.RotatedPole(pole_latitude=lat_pole, pole_longitude=lon_pole) 134 | data_crs = ccrs.PlateCarree() 135 | 136 | # create the plot and set the size 137 | plt.figure(figsize=(20,10)) 138 | axes = plt.axes(projection= rotated_pole) 139 | 140 | 141 | # create country's borders and landsea mask 142 | #land_50m = cfeature.NaturalEarthFeature('cultural', 'admin_0_countries', '50m', edgecolor='black', facecolor='none', linewidth=0.2) 143 | broder_50m = cfeature.NaturalEarthFeature('physical', 'coastline', '50m', edgecolor='black', facecolor='none', linewidth=0.8) 144 | if max is None: 145 | max = np.nanmax(field) 146 | if min is None: 147 | min = np.nanmin(field) 148 | # activate the labels and set countour of the countourf 149 | draw_labels = True 150 | reversed_cmap = False 151 | levels = np.arange(min, max, 0.05) 152 | color_map = plt.cm.get_cmap(colormap) 153 | if reversed_cmap: 154 | reversed_color_map = color_map.reversed() 155 | else: 156 | reversed_color_map = color_map 157 | plt.gca().set_facecolor("dimgrey") 158 | 159 | if centered_bar: 160 | norm = TwoSlopeNorm(vmin=min, vmax=max, vcenter=0.0001) 161 | # plot in each subplot 162 | h = plt.contourf(lon, lat, field[:,:], levels=levels, cmap=reversed_color_map, extend='both', norm=norm) 163 | else: 164 | h = plt.contourf(lon, lat, field[:,:], levels=levels, cmap=reversed_color_map , extend='both') 165 | axes.set_title(title, fontsize=25, weight="bold") 166 | 167 | ## add borders and landsea mask 168 | #axes.add_feature(land_50m) 169 | if coastline: 170 | axes.add_feature(broder_50m) 171 | 172 | gl = axes.gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False) 173 | gl.ylocator = mticker.FixedLocator(np.arange(-60, 80, 10)) 174 | gl.xlocator = mticker.FixedLocator(np.arange(-100, 90 ,10)) 175 | gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 176 | gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 177 | gl.top_labels = False 178 | gl.right_labels = False 179 | gl.left_labels = False 180 | gl.bottom_labels = True 181 | gl.left_labels = True 182 | 183 | #set colorbar 184 | cb = plt.colorbar(orientation="horizontal", shrink=0.4, pad=0.07, format="%.2f") 185 | cb.ax.tick_params(labelsize=14) 186 | cb.set_label(label= str(metric),fontsize=20) 187 | 188 | plt.tight_layout() 189 | plt.savefig(str(filename) + ".png") 190 | 191 | # create var wherre to store others 192 | result = np.zeros((1,224,544)) 193 | 194 | 195 | era5_old = xr.open_dataset("old_version.nc") 196 | era5_new = xr.open_dataset("new_version.nc") 197 | tas = xr.open_dataset("ts_delta.nc") 198 | tos = xr.open_dataset("tos_delta.nc") 199 | 200 | 201 | lon = tos.variables['lon'][:] 202 | lat = tos.variables['lat'][:] 203 | month = 0 204 | print("loads done") 205 | plot("old_final_temp", era5_old.variables['T_SKIN'][0,:,:].values, "Final PGW Temperature w/o sea ice update", 206 | "T_SKIN [K]", lon, lat) 207 | print("1 done") 208 | plot("new_final_temp", era5_new.variables['T_SKIN'][0,:,:].values, "Final PGW Temperature with sea ice update", 209 | "T_SKIN [K]", lon, lat) 210 | print("2 done") 211 | plot("old_final_sic", era5_old.variables['FR_SEA_ICE'][0,:,:].values, "ERA5 Sea Ice", 212 | "Sea Ice frac [1]", lon, lat) 213 | print("3 done") 214 | plot("new_final_sic", era5_new.variables['FR_SEA_ICE'][0,:,:].values, "PGW Sea Ice", 215 | "Sea Ice frac [1]", lon, lat) 216 | print("4 done") 217 | 218 | plot("diff_winter", result - tas.variables['tas'][month,:,:].values,"Differences between new and previous PGW versions for January", 219 | "TAS delta [K]",lon,lat ) 220 | plot("tas_winter"+addon, tas.variables['tas'][month,:,:].values, "TAS field from previous PGW version for January", 221 | "TAS delta [K]",lon,lat ) 222 | 223 | plot ("cdo_winter"+addon, cdo.variables['tos'][month,:,:].values, "SST field from bi-linear interpolation for January", 224 | "SST delta [K]",lon,lat ) 225 | plot ("sst"+addon, tos.variables['sst'][month,:,:].values, "SST field from NaN-ignoring interpolation using kernel interp for January", 226 | "SST delta [K]" ,lon,lat) 227 | 228 | plot ("sst_tas_diff"+addon, tos.variables['sst'][month,:,:].values- tas.variables['tas'][month,:,:].values, "Differences between SST and TAS for January", 229 | "SST delta [K]",lon,lat) 230 | plot ("ice"+addon, era5.variables['FR_SEA_ICE'][0,:,:].values, "Sea ice fraction from ERA5 for January", 231 | "Ice fraction [%]",lon,lat, colormap="Blues") 232 | lon = christoph.variables['lon'][:] 233 | lat = christoph.variables['lat'][:] 234 | print(np.sum(christoph.variables['ts'][0,:,:].values)) 235 | print(np.sum(christoph.variables['ts'][0,:,:].values)/ (len(lon)*len(lat))) 236 | plot ("heim", christoph.variables['ts'][0,:,:].values, "Differences between TS and SST for January", 237 | "Temperature [K]",lon,lat) 238 | 239 | 240 | def plot_paper(filename, field, title, metric, max=None, min=None, lat_pole=90, lon_pole=-180, 241 | coastline=True, colormap="RdBu_r"): 242 | 243 | rotated_pole = ccrs.RotatedPole(pole_latitude = lat_pole, pole_longitude = lon_pole) 244 | data_crs = ccrs.PlateCarree() 245 | 246 | # create the plot and set the size 247 | fig, axs = plt.subplots(1,3, sharex=True, sharey=True , subplot_kw=dict(projection= rotated_pole), figsize = (20*3,10)) 248 | fig.subplots_adjust( wspace=0.05, left=0.05, right=0.99, bottom=0.12, top=0.92) 249 | #fig.suptitle(r'$\Delta$'+ "SST comparison between native GCM data, bi-linear interpolation \n and NaN-ignoring interpolation", fontsize=30, weight="bold") 250 | 251 | # create country's borders and landsea mask 252 | #land_50m = cfeature.NaturalEarthFeature('cultural', 'admin_0_countries', '50m', edgecolor='black', facecolor='none', linewidth=0.2) 253 | broder_50m = cfeature.NaturalEarthFeature('physical', 'coastline', '50m', edgecolor='black', facecolor='none', linewidth=0.8) 254 | if max == None: 255 | max = np.nanmax(field) 256 | if min == None: 257 | min = np.nanmin(field) 258 | # activate the labels and set countour of the countourf 259 | draw_labels = True 260 | reversed_cmap = False 261 | levels = np.arange(min, max, 0.05) 262 | color_map = plt.cm.get_cmap("Reds") 263 | if reversed_cmap == True: 264 | reversed_color_map = color_map.reversed() 265 | else: 266 | reversed_color_map = color_map 267 | #plt.gca().set_facecolor("dimgrey") 268 | #norm = TwoSlopeNorm(vmin=min, vmax = max, vcenter=0.0001) 269 | origin_dim = raw_sst.coords['longitude'].values.shape 270 | lon_raw = raw_sst.coords['longitude'].values.reshape(-1) 271 | for i in range(len(lon_raw)): 272 | if lon_raw[i] > 180: 273 | lon_raw[i] -= 360 274 | # plot in each subplot 275 | h1 = axs[0].contourf(lon_raw.reshape(origin_dim), raw_sst.coords['latitude'].values, raw_sst.variables['tos'][month,:,:].values, levels=levels, cmap=reversed_color_map , extend='both') 276 | axs[0].set_title(r'$\Delta$SST on GCM ocean model grid', fontsize=25, weight="bold") 277 | ## add borders and landsea mask 278 | #axes.add_feature(land_50m) 279 | if coastline: 280 | axs[0].add_feature(broder_50m) 281 | 282 | gl = axs[0].gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False) 283 | gl.ylocator = mticker.FixedLocator(np.arange(-60, 80, 10)) 284 | gl.xlocator = mticker.FixedLocator(np.arange(-100, 90 ,10)) 285 | gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 286 | gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 287 | gl.top_labels = False 288 | gl.right_labels = False 289 | gl.left_labels = False 290 | gl.bottom_labels = True 291 | gl.left_labels = True 292 | 293 | # plot in each subplot 294 | h = axs[1].contourf(lon[210:-150], lat[30:-115], cdo.variables['tos'][month,:,:].values[30:-115,210:-150], levels=levels, cmap=reversed_color_map , extend='both') 295 | axs[1].set_title(r'$\Delta$'+ "SST using bi-linear interpolation", fontsize=25, weight="bold") 296 | ## add borders and landsea mask 297 | #axes.add_feature(land_50m) 298 | if coastline: 299 | axs[1].add_feature(broder_50m) 300 | 301 | gl = axs[1].gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False) 302 | gl.ylocator = mticker.FixedLocator(np.arange(-60, 80, 10)) 303 | gl.xlocator = mticker.FixedLocator(np.arange(-100, 90 ,10)) 304 | gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 305 | gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 306 | gl.top_labels = False 307 | gl.right_labels = False 308 | gl.left_labels = False 309 | gl.bottom_labels = True 310 | gl.left_labels = True 311 | 312 | # plot in each subplot 313 | h = axs[2].contourf(lon[210:-150], lat[30:-115], tos.variables['sst'][month,:,:].values[30:-115,210:-150], levels=levels, cmap=reversed_color_map , extend='both') 314 | axs[2].set_title(r'$\Delta$'+ "SST using NaN-ignoring interpolation", fontsize=25, weight="bold") 315 | ## add borders and landsea mask 316 | #axes.add_feature(land_50m) 317 | if coastline: 318 | axs[2].add_feature(broder_50m) 319 | 320 | gl = axs[2].gridlines(color='black', linestyle='--', linewidth=1., alpha=0.35, draw_labels=draw_labels, dms=True, x_inline=False, y_inline=False) 321 | gl.ylocator = mticker.FixedLocator(np.arange(-60, 80, 10)) 322 | gl.xlocator = mticker.FixedLocator(np.arange(-100, 90 ,10)) 323 | gl.xlabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 324 | gl.ylabel_style = {'size':12, 'rotation': 0, 'rotation_mode': 'anchor'} 325 | gl.top_labels = False 326 | gl.right_labels = False 327 | gl.left_labels = False 328 | gl.bottom_labels = True 329 | gl.left_labels = True 330 | 331 | #set colorbar 332 | cb = mpu.colorbar(h1, axs[1], orientation = 'horizontal', pad = 0.15, aspect=50, format='%.1f') 333 | cb.ax.tick_params(labelsize=14) 334 | cb.set_label(label="SST delta [K]",fontsize=20) 335 | 336 | #plt.tight_layout() 337 | plt.savefig(str(filename) + ".png") 338 | 339 | #plot_paper("jonas_figure"+addon, raw_sst.variables['tos'][month,:,:].values, "Combined TAS field from NaN-ignoring interpolation for January", 340 | # "TAS delta [K]") 341 | """ 342 | -------------------------------------------------------------------------------- /step_03_apply_to_era.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | description PGW for ERA5 main routine to update ERA5 files with climate 5 | deltas to transition from ERA climate to PGW climate. 6 | authors Before 2022: original developments by Roman Brogli 7 | Since 2022: upgrade to PGW for ERA5 by Christoph Heim 8 | 2022: udpates by Jonas Mensch 9 | """ 10 | ############################################################################## 11 | import argparse, os 12 | import xarray as xr 13 | import numpy as np 14 | from argparse import RawDescriptionHelpFormatter 15 | from pathlib import Path 16 | from datetime import datetime, timedelta 17 | from functions import ( 18 | specific_to_relative_humidity, 19 | relative_to_specific_humidity, 20 | load_delta, 21 | load_delta_interp, 22 | integ_geopot, 23 | interp_logp_4d, 24 | determine_p_ref, 25 | integrate_tos 26 | ) 27 | from constants import CON_G, CON_RD 28 | from parallel import IterMP 29 | from settings import ( 30 | i_debug, 31 | era5_file_name_base, 32 | var_name_map, 33 | TIME_ERA, LEV_ERA, HLEV_ERA, LON_ERA, LAT_ERA, SOIL_HLEV_ERA, 34 | TIME_GCM, PLEV_GCM, 35 | i_reinterp, 36 | p_ref_inp, 37 | thresh_phi_ref_max_error, 38 | max_n_iter, 39 | adj_factor, 40 | file_name_bases, 41 | ) 42 | ############################################################################## 43 | 44 | def pgw_for_era5(inp_era_file_path, out_era_file_path, 45 | delta_input_dir, era_step_dt, 46 | ignore_top_pressure_error, 47 | debug_mode=None): 48 | if i_debug >= 0: 49 | print('Start working on input file {}'.format(inp_era_file_path)) 50 | 51 | ######################################################################### 52 | ### PREPARATION STEPS 53 | ######################################################################### 54 | # containers for variable computation 55 | vars_era = {} 56 | vars_pgw = {} 57 | deltas = {} 58 | 59 | # open data set 60 | era_file = xr.open_dataset(inp_era_file_path, decode_cf=False) 61 | 62 | ## compute pressure on ERA5 full levels and half levels 63 | # pressure on half levels 64 | pa_hl_era = (era_file.ak + 65 | era_file[var_name_map['ps']] * era_file.bk).transpose( 66 | TIME_ERA, HLEV_ERA, LAT_ERA, LON_ERA) 67 | # if akm and akb coefficients (for full levels) exist, use them 68 | if 'akm' in era_file: 69 | akm = era_file.akm 70 | bkm = era_file.bkm 71 | # if akm and abk coefficients do not exist, computed them 72 | # with the average of the half-level coefficients above and below 73 | else: 74 | akm = ( 75 | 0.5 * era_file.ak.diff( 76 | dim=HLEV_ERA, 77 | label='lower').rename({HLEV_ERA:LEV_ERA}) + 78 | era_file.ak.isel({HLEV_ERA:range(len(era_file.level1)-1)}).values 79 | ) 80 | bkm = ( 81 | 0.5 * era_file.bk.diff( 82 | dim=HLEV_ERA, 83 | label='lower').rename({HLEV_ERA:LEV_ERA}) + 84 | era_file.bk.isel({HLEV_ERA:range(len(era_file.level1)-1)}).values 85 | ) 86 | # pressure on full levels 87 | pa_era = (akm + era_file[var_name_map['ps']] * bkm).transpose( 88 | TIME_ERA, LEV_ERA, LAT_ERA, LON_ERA) 89 | 90 | # compute relative humidity in ERA climate state 91 | era_file[var_name_map['hur']] = specific_to_relative_humidity( 92 | era_file[var_name_map['hus']], 93 | pa_era, era_file[var_name_map['ta']]).transpose( 94 | TIME_ERA, LEV_ERA, LAT_ERA, LON_ERA) 95 | 96 | ######################################################################### 97 | ### UPDATE SURFACE AND SOIL TEMPERATURE 98 | ######################################################################### 99 | # update surface skin temperature using SST delta over sea and and 100 | # surface skin temperature delta over land and over sea ice 101 | if i_debug >= 2: 102 | print('update surface skin temperature (ts)') 103 | delta_siconc = load_delta(delta_input_dir, 'siconc', 104 | era_file[TIME_ERA], era_step_dt) 105 | era_file[var_name_map['sic']].values += delta_siconc.values/100 106 | era_file[var_name_map['sic']].values = np.clip( 107 | era_file[var_name_map['sic']].values, 0, 1) 108 | print(np.nanmin(era_file[var_name_map['sic']].values)) 109 | #deltas['siconc'] = delta_siconc 110 | # load surface temperature climate delta 111 | #(for grid points over land and sea ice) 112 | delta_ts = load_delta(delta_input_dir, 'ts', 113 | era_file[TIME_ERA], era_step_dt) 114 | # load SST climate delta (for grid points over open water) 115 | delta_tos = load_delta(delta_input_dir, 'tos', 116 | era_file[TIME_ERA], era_step_dt) 117 | # combine using land and sea-ice mask in ERA5 118 | delta_ts_combined = integrate_tos( 119 | delta_tos.values, 120 | delta_ts.values, 121 | era_file[var_name_map['sftlf']].isel({TIME_ERA:0}).values, 122 | era_file[var_name_map['sic']].isel({TIME_ERA:0}).values 123 | ) 124 | era_file[var_name_map['ts']].values += delta_ts_combined 125 | delta_ts.values = delta_ts_combined 126 | # store delta for output in case of --debug_mode = interpolate_full 127 | deltas['ts'] = delta_ts 128 | print(np.nanmin(era_file[var_name_map['sic']].values)) 129 | # update temperature of soil layers 130 | if i_debug >= 2: 131 | print('update soil layer temperature (st)') 132 | # set climatological lower soil temperature delta to annual mean 133 | # climate delta of surface skin temperature. 134 | delta_st_clim = load_delta(delta_input_dir, 'ts', 135 | era_file[TIME_ERA], 136 | target_date_time=None).mean(dim=[TIME_GCM]) 137 | # interpolate between surface temperature and deep soil temperature 138 | # using exponential decay of annual cycle signal 139 | delta_soilt = ( 140 | delta_st_clim + np.exp(-era_file.soil1/2.8) * 141 | (delta_ts - delta_st_clim) 142 | ) 143 | delta_soilt = delta_soilt.transpose(TIME_ERA, SOIL_HLEV_ERA, LAT_ERA, LON_ERA) 144 | era_file[var_name_map['st']].values += delta_soilt 145 | # store delta for output in case of --debug_mode = interpolate_full 146 | deltas['st'] = delta_soilt 147 | 148 | ######################################################################### 149 | ### START UPDATING 3D FIELDS 150 | ######################################################################### 151 | # If no re-interpolation is done, the final PGW climate state 152 | # variables can be computed already now, before updating the 153 | # surface pressure. This means that the climate deltas or 154 | # interpolated on the ERA5 model levels of the ERA climate state. 155 | if not i_reinterp: 156 | 157 | ### interpolate climate deltas onto ERA5 grid 158 | for var_name in ['ta','hur','ua','va']: 159 | if i_debug >= 2: 160 | print('update {}'.format(var_name)) 161 | 162 | ## interpolate climate deltas to ERA5 model levels 163 | ## use ERA climate state 164 | delta_var = load_delta_interp(delta_input_dir, 165 | var_name, pa_era, era_file[TIME_ERA], era_step_dt, 166 | ignore_top_pressure_error) 167 | deltas[var_name] = delta_var 168 | 169 | ## compute PGW climate state variables 170 | vars_pgw[var_name] = ( 171 | era_file[var_name_map[var_name]] + 172 | deltas[var_name] 173 | ) 174 | 175 | 176 | ######################################################################### 177 | ### UPDATE SURFACE PRESSURE USING ITERATIVE PROCEDURE 178 | ######################################################################### 179 | if i_debug >= 2: 180 | print('###### Start with iterative surface pressure adjustment.') 181 | # change in surface pressure between ERA and PGW climate states 182 | delta_ps = xr.zeros_like(era_file[var_name_map['ps']]) 183 | # increment to adjust delta_ps with each iteration 184 | adj_ps = xr.zeros_like(era_file[var_name_map['ps']]) 185 | # maximum error in geopotential (used in iteration) 186 | phi_ref_max_error = np.inf 187 | 188 | it = 1 189 | while phi_ref_max_error > thresh_phi_ref_max_error: 190 | 191 | # update surface pressure 192 | delta_ps += adj_ps 193 | ps_pgw = era_file[var_name_map['ps']] + delta_ps 194 | 195 | # recompute pressure on full and half levels 196 | pa_pgw = (akm + ps_pgw * bkm).transpose( 197 | TIME_ERA, LEV_ERA, LAT_ERA, LON_ERA) 198 | pa_hl_pgw = (era_file.ak + ps_pgw * era_file.bk).transpose( 199 | TIME_ERA, HLEV_ERA, LAT_ERA, LON_ERA) 200 | 201 | 202 | if i_reinterp: 203 | # interpolate ERA climate state variables as well as 204 | # climate deltas onto updated model levels, and 205 | # compute PGW climate state variables 206 | if i_debug >= 2: 207 | print('reinterpolate ta and hur') 208 | for var_name in ['ta', 'hur']: 209 | vars_era[var_name] = interp_logp_4d( 210 | era_file[var_name_map[var_name]], 211 | pa_era, pa_pgw, extrapolate='constant') 212 | deltas[var_name] = load_delta_interp(delta_input_dir, 213 | var_name, pa_pgw, 214 | era_file[TIME_ERA], era_step_dt, 215 | ignore_top_pressure_error) 216 | vars_pgw[var_name] = vars_era[var_name] + deltas[var_name] 217 | 218 | # Determine current reference pressure (p_ref) 219 | if p_ref_inp is None: 220 | # get GCM pressure levels as candidates for reference pressure 221 | p_ref_opts = load_delta(delta_input_dir, 'zg', 222 | era_file[TIME_ERA], era_step_dt)[PLEV_GCM] 223 | # maximum reference pressure in ERA and PGW climate states 224 | # (take 95% of surface pressure to ensure that a few model 225 | # levels are located in between which makes the solution 226 | # smoother). 227 | p_min_era = pa_hl_era.isel( 228 | {HLEV_ERA:len(pa_hl_era[HLEV_ERA])-1}) * 0.95 229 | p_min_pgw = pa_hl_pgw.isel( 230 | {HLEV_ERA:len(pa_hl_era[HLEV_ERA])-1}) * 0.95 231 | # reference pressure from a former iteration already set? 232 | try: 233 | p_ref_last = p_ref 234 | except UnboundLocalError: 235 | p_ref_last = None 236 | # determine local reference pressure 237 | p_ref = xr.apply_ufunc(determine_p_ref, p_min_era, p_min_pgw, 238 | p_ref_opts, p_ref_last, 239 | input_core_dims=[[],[],[PLEV_GCM],[]], 240 | vectorize=True) 241 | if HLEV_ERA in p_ref.coords: 242 | del p_ref[HLEV_ERA] 243 | # make sure a reference pressure above the required model 244 | # level could be found everywhere 245 | if np.any(np.isnan(p_ref)): 246 | raise ValueError('No reference pressure level above the ' + 247 | 'required local minimum pressure level could not ' + 248 | 'be found everywhere. ' + 249 | 'This is likely the case because your geopotential ' + 250 | 'data set does not reach up high enough (e.g. only to ' + 251 | '500 hPa instead of e.g. 300 hPa?)') 252 | else: 253 | p_ref = p_ref_inp 254 | 255 | #p_sfc_era.to_netcdf('psfc_era.nc') 256 | #p_ref.to_netcdf('pref.nc') 257 | #quit() 258 | 259 | # convert relative humidity to speicifc humidity in pgw 260 | # take PGW climate state temperature and relative humidity 261 | # and pressure of current iteration 262 | vars_pgw['hus'] = relative_to_specific_humidity( 263 | vars_pgw['hur'], 264 | pa_pgw, 265 | vars_pgw['ta'] 266 | ) 267 | 268 | # compute updated geopotential at reference pressure 269 | phi_ref_pgw = integ_geopot( 270 | pa_hl_pgw, 271 | era_file[var_name_map['zgs']], 272 | vars_pgw['ta'], 273 | vars_pgw['hus'], 274 | era_file[HLEV_ERA], 275 | p_ref 276 | ) 277 | 278 | # recompute original geopotential at currently used 279 | # reference pressure level 280 | phi_ref_era = integ_geopot( 281 | pa_hl_era, 282 | era_file[var_name_map['zgs']], 283 | era_file[var_name_map['ta']], 284 | era_file[var_name_map['hus']], 285 | era_file[HLEV_ERA], 286 | p_ref 287 | ) 288 | 289 | delta_phi_ref = phi_ref_pgw - phi_ref_era 290 | 291 | ## load climate delta at currently used reference pressure level 292 | climate_delta_phi_ref = load_delta(delta_input_dir, 'zg', 293 | era_file[TIME_ERA], era_step_dt) * CON_G 294 | climate_delta_phi_ref = climate_delta_phi_ref.sel({PLEV_GCM:p_ref}) 295 | del climate_delta_phi_ref[PLEV_GCM] 296 | 297 | # error in future geopotential 298 | phi_ref_error = delta_phi_ref - climate_delta_phi_ref 299 | 300 | # adjust surface pressure by some amount in the right direction 301 | adj_ps = - adj_factor * ps_pgw / ( 302 | CON_RD * 303 | vars_pgw['ta'].sel({LEV_ERA:np.max(era_file[LEV_ERA])}) 304 | ) * phi_ref_error 305 | if LEV_ERA in adj_ps.coords: 306 | del adj_ps[LEV_ERA] 307 | 308 | phi_ref_max_error = np.abs(phi_ref_error).max().values 309 | if i_debug >= 2: 310 | print('### iteration {:03d}, phi max error: {}'. 311 | format(it, phi_ref_max_error)) 312 | 313 | it += 1 314 | 315 | if it > max_n_iter: 316 | raise ValueError('ERROR! Pressure adjustment did not converge '+ 317 | 'for file {}. '.format(inp_era_file_path) + 318 | 'Consider increasing the value for "max_n_iter" in ' + 319 | 'settings.py') 320 | 321 | ######################################################################### 322 | ### FINISH UPDATING 3D FIELDS 323 | ######################################################################### 324 | # store computed delta ps for output in case of 325 | # --debug_mode = interpolate_full 326 | deltas['ps'] = ps_pgw - era_file.PS 327 | 328 | ## If re-interpolation is enabled, interpolate climate deltas for 329 | ## ua and va onto final PGW climate state ERA5 model levels. 330 | if i_reinterp: 331 | for var_name in ['ua', 'va']: 332 | if i_debug >= 2: 333 | print('add {}'.format(var_name)) 334 | var_era = interp_logp_4d(era_file[var_name_map[var_name]], 335 | pa_era, pa_pgw, extrapolate='constant') 336 | delta_var = load_delta_interp(delta_input_dir, 337 | var_name, pa_pgw, 338 | era_file[TIME_ERA], era_step_dt, 339 | ignore_top_pressure_error) 340 | vars_pgw[var_name] = var_era + delta_var 341 | # store delta for output in case of 342 | # --debug_mode = interpolate_full 343 | deltas[var_name] = delta_var 344 | 345 | ######################################################################### 346 | ### DEBUG MODE 347 | ######################################################################### 348 | ## for debug_mode == interpolate_full, write final climate deltas 349 | ## to output directory 350 | if debug_mode == 'interpolate_full': 351 | var_names = ['ps','ta','hur','ua','va','st','ts'] 352 | for var_name in var_names: 353 | print(var_name) 354 | # creat output file name 355 | out_file_path = os.path.join(Path(out_era_file_path).parents[0], 356 | '{}_delta_{}'.format(var_name_map[var_name], 357 | Path(out_era_file_path).name)) 358 | # convert to dataset 359 | delta = deltas[var_name].to_dataset(name=var_name_map[var_name]) 360 | # save climate delta 361 | delta.to_netcdf(out_file_path, mode='w') 362 | 363 | ######################################################################### 364 | ### SAVE PGW ERA5 FILE 365 | ######################################################################### 366 | ## for production mode, modify ERA5 file and save 367 | else: 368 | ## update 3D fields in ERA file 369 | era_file[var_name_map['ps']] = ps_pgw 370 | for var_name in ['ta','hus','ua','va']: 371 | era_file[var_name_map[var_name]] = vars_pgw[var_name] 372 | ## remove manually computed RH field in ERA5 file 373 | del era_file[var_name_map['hur']] 374 | 375 | 376 | ## save updated ERA5 file 377 | print(np.nanmin(era_file[var_name_map['sic']].values)) 378 | era_file.to_netcdf(out_era_file_path, mode='w') 379 | era_file.close() 380 | if i_debug >= 1: 381 | print('Done. Saved to file {}.'.format(out_era_file_path)) 382 | 383 | 384 | 385 | ############################################################################## 386 | 387 | def debug_interpolate_time( 388 | inp_era_file_path, out_era_file_path, 389 | delta_input_dir, era_step_dt, 390 | ignore_top_pressure_error, 391 | debug_mode=None): 392 | """ 393 | Debugging function to test time interpolation. Is called if input 394 | inputg argument --debug_mode is set to "interpolate_time". 395 | """ 396 | # load input ERA5 file 397 | # in this debugging function, the only purpose of this is to obtain 398 | # the time format of the ERA5 file 399 | era_file = xr.open_dataset(inp_era_file_path, decode_cf=False) 400 | 401 | var_names = ['tos','tas','hurs','ps','ta','hur','ua','va','zg'] 402 | for var_name in var_names: 403 | print(var_name) 404 | # creat output file name 405 | out_file_path = os.path.join(Path(out_era_file_path).parents[0], 406 | '{}_{}_{}'.format("delta",var_name, 407 | Path(out_era_file_path).name)) 408 | # load climate delta interpolated in time only 409 | delta = load_delta(delta_input_dir, var_name, era_file[TIME_ERA], 410 | target_date_time=era_step_dt) 411 | # convert to dataset 412 | delta = delta.to_dataset(name=var_name) 413 | delta.to_netcdf(out_file_path, mode='w') 414 | era_file.close() 415 | 416 | 417 | 418 | 419 | 420 | 421 | ############################################################################## 422 | if __name__ == "__main__": 423 | ## input arguments 424 | parser = argparse.ArgumentParser(description = 425 | """ 426 | Perturb ERA5 with PGW climate deltas. Settings can be made in 427 | "settings.py". 428 | ########################################################################## 429 | 430 | Main function to update ERA5 files with the PGW signal. 431 | The terminology used is HIST referring to the historical (or reference) 432 | climatology, SCEN referring to the future (climate change scenario) 433 | climatology, and SCEN-HIST (a.k.a. climate delta) referring to the 434 | PGW signal which should be applied to the ERA5 files. 435 | The script computes and adds a climate change signal for: 436 | - ua 437 | - va 438 | - ta (including tas for interpolation near the surface) 439 | - hus (computed using a hur and hurs climate delta) 440 | - surface skin temperature (including SST) and soil temperature 441 | and consequently iteratively updates ps to maintain hydrostatic 442 | balance. During this, the climate delta for zg is additionally required. 443 | A list of all climate deltas required is shown in settings.py. 444 | 445 | ########################################################################## 446 | 447 | If the variable names in the ERA5 files to be processed deviate from 448 | the CMOR convention, the dict 'var_name_map' in the file 449 | settings.py allows to map between the CMOR names and the names in the ERA5 450 | file. Also the coordinate names in the ERA5 or the GCM climate 451 | delta files can be changed in settings.py, if required. 452 | 453 | ########################################################################## 454 | 455 | The code can be run in parallel on multiple ERA5 files at the same time. 456 | See input arguments. 457 | 458 | ########################################################################## 459 | 460 | Note that epxloring the option --debug_mode (-D) can provide a lot of 461 | insight into what the code does and can help gain confidence using the 462 | code (see argument documentation below for more information). 463 | 464 | ########################################################################## 465 | 466 | Some more information about the iterative surface pressure 467 | adjustment: 468 | 469 | - The procedure requires a reference pressure level (e.g. 500 hPa) for 470 | which the geopotential is computed. Based on the deviation between the 471 | computed and the GCM reference pressure geopotential, the surface pressure 472 | is adjusted. Since the climate deltas may not always be available at 473 | native vertical GCM resolution, but the climate delta for the geopotential 474 | on one specific pressure level itself is computed by the GCM using data 475 | from all GCM model levels, this introduces an error in the surface 476 | pressure adjustment used here. See publication for more details. 477 | The higher (in terms of altitdue) the reference pressure is chosen, 478 | the larger this error may get. 479 | Alternatively, the reference pressure can be determined locally 480 | as the lowest possible pressure above the surface for which a climate 481 | delta for the geopotential is available (see settings.py). 482 | 483 | - If the iteration does not converge, 'thresh_phi_ref_max_error' in 484 | the file settings.py may have to be raised a little bit. Setting 485 | i_debug = 2 may help to diagnose if this helps. 486 | 487 | - As a default option, the climate deltas are interpolated to 488 | the ERA5 model levels of the ERA climate state before the surface 489 | pressure is adjusted (i_reinterp = 0). 490 | There is an option implemented (i_reinterp = 1) in which the 491 | deltas are re-interpolated on the updated ERA5 model levels 492 | with each iteration of surface pressure adjustment. However, this 493 | implies that the ERA5 fields are extrapolated at the surface 494 | (if the surface pressure increases) the effect of which was not 495 | tested in detail. The extrapolation is done assuming that the 496 | boundary values are constant, which is not ideal for height-dependent 497 | variables like e.g. temperature. As a default, it is recommended to set 498 | i_reinterp = 0. 499 | 500 | ########################################################################## 501 | 502 | """, formatter_class=RawDescriptionHelpFormatter) 503 | 504 | # input era5 directory 505 | parser.add_argument('-i', '--input_dir', type=str, default=None, 506 | help='Directory with ERA5 input files to process. ' + 507 | 'These files are not overwritten but copies will ' + 508 | 'be save in --output_dir .') 509 | 510 | # output era5 directory 511 | parser.add_argument('-o', '--output_dir', type=str, default=None, 512 | help='Directory to store processed ERA5 files.') 513 | 514 | # first bc step to compute 515 | parser.add_argument('-f', '--first_era_step', type=str, 516 | default='2006080200', 517 | help='Date of first ERA5 time step to process. Format should ' + 518 | 'be YYYYMMDDHH.') 519 | 520 | # last bc step to compute 521 | parser.add_argument('-l', '--last_era_step', type=str, 522 | default='2006080300', 523 | help='Date of last ERA5 time step to process. Format should ' + 524 | 'be YYYYMMDDHH.') 525 | 526 | # delta hour increments 527 | parser.add_argument('-H', '--hour_inc_step', type=int, default=3, 528 | help='Hourly increment of the ERA5 time steps to process '+ 529 | 'between --first_era_step and --last_era_step. Default value ' + 530 | 'is 3-hourly, i.e. (00, 03, 06, 09, 12, 15, 18, 21 UTC).') 531 | 532 | # climate delta directory (already remapped to ERA5 grid) 533 | parser.add_argument('-d', '--delta_input_dir', type=str, default=None, 534 | help='Directory with GCM climate deltas (SCEN-HIST) to be used. ' + 535 | 'This directory should have a climate delta for ta,hur,' + 536 | 'ua,va,zg,tas,hurs,ts,tos (e.g. ta_delta.nc), as well as the ' + 537 | 'HIST climatology value for ps (e.g. ps_historical.nc). ' + 538 | 'All files have to be horizontally remapped to the grid of ' + 539 | 'the ERA5 files used (see step_02_preproc_deltas.py).') 540 | 541 | # number of parallel jobs 542 | parser.add_argument('-p', '--n_par', type=int, default=1, 543 | help='Number of parallel tasks. Parallelization is done ' + 544 | 'on the level of individual ERA5 files being processed at ' + 545 | 'the same time.') 546 | 547 | # flag to ignore the error from to pressure extrapolation at the model top 548 | parser.add_argument('-t', '--ignore_top_pressure_error', 549 | action='store_true', 550 | help='Flag to ignore an error due to pressure ' + 551 | 'extrapolation at the model top if GCM climate deltas reach ' + 552 | 'up less far than ERA5. This can only be done if ERA5 data ' + 553 | 'is not used by the limited-area model '+ 554 | 'beyond the upper-most level of the GCM climate ' + 555 | 'deltas!!') 556 | 557 | # input era5 directory 558 | parser.add_argument('-D', '--debug_mode', type=str, default=None, 559 | help='If this flag is set, the ERA5 files will not be ' + 560 | 'modified but instead the processed climate deltas ' 561 | 'are written to the output directory. There are two ' + 562 | 'options: for "-D interpolate_time", the climate deltas ' + 563 | 'are only interpolated to the time of the ERA5 files ' + 564 | 'and then stored. for "-D interpolate_full", the ' + 565 | 'full routine is run but instead of the processed ERA5 ' + 566 | 'files, only the difference between the processed and ' + 567 | 'the unprocessed ERA5 files is store (i.e. the climate ' + 568 | 'deltas after full interpolation to the ERA5 grid).') 569 | 570 | 571 | args = parser.parse_args() 572 | ########################################################################## 573 | 574 | # make sure required input arguments are set. 575 | if args.input_dir is None: 576 | raise ValueError('Input directory (-i) is required.') 577 | if args.output_dir is None: 578 | raise ValueError('Output directory (-o) is required.') 579 | if args.delta_input_dir is None: 580 | raise ValueError('Delta input directory (-d) is required.') 581 | 582 | # check for debug mode 583 | if args.debug_mode is not None: 584 | if args.debug_mode not in ['interpolate_time', 'interpolate_full']: 585 | raise ValueError('Invalid input for argument --debug_mode! ' + 586 | 'Valid arguments are: ' + 587 | '"interpolate_time" or "interpolate_full"') 588 | 589 | # first date and last date to datetime object 590 | first_era_step = datetime.strptime(args.first_era_step, '%Y%m%d%H') 591 | last_era_step = datetime.strptime(args.last_era_step, '%Y%m%d%H') 592 | 593 | # time steps to process 594 | era_step_dts = np.arange(first_era_step, 595 | last_era_step+timedelta(hours=args.hour_inc_step), 596 | timedelta(hours=args.hour_inc_step)).tolist() 597 | 598 | # if output directory doesn't exist create it 599 | Path(args.output_dir).mkdir(parents=True, exist_ok=True) 600 | 601 | IMP = IterMP(njobs=args.n_par, run_async=True) 602 | fargs = dict( 603 | delta_input_dir = args.delta_input_dir, 604 | ignore_top_pressure_error = args.ignore_top_pressure_error, 605 | debug_mode = args.debug_mode, 606 | ) 607 | step_args = [] 608 | 609 | ########################################################################## 610 | # iterate over time step and prepare function arguments 611 | for era_step_dt in era_step_dts: 612 | print(era_step_dt) 613 | 614 | # set output and input ERA5 file 615 | inp_era_file_path = os.path.join(args.input_dir, 616 | era5_file_name_base.format(era_step_dt)) 617 | out_era_file_path = os.path.join(args.output_dir, 618 | era5_file_name_base.format(era_step_dt)) 619 | 620 | step_args.append(dict( 621 | inp_era_file_path = inp_era_file_path, 622 | out_era_file_path = out_era_file_path, 623 | era_step_dt = era_step_dt 624 | ) 625 | ) 626 | 627 | # choose either main function (pgw_for_era5) for production mode and 628 | # debug mode "interpolate_full", or function time_interpolation 629 | # for debug mode "interpolate_time" 630 | if (args.debug_mode is None) or (args.debug_mode == 'interpolate_full'): 631 | run_function = pgw_for_era5 632 | elif args.debug_mode == 'interpolate_time': 633 | run_function = debug_interpolate_time 634 | else: 635 | raise NotImplementedError() 636 | 637 | # run in parallel if args.n_par > 1 638 | IMP.run(run_function, fargs, step_args) 639 | 640 | -------------------------------------------------------------------------------- /step_01_extract_deltas/CFday_wget_scripts/wget_CFday_ssp585_ua.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################################## 3 | # ESG Federation download script 4 | # 5 | # Template version: 1.2 6 | # Generated by esgf-data.dkrz.de - 2022/02/21 20:59:57 7 | # Search URL: https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.ua.gn.v20190710|esgf3.dkrz.de 8 | # 9 | ############################################################################### 10 | # first be sure it's bash... anything out of bash or sh will break 11 | # and the test will assure we are not using sh instead of bash 12 | if [ $BASH ] && [ `basename $BASH` != bash ]; then 13 | echo "######## This is a bash script! ##############" 14 | echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." 15 | echo "Trying to recover automatically..." 16 | sleep 1 17 | /bin/bash $0 $@ 18 | exit $? 19 | fi 20 | 21 | version=1.3.2 22 | CACHE_FILE=.$(basename $0).status 23 | openId= 24 | search_url='https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.ua.gn.v20190710|esgf3.dkrz.de' 25 | 26 | #These are the embedded files to be downloaded 27 | download_files="$(cat < 10#${ver2[i]})) 99 | then 100 | return 1 101 | fi 102 | if ((10#${ver1[i]} < 10#${ver2[i]})) 103 | then 104 | return 2 105 | fi 106 | done 107 | return 0 108 | } 109 | 110 | check_commands() { 111 | #check wget 112 | local MIN_WGET_VERSION=1.10 113 | vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION 114 | case $? in 115 | 2) #lower 116 | wget -V 117 | echo 118 | echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 119 | exit 1 120 | esac 121 | } 122 | 123 | usage() { 124 | echo "Usage: $(basename $0) [flags] [openid] [username]" 125 | echo "Flags is one of:" 126 | sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 127 | echo 128 | echo "This command stores the states of the downloads in .$0.status" 129 | echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" 130 | } 131 | 132 | #defaults 133 | debug=0 134 | clean_work=1 135 | 136 | #parse flags 137 | while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do 138 | case $OPT in 139 | H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. 140 | T) force_TLSv1=1;; # : Forces wget to use TLSv1. 141 | c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. 142 | f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. 143 | F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) 144 | o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. 145 | I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. 146 | w) output="$OPTARG";; # : Write embedded files into a file and exit 147 | i) insecure=1;; # : set insecure mode, i.e. don't check server certificate 148 | s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). 149 | u) update=1;; # : Issue the search again and see if something has changed. 150 | U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) 151 | n) dry_run=1;; # : Don't download any files, just report. 152 | p) clean_work=0;; # : preserve data that failed checksum 153 | d) verbose=1;debug=1;; # : display debug information 154 | v) verbose=1;; # : be more verbose 155 | q) quiet=1;; # : be less verbose 156 | h) usage && exit 0;; # : displays this help 157 | \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; 158 | \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; 159 | esac 160 | done 161 | shift $(($OPTIND - 1)) 162 | 163 | #setup input as desired by the user 164 | if [[ "$input_file" ]]; then 165 | if [[ "$input_file" == '-' ]]; then 166 | download_files="$(cat)" #read from STDIN 167 | exec 0$output 181 | exit 182 | fi 183 | 184 | 185 | #assure we have everything we need 186 | check_commands 187 | 188 | if ((update)); then 189 | echo "Checking the server for changes..." 190 | new_wget="$(wget "$search_url" -qO -)" 191 | compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" 192 | if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then 193 | echo "No changes detected." 194 | else 195 | echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" 196 | counter=0 197 | while [[ -f $0.old.$counter ]]; do ((counter++)); done 198 | mv $0 $0.old.$counter 199 | echo "$new_wget" > $0 200 | fi 201 | exit 0 202 | fi 203 | 204 | 205 | ############################################################################## 206 | check_java() { 207 | if ! type java >& /dev/null; then 208 | echo "Java could not be found." >&2 209 | return 1 210 | fi 211 | if java -version 2>&1|grep openjdk >/dev/null; then 212 | openjdk=1; 213 | else 214 | openjdk=0; 215 | fi 216 | jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) 217 | mVer=${jversion[1]} 218 | if [ $openjdk -eq 1 ]; then 219 | mVer=${jversion[0]} 220 | if ((mVer<5)); then 221 | echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 222 | echo "Current version seems older: $(java -version | head -n1) " >&2 223 | return 1 224 | fi 225 | else 226 | 227 | if ((mVer<5)); then 228 | echo "Java version 1.5+ is required for retrieving the certificate." >&2 229 | echo "Current version seems older: $(java -version | head -n1) " >&2 230 | return 1 231 | fi 232 | fi 233 | } 234 | 235 | check_myproxy_logon() { 236 | if ! type myproxy-logon >& /dev/null; then 237 | echo "myproxy-logon could not be found." >&2 238 | return 1 239 | fi 240 | echo "myproxy-logon found" >&2 241 | } 242 | 243 | proxy_to_java() { 244 | local proxy_user proxy_pass proxy_server proxy_port 245 | eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) 246 | local JAVA_PROXY= 247 | [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" 248 | [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" 249 | eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) 250 | [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" 251 | [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" 252 | 253 | echo "$JAVA_PROXY" 254 | } 255 | 256 | # get certificates from github 257 | get_certificates() { 258 | # don't if this was already done today 259 | [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 260 | echo -n "Retrieving Federation Certificates..." >&2 261 | 262 | if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then 263 | echo "Could not fetch esg-truststore"; 264 | return 1 265 | fi 266 | 267 | if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then 268 | #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why 269 | wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar 270 | echo "Could't update certs!" >&2 271 | return 1 272 | else 273 | #if here everythng went fine. Replace old cert with this ones 274 | [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) 275 | mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR 276 | touch $ESG_CERT_DIR 277 | echo "done!" >&2 278 | fi 279 | 280 | } 281 | 282 | # Retrieve ESG credentials 283 | unset pass 284 | get_credentials() { 285 | if check_java 286 | then 287 | use_java=1 288 | else 289 | use_java=0 290 | echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 291 | check_myproxy_logon || exit 1 292 | fi 293 | #get all certificates 294 | get_certificates 295 | 296 | if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then 297 | echo -n "(Downloading $MYPROXY_GETCERT... " 298 | mkdir -p $(dirname $MYPROXY_GETCERT) 299 | if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then 300 | echo 'done)' 301 | touch $MYPROXY_GETCERT 302 | else 303 | echo 'failed)' 304 | fi 305 | fi 306 | 307 | #if the user already defined one, use it 308 | if [[ -z $openId ]]; then 309 | #try to parse the last valid value if any 310 | [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) 311 | if [[ -z $openId ]]; then 312 | #no OpenID, we need to ask the user 313 | echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " 314 | else 315 | #Allow the user to change it if desired 316 | echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " 317 | fi 318 | read -e 319 | [[ "$REPLY" ]] && openId="$REPLY" 320 | else 321 | ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" 322 | fi 323 | 324 | if grep -q ceda.ac.uk <<<$openId; then 325 | username=${openId##*/} 326 | echo -n "Please give your username if different [$username]: " 327 | read -e 328 | [[ "$REPLY" ]] && username="$REPLY" 329 | fi 330 | 331 | 332 | 333 | if [ $use_java -eq 1 ] 334 | then 335 | local args= 336 | #get password 337 | [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass 338 | [[ "$openId" ]] && args=$args" --oid $openId" 339 | [[ "$pass" ]] && args=$args" -P $pass" 340 | [[ "$username" ]] && args=$args" -l $username" 341 | 342 | echo -n $'\nRetrieving Credentials...' >&2 343 | if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then 344 | echo "Certificate could not be retrieved" 345 | exit 1 346 | fi 347 | echo "done!" >&2 348 | else 349 | args=`openid_to_myproxy_args $openId $username` || exit 1 350 | if ! myproxy-logon $args -b -o $ESG_CREDENTIALS 351 | then 352 | echo "Certificate could not be retrieved" 353 | exit 1 354 | fi 355 | cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ 356 | fi 357 | } 358 | 359 | openid_to_myproxy_args() { 360 | python - </dev/null; then 394 | #check openssl and certificate 395 | if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then 396 | echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." 397 | get_credentials 398 | else 399 | #ok, certificate is fine 400 | return 0 401 | fi 402 | fi 403 | } 404 | 405 | # 406 | # Detect ESG credentials 407 | # 408 | find_credentials() { 409 | 410 | #is X509_USER_PROXY or $HOME/.esg/credential.pem 411 | if [[ -f "$ESG_CREDENTIALS" ]]; then 412 | # file found, proceed. 413 | ESG_CERT="$ESG_CREDENTIALS" 414 | ESG_KEY="$ESG_CREDENTIALS" 415 | elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then 416 | # second try, use these certificates. 417 | ESG_CERT="$X509_USER_CERT" 418 | ESG_KEY="$X509_USER_KEY" 419 | else 420 | # If credentials are not present, just point to where they should go 421 | echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 422 | ESG_CERT="$ESG_CREDENTIALS" 423 | ESG_KEY="$ESG_CREDENTIALS" 424 | #they will be retrieved later one 425 | fi 426 | 427 | 428 | #chek openssl and certificate 429 | if (which openssl &>/dev/null); then 430 | if ( openssl version | grep 'OpenSSL 1\.0' ); then 431 | echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' 432 | fi 433 | check_cert || { (($?==1)); exit 1; } 434 | fi 435 | 436 | if [[ $CHECK_SERVER_CERT == "Yes" ]]; then 437 | [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } 438 | PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" 439 | fi 440 | 441 | #some wget version complain if there's no file present 442 | [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR 443 | 444 | PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" 445 | 446 | } 447 | 448 | check_chksum() { 449 | local file="$1" 450 | local chk_type=$2 451 | local chk_value=$3 452 | local local_chksum=Unknown 453 | 454 | case $chk_type in 455 | md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; 456 | sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; 457 | *) echo "Can't verify checksum." && return 0;; 458 | esac 459 | 460 | #verify 461 | ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 462 | echo $local_chksum 463 | } 464 | 465 | #Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) 466 | md5sum_() { 467 | hash -r 468 | if type md5sum >& /dev/null; then 469 | echo $(md5sum $@) 470 | else 471 | echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') 472 | fi 473 | } 474 | 475 | #Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) 476 | sha256sum_() { 477 | hash -r 478 | if type sha256sum >& /dev/null; then 479 | echo $(sha256sum $@) 480 | elif type shasum >& /dev/null; then 481 | echo $(shasum -a 256 $@) 482 | else 483 | echo $(sha2 -q -256 $@) 484 | fi 485 | } 486 | 487 | get_mod_time_() { 488 | if ((MACOSX)); then 489 | #on a mac modtime is stat -f %m 490 | echo "$(stat -f %m $@)" 491 | else 492 | #on linux (cygwin) modtime is stat -c %Y 493 | echo "$(stat -c %Y $@)" 494 | fi 495 | return 0; 496 | } 497 | 498 | remove_from_cache() { 499 | local entry="$1" 500 | local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" 501 | echo "$tmp_file" > "$CACHE_FILE" 502 | unset cached 503 | } 504 | 505 | #Download data from node using cookies and not certificates. 506 | download_http_sec() 507 | { 508 | #The data to be downloaded. 509 | data=" $url" 510 | filename="$file" 511 | 512 | #Wget args. 513 | if ((insecure)) 514 | then 515 | wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 516 | else 517 | wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 518 | fi 519 | 520 | if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) 521 | then 522 | wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" 523 | fi 524 | 525 | if((force_TLSv1)) 526 | then 527 | wget_args=" $wget_args"" --secure-protocol=TLSv1 " 528 | fi 529 | 530 | 531 | if [[ ! -z "$ESGF_WGET_OPTS" ]] 532 | then 533 | wget_args="$wget_args $ESGF_WGET_OPTS" 534 | fi 535 | 536 | 537 | #use cookies for the next downloads 538 | use_cookies_for_http_basic_auth=1; 539 | 540 | #Debug message. 541 | if ((debug)) 542 | then 543 | echo -e "\nExecuting:\n" 544 | echo -e "wget $wget_args $data\n" 545 | fi 546 | 547 | 548 | #Try to download the data. 549 | command="wget $wget_args -O $filename $data" 550 | http_resp=$(eval $command 2>&1) 551 | cmd_exit_status="$?" 552 | 553 | if ((debug)) 554 | then 555 | echo -e "\nHTTP response:\n $http_resp\n" 556 | fi 557 | 558 | #Extract orp service from url ? 559 | #Evaluate response. 560 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 561 | #(( "$redirects" == 1 )) && 562 | if echo "$http_resp" | grep -q "/esg-orp/" 563 | then 564 | urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) 565 | orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) 566 | 567 | 568 | #Use cookies for transaction with orp. 569 | wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" 570 | 571 | #Download data using either http basic auth or http login form. 572 | if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] 573 | then 574 | download_http_sec_open_id 575 | else 576 | download_http_sec_decide_service 577 | fi 578 | else 579 | if echo "$http_resp" | grep -q "401 Unauthorized" \ 580 | || echo "$http_resp" | grep -q "403: Forbidden" \ 581 | || echo "$http_resp" | grep -q "Connection timed out." \ 582 | || echo "$http_resp" | grep -q "no-check-certificate" \ 583 | || (( $cmd_exit_status != 0 )) 584 | then 585 | echo "ERROR : http request to OpenID Relying Party service failed." 586 | failed=1 587 | fi 588 | fi 589 | } 590 | 591 | 592 | #Function that decides which implementaion of idp to use. 593 | download_http_sec_decide_service() 594 | { 595 | #find claimed id 596 | 597 | pos=$(echo "$openid_c" | egrep -o '/' | wc -l) 598 | username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") 599 | esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') 600 | 601 | host=$(echo "$openid_c" | cut -d'/' -f 3) 602 | #test ceda first. 603 | 604 | if [[ -z "$esgf_uri" ]] 605 | then 606 | openid_c_tmp="https://""$host""/openid/" 607 | else 608 | openid_c_tmp="https://""$host""/esgf-idp/openid/" 609 | fi 610 | 611 | command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" 612 | 613 | if [[ ! -z "$ESGF_WGET_OPTS" ]] 614 | then 615 | command="$command $ESGF_WGET_OPTS" 616 | fi 617 | 618 | #Debug message. 619 | if ((debug)) 620 | then 621 | echo -e "\nExecuting:\n" 622 | echo -e "$command\n" 623 | fi 624 | 625 | 626 | #Execution of command. 627 | http_resp=$(eval $command 2>&1) 628 | cmd_exit_status="$?" 629 | 630 | 631 | if ((debug)) 632 | then 633 | echo -e "\nHTTP response:\n $http_resp\n" 634 | fi 635 | 636 | 637 | if echo "$http_resp" | grep -q "[application/xrds+xml]" \ 638 | && echo "$http_resp" | grep -q "200 OK" \ 639 | && (( cmd_exit_status == 0 )) 640 | then 641 | openid_c=$openid_c_tmp 642 | download_http_sec_open_id 643 | else 644 | if [[ -z "$esgf_uri" ]] 645 | then 646 | echo "ERROR : HTTP request to OpenID Relying Party service failed." 647 | failed=1 648 | else 649 | download_http_sec_cl_id 650 | fi 651 | fi 652 | } 653 | 654 | 655 | download_http_sec_retry() 656 | { 657 | echo -e "\nRetrying....\n" 658 | #Retry in case that last redirect did not work, this happens with older version of wget. 659 | command="wget $wget_args $data" 660 | 661 | #Debug message. 662 | if ((debug)) 663 | then 664 | echo -e "Executing:\n" 665 | echo -e "$command\n" 666 | fi 667 | 668 | http_resp=$(eval $command 2>&1) 669 | cmd_exit_status="$?" 670 | 671 | if ((debug)) 672 | then 673 | echo -e "\nHTTP response:\n $http_resp\n" 674 | fi 675 | 676 | if echo "$http_resp" | grep -q "401 Unauthorized" \ 677 | || echo "$http_resp" | grep -q "403: Forbidden" \ 678 | || echo "$http_resp" | grep -q "Connection timed out." \ 679 | || echo "$http_resp" | grep -q "no-check-certificate" \ 680 | || (( $cmd_exit_status != 0 )) 681 | then 682 | echo -e "\nERROR : Retry failed.\n" 683 | #rm "$filename" 684 | failed=1 685 | fi #if retry failed. 686 | } 687 | 688 | #Function for downloading data using the claimed id. 689 | download_http_sec_cl_id() 690 | { 691 | #Http request for sending openid to the orp service. 692 | command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " 693 | 694 | #Debug message. 695 | if ((debug)) 696 | then 697 | echo -e "Executing:\n" 698 | echo -e "wget $command\n" 699 | fi 700 | 701 | 702 | #Execution of command. 703 | http_resp=$(eval $command 2>&1) 704 | cmd_exit_status="$?" 705 | 706 | 707 | if ((debug)) 708 | then 709 | echo -e "\nHTTP response:\n $http_resp\n" 710 | fi 711 | 712 | 713 | #Extract orp service from openid ? 714 | #Evaluate response.If redirected to idp service send the credentials. 715 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 716 | #(( redirects == 2 )) && 717 | if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) 718 | then 719 | 720 | urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) 721 | idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) 722 | 723 | command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" 724 | 725 | 726 | #Debug message. 727 | if ((debug)) 728 | then 729 | echo -e "Executing:\n" 730 | echo -e "wget $command\n" 731 | fi 732 | 733 | #Execution of command. 734 | http_resp=$(eval $command 2>&1) 735 | cmd_exit_status="$?" 736 | 737 | if ((debug)) 738 | then 739 | echo -e "\nHTTP response:\n $http_resp\n" 740 | fi 741 | 742 | #Evaluate response. 743 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 744 | #(( "$redirects" != 5 )) \ 745 | if echo "$http_resp" | grep -q "text/html" \ 746 | || echo "$http_resp" | grep -q "403: Forbidden" \ 747 | || (( cmd_exit_status != 0 )) 748 | then 749 | rm "$filename" 750 | download_http_sec_retry 751 | fi 752 | 753 | else 754 | echo "ERROR : HTTP request to OpenID Provider service failed." 755 | failed=1 756 | fi #if redirected to idp. 757 | } 758 | 759 | 760 | 761 | download_http_sec_open_id() 762 | { 763 | #Http request for sending openid to the orp web service. 764 | command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " 765 | 766 | 767 | #Debug message. 768 | if ((debug)) 769 | then 770 | echo -e "Executing:\n" 771 | echo -e "$command\n" 772 | fi 773 | 774 | #Execution of command. 775 | http_resp=$(eval $command 2>&1) 776 | cmd_exit_status="$?" 777 | 778 | 779 | if ((debug)) 780 | then 781 | echo -e "\nHTTP response:\n $http_resp\n" 782 | fi 783 | 784 | #Evaluate response. 785 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 786 | #(( "$redirects" != 7 )) || 787 | if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) 788 | then 789 | rm "$filename" 790 | download_http_sec_retry 791 | fi #if error during http basic authentication. 792 | 793 | } 794 | 795 | 796 | download() { 797 | wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" 798 | 799 | while read line 800 | do 801 | # read csv here document into proper variables 802 | eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) 803 | 804 | #Process the file 805 | echo -n "$file ..." 806 | 807 | #get the cached entry if any. 808 | cached="$(grep -e "^$file" "$CACHE_FILE")" 809 | 810 | #if we have the cache entry but no file, clean it. 811 | if [[ ! -f $file && "$cached" ]]; then 812 | #the file was removed, clean the cache 813 | remove_from_cache "$file" 814 | unset cached 815 | fi 816 | 817 | #check it wasn't modified 818 | if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then 819 | if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then 820 | echo "Already downloaded and verified" 821 | continue 822 | elif ((update_files)); then 823 | #user want's to overwrite newer files 824 | rm $file 825 | remove_from_cache "$file" 826 | unset cached 827 | else 828 | #file on server is different from what we have. 829 | echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" 830 | continue 831 | fi 832 | fi 833 | unset chksum_err_value chksum_err_count 834 | 835 | while : ; do 836 | # (if we had the file size, we could check before trying to complete) 837 | echo "Downloading" 838 | [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" 839 | if ((dry_run)); then 840 | #all important info was already displayed, if in dry_run mode just abort 841 | #No status will be stored 842 | break 843 | else 844 | if ((use_http_sec)) 845 | then 846 | download_http_sec 847 | if ((failed)) 848 | then 849 | break 850 | fi 851 | else 852 | $wget -O "$file" $url || { failed=1; break; } 853 | fi 854 | fi 855 | 856 | #check if file is there 857 | if [[ -f $file ]]; then 858 | ((debug)) && echo file found 859 | if [[ ! "$chksum" ]]; then 860 | echo "Checksum not provided, can't verify file integrity" 861 | break 862 | fi 863 | result_chksum=$(check_chksum "$file" $chksum_type $chksum) 864 | if [[ "$result_chksum" != "$chksum" ]]; then 865 | echo " $chksum_type failed!" 866 | if ((clean_work)); then 867 | if !((chksum_err_count)); then 868 | chksum_err_value=$result_chksum 869 | chksum_err_count=2 870 | elif ((checksum_err_count--)); then 871 | if [[ "$result_chksum" != "$chksum_err_value" ]]; then 872 | #this is a real transmission problem 873 | chksum_err_value=$result_chksum 874 | chksum_err_count=2 875 | fi 876 | else 877 | #ok if here we keep getting the same "different" checksum 878 | echo "The file returns always a different checksum!" 879 | echo "Contact the data owner to verify what is happening." 880 | echo 881 | sleep 1 882 | break 883 | fi 884 | 885 | rm $file 886 | #try again 887 | echo -n " re-trying..." 888 | continue 889 | else 890 | echo " don't use -p or remove manually." 891 | fi 892 | else 893 | echo " $chksum_type ok. done!" 894 | echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE 895 | fi 896 | fi 897 | #done! 898 | break 899 | done 900 | 901 | if ((failed)); then 902 | echo "download failed" 903 | # most common failure is certificate expiration, so check this 904 | #if we have the pasword we can retrigger download 905 | ((!skip_security)) && [[ "$pass" ]] && check_cert 906 | unset failed 907 | fi 908 | 909 | done <<<"$download_files" 910 | 911 | } 912 | 913 | dedup_cache_() { 914 | local file=${1:-${CACHE_FILE}} 915 | ((debug)) && echo "dedup'ing cache ${file} ..." 916 | local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) 917 | ((DEBUG)) && echo "$tmp" 918 | echo "$tmp" > $file 919 | ((debug)) && echo "(cache dedup'ed)" 920 | } 921 | 922 | http_basic_auth_func_info_message() 923 | { 924 | echo "********************************************************************************" 925 | echo "* *" 926 | echo "* Note that new functionality to allow authentication without the need for *" 927 | echo "* certificates is available with this version of the wget script. To enable, *" 928 | echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" 929 | echo "* *" 930 | echo "* $ "$(basename "$0")" -H [options...] *" 931 | echo "* *" 932 | echo "* For a full description of the available options use the help option: *" 933 | echo "* *" 934 | echo "* $ "$(basename "$0")" -h *" 935 | echo "* *" 936 | echo "********************************************************************************" 937 | } 938 | 939 | # 940 | # MAIN 941 | # 942 | 943 | if ((!use_http_sec)) 944 | then 945 | http_basic_auth_func_info_message 946 | fi 947 | 948 | echo "Running $(basename $0) version: $version" 949 | ((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" 950 | echo "Use $(basename $0) -h for help."$'\n' 951 | 952 | ((debug)) && cat< 1)) || (("$#" == 1)) ) 1007 | then 1008 | openid_c=$1 1009 | else 1010 | read -p "Enter your openid : " openid_c 1011 | fi 1012 | 1013 | 1014 | #Read username. 1015 | if [[ ! -z "$username_supplied" ]] 1016 | then 1017 | username_c="$username_supplied" 1018 | elif (("$#" == 2)) 1019 | then 1020 | username_c=$2 1021 | elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] 1022 | then 1023 | read -p "Enter username : " username_c 1024 | fi 1025 | 1026 | #Read password. 1027 | read -s -p "Enter password : " password_c 1028 | echo -e "\n" 1029 | 1030 | fi #use cookies 1031 | 1032 | fi #use_http_sec 1033 | 1034 | 1035 | #do we have old results? Create the file if not 1036 | [ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE 1037 | 1038 | #clean the force parameter if here (at htis point we already have the certificate) 1039 | unset force 1040 | 1041 | download 1042 | 1043 | dedup_cache_ 1044 | 1045 | 1046 | echo "done" 1047 | -------------------------------------------------------------------------------- /step_01_extract_deltas/CFday_wget_scripts/wget_CFday_ssp585_va.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################################## 3 | # ESG Federation download script 4 | # 5 | # Template version: 1.2 6 | # Generated by esgf-data.dkrz.de - 2022/02/21 20:59:38 7 | # Search URL: https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.va.gn.v20190710|esgf3.dkrz.de 8 | # 9 | ############################################################################### 10 | # first be sure it's bash... anything out of bash or sh will break 11 | # and the test will assure we are not using sh instead of bash 12 | if [ $BASH ] && [ `basename $BASH` != bash ]; then 13 | echo "######## This is a bash script! ##############" 14 | echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." 15 | echo "Trying to recover automatically..." 16 | sleep 1 17 | /bin/bash $0 $@ 18 | exit $? 19 | fi 20 | 21 | version=1.3.2 22 | CACHE_FILE=.$(basename $0).status 23 | openId= 24 | search_url='https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.va.gn.v20190710|esgf3.dkrz.de' 25 | 26 | #These are the embedded files to be downloaded 27 | download_files="$(cat < 10#${ver2[i]})) 99 | then 100 | return 1 101 | fi 102 | if ((10#${ver1[i]} < 10#${ver2[i]})) 103 | then 104 | return 2 105 | fi 106 | done 107 | return 0 108 | } 109 | 110 | check_commands() { 111 | #check wget 112 | local MIN_WGET_VERSION=1.10 113 | vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION 114 | case $? in 115 | 2) #lower 116 | wget -V 117 | echo 118 | echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 119 | exit 1 120 | esac 121 | } 122 | 123 | usage() { 124 | echo "Usage: $(basename $0) [flags] [openid] [username]" 125 | echo "Flags is one of:" 126 | sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 127 | echo 128 | echo "This command stores the states of the downloads in .$0.status" 129 | echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" 130 | } 131 | 132 | #defaults 133 | debug=0 134 | clean_work=1 135 | 136 | #parse flags 137 | while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do 138 | case $OPT in 139 | H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. 140 | T) force_TLSv1=1;; # : Forces wget to use TLSv1. 141 | c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. 142 | f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. 143 | F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) 144 | o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. 145 | I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. 146 | w) output="$OPTARG";; # : Write embedded files into a file and exit 147 | i) insecure=1;; # : set insecure mode, i.e. don't check server certificate 148 | s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). 149 | u) update=1;; # : Issue the search again and see if something has changed. 150 | U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) 151 | n) dry_run=1;; # : Don't download any files, just report. 152 | p) clean_work=0;; # : preserve data that failed checksum 153 | d) verbose=1;debug=1;; # : display debug information 154 | v) verbose=1;; # : be more verbose 155 | q) quiet=1;; # : be less verbose 156 | h) usage && exit 0;; # : displays this help 157 | \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; 158 | \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; 159 | esac 160 | done 161 | shift $(($OPTIND - 1)) 162 | 163 | #setup input as desired by the user 164 | if [[ "$input_file" ]]; then 165 | if [[ "$input_file" == '-' ]]; then 166 | download_files="$(cat)" #read from STDIN 167 | exec 0$output 181 | exit 182 | fi 183 | 184 | 185 | #assure we have everything we need 186 | check_commands 187 | 188 | if ((update)); then 189 | echo "Checking the server for changes..." 190 | new_wget="$(wget "$search_url" -qO -)" 191 | compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" 192 | if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then 193 | echo "No changes detected." 194 | else 195 | echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" 196 | counter=0 197 | while [[ -f $0.old.$counter ]]; do ((counter++)); done 198 | mv $0 $0.old.$counter 199 | echo "$new_wget" > $0 200 | fi 201 | exit 0 202 | fi 203 | 204 | 205 | ############################################################################## 206 | check_java() { 207 | if ! type java >& /dev/null; then 208 | echo "Java could not be found." >&2 209 | return 1 210 | fi 211 | if java -version 2>&1|grep openjdk >/dev/null; then 212 | openjdk=1; 213 | else 214 | openjdk=0; 215 | fi 216 | jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) 217 | mVer=${jversion[1]} 218 | if [ $openjdk -eq 1 ]; then 219 | mVer=${jversion[0]} 220 | if ((mVer<5)); then 221 | echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 222 | echo "Current version seems older: $(java -version | head -n1) " >&2 223 | return 1 224 | fi 225 | else 226 | 227 | if ((mVer<5)); then 228 | echo "Java version 1.5+ is required for retrieving the certificate." >&2 229 | echo "Current version seems older: $(java -version | head -n1) " >&2 230 | return 1 231 | fi 232 | fi 233 | } 234 | 235 | check_myproxy_logon() { 236 | if ! type myproxy-logon >& /dev/null; then 237 | echo "myproxy-logon could not be found." >&2 238 | return 1 239 | fi 240 | echo "myproxy-logon found" >&2 241 | } 242 | 243 | proxy_to_java() { 244 | local proxy_user proxy_pass proxy_server proxy_port 245 | eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) 246 | local JAVA_PROXY= 247 | [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" 248 | [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" 249 | eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) 250 | [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" 251 | [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" 252 | 253 | echo "$JAVA_PROXY" 254 | } 255 | 256 | # get certificates from github 257 | get_certificates() { 258 | # don't if this was already done today 259 | [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 260 | echo -n "Retrieving Federation Certificates..." >&2 261 | 262 | if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then 263 | echo "Could not fetch esg-truststore"; 264 | return 1 265 | fi 266 | 267 | if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then 268 | #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why 269 | wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar 270 | echo "Could't update certs!" >&2 271 | return 1 272 | else 273 | #if here everythng went fine. Replace old cert with this ones 274 | [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) 275 | mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR 276 | touch $ESG_CERT_DIR 277 | echo "done!" >&2 278 | fi 279 | 280 | } 281 | 282 | # Retrieve ESG credentials 283 | unset pass 284 | get_credentials() { 285 | if check_java 286 | then 287 | use_java=1 288 | else 289 | use_java=0 290 | echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 291 | check_myproxy_logon || exit 1 292 | fi 293 | #get all certificates 294 | get_certificates 295 | 296 | if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then 297 | echo -n "(Downloading $MYPROXY_GETCERT... " 298 | mkdir -p $(dirname $MYPROXY_GETCERT) 299 | if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then 300 | echo 'done)' 301 | touch $MYPROXY_GETCERT 302 | else 303 | echo 'failed)' 304 | fi 305 | fi 306 | 307 | #if the user already defined one, use it 308 | if [[ -z $openId ]]; then 309 | #try to parse the last valid value if any 310 | [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) 311 | if [[ -z $openId ]]; then 312 | #no OpenID, we need to ask the user 313 | echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " 314 | else 315 | #Allow the user to change it if desired 316 | echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " 317 | fi 318 | read -e 319 | [[ "$REPLY" ]] && openId="$REPLY" 320 | else 321 | ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" 322 | fi 323 | 324 | if grep -q ceda.ac.uk <<<$openId; then 325 | username=${openId##*/} 326 | echo -n "Please give your username if different [$username]: " 327 | read -e 328 | [[ "$REPLY" ]] && username="$REPLY" 329 | fi 330 | 331 | 332 | 333 | if [ $use_java -eq 1 ] 334 | then 335 | local args= 336 | #get password 337 | [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass 338 | [[ "$openId" ]] && args=$args" --oid $openId" 339 | [[ "$pass" ]] && args=$args" -P $pass" 340 | [[ "$username" ]] && args=$args" -l $username" 341 | 342 | echo -n $'\nRetrieving Credentials...' >&2 343 | if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then 344 | echo "Certificate could not be retrieved" 345 | exit 1 346 | fi 347 | echo "done!" >&2 348 | else 349 | args=`openid_to_myproxy_args $openId $username` || exit 1 350 | if ! myproxy-logon $args -b -o $ESG_CREDENTIALS 351 | then 352 | echo "Certificate could not be retrieved" 353 | exit 1 354 | fi 355 | cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ 356 | fi 357 | } 358 | 359 | openid_to_myproxy_args() { 360 | python - </dev/null; then 394 | #check openssl and certificate 395 | if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then 396 | echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." 397 | get_credentials 398 | else 399 | #ok, certificate is fine 400 | return 0 401 | fi 402 | fi 403 | } 404 | 405 | # 406 | # Detect ESG credentials 407 | # 408 | find_credentials() { 409 | 410 | #is X509_USER_PROXY or $HOME/.esg/credential.pem 411 | if [[ -f "$ESG_CREDENTIALS" ]]; then 412 | # file found, proceed. 413 | ESG_CERT="$ESG_CREDENTIALS" 414 | ESG_KEY="$ESG_CREDENTIALS" 415 | elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then 416 | # second try, use these certificates. 417 | ESG_CERT="$X509_USER_CERT" 418 | ESG_KEY="$X509_USER_KEY" 419 | else 420 | # If credentials are not present, just point to where they should go 421 | echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 422 | ESG_CERT="$ESG_CREDENTIALS" 423 | ESG_KEY="$ESG_CREDENTIALS" 424 | #they will be retrieved later one 425 | fi 426 | 427 | 428 | #chek openssl and certificate 429 | if (which openssl &>/dev/null); then 430 | if ( openssl version | grep 'OpenSSL 1\.0' ); then 431 | echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' 432 | fi 433 | check_cert || { (($?==1)); exit 1; } 434 | fi 435 | 436 | if [[ $CHECK_SERVER_CERT == "Yes" ]]; then 437 | [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } 438 | PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" 439 | fi 440 | 441 | #some wget version complain if there's no file present 442 | [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR 443 | 444 | PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" 445 | 446 | } 447 | 448 | check_chksum() { 449 | local file="$1" 450 | local chk_type=$2 451 | local chk_value=$3 452 | local local_chksum=Unknown 453 | 454 | case $chk_type in 455 | md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; 456 | sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; 457 | *) echo "Can't verify checksum." && return 0;; 458 | esac 459 | 460 | #verify 461 | ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 462 | echo $local_chksum 463 | } 464 | 465 | #Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) 466 | md5sum_() { 467 | hash -r 468 | if type md5sum >& /dev/null; then 469 | echo $(md5sum $@) 470 | else 471 | echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') 472 | fi 473 | } 474 | 475 | #Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) 476 | sha256sum_() { 477 | hash -r 478 | if type sha256sum >& /dev/null; then 479 | echo $(sha256sum $@) 480 | elif type shasum >& /dev/null; then 481 | echo $(shasum -a 256 $@) 482 | else 483 | echo $(sha2 -q -256 $@) 484 | fi 485 | } 486 | 487 | get_mod_time_() { 488 | if ((MACOSX)); then 489 | #on a mac modtime is stat -f %m 490 | echo "$(stat -f %m $@)" 491 | else 492 | #on linux (cygwin) modtime is stat -c %Y 493 | echo "$(stat -c %Y $@)" 494 | fi 495 | return 0; 496 | } 497 | 498 | remove_from_cache() { 499 | local entry="$1" 500 | local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" 501 | echo "$tmp_file" > "$CACHE_FILE" 502 | unset cached 503 | } 504 | 505 | #Download data from node using cookies and not certificates. 506 | download_http_sec() 507 | { 508 | #The data to be downloaded. 509 | data=" $url" 510 | filename="$file" 511 | 512 | #Wget args. 513 | if ((insecure)) 514 | then 515 | wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 516 | else 517 | wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 518 | fi 519 | 520 | if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) 521 | then 522 | wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" 523 | fi 524 | 525 | if((force_TLSv1)) 526 | then 527 | wget_args=" $wget_args"" --secure-protocol=TLSv1 " 528 | fi 529 | 530 | 531 | if [[ ! -z "$ESGF_WGET_OPTS" ]] 532 | then 533 | wget_args="$wget_args $ESGF_WGET_OPTS" 534 | fi 535 | 536 | 537 | #use cookies for the next downloads 538 | use_cookies_for_http_basic_auth=1; 539 | 540 | #Debug message. 541 | if ((debug)) 542 | then 543 | echo -e "\nExecuting:\n" 544 | echo -e "wget $wget_args $data\n" 545 | fi 546 | 547 | 548 | #Try to download the data. 549 | command="wget $wget_args -O $filename $data" 550 | http_resp=$(eval $command 2>&1) 551 | cmd_exit_status="$?" 552 | 553 | if ((debug)) 554 | then 555 | echo -e "\nHTTP response:\n $http_resp\n" 556 | fi 557 | 558 | #Extract orp service from url ? 559 | #Evaluate response. 560 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 561 | #(( "$redirects" == 1 )) && 562 | if echo "$http_resp" | grep -q "/esg-orp/" 563 | then 564 | urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) 565 | orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) 566 | 567 | 568 | #Use cookies for transaction with orp. 569 | wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" 570 | 571 | #Download data using either http basic auth or http login form. 572 | if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] 573 | then 574 | download_http_sec_open_id 575 | else 576 | download_http_sec_decide_service 577 | fi 578 | else 579 | if echo "$http_resp" | grep -q "401 Unauthorized" \ 580 | || echo "$http_resp" | grep -q "403: Forbidden" \ 581 | || echo "$http_resp" | grep -q "Connection timed out." \ 582 | || echo "$http_resp" | grep -q "no-check-certificate" \ 583 | || (( $cmd_exit_status != 0 )) 584 | then 585 | echo "ERROR : http request to OpenID Relying Party service failed." 586 | failed=1 587 | fi 588 | fi 589 | } 590 | 591 | 592 | #Function that decides which implementaion of idp to use. 593 | download_http_sec_decide_service() 594 | { 595 | #find claimed id 596 | 597 | pos=$(echo "$openid_c" | egrep -o '/' | wc -l) 598 | username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") 599 | esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') 600 | 601 | host=$(echo "$openid_c" | cut -d'/' -f 3) 602 | #test ceda first. 603 | 604 | if [[ -z "$esgf_uri" ]] 605 | then 606 | openid_c_tmp="https://""$host""/openid/" 607 | else 608 | openid_c_tmp="https://""$host""/esgf-idp/openid/" 609 | fi 610 | 611 | command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" 612 | 613 | if [[ ! -z "$ESGF_WGET_OPTS" ]] 614 | then 615 | command="$command $ESGF_WGET_OPTS" 616 | fi 617 | 618 | #Debug message. 619 | if ((debug)) 620 | then 621 | echo -e "\nExecuting:\n" 622 | echo -e "$command\n" 623 | fi 624 | 625 | 626 | #Execution of command. 627 | http_resp=$(eval $command 2>&1) 628 | cmd_exit_status="$?" 629 | 630 | 631 | if ((debug)) 632 | then 633 | echo -e "\nHTTP response:\n $http_resp\n" 634 | fi 635 | 636 | 637 | if echo "$http_resp" | grep -q "[application/xrds+xml]" \ 638 | && echo "$http_resp" | grep -q "200 OK" \ 639 | && (( cmd_exit_status == 0 )) 640 | then 641 | openid_c=$openid_c_tmp 642 | download_http_sec_open_id 643 | else 644 | if [[ -z "$esgf_uri" ]] 645 | then 646 | echo "ERROR : HTTP request to OpenID Relying Party service failed." 647 | failed=1 648 | else 649 | download_http_sec_cl_id 650 | fi 651 | fi 652 | } 653 | 654 | 655 | download_http_sec_retry() 656 | { 657 | echo -e "\nRetrying....\n" 658 | #Retry in case that last redirect did not work, this happens with older version of wget. 659 | command="wget $wget_args $data" 660 | 661 | #Debug message. 662 | if ((debug)) 663 | then 664 | echo -e "Executing:\n" 665 | echo -e "$command\n" 666 | fi 667 | 668 | http_resp=$(eval $command 2>&1) 669 | cmd_exit_status="$?" 670 | 671 | if ((debug)) 672 | then 673 | echo -e "\nHTTP response:\n $http_resp\n" 674 | fi 675 | 676 | if echo "$http_resp" | grep -q "401 Unauthorized" \ 677 | || echo "$http_resp" | grep -q "403: Forbidden" \ 678 | || echo "$http_resp" | grep -q "Connection timed out." \ 679 | || echo "$http_resp" | grep -q "no-check-certificate" \ 680 | || (( $cmd_exit_status != 0 )) 681 | then 682 | echo -e "\nERROR : Retry failed.\n" 683 | #rm "$filename" 684 | failed=1 685 | fi #if retry failed. 686 | } 687 | 688 | #Function for downloading data using the claimed id. 689 | download_http_sec_cl_id() 690 | { 691 | #Http request for sending openid to the orp service. 692 | command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " 693 | 694 | #Debug message. 695 | if ((debug)) 696 | then 697 | echo -e "Executing:\n" 698 | echo -e "wget $command\n" 699 | fi 700 | 701 | 702 | #Execution of command. 703 | http_resp=$(eval $command 2>&1) 704 | cmd_exit_status="$?" 705 | 706 | 707 | if ((debug)) 708 | then 709 | echo -e "\nHTTP response:\n $http_resp\n" 710 | fi 711 | 712 | 713 | #Extract orp service from openid ? 714 | #Evaluate response.If redirected to idp service send the credentials. 715 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 716 | #(( redirects == 2 )) && 717 | if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) 718 | then 719 | 720 | urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) 721 | idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) 722 | 723 | command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" 724 | 725 | 726 | #Debug message. 727 | if ((debug)) 728 | then 729 | echo -e "Executing:\n" 730 | echo -e "wget $command\n" 731 | fi 732 | 733 | #Execution of command. 734 | http_resp=$(eval $command 2>&1) 735 | cmd_exit_status="$?" 736 | 737 | if ((debug)) 738 | then 739 | echo -e "\nHTTP response:\n $http_resp\n" 740 | fi 741 | 742 | #Evaluate response. 743 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 744 | #(( "$redirects" != 5 )) \ 745 | if echo "$http_resp" | grep -q "text/html" \ 746 | || echo "$http_resp" | grep -q "403: Forbidden" \ 747 | || (( cmd_exit_status != 0 )) 748 | then 749 | rm "$filename" 750 | download_http_sec_retry 751 | fi 752 | 753 | else 754 | echo "ERROR : HTTP request to OpenID Provider service failed." 755 | failed=1 756 | fi #if redirected to idp. 757 | } 758 | 759 | 760 | 761 | download_http_sec_open_id() 762 | { 763 | #Http request for sending openid to the orp web service. 764 | command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " 765 | 766 | 767 | #Debug message. 768 | if ((debug)) 769 | then 770 | echo -e "Executing:\n" 771 | echo -e "$command\n" 772 | fi 773 | 774 | #Execution of command. 775 | http_resp=$(eval $command 2>&1) 776 | cmd_exit_status="$?" 777 | 778 | 779 | if ((debug)) 780 | then 781 | echo -e "\nHTTP response:\n $http_resp\n" 782 | fi 783 | 784 | #Evaluate response. 785 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 786 | #(( "$redirects" != 7 )) || 787 | if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) 788 | then 789 | rm "$filename" 790 | download_http_sec_retry 791 | fi #if error during http basic authentication. 792 | 793 | } 794 | 795 | 796 | download() { 797 | wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" 798 | 799 | while read line 800 | do 801 | # read csv here document into proper variables 802 | eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) 803 | 804 | #Process the file 805 | echo -n "$file ..." 806 | 807 | #get the cached entry if any. 808 | cached="$(grep -e "^$file" "$CACHE_FILE")" 809 | 810 | #if we have the cache entry but no file, clean it. 811 | if [[ ! -f $file && "$cached" ]]; then 812 | #the file was removed, clean the cache 813 | remove_from_cache "$file" 814 | unset cached 815 | fi 816 | 817 | #check it wasn't modified 818 | if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then 819 | if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then 820 | echo "Already downloaded and verified" 821 | continue 822 | elif ((update_files)); then 823 | #user want's to overwrite newer files 824 | rm $file 825 | remove_from_cache "$file" 826 | unset cached 827 | else 828 | #file on server is different from what we have. 829 | echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" 830 | continue 831 | fi 832 | fi 833 | unset chksum_err_value chksum_err_count 834 | 835 | while : ; do 836 | # (if we had the file size, we could check before trying to complete) 837 | echo "Downloading" 838 | [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" 839 | if ((dry_run)); then 840 | #all important info was already displayed, if in dry_run mode just abort 841 | #No status will be stored 842 | break 843 | else 844 | if ((use_http_sec)) 845 | then 846 | download_http_sec 847 | if ((failed)) 848 | then 849 | break 850 | fi 851 | else 852 | $wget -O "$file" $url || { failed=1; break; } 853 | fi 854 | fi 855 | 856 | #check if file is there 857 | if [[ -f $file ]]; then 858 | ((debug)) && echo file found 859 | if [[ ! "$chksum" ]]; then 860 | echo "Checksum not provided, can't verify file integrity" 861 | break 862 | fi 863 | result_chksum=$(check_chksum "$file" $chksum_type $chksum) 864 | if [[ "$result_chksum" != "$chksum" ]]; then 865 | echo " $chksum_type failed!" 866 | if ((clean_work)); then 867 | if !((chksum_err_count)); then 868 | chksum_err_value=$result_chksum 869 | chksum_err_count=2 870 | elif ((checksum_err_count--)); then 871 | if [[ "$result_chksum" != "$chksum_err_value" ]]; then 872 | #this is a real transmission problem 873 | chksum_err_value=$result_chksum 874 | chksum_err_count=2 875 | fi 876 | else 877 | #ok if here we keep getting the same "different" checksum 878 | echo "The file returns always a different checksum!" 879 | echo "Contact the data owner to verify what is happening." 880 | echo 881 | sleep 1 882 | break 883 | fi 884 | 885 | rm $file 886 | #try again 887 | echo -n " re-trying..." 888 | continue 889 | else 890 | echo " don't use -p or remove manually." 891 | fi 892 | else 893 | echo " $chksum_type ok. done!" 894 | echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE 895 | fi 896 | fi 897 | #done! 898 | break 899 | done 900 | 901 | if ((failed)); then 902 | echo "download failed" 903 | # most common failure is certificate expiration, so check this 904 | #if we have the pasword we can retrigger download 905 | ((!skip_security)) && [[ "$pass" ]] && check_cert 906 | unset failed 907 | fi 908 | 909 | done <<<"$download_files" 910 | 911 | } 912 | 913 | dedup_cache_() { 914 | local file=${1:-${CACHE_FILE}} 915 | ((debug)) && echo "dedup'ing cache ${file} ..." 916 | local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) 917 | ((DEBUG)) && echo "$tmp" 918 | echo "$tmp" > $file 919 | ((debug)) && echo "(cache dedup'ed)" 920 | } 921 | 922 | http_basic_auth_func_info_message() 923 | { 924 | echo "********************************************************************************" 925 | echo "* *" 926 | echo "* Note that new functionality to allow authentication without the need for *" 927 | echo "* certificates is available with this version of the wget script. To enable, *" 928 | echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" 929 | echo "* *" 930 | echo "* $ "$(basename "$0")" -H [options...] *" 931 | echo "* *" 932 | echo "* For a full description of the available options use the help option: *" 933 | echo "* *" 934 | echo "* $ "$(basename "$0")" -h *" 935 | echo "* *" 936 | echo "********************************************************************************" 937 | } 938 | 939 | # 940 | # MAIN 941 | # 942 | 943 | if ((!use_http_sec)) 944 | then 945 | http_basic_auth_func_info_message 946 | fi 947 | 948 | echo "Running $(basename $0) version: $version" 949 | ((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" 950 | echo "Use $(basename $0) -h for help."$'\n' 951 | 952 | ((debug)) && cat< 1)) || (("$#" == 1)) ) 1007 | then 1008 | openid_c=$1 1009 | else 1010 | read -p "Enter your openid : " openid_c 1011 | fi 1012 | 1013 | 1014 | #Read username. 1015 | if [[ ! -z "$username_supplied" ]] 1016 | then 1017 | username_c="$username_supplied" 1018 | elif (("$#" == 2)) 1019 | then 1020 | username_c=$2 1021 | elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] 1022 | then 1023 | read -p "Enter username : " username_c 1024 | fi 1025 | 1026 | #Read password. 1027 | read -s -p "Enter password : " password_c 1028 | echo -e "\n" 1029 | 1030 | fi #use cookies 1031 | 1032 | fi #use_http_sec 1033 | 1034 | 1035 | #do we have old results? Create the file if not 1036 | [ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE 1037 | 1038 | #clean the force parameter if here (at htis point we already have the certificate) 1039 | unset force 1040 | 1041 | download 1042 | 1043 | dedup_cache_ 1044 | 1045 | 1046 | echo "done" 1047 | -------------------------------------------------------------------------------- /step_01_extract_deltas/CFday_wget_scripts/wget_CFday_ssp585_hur.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############################################################################## 3 | # ESG Federation download script 4 | # 5 | # Template version: 1.2 6 | # Generated by esgf-data.dkrz.de - 2022/02/21 21:00:30 7 | # Search URL: https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.hur.gn.v20190710|esgf3.dkrz.de 8 | # 9 | ############################################################################### 10 | # first be sure it's bash... anything out of bash or sh will break 11 | # and the test will assure we are not using sh instead of bash 12 | if [ $BASH ] && [ `basename $BASH` != bash ]; then 13 | echo "######## This is a bash script! ##############" 14 | echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." 15 | echo "Trying to recover automatically..." 16 | sleep 1 17 | /bin/bash $0 $@ 18 | exit $? 19 | fi 20 | 21 | version=1.3.2 22 | CACHE_FILE=.$(basename $0).status 23 | openId= 24 | search_url='https://esgf-data.dkrz.de/esg-search/wget/?distrib=false&dataset_id=CMIP6.ScenarioMIP.DKRZ.MPI-ESM1-2-HR.ssp585.r1i1p1f1.CFday.hur.gn.v20190710|esgf3.dkrz.de' 25 | 26 | #These are the embedded files to be downloaded 27 | download_files="$(cat < 10#${ver2[i]})) 99 | then 100 | return 1 101 | fi 102 | if ((10#${ver1[i]} < 10#${ver2[i]})) 103 | then 104 | return 2 105 | fi 106 | done 107 | return 0 108 | } 109 | 110 | check_commands() { 111 | #check wget 112 | local MIN_WGET_VERSION=1.10 113 | vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION 114 | case $? in 115 | 2) #lower 116 | wget -V 117 | echo 118 | echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 119 | exit 1 120 | esac 121 | } 122 | 123 | usage() { 124 | echo "Usage: $(basename $0) [flags] [openid] [username]" 125 | echo "Flags is one of:" 126 | sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 127 | echo 128 | echo "This command stores the states of the downloads in .$0.status" 129 | echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" 130 | } 131 | 132 | #defaults 133 | debug=0 134 | clean_work=1 135 | 136 | #parse flags 137 | while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do 138 | case $OPT in 139 | H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. 140 | T) force_TLSv1=1;; # : Forces wget to use TLSv1. 141 | c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. 142 | f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. 143 | F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) 144 | o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. 145 | I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. 146 | w) output="$OPTARG";; # : Write embedded files into a file and exit 147 | i) insecure=1;; # : set insecure mode, i.e. don't check server certificate 148 | s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). 149 | u) update=1;; # : Issue the search again and see if something has changed. 150 | U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) 151 | n) dry_run=1;; # : Don't download any files, just report. 152 | p) clean_work=0;; # : preserve data that failed checksum 153 | d) verbose=1;debug=1;; # : display debug information 154 | v) verbose=1;; # : be more verbose 155 | q) quiet=1;; # : be less verbose 156 | h) usage && exit 0;; # : displays this help 157 | \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; 158 | \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; 159 | esac 160 | done 161 | shift $(($OPTIND - 1)) 162 | 163 | #setup input as desired by the user 164 | if [[ "$input_file" ]]; then 165 | if [[ "$input_file" == '-' ]]; then 166 | download_files="$(cat)" #read from STDIN 167 | exec 0$output 181 | exit 182 | fi 183 | 184 | 185 | #assure we have everything we need 186 | check_commands 187 | 188 | if ((update)); then 189 | echo "Checking the server for changes..." 190 | new_wget="$(wget "$search_url" -qO -)" 191 | compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" 192 | if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then 193 | echo "No changes detected." 194 | else 195 | echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" 196 | counter=0 197 | while [[ -f $0.old.$counter ]]; do ((counter++)); done 198 | mv $0 $0.old.$counter 199 | echo "$new_wget" > $0 200 | fi 201 | exit 0 202 | fi 203 | 204 | 205 | ############################################################################## 206 | check_java() { 207 | if ! type java >& /dev/null; then 208 | echo "Java could not be found." >&2 209 | return 1 210 | fi 211 | if java -version 2>&1|grep openjdk >/dev/null; then 212 | openjdk=1; 213 | else 214 | openjdk=0; 215 | fi 216 | jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) 217 | mVer=${jversion[1]} 218 | if [ $openjdk -eq 1 ]; then 219 | mVer=${jversion[0]} 220 | if ((mVer<5)); then 221 | echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 222 | echo "Current version seems older: $(java -version | head -n1) " >&2 223 | return 1 224 | fi 225 | else 226 | 227 | if ((mVer<5)); then 228 | echo "Java version 1.5+ is required for retrieving the certificate." >&2 229 | echo "Current version seems older: $(java -version | head -n1) " >&2 230 | return 1 231 | fi 232 | fi 233 | } 234 | 235 | check_myproxy_logon() { 236 | if ! type myproxy-logon >& /dev/null; then 237 | echo "myproxy-logon could not be found." >&2 238 | return 1 239 | fi 240 | echo "myproxy-logon found" >&2 241 | } 242 | 243 | proxy_to_java() { 244 | local proxy_user proxy_pass proxy_server proxy_port 245 | eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) 246 | local JAVA_PROXY= 247 | [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" 248 | [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" 249 | eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) 250 | [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" 251 | [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" 252 | 253 | echo "$JAVA_PROXY" 254 | } 255 | 256 | # get certificates from github 257 | get_certificates() { 258 | # don't if this was already done today 259 | [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 260 | echo -n "Retrieving Federation Certificates..." >&2 261 | 262 | if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then 263 | echo "Could not fetch esg-truststore"; 264 | return 1 265 | fi 266 | 267 | if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then 268 | #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why 269 | wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar 270 | echo "Could't update certs!" >&2 271 | return 1 272 | else 273 | #if here everythng went fine. Replace old cert with this ones 274 | [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) 275 | mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR 276 | touch $ESG_CERT_DIR 277 | echo "done!" >&2 278 | fi 279 | 280 | } 281 | 282 | # Retrieve ESG credentials 283 | unset pass 284 | get_credentials() { 285 | if check_java 286 | then 287 | use_java=1 288 | else 289 | use_java=0 290 | echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 291 | check_myproxy_logon || exit 1 292 | fi 293 | #get all certificates 294 | get_certificates 295 | 296 | if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then 297 | echo -n "(Downloading $MYPROXY_GETCERT... " 298 | mkdir -p $(dirname $MYPROXY_GETCERT) 299 | if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then 300 | echo 'done)' 301 | touch $MYPROXY_GETCERT 302 | else 303 | echo 'failed)' 304 | fi 305 | fi 306 | 307 | #if the user already defined one, use it 308 | if [[ -z $openId ]]; then 309 | #try to parse the last valid value if any 310 | [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) 311 | if [[ -z $openId ]]; then 312 | #no OpenID, we need to ask the user 313 | echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " 314 | else 315 | #Allow the user to change it if desired 316 | echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " 317 | fi 318 | read -e 319 | [[ "$REPLY" ]] && openId="$REPLY" 320 | else 321 | ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" 322 | fi 323 | 324 | if grep -q ceda.ac.uk <<<$openId; then 325 | username=${openId##*/} 326 | echo -n "Please give your username if different [$username]: " 327 | read -e 328 | [[ "$REPLY" ]] && username="$REPLY" 329 | fi 330 | 331 | 332 | 333 | if [ $use_java -eq 1 ] 334 | then 335 | local args= 336 | #get password 337 | [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass 338 | [[ "$openId" ]] && args=$args" --oid $openId" 339 | [[ "$pass" ]] && args=$args" -P $pass" 340 | [[ "$username" ]] && args=$args" -l $username" 341 | 342 | echo -n $'\nRetrieving Credentials...' >&2 343 | if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then 344 | echo "Certificate could not be retrieved" 345 | exit 1 346 | fi 347 | echo "done!" >&2 348 | else 349 | args=`openid_to_myproxy_args $openId $username` || exit 1 350 | if ! myproxy-logon $args -b -o $ESG_CREDENTIALS 351 | then 352 | echo "Certificate could not be retrieved" 353 | exit 1 354 | fi 355 | cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ 356 | fi 357 | } 358 | 359 | openid_to_myproxy_args() { 360 | python - </dev/null; then 394 | #check openssl and certificate 395 | if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then 396 | echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." 397 | get_credentials 398 | else 399 | #ok, certificate is fine 400 | return 0 401 | fi 402 | fi 403 | } 404 | 405 | # 406 | # Detect ESG credentials 407 | # 408 | find_credentials() { 409 | 410 | #is X509_USER_PROXY or $HOME/.esg/credential.pem 411 | if [[ -f "$ESG_CREDENTIALS" ]]; then 412 | # file found, proceed. 413 | ESG_CERT="$ESG_CREDENTIALS" 414 | ESG_KEY="$ESG_CREDENTIALS" 415 | elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then 416 | # second try, use these certificates. 417 | ESG_CERT="$X509_USER_CERT" 418 | ESG_KEY="$X509_USER_KEY" 419 | else 420 | # If credentials are not present, just point to where they should go 421 | echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 422 | ESG_CERT="$ESG_CREDENTIALS" 423 | ESG_KEY="$ESG_CREDENTIALS" 424 | #they will be retrieved later one 425 | fi 426 | 427 | 428 | #chek openssl and certificate 429 | if (which openssl &>/dev/null); then 430 | if ( openssl version | grep 'OpenSSL 1\.0' ); then 431 | echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' 432 | fi 433 | check_cert || { (($?==1)); exit 1; } 434 | fi 435 | 436 | if [[ $CHECK_SERVER_CERT == "Yes" ]]; then 437 | [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } 438 | PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" 439 | fi 440 | 441 | #some wget version complain if there's no file present 442 | [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR 443 | 444 | PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" 445 | 446 | } 447 | 448 | check_chksum() { 449 | local file="$1" 450 | local chk_type=$2 451 | local chk_value=$3 452 | local local_chksum=Unknown 453 | 454 | case $chk_type in 455 | md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; 456 | sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; 457 | *) echo "Can't verify checksum." && return 0;; 458 | esac 459 | 460 | #verify 461 | ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 462 | echo $local_chksum 463 | } 464 | 465 | #Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) 466 | md5sum_() { 467 | hash -r 468 | if type md5sum >& /dev/null; then 469 | echo $(md5sum $@) 470 | else 471 | echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') 472 | fi 473 | } 474 | 475 | #Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) 476 | sha256sum_() { 477 | hash -r 478 | if type sha256sum >& /dev/null; then 479 | echo $(sha256sum $@) 480 | elif type shasum >& /dev/null; then 481 | echo $(shasum -a 256 $@) 482 | else 483 | echo $(sha2 -q -256 $@) 484 | fi 485 | } 486 | 487 | get_mod_time_() { 488 | if ((MACOSX)); then 489 | #on a mac modtime is stat -f %m 490 | echo "$(stat -f %m $@)" 491 | else 492 | #on linux (cygwin) modtime is stat -c %Y 493 | echo "$(stat -c %Y $@)" 494 | fi 495 | return 0; 496 | } 497 | 498 | remove_from_cache() { 499 | local entry="$1" 500 | local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" 501 | echo "$tmp_file" > "$CACHE_FILE" 502 | unset cached 503 | } 504 | 505 | #Download data from node using cookies and not certificates. 506 | download_http_sec() 507 | { 508 | #The data to be downloaded. 509 | data=" $url" 510 | filename="$file" 511 | 512 | #Wget args. 513 | if ((insecure)) 514 | then 515 | wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 516 | else 517 | wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " 518 | fi 519 | 520 | if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) 521 | then 522 | wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" 523 | fi 524 | 525 | if((force_TLSv1)) 526 | then 527 | wget_args=" $wget_args"" --secure-protocol=TLSv1 " 528 | fi 529 | 530 | 531 | if [[ ! -z "$ESGF_WGET_OPTS" ]] 532 | then 533 | wget_args="$wget_args $ESGF_WGET_OPTS" 534 | fi 535 | 536 | 537 | #use cookies for the next downloads 538 | use_cookies_for_http_basic_auth=1; 539 | 540 | #Debug message. 541 | if ((debug)) 542 | then 543 | echo -e "\nExecuting:\n" 544 | echo -e "wget $wget_args $data\n" 545 | fi 546 | 547 | 548 | #Try to download the data. 549 | command="wget $wget_args -O $filename $data" 550 | http_resp=$(eval $command 2>&1) 551 | cmd_exit_status="$?" 552 | 553 | if ((debug)) 554 | then 555 | echo -e "\nHTTP response:\n $http_resp\n" 556 | fi 557 | 558 | #Extract orp service from url ? 559 | #Evaluate response. 560 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 561 | #(( "$redirects" == 1 )) && 562 | if echo "$http_resp" | grep -q "/esg-orp/" 563 | then 564 | urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) 565 | orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) 566 | 567 | 568 | #Use cookies for transaction with orp. 569 | wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" 570 | 571 | #Download data using either http basic auth or http login form. 572 | if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] 573 | then 574 | download_http_sec_open_id 575 | else 576 | download_http_sec_decide_service 577 | fi 578 | else 579 | if echo "$http_resp" | grep -q "401 Unauthorized" \ 580 | || echo "$http_resp" | grep -q "403: Forbidden" \ 581 | || echo "$http_resp" | grep -q "Connection timed out." \ 582 | || echo "$http_resp" | grep -q "no-check-certificate" \ 583 | || (( $cmd_exit_status != 0 )) 584 | then 585 | echo "ERROR : http request to OpenID Relying Party service failed." 586 | failed=1 587 | fi 588 | fi 589 | } 590 | 591 | 592 | #Function that decides which implementaion of idp to use. 593 | download_http_sec_decide_service() 594 | { 595 | #find claimed id 596 | 597 | pos=$(echo "$openid_c" | egrep -o '/' | wc -l) 598 | username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") 599 | esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') 600 | 601 | host=$(echo "$openid_c" | cut -d'/' -f 3) 602 | #test ceda first. 603 | 604 | if [[ -z "$esgf_uri" ]] 605 | then 606 | openid_c_tmp="https://""$host""/openid/" 607 | else 608 | openid_c_tmp="https://""$host""/esgf-idp/openid/" 609 | fi 610 | 611 | command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" 612 | 613 | if [[ ! -z "$ESGF_WGET_OPTS" ]] 614 | then 615 | command="$command $ESGF_WGET_OPTS" 616 | fi 617 | 618 | #Debug message. 619 | if ((debug)) 620 | then 621 | echo -e "\nExecuting:\n" 622 | echo -e "$command\n" 623 | fi 624 | 625 | 626 | #Execution of command. 627 | http_resp=$(eval $command 2>&1) 628 | cmd_exit_status="$?" 629 | 630 | 631 | if ((debug)) 632 | then 633 | echo -e "\nHTTP response:\n $http_resp\n" 634 | fi 635 | 636 | 637 | if echo "$http_resp" | grep -q "[application/xrds+xml]" \ 638 | && echo "$http_resp" | grep -q "200 OK" \ 639 | && (( cmd_exit_status == 0 )) 640 | then 641 | openid_c=$openid_c_tmp 642 | download_http_sec_open_id 643 | else 644 | if [[ -z "$esgf_uri" ]] 645 | then 646 | echo "ERROR : HTTP request to OpenID Relying Party service failed." 647 | failed=1 648 | else 649 | download_http_sec_cl_id 650 | fi 651 | fi 652 | } 653 | 654 | 655 | download_http_sec_retry() 656 | { 657 | echo -e "\nRetrying....\n" 658 | #Retry in case that last redirect did not work, this happens with older version of wget. 659 | command="wget $wget_args $data" 660 | 661 | #Debug message. 662 | if ((debug)) 663 | then 664 | echo -e "Executing:\n" 665 | echo -e "$command\n" 666 | fi 667 | 668 | http_resp=$(eval $command 2>&1) 669 | cmd_exit_status="$?" 670 | 671 | if ((debug)) 672 | then 673 | echo -e "\nHTTP response:\n $http_resp\n" 674 | fi 675 | 676 | if echo "$http_resp" | grep -q "401 Unauthorized" \ 677 | || echo "$http_resp" | grep -q "403: Forbidden" \ 678 | || echo "$http_resp" | grep -q "Connection timed out." \ 679 | || echo "$http_resp" | grep -q "no-check-certificate" \ 680 | || (( $cmd_exit_status != 0 )) 681 | then 682 | echo -e "\nERROR : Retry failed.\n" 683 | #rm "$filename" 684 | failed=1 685 | fi #if retry failed. 686 | } 687 | 688 | #Function for downloading data using the claimed id. 689 | download_http_sec_cl_id() 690 | { 691 | #Http request for sending openid to the orp service. 692 | command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " 693 | 694 | #Debug message. 695 | if ((debug)) 696 | then 697 | echo -e "Executing:\n" 698 | echo -e "wget $command\n" 699 | fi 700 | 701 | 702 | #Execution of command. 703 | http_resp=$(eval $command 2>&1) 704 | cmd_exit_status="$?" 705 | 706 | 707 | if ((debug)) 708 | then 709 | echo -e "\nHTTP response:\n $http_resp\n" 710 | fi 711 | 712 | 713 | #Extract orp service from openid ? 714 | #Evaluate response.If redirected to idp service send the credentials. 715 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 716 | #(( redirects == 2 )) && 717 | if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) 718 | then 719 | 720 | urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) 721 | idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) 722 | 723 | command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" 724 | 725 | 726 | #Debug message. 727 | if ((debug)) 728 | then 729 | echo -e "Executing:\n" 730 | echo -e "wget $command\n" 731 | fi 732 | 733 | #Execution of command. 734 | http_resp=$(eval $command 2>&1) 735 | cmd_exit_status="$?" 736 | 737 | if ((debug)) 738 | then 739 | echo -e "\nHTTP response:\n $http_resp\n" 740 | fi 741 | 742 | #Evaluate response. 743 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 744 | #(( "$redirects" != 5 )) \ 745 | if echo "$http_resp" | grep -q "text/html" \ 746 | || echo "$http_resp" | grep -q "403: Forbidden" \ 747 | || (( cmd_exit_status != 0 )) 748 | then 749 | rm "$filename" 750 | download_http_sec_retry 751 | fi 752 | 753 | else 754 | echo "ERROR : HTTP request to OpenID Provider service failed." 755 | failed=1 756 | fi #if redirected to idp. 757 | } 758 | 759 | 760 | 761 | download_http_sec_open_id() 762 | { 763 | #Http request for sending openid to the orp web service. 764 | command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " 765 | 766 | 767 | #Debug message. 768 | if ((debug)) 769 | then 770 | echo -e "Executing:\n" 771 | echo -e "$command\n" 772 | fi 773 | 774 | #Execution of command. 775 | http_resp=$(eval $command 2>&1) 776 | cmd_exit_status="$?" 777 | 778 | 779 | if ((debug)) 780 | then 781 | echo -e "\nHTTP response:\n $http_resp\n" 782 | fi 783 | 784 | #Evaluate response. 785 | #redirects=$(echo "$http_resp" | egrep -c ' 302 ') 786 | #(( "$redirects" != 7 )) || 787 | if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) 788 | then 789 | rm "$filename" 790 | download_http_sec_retry 791 | fi #if error during http basic authentication. 792 | 793 | } 794 | 795 | 796 | download() { 797 | wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" 798 | 799 | while read line 800 | do 801 | # read csv here document into proper variables 802 | eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) 803 | 804 | #Process the file 805 | echo -n "$file ..." 806 | 807 | #get the cached entry if any. 808 | cached="$(grep -e "^$file" "$CACHE_FILE")" 809 | 810 | #if we have the cache entry but no file, clean it. 811 | if [[ ! -f $file && "$cached" ]]; then 812 | #the file was removed, clean the cache 813 | remove_from_cache "$file" 814 | unset cached 815 | fi 816 | 817 | #check it wasn't modified 818 | if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then 819 | if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then 820 | echo "Already downloaded and verified" 821 | continue 822 | elif ((update_files)); then 823 | #user want's to overwrite newer files 824 | rm $file 825 | remove_from_cache "$file" 826 | unset cached 827 | else 828 | #file on server is different from what we have. 829 | echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" 830 | continue 831 | fi 832 | fi 833 | unset chksum_err_value chksum_err_count 834 | 835 | while : ; do 836 | # (if we had the file size, we could check before trying to complete) 837 | echo "Downloading" 838 | [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" 839 | if ((dry_run)); then 840 | #all important info was already displayed, if in dry_run mode just abort 841 | #No status will be stored 842 | break 843 | else 844 | if ((use_http_sec)) 845 | then 846 | download_http_sec 847 | if ((failed)) 848 | then 849 | break 850 | fi 851 | else 852 | $wget -O "$file" $url || { failed=1; break; } 853 | fi 854 | fi 855 | 856 | #check if file is there 857 | if [[ -f $file ]]; then 858 | ((debug)) && echo file found 859 | if [[ ! "$chksum" ]]; then 860 | echo "Checksum not provided, can't verify file integrity" 861 | break 862 | fi 863 | result_chksum=$(check_chksum "$file" $chksum_type $chksum) 864 | if [[ "$result_chksum" != "$chksum" ]]; then 865 | echo " $chksum_type failed!" 866 | if ((clean_work)); then 867 | if !((chksum_err_count)); then 868 | chksum_err_value=$result_chksum 869 | chksum_err_count=2 870 | elif ((checksum_err_count--)); then 871 | if [[ "$result_chksum" != "$chksum_err_value" ]]; then 872 | #this is a real transmission problem 873 | chksum_err_value=$result_chksum 874 | chksum_err_count=2 875 | fi 876 | else 877 | #ok if here we keep getting the same "different" checksum 878 | echo "The file returns always a different checksum!" 879 | echo "Contact the data owner to verify what is happening." 880 | echo 881 | sleep 1 882 | break 883 | fi 884 | 885 | rm $file 886 | #try again 887 | echo -n " re-trying..." 888 | continue 889 | else 890 | echo " don't use -p or remove manually." 891 | fi 892 | else 893 | echo " $chksum_type ok. done!" 894 | echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE 895 | fi 896 | fi 897 | #done! 898 | break 899 | done 900 | 901 | if ((failed)); then 902 | echo "download failed" 903 | # most common failure is certificate expiration, so check this 904 | #if we have the pasword we can retrigger download 905 | ((!skip_security)) && [[ "$pass" ]] && check_cert 906 | unset failed 907 | fi 908 | 909 | done <<<"$download_files" 910 | 911 | } 912 | 913 | dedup_cache_() { 914 | local file=${1:-${CACHE_FILE}} 915 | ((debug)) && echo "dedup'ing cache ${file} ..." 916 | local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) 917 | ((DEBUG)) && echo "$tmp" 918 | echo "$tmp" > $file 919 | ((debug)) && echo "(cache dedup'ed)" 920 | } 921 | 922 | http_basic_auth_func_info_message() 923 | { 924 | echo "********************************************************************************" 925 | echo "* *" 926 | echo "* Note that new functionality to allow authentication without the need for *" 927 | echo "* certificates is available with this version of the wget script. To enable, *" 928 | echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" 929 | echo "* *" 930 | echo "* $ "$(basename "$0")" -H [options...] *" 931 | echo "* *" 932 | echo "* For a full description of the available options use the help option: *" 933 | echo "* *" 934 | echo "* $ "$(basename "$0")" -h *" 935 | echo "* *" 936 | echo "********************************************************************************" 937 | } 938 | 939 | # 940 | # MAIN 941 | # 942 | 943 | if ((!use_http_sec)) 944 | then 945 | http_basic_auth_func_info_message 946 | fi 947 | 948 | echo "Running $(basename $0) version: $version" 949 | ((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" 950 | echo "Use $(basename $0) -h for help."$'\n' 951 | 952 | ((debug)) && cat< 1)) || (("$#" == 1)) ) 1007 | then 1008 | openid_c=$1 1009 | else 1010 | read -p "Enter your openid : " openid_c 1011 | fi 1012 | 1013 | 1014 | #Read username. 1015 | if [[ ! -z "$username_supplied" ]] 1016 | then 1017 | username_c="$username_supplied" 1018 | elif (("$#" == 2)) 1019 | then 1020 | username_c=$2 1021 | elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] 1022 | then 1023 | read -p "Enter username : " username_c 1024 | fi 1025 | 1026 | #Read password. 1027 | read -s -p "Enter password : " password_c 1028 | echo -e "\n" 1029 | 1030 | fi #use cookies 1031 | 1032 | fi #use_http_sec 1033 | 1034 | 1035 | #do we have old results? Create the file if not 1036 | [ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE 1037 | 1038 | #clean the force parameter if here (at htis point we already have the certificate) 1039 | unset force 1040 | 1041 | download 1042 | 1043 | dedup_cache_ 1044 | 1045 | 1046 | echo "done" 1047 | --------------------------------------------------------------------------------