├── ANN-CalculateEnsembleStatistics.py ├── ANN-CalculateObsCategories.py ├── ANN-CalculateVerificationMetrics.py ├── ANN-FindTuningParameters.py ├── ANN-GenerateProbabilityForecasts.py ├── CNN-CalculateLargeScalePredictors.py ├── CNN-CalculateVerificationMetrics.py ├── CNN-FindTuningParameters.py ├── CNN-FitConvolutionalNetworkModel.py ├── CNN-GenerateProbabilityForecasts.py ├── CSGD-FitClimatologicalDistributions.py ├── CSGD-GenerateForecastDistributions.py ├── CodeForGraphics.py ├── README.md ├── S-ANN-GenerateProbabilityForecasts.py ├── S-CNN-FindTuningParameters.py ├── S-CNN-FitConvolutionalNetworkModel.py ├── S-CSGD-GenerateForecastDistributions.py ├── S-CalculateVerificationMetrics.py └── S-CodeForGraphics.py /ANN-CalculateEnsembleStatistics.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | import datetime 7 | import time 8 | import matplotlib.path as path 9 | import matplotlib.patches as patches 10 | import matplotlib.pyplot as plt 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | from numpy import loadtxt 15 | from scipy.interpolate import interp1d 16 | 17 | 18 | 19 | #plt.ion() 20 | 21 | r = 300. # neighborhood radius (kilometers) 22 | R = 6373. # earth radius (kilometers) 23 | 24 | 25 | leadDay = 6 # leadDay=d works out to being a d+0.5 day forecast 26 | accumulation = 7 # Precipitation accumulation period 27 | 28 | clead = 'week'+str((leadDay+8)//7) 29 | 30 | 31 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 32 | obs_lat = f1['obs_lat'] 33 | obs_lon = f1['obs_lon'] 34 | obs_1week_dates_ord = f1['obs_dates_ord'] 35 | obs_1week_dates = f1['obs_dates'] 36 | f1.close() 37 | 38 | nxy = len(obs_lat) 39 | 40 | 41 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_calplus.npz") 42 | ### Modeled precip is (reforecast time, member, year, lead time, lat, lon) 43 | mod_precip = f3['precip'] 44 | #mod_dates_ord = f3['datesOrd'] 45 | mod_lon = f3['lon'] 46 | mod_lat = f3['lat'] 47 | f3.close() 48 | 49 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz") 50 | mod_dates_ord = f3['dates_ord'] 51 | f3.close() 52 | 53 | ndts, nmem, nyrs, nlts, nlat, nlon = mod_precip.shape 54 | 55 | ### Modeled precip 7-day accumulation is (reforecast time, year, ensembles, space) 56 | mod_precip_week = np.sum(mod_precip[:,:,:,leadDay:leadDay+accumulation,:,:],axis=3).reshape((ndts,nmem,nyrs,nlon*nlat)) 57 | mod_dates_week = mod_dates_ord[:,:,leadDay] 58 | 59 | 60 | ### Calculate day of the year ('doy') for each reforecast date 61 | doy = np.zeros(ndts,dtype=np.int32) 62 | for idt in range(ndts): 63 | yyyy = datetime.date.fromordinal(int(mod_dates_week[idt,0])).year 64 | doy[idt] = min(364,(datetime.date.fromordinal(int(mod_dates_week[idt,0]))-datetime.date(yyyy,1,1)).days) 65 | 66 | 67 | ### Calculate spatially smoothed ensemble foreasts at analysis grid locations 68 | mod_precip_week_sm = np.zeros((ndts,nmem,nyrs,nxy),dtype=np.float32) 69 | for ixy in range(0,nxy): 70 | lat1 = np.deg2rad(obs_lat[ixy]) 71 | lon1 = np.deg2rad(obs_lon[ixy]) 72 | lat2 = np.deg2rad(mod_lat) 73 | lon2 = np.deg2rad(mod_lon) 74 | dlon = lon2 - lon1 75 | dlat = lat2 - lat1 76 | a = (np.sin(dlat/2)**2)[:,None] + np.cos(lat1) * np.outer(np.cos(lat2),np.sin(dlon/2)**2) 77 | c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) 78 | gcdst = (R*c).reshape(nlat*nlon) # great circle distances between forecast and analysis grid points 79 | uselocs = np.nonzero(gcdst0.0)[0][0] - 1 109 | if izmax<0: 110 | itp_fct = interp1d(np.append(0.0,x), np.append(0.0,chf), kind='linear',fill_value='extrapolate') 111 | else: 112 | itp_fct = interp1d(x[izmax:], chf[izmax:], kind='linear',fill_value='extrapolate') 113 | apcp_ens_pit_train[idt,:,ixy,:] = np.transpose(1.-np.exp(-itp_fct(fcst_train[idt,:,:,ixy]))) 114 | apcp_ens_pit_verif[idt,ixy,:] = 1.-np.exp(-itp_fct(fcst_verif[idt,:,ixy])) 115 | apcp_fcst_p0_cl[idt,ixy] = np.mean(fcst_train[wnd_ind,:,:,ixy]==0.0) 116 | ### Save out to file 117 | outfilename = "/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr) 118 | np.savez(outfilename, doy_dts=doy, \ 119 | apcp_obs_ind_train=apcp_obs_ind_train, \ 120 | apcp_obs_ind_verif=apcp_obs_ind_verif, \ 121 | apcp_ens_pit_train=apcp_ens_pit_train, \ 122 | apcp_ens_pit_verif=apcp_ens_pit_verif, \ 123 | apcp_fcst_p0_cl=apcp_fcst_p0_cl) 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /ANN-CalculateObsCategories.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import matplotlib.path as path 8 | import datetime 9 | import time 10 | import pandas as pd 11 | import statsmodels.api as sm 12 | import statsmodels.formula.api as smf 13 | 14 | from netCDF4 import Dataset 15 | from numpy import ma 16 | from numpy import loadtxt 17 | from scipy import stats 18 | 19 | #plt.ion() 20 | 21 | ncat = 30 22 | qtlv_eval = [.333,.667,.85,.95] 23 | 24 | 25 | 26 | #============================================================================== 27 | # Load PRISM data set, aggregate to 1-week average and calculate doy 28 | #============================================================================== 29 | 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz") 31 | #list(f1) 32 | obs_precip = f1['precip'] 33 | obs_lat = f1['lat'] 34 | obs_lon = f1['lon'] 35 | obs_dates_ord = f1['dates_ord'] 36 | obs_dates = f1['dates'] 37 | f1.close() 38 | 39 | ndts, nxy = obs_precip.shape 40 | 41 | obs_precip_week = np.zeros((ndts-6,nxy), dtype=np.float32) 42 | for iday in range(7): 43 | obs_precip_week += obs_precip[iday:(ndts-6+iday),:] 44 | 45 | ndts, nxy = obs_precip_week.shape 46 | 47 | obs_dates_ord = obs_dates_ord[:ndts] 48 | obs_dates = obs_dates[:ndts] 49 | 50 | 51 | doy = np.zeros(ndts, dtype=np.int32) 52 | for idt in range(ndts): 53 | doy[idt] = (datetime.date.fromordinal(int(obs_dates_ord[idt]))-datetime.date(obs_dates[idt,0],1,1)).days 54 | if datetime.date(obs_dates[idt,0],1,1).year%4==0 and doy[idt]>58: 55 | doy[idt] -= 1 # in leap year, do not count Feb 29 56 | 57 | 58 | 59 | #============================================================================== 60 | # Estimate climatological PoP and 'hybrid' quantiles using a moving window 61 | #============================================================================== 62 | 63 | pop_doy = np.zeros((365,nxy), dtype=np.float32) 64 | thr_doy = np.zeros((365,nxy,ncat-1), dtype=np.float32) 65 | qtev_doy = np.zeros((365,nxy,len(qtlv_eval)), dtype=np.float32) 66 | 67 | for idd in range(365): 68 | print(idd) 69 | ind_doy = np.where(doy==idd)[0] 70 | ind_doy_ext = np.append(np.append(ind_doy[0]-366,ind_doy),ind_doy[-1]+365) 71 | wnd_ind = np.add.outer(ind_doy_ext,np.arange(-30,31)).flatten() 72 | imin = np.where(wnd_ind>=0)[0][0] 73 | imax = np.where(wnd_ind0.254) 77 | thr_doy[idd,ixy,0] = 0.254 78 | qtlv = 1. + pop_doy[idd,ixy]*((np.arange(1,ncat-1)/float(ncat-1))-1.) 79 | thr_doy[idd,ixy,1:] = np.quantile(y,qtlv) 80 | qtev_doy[idd,ixy,:] = np.maximum(0.254,np.quantile(y,qtlv_eval)) 81 | 82 | 83 | 84 | #============================================================================== 85 | # Assign observations to classes (multiple assignments allowed if ambiguous) 86 | #============================================================================== 87 | 88 | apcp_obs_cat = np.zeros((ndts,nxy,ncat),dtype=np.bool_) 89 | 90 | for idt in range(ndts): 91 | for ixy in range(0,nxy): 92 | lower = np.append(-np.Inf,thr_doy[doy[idt],ixy,:]) 93 | upper = np.append(thr_doy[doy[idt],ixy,:],np.Inf) 94 | apcp_obs_cat[idt,ixy,:] = np.logical_and(obs_precip_week[idt,ixy]>=lower,obs_precip_week[idt,ixy]<=upper) 95 | 96 | np.savez("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_"+str(ncat)+"cl", 97 | obs_lat = obs_lat, 98 | obs_lon = obs_lon, 99 | obs_dates_ord = obs_dates_ord, 100 | obs_dates = obs_dates, 101 | apcp_obs_cat = apcp_obs_cat, 102 | apcp_obs = obs_precip_week, 103 | pop_doy = pop_doy, 104 | thr_doy = thr_doy, 105 | qtev_doy = qtev_doy) 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /ANN-CalculateVerificationMetrics.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy.stats as stats 4 | import math 5 | import os, sys 6 | import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import pdb 10 | import pickle 11 | 12 | from scipy.stats import gamma 13 | from scipy.interpolate import interp1d 14 | 15 | plt.ion() 16 | 17 | 18 | 19 | leadDay = 20 # d works out to being a d+0.5 day forecast 20 | accumulation = 7 # Precipitation accumulation period 21 | 22 | clead = 'week'+str((leadDay+8)//7) 23 | 24 | 25 | ## Load PRISM data 26 | 27 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 28 | #list(f1) 29 | obs_lat = f1['obs_lat'] 30 | obs_lon = f1['obs_lon'] 31 | obs_dates_ord = f1['obs_dates_ord'] 32 | pop_doy = f1['pop_doy'] 33 | thr_doy = f1['thr_doy'] 34 | qtev_doy = f1['qtev_doy'] 35 | apcp_obs_cat = f1['apcp_obs_cat'] 36 | obs_precip_week = f1['apcp_obs'] 37 | f1.close() 38 | 39 | ndts, nxy, ncat = apcp_obs_cat.shape 40 | 41 | 42 | ## Load IFS ensemble forecasts 43 | 44 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz") 45 | mod_dates = f2['dates_ord'][:,:,leadDay] 46 | mod_lon = f2['lon'] 47 | mod_lat = f2['lat'] 48 | mod_precip = np.sum(f2['precip'][:,:,leadDay:leadDay+accumulation,:,:],axis=2) 49 | f2.close() 50 | 51 | ndts, nyrs = mod_dates.shape 52 | 53 | ecmwf_ind = np.zeros(len(mod_lon),dtype=np.bool) 54 | 55 | for ixy in range(nxy): 56 | dst = abs(mod_lon-obs_lon[ixy]) + abs(mod_lat-obs_lat[ixy]) # Nearest neighbor interpolation 57 | ecmwf_ind[np.argmin(dst)] = True 58 | 59 | ecmwf_precip = mod_precip[:,:,:,ecmwf_ind] 60 | 61 | 62 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32) 63 | for idt in range(ndts): 64 | for iyr in range(nyrs): 65 | fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0] 66 | obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:] # PRISM data on the verification days 67 | 68 | 69 | 70 | ### Calculate skill scores 71 | 72 | exc33p = np.zeros(obs_precip_vdate.shape) 73 | brier33pClm = np.zeros(obs_precip_vdate.shape) 74 | pot33pANN = np.zeros(obs_precip_vdate.shape) 75 | brier33pANN = np.zeros(obs_precip_vdate.shape) 76 | pot33pCSGD = np.zeros(obs_precip_vdate.shape) 77 | brier33pCSGD = np.zeros(obs_precip_vdate.shape) 78 | pot33pENS = np.zeros(obs_precip_vdate.shape) 79 | brier33pENS = np.zeros(obs_precip_vdate.shape) 80 | 81 | exc67p = np.zeros(obs_precip_vdate.shape) 82 | brier67pClm = np.zeros(obs_precip_vdate.shape) 83 | pot67pANN = np.zeros(obs_precip_vdate.shape) 84 | brier67pANN = np.zeros(obs_precip_vdate.shape) 85 | pot67pCSGD = np.zeros(obs_precip_vdate.shape) 86 | brier67pCSGD = np.zeros(obs_precip_vdate.shape) 87 | pot67pENS = np.zeros(obs_precip_vdate.shape) 88 | brier67pENS = np.zeros(obs_precip_vdate.shape) 89 | 90 | exc85p = np.zeros(obs_precip_vdate.shape) 91 | brier85pClm = np.zeros(obs_precip_vdate.shape) 92 | pot85pANN = np.zeros(obs_precip_vdate.shape) 93 | brier85pANN = np.zeros(obs_precip_vdate.shape) 94 | pot85pCSGD = np.zeros(obs_precip_vdate.shape) 95 | brier85pCSGD = np.zeros(obs_precip_vdate.shape) 96 | pot85pENS = np.zeros(obs_precip_vdate.shape) 97 | brier85pENS = np.zeros(obs_precip_vdate.shape) 98 | 99 | rpsClm = np.zeros(obs_precip_vdate.shape) 100 | rpsANN = np.zeros(obs_precip_vdate.shape) 101 | rpsCSGD = np.zeros(obs_precip_vdate.shape) 102 | rpsENS = np.zeros(obs_precip_vdate.shape) 103 | 104 | crpsClm = np.zeros(obs_precip_vdate.shape) 105 | crpsANN = np.zeros(obs_precip_vdate.shape) 106 | crpsCSGD = np.zeros(obs_precip_vdate.shape) 107 | 108 | 109 | f3 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/csgd_fcsts_params_"+clead+".npz") 110 | csgd_pars_fcst = f3['csgd_pars_fcst'] 111 | f3.close() 112 | 113 | 114 | wwCl = 15 # number of dates around the date of interest used to create a sample representing climatology 115 | 116 | 117 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical approximation of the CRPS 118 | dx = np.diff(x) 119 | 120 | for iyr in range(nyrs): 121 | print(iyr) 122 | f4 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz") 123 | doy_dts = f4['doy_dts'] 124 | f4.close() 125 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_"+clead+"_yr"+str(iyr)+".npz") 126 | prob_fcst_cat = f5['prob_fcst_cat'] 127 | f5.close() 128 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)]) 129 | prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32) 130 | for idt in range(ndts): 131 | ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function 132 | for ixy in range(nxy): 133 | itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate') 134 | prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:])) 135 | pot33pANN[idt,iyr,ixy] = prob_over_thr[0] 136 | pot67pANN[idt,iyr,ixy] = prob_over_thr[1] 137 | pot85pANN[idt,iyr,ixy] = prob_over_thr[2] 138 | ## Calculate CRPS for ANN 139 | bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2 140 | crpsANN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 141 | ### Calculate climatological exceedances based on analyzed values within a time window around the forecast date 142 | windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl] 143 | obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy)) 144 | ### Calculate threshold exceedances for the Brier scores used to approximate the CRPS 145 | crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x) 146 | ### Compose sample to represent ECMWF model climatology 147 | modClm = ecmwf_precip[windowClm,:,:,:].reshape((wwCl*nyrs*11,nxy)) 148 | ## Calculate CRPS for Clm 149 | clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0) 150 | bs = (clm_cdf-crps_exc)**2 151 | crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1) 152 | ## Calculate CRPS for CSGD 153 | shape = np.square(csgd_pars_fcst[idt,iyr,:,0]/csgd_pars_fcst[idt,iyr,:,1]) 154 | scale = np.square(csgd_pars_fcst[idt,iyr,:,1])/csgd_pars_fcst[idt,iyr,:,0] 155 | shift = csgd_pars_fcst[idt,iyr,:,2] 156 | csgd_cdf = gamma.cdf((x[None,:]-shift[:,None])/scale[:,None],shape[:,None]) 157 | bs = (csgd_cdf-crps_exc)**2 158 | crpsCSGD[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1) 159 | ## Calculate Brier scores for different thresholds 160 | p33 = qtev_doy[doy_dts[idt],:,0] 161 | exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33) 162 | brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2 163 | brier33pANN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pANN[idt,iyr,:])**2 164 | pot33pCSGD[idt,iyr,:] = 1.-gamma.cdf((p33-shift)/scale,shape) 165 | brier33pCSGD[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCSGD[idt,iyr,:])**2 166 | p33mod = np.maximum(0.254,np.quantile(modClm,0.333,axis=0)) 167 | #pot33pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p33mod[None,:],axis=0) 168 | pot33pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p33[None,:],axis=0) 169 | brier33pENS[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pENS[idt,iyr,:])**2 170 | p67 = qtev_doy[doy_dts[idt],:,1] 171 | exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67) 172 | brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2 173 | brier67pANN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pANN[idt,iyr,:])**2 174 | pot67pCSGD[idt,iyr,:] = 1.-gamma.cdf((p67-shift)/scale,shape) 175 | brier67pCSGD[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCSGD[idt,iyr,:])**2 176 | p67mod = np.maximum(0.254,np.quantile(modClm,0.667,axis=0)) 177 | #pot67pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p67mod[None,:],axis=0) 178 | pot67pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p67[None,:],axis=0) 179 | brier67pENS[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pENS[idt,iyr,:])**2 180 | p85 = qtev_doy[doy_dts[idt],:,2] 181 | exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85) 182 | brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2 183 | brier85pANN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pANN[idt,iyr,:])**2 184 | pot85pCSGD[idt,iyr,:] = 1.-gamma.cdf((p85-shift)/scale,shape) 185 | brier85pCSGD[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCSGD[idt,iyr,:])**2 186 | p85mod = np.maximum(0.254,np.quantile(modClm,0.85,axis=0)) 187 | #pot85pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p85mod[None,:],axis=0) 188 | pot85pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p85[None,:],axis=0) 189 | brier85pENS[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pENS[idt,iyr,:])**2 190 | 191 | 192 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-ann_"+clead 193 | np.savez(outfilename, crpsClm=crpsClm, crpsANN=crpsANN, crpsCSGD=crpsCSGD, \ 194 | exc33p=exc33p, pot33pANN=pot33pANN, pot33pCSGD=pot33pCSGD, pot33pENS=pot33pENS, Bs33pClm=brier33pClm, Bs33pANN=brier33pANN, Bs33pCSGD=brier33pCSGD, Bs33pENS=brier33pENS, \ 195 | exc67p=exc67p, pot67pANN=pot67pANN, pot67pCSGD=pot67pCSGD, pot67pENS=pot67pENS, Bs67pClm=brier67pClm, Bs67pANN=brier67pANN, Bs67pCSGD=brier67pCSGD, Bs67pENS=brier67pENS, \ 196 | exc85p=exc85p, pot85pANN=pot85pANN, pot85pCSGD=pot85pCSGD, pot85pENS=pot85pENS, Bs85pClm=brier85pClm, Bs85pANN=brier85pANN, Bs85pCSGD=brier85pCSGD, Bs85pENS=brier85pENS) 197 | 198 | 199 | 200 | 201 | 202 | # calculate ranked probability score 203 | rpsClm = brier33pClm + brier67pClm + brier85pClm 204 | rpsANN = brier33pANN + brier67pANN + brier85pANN 205 | rpsCSGD = brier33pCSGD + brier67pCSGD + brier85pCSGD 206 | rpsENS = brier33pENS + brier67pENS + brier85pENS 207 | 208 | # rpssAvgANN 209 | round(1.-np.sum(rpsANN)/np.sum(rpsClm),4) 210 | 211 | # rpssAvgCSGD 212 | round(1.-np.sum(rpsCSGD)/np.sum(rpsClm),4) 213 | 214 | # rpssAvgENS 215 | round(1.-np.sum(rpsENS)/np.sum(rpsClm),4) 216 | 217 | 218 | 219 | -------------------------------------------------------------------------------- /ANN-FindTuningParameters.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import keras 10 | import keras.backend as K 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | 15 | from scipy import stats 16 | from scipy.interpolate import interp1d 17 | 18 | from keras import models 19 | from keras import layers 20 | from keras import regularizers 21 | 22 | from keras.layers import Input, Dense, Add, Activation 23 | from keras.models import Model 24 | from keras.optimizers import Adam 25 | 26 | #plt.ion() 27 | 28 | 29 | ncl = '20' 30 | clead = 'week2' 31 | imod = 0 32 | 33 | 34 | def build_cat_model(n_features, hidden_nodes, n_bins, par_reg): 35 | inp1 = Input(shape=(n_features,)) 36 | inp2 = Input(shape=(n_bins,)) 37 | x = Dense(hidden_nodes[0], activation='elu', kernel_regularizer=regularizers.l1(par_reg))(inp1) 38 | if len(hidden_nodes) > 1: 39 | for h in hidden_nodes[1:]: 40 | x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 41 | x = Dense(n_bins, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 42 | x = Add()([x, inp2]) 43 | out = Activation('softmax')(x) 44 | return Model(inputs=[inp1, inp2], outputs=out) 45 | 46 | 47 | def modified_categorical_crossentropy(y_mat, prob_fcst): 48 | prob_obs_cat = K.sum(y_mat*prob_fcst,axis=1) 49 | return -K.mean(K.log(prob_obs_cat)) 50 | 51 | 52 | 53 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_"+ncl+"cl.npz") 54 | #list(f1) 55 | lat = f1['obs_lat'] 56 | lon = f1['obs_lon'] 57 | obs_dates_ord = f1['obs_dates_ord'] 58 | pop_doy = f1['pop_doy'] 59 | thr_doy = f1['thr_doy'] 60 | qtev_doy = f1['qtev_doy'] 61 | apcp_obs_cat = f1['apcp_obs_cat'] 62 | apcp_obs = f1['apcp_obs'] 63 | f1.close() 64 | 65 | ndts, nxy, ncat = apcp_obs_cat.shape 66 | 67 | 68 | nyrs = 20 69 | 70 | reg = 10.**np.arange(-6,-2) 71 | nreg = len(reg) 72 | 73 | mod = [[10],[20],[10,10]] 74 | 75 | 76 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical approximation of the CRPS 77 | dx = np.diff(x) 78 | 79 | opt_reg_param = np.zeros(nyrs, dtype=np.float32) 80 | opt_valid_scores = np.zeros((nyrs,5), dtype=np.float32) 81 | opt_valid_crps = np.zeros((nyrs,5), dtype=np.float32) 82 | 83 | 84 | for iyr in range(nyrs): 85 | print('year: ',iyr) 86 | # Load smoothed ensemble forecast PIT values 87 | f4 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz") 88 | doy_dts = f4['doy_dts'] 89 | apcp_obs_ind = f4['apcp_obs_ind_train'] 90 | apcp_ens_pit = f4['apcp_ens_pit_train'] 91 | f4.close() 92 | ndts, nyrs_cv, nxy, nmem = apcp_ens_pit.shape 93 | # Calculate normalized coordinates, cosine/sine of day of the year, and climatological probability of precipitation 94 | lon_nml = np.repeat(-1.+2.*(lon[np.newaxis,:]-min(lon))/(max(lon)-min(lon)),ndts*nyrs_cv,axis=0).reshape((ndts*nyrs_cv,nxy,1)) 95 | lat_nml = np.repeat(-1.+2.*(lat[np.newaxis,:]-min(lat))/(max(lat)-min(lat)),ndts*nyrs_cv,axis=0).reshape((ndts*nyrs_cv,nxy,1)) 96 | apcp_pop_cl = np.repeat(pop_doy[doy_dts,np.newaxis,:],nyrs_cv,axis=1).reshape((ndts*nyrs_cv,nxy,1)) 97 | # Calculate predictors and classification targets 98 | apcp_efi = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit),axis=3).reshape((ndts*nyrs_cv,nxy,1)) 99 | predictors = np.concatenate((lon_nml,lat_nml,-1.+2.*apcp_pop_cl,apcp_efi),axis=2) 100 | logp_cl = np.concatenate((np.log(1.-apcp_pop_cl),np.repeat(np.log(apcp_pop_cl),ncat-1,axis=2)-np.log(ncat-1)),axis=2) 101 | # perform 5-fold cross validation to find optimal regularization 102 | date_order = np.arange(ndts*nyrs_cv).reshape(ndts,nyrs_cv).T.flatten() 103 | cv_ind = date_order[np.arange(ndts*nyrs_cv)%232<231] # remove the date between the 5 cross-validated blocks 104 | valid_score = np.zeros((nreg,5), dtype=np.float32) 105 | valid_crps = np.zeros((nreg,5), dtype=np.float32) 106 | for cvi in range(5): 107 | train_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)!=cvi] 108 | valid_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)==cvi] 109 | predictors_train = predictors[train_ind,:,:].reshape((-1,predictors.shape[-1])) 110 | logp_cl_train = logp_cl[train_ind,:,:].reshape((-1,ncat)) 111 | cat_targets_train = apcp_obs_cat[apcp_obs_ind.flatten()[train_ind],:,:].reshape((-1,ncat)).astype(float) 112 | predictors_valid = predictors[valid_ind,:,:].reshape((-1,predictors.shape[-1])) 113 | logp_cl_valid = logp_cl[valid_ind,:,:].reshape((-1,ncat)) 114 | cat_targets_valid = apcp_obs_cat[apcp_obs_ind.flatten()[valid_ind],:,:].reshape((-1,ncat)).astype(float) 115 | doy_valid = np.repeat(doy_dts[:,np.newaxis],nyrs_cv,axis=1).flatten()[valid_ind] 116 | for ireg in range(nreg): 117 | # Define and fit ANN model (using batch gradient descent) 118 | keras.backend.clear_session() 119 | model = build_cat_model(predictors.shape[-1], mod[imod], ncat, reg[ireg]) 120 | model.compile(optimizer=Adam(0.05), loss=modified_categorical_crossentropy) 121 | model.fit([predictors_train,logp_cl_train], cat_targets_train, epochs=100, batch_size=len(train_ind)*nxy, verbose=0) 122 | valid_score[ireg,cvi] = model.evaluate([predictors_valid,logp_cl_valid], cat_targets_valid, batch_size=len(train_ind)*nxy, verbose=0) 123 | # Calculate CRPS for each cross-validation fold 124 | prob_fcst_cat = model.predict([predictors_valid,logp_cl_valid]).reshape((len(valid_ind),nxy,ncat)) 125 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)]) 126 | crps_fold = np.zeros((len(valid_ind),nxy),dtype=np.float32) 127 | for ivdt in range(len(valid_ind)): 128 | for ixy in range(nxy): 129 | itp_fct = interp1d(thr_doy[doy_valid[ivdt],ixy,:], prob_fcst_chf[ivdt,ixy,:], kind='linear',fill_value='extrapolate') 130 | bs = (1.-np.exp(-itp_fct(x))-1.*(apcp_obs[apcp_obs_ind.flatten()[valid_ind[ivdt]],ixy]<=x))**2 131 | crps_fold[ivdt,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 132 | valid_crps[ireg,cvi] = np.mean(crps_fold) 133 | opt_reg_ind = np.argmin(np.mean(valid_score,axis=1)) 134 | opt_reg_param[iyr] = reg[opt_reg_ind] 135 | opt_valid_scores[iyr,:] = valid_score[opt_reg_ind,:] 136 | opt_valid_crps[iyr,:] = valid_crps[opt_reg_ind,:] 137 | 138 | 139 | ### Save out to file 140 | outfilename = "/home/michael/Desktop/CalifAPCP/tuning/efi-"+ncl+"cl-m"+str(imod)+"-l1_"+clead 141 | np.savez(outfilename, opt_reg_param=opt_reg_param, opt_valid_scores=opt_valid_scores, opt_valid_crps=opt_valid_crps) 142 | 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /ANN-GenerateProbabilityForecasts.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import keras 10 | import keras.backend as K 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | 15 | from scipy import stats 16 | 17 | from keras import models 18 | from keras import layers 19 | from keras import regularizers 20 | 21 | from keras.layers import Input, Dense, Add, Activation, Dropout 22 | from keras.layers.merge import Concatenate 23 | from keras.models import Model 24 | from keras.optimizers import Adam 25 | 26 | #plt.ion() 27 | 28 | 29 | 30 | def build_cat_model(n_features, hidden_nodes, n_bins, par_reg): 31 | inp1 = Input(shape=(n_features,)) 32 | inp2 = Input(shape=(n_bins,)) 33 | x = Dense(hidden_nodes[0], activation='elu', kernel_regularizer=regularizers.l1(par_reg))(inp1) 34 | if len(hidden_nodes) > 1: 35 | for h in hidden_nodes[1:]: 36 | x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 37 | x = Dense(n_bins, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 38 | x = Add()([x, inp2]) 39 | out = Activation('softmax')(x) 40 | return Model(inputs=[inp1, inp2], outputs=out) 41 | 42 | 43 | def modified_categorical_crossentropy(y_mat, prob_fcst): 44 | prob_obs_cat = K.sum(y_mat*prob_fcst,axis=1) 45 | return -K.mean(K.log(prob_obs_cat)) 46 | 47 | 48 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 49 | #list(f1) 50 | lat = f1['obs_lat'] 51 | lon = f1['obs_lon'] 52 | obs_dates_ord = f1['obs_dates_ord'] 53 | pop_doy = f1['pop_doy'] 54 | thr_doy = f1['thr_doy'] 55 | qtev_doy = f1['qtev_doy'] 56 | apcp_obs_cat = f1['apcp_obs_cat'] 57 | f1.close() 58 | 59 | ncat = apcp_obs_cat.shape[2] 60 | 61 | 62 | 63 | clead = 'week2' 64 | 65 | f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz") 66 | opt_reg_param = f3['opt_reg_param'] 67 | f3.close() 68 | 69 | 70 | for iyr in range(20): 71 | print(iyr) 72 | # Load smoothed ensemble forecast PIT values 73 | f2 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz") 74 | doy_dts = f2['doy_dts'] 75 | apcp_obs_ind_train = f2['apcp_obs_ind_train'] 76 | apcp_obs_ind_verif = f2['apcp_obs_ind_verif'] 77 | apcp_ens_pit_train = f2['apcp_ens_pit_train'] 78 | apcp_ens_pit_verif = f2['apcp_ens_pit_verif'] 79 | f2.close() 80 | ndts, nyrs_tr, nxy, nmem = apcp_ens_pit_train.shape 81 | # Calculate normalized coordinates and climatological probability of precipitation 82 | lon_train = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1)) 83 | lon_verif = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts,axis=0).reshape((ndts,nxy,1)) 84 | lat_train = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1)) 85 | lat_verif = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts,axis=0).reshape((ndts,nxy,1)) 86 | apcp_pop_cl_train = np.repeat(pop_doy[doy_dts,np.newaxis,:,None],nyrs_tr,axis=1) 87 | apcp_pop_cl_verif = pop_doy[doy_dts,:,None] 88 | # Calculate predictors and classification targets 89 | apcp_efi_train = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_train),axis=3)[:,:,:,None] 90 | apcp_efi_verif = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_verif),axis=2)[:,:,None] 91 | train_predictors = np.concatenate((lon_train,lat_train,apcp_efi_train),axis=3).reshape((-1,3)) 92 | train_logp_cl = np.concatenate((np.log(1.-apcp_pop_cl_train),np.repeat(np.log(apcp_pop_cl_train),ncat-1,axis=3)-np.log(ncat-1)),axis=3).reshape((-1,ncat)) 93 | train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].reshape((-1,ncat)).astype(float) 94 | # Define and fit ANN model 95 | keras.backend.clear_session() 96 | model = build_cat_model(train_predictors.shape[-1], [10], ncat, opt_reg_param[iyr]) 97 | model.compile(optimizer=Adam(0.05), loss=modified_categorical_crossentropy) 98 | model.fit([train_predictors,train_logp_cl], train_cat_targets, epochs=100, batch_size=ndts*nyrs_tr*nxy, verbose=0) 99 | # Calculate probability forecasts 100 | verif_predictors = np.concatenate((lon_verif,lat_verif,apcp_efi_verif),axis=2).reshape((-1,3)) 101 | verif_logp_cl = np.concatenate((np.log(1.-apcp_pop_cl_verif),np.repeat(np.log(apcp_pop_cl_verif),ncat-1,axis=2)-np.log(ncat-1)),axis=2).reshape((-1,ncat)) 102 | prob_fcst_cat = model.predict([verif_predictors,verif_logp_cl]).reshape((ndts,nxy,ncat)) 103 | ### Save out to file 104 | outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_"+clead+"_yr"+str(iyr) 105 | np.savez(outfilename, prob_fcst_cat=prob_fcst_cat) 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /CNN-CalculateLargeScalePredictors.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | import datetime 7 | import time 8 | #import matplotlib.path as path 9 | #import matplotlib.patches as patches 10 | import matplotlib.pyplot as plt 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | from numpy.linalg import solve 15 | from numpy.linalg import svd 16 | 17 | #plt.ion() 18 | 19 | data_path = '/Volumes/ExtMichael/Michael/ECMWF-subseasonal/' 20 | 21 | 22 | 23 | ### Load geopotential height forecast fields and aggregate to week-2, week-3, and week-4 averages 24 | 25 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz") 26 | mod_dates_ord = f1['dates_ord'] 27 | f1.close() 28 | 29 | ndts, nyrs, nlts = mod_dates_ord.shape 30 | 31 | ixl = 71 # -144 32 | ixu = 147 # -107 33 | jyl = 55 # 52 34 | jyu = 115 # 23 35 | 36 | nxf = len(range(ixl,ixu+1)) 37 | nyf = len(range(jyl,jyu+1)) 38 | 39 | nens = 11 40 | 41 | z500_week2 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32) 42 | z500_week3 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32) 43 | z500_week4 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32) 44 | 45 | wgt12h = np.r_[0.5,np.ones(13,dtype=np.float32),0.5] 46 | 47 | for idt in range(ndts): 48 | date_init = datetime.date.fromordinal(int(mod_dates_ord[idt,-1,0]-1.5)) # Initialization date of ECMWF reforecast 49 | cyear = format(date_init.year+1) 50 | cmonth = format(date_init.month,'02') 51 | cday = format(date_init.day,'02') 52 | infile = data_path+'ControlLargeDomain/geopotential/'+cyear+'-'+cmonth+'-'+cday+'cntrl_12hrpress_start0hr.nc' 53 | nc = Dataset(infile) 54 | z = nc.variables['z'][:,:,:,jyl:(jyu+1),ixl:(ixu+1)] 55 | nc.close() 56 | z500_week2[idt,:,0,:,:] = np.average(z[:,13:28,0,:,:],axis=1,weights=wgt12h) 57 | z500_week3[idt,:,0,:,:] = np.average(z[:,27:42,0,:,:],axis=1,weights=wgt12h) 58 | z500_week4[idt,:,0,:,:] = np.average(z[:,41:56,0,:,:],axis=1,weights=wgt12h) 59 | print(infile) 60 | infile = data_path+'EnsembleLargeDomain/geopotential/'+cyear+'-'+cmonth+'-'+cday+'ens_12hrpress_start0hr.z.nc' 61 | nc = Dataset(infile) 62 | z = nc.variables['z'][:,:,:,:,jyl:(jyu+1),ixl:(ixu+1)] 63 | nc.close() 64 | z500_week2[idt,:,1:,:,:] = np.average(z[:,13:28,:,0,:,:],axis=1,weights=wgt12h) 65 | z500_week3[idt,:,1:,:,:] = np.average(z[:,27:42,:,0,:,:],axis=1,weights=wgt12h) 66 | z500_week4[idt,:,1:,:,:] = np.average(z[:,41:56,:,0,:,:],axis=1,weights=wgt12h) 67 | print(infile) 68 | 69 | 70 | # Upscale to 1-deg grid 71 | 72 | nxfu = (nxf-1)//2 73 | nyfu = (nyf-1)//2 74 | 75 | z500_week2_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32) 76 | z500_week3_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32) 77 | z500_week4_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32) 78 | 79 | for ixd in range(-1,2): 80 | wx = 0.5**(1+abs(ixd)) 81 | for jyd in range(-1,2): 82 | wy = 0.5**(1+abs(jyd)) 83 | w = wx*wy 84 | z500_week2_1deg += z500_week2[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w 85 | z500_week3_1deg += z500_week3[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w 86 | z500_week4_1deg += z500_week4[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w 87 | 88 | 89 | ### Save out to file 90 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/data/z500_predictor_cnn" 91 | np.savez(outfilename, mod_dates_ord=mod_dates_ord, 92 | longitude=lon.data[(ixl+1):ixu:2], 93 | latitude=lat.data[(jyl+1):jyu:2], 94 | z500_week2=z500_week2_1deg, 95 | z500_week3=z500_week3_1deg, 96 | z500_week4=z500_week4_1deg) 97 | 98 | 99 | 100 | 101 | ### Load total column water forecast fields and aggregate to week-2, week-3, and week-4 averages 102 | 103 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz") 104 | mod_dates_ord = f1['dates_ord'] 105 | f1.close() 106 | 107 | ndts, nyrs, nlts = mod_dates_ord.shape 108 | 109 | ixl = 71 # -144 110 | ixu = 147 # -107 111 | jyl = 55 # 52 112 | jyu = 115 # 23 113 | 114 | nxf = len(range(ixl,ixu+1)) 115 | nyf = len(range(jyl,jyu+1)) 116 | 117 | nens = 11 118 | 119 | tcw_week2 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32) 120 | tcw_week3 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32) 121 | tcw_week4 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32) 122 | 123 | wgt6h = np.r_[0.5,np.ones(27,dtype=np.float32),0.5] 124 | 125 | for idt in range(ndts): 126 | date_init = datetime.date.fromordinal(int(mod_dates_ord[idt,-1,0]-1.5)) # Initialization date of ECMWF reforecast 127 | cyear = format(date_init.year+1) 128 | cmonth = format(date_init.month,'02') 129 | cday = format(date_init.day,'02') 130 | infile = data_path+'ControlLargeDomain/tcw/'+cyear+'-'+cmonth+'-'+cday+'cntrl_6hrsfc_start0hr.nc' 131 | nc = Dataset(infile) 132 | twc = nc.variables['tcw'][:,:,jyl:(jyu+1),ixl:(ixu+1)] 133 | nc.close() 134 | tcw_week2[idt,:,0,:,:] = np.average(twc[:,26:55,:,:],axis=1,weights=wgt6h) 135 | tcw_week3[idt,:,0,:,:] = np.average(twc[:,54:83,:,:],axis=1,weights=wgt6h) 136 | tcw_week4[idt,:,0,:,:] = np.average(twc[:,82:111,:,:],axis=1,weights=wgt6h) 137 | print(infile) 138 | infile = data_path+'EnsembleLargeDomain/tcw/'+cyear+'-'+cmonth+'-'+cday+'ens_6hrsfc_start0hr.tcw.nc' 139 | nc = Dataset(infile) 140 | twc = nc.variables['tcw'][:,:,:,jyl:(jyu+1),ixl:(ixu+1)] 141 | nc.close() 142 | tcw_week2[idt,:,1:,:,:] = np.average(twc[:,26:55,:,:],axis=1,weights=wgt6h) 143 | tcw_week3[idt,:,1:,:,:] = np.average(twc[:,54:83,:,:],axis=1,weights=wgt6h) 144 | tcw_week4[idt,:,1:,:,:] = np.average(twc[:,82:111,:,:],axis=1,weights=wgt6h) 145 | print(infile) 146 | 147 | #nc = Dataset(infile) 148 | #lons = nc.variables['longitude'][ixl:(ixu+1)] 149 | #lats = nc.variables['latitude'][jyl:(jyu+1)] 150 | #nc.close() 151 | 152 | 153 | 154 | # Upscale to 1-deg grid 155 | 156 | nxfu = (nxf-1)//2 157 | nyfu = (nyf-1)//2 158 | 159 | tcw_week2_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32) 160 | tcw_week3_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32) 161 | tcw_week4_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32) 162 | 163 | for ixd in range(-1,2): 164 | wx = 0.5**(1+abs(ixd)) 165 | for jyd in range(-1,2): 166 | wy = 0.5**(1+abs(jyd)) 167 | w = wx*wy 168 | tcw_week2_1deg += tcw_week2[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w 169 | tcw_week3_1deg += tcw_week3[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w 170 | tcw_week4_1deg += tcw_week4[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w 171 | 172 | 173 | ### Save out to file 174 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/data/tcw_predictor_cnn" 175 | np.savez(outfilename, mod_dates_ord=mod_dates_ord, tcw_week2=tcw_week2_1deg, tcw_week3=tcw_week3_1deg, tcw_week4=tcw_week4_1deg) 176 | 177 | 178 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/tcw_predictor.npz") 179 | tcw_week2 = f1['tcw_week2'] 180 | tcw_week3 = f1['tcw_week3'] 181 | tcw_week4 = f1['tcw_week4'] 182 | f1.close() 183 | 184 | 185 | 186 | # Load ERA-5 reanalyses for z500 187 | 188 | data_path = '/Projects/ClimateAnalysis/OBS/ERA5/' 189 | 190 | infile = data_path+'GEOPOT500.1981.4x.nc' 191 | nc = Dataset(infile) 192 | lon = nc.variables['lon'][:] 193 | lat = nc.variables['lat'][:] 194 | nc.close() 195 | 196 | lon = np.where(lon>180,lon-360,lon) 197 | 198 | idx_lon = np.logical_and(np.greater_equal(lon,-144.5),np.less_equal(lon,-106.5)) 199 | idx_lat = np.logical_and(np.greater_equal(lat,22.5),np.less_equal(lat,52.5)) 200 | 201 | nx = sum(idx_lon) 202 | ny = sum(idx_lat) 203 | ntimes = 4*(737059-723181+1) 204 | 205 | dates_ord = np.zeros(ntimes,dtype=np.float32) 206 | z500 = np.zeros((ntimes,ny,nx),dtype=np.float32) 207 | 208 | idtb = 0 209 | 210 | for iyr in range(38): 211 | infile = data_path+'GEOPOT500.'+str(1981+iyr)+'.4x.nc' 212 | print(infile) 213 | nc = Dataset(infile) 214 | ntyr = len(nc.dimensions['time']) 215 | idte = idtb + ntyr 216 | dates_ord[idtb:idte] = 657072 + nc.variables['time'][:]/24. 217 | z500[idtb:idte,:,:] = nc.variables['GEOPOT'][:,0,idx_lat,idx_lon]/9.806 218 | nc.close() 219 | idtb = idte 220 | 221 | # Upscale to 1-deg grid 222 | 223 | nxu = (nx-1)//4 224 | nyu = (ny-1)//4 225 | 226 | z500_1deg = np.zeros((ntimes,nyu,nxu), dtype=np.float32) 227 | 228 | for ixd in range(-2,3): 229 | wx = 0.125*min(3-abs(ixd),2) 230 | for jyd in range(-2,3): 231 | wy = 0.125*min(3-abs(jyd),2) 232 | w = wx*wy 233 | z500_1deg += z500[:,(2+jyd):(ny-2+jyd):4,(2+ixd):(nx-2+ixd):4]*w 234 | 235 | 236 | # Accumulate to 7-day averages 237 | 238 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz") 239 | mod_dates_ord = f1['dates_ord'] 240 | f1.close() 241 | 242 | ndts, nyrs, nlts = mod_dates_ord.shape 243 | 244 | wgt6h = np.r_[0.5,np.ones(27,dtype=np.float32),0.5] 245 | 246 | z500_acc1wk = np.zeros((ndts,nyrs,nyu,nxu), dtype=np.float32) 247 | 248 | for idt in range(ndts): 249 | for iyr in range(nyrs): 250 | date_init_ord = mod_dates_ord[idt,iyr,0]-1. # Initialization date of ECMWF reforecast 251 | era5_ind = np.where(dates_ord==date_init_ord)[0] 252 | if len(era5_ind)<1: 253 | print('Waring! No match found for idt='+str(idt)+', iyr='+str(iyr)+'.\n') 254 | continue 255 | idtl = era5_ind[0] 256 | idtu = era5_ind[0] + 29 257 | if idtu>ntimes: 258 | print('Waring! Aggregation period outside the data range for idt='+str(idt)+', iyr='+str(iyr)+'.\n') 259 | continue 260 | z500_acc1wk[idt,iyr,:,:] = np.average(z500_1deg[idtl:idtu,:,:],axis=0,weights=wgt6h) 261 | 262 | 263 | 264 | # Load ERA-5 reanalyses for tcw 265 | 266 | data_path = '/Projects/ClimateAnalysis/OBS/ERA5/' 267 | 268 | infile = data_path+'TCW.1981.nc' 269 | nc = Dataset(infile) 270 | lon = nc.variables['lon'][:] 271 | lat = nc.variables['lat'][:] 272 | nc.close() 273 | 274 | lon = np.where(lon>180,lon-360,lon) 275 | 276 | idx_lon = np.logical_and(np.greater_equal(lon,-144.5),np.less_equal(lon,-106.5)) 277 | idx_lat = np.logical_and(np.greater_equal(lat,22.5),np.less_equal(lat,52.5)) 278 | 279 | nx = sum(idx_lon) 280 | ny = sum(idx_lat) 281 | ntimes = 4*(737059-723181+1) 282 | 283 | dates_ord = np.zeros(ntimes,dtype=np.float32) 284 | tcw = np.zeros((ntimes,ny,nx),dtype=np.float32) 285 | 286 | idtb = 0 287 | 288 | for iyr in range(38): 289 | infile = data_path+'TCW.'+str(1981+iyr)+'.nc' 290 | print(infile) 291 | nc = Dataset(infile) 292 | ntyr = len(nc.dimensions['time']) 293 | idte = idtb + ntyr 294 | dates_ord[(idtb//6):(idte//6)] = 657072 + nc.variables['time'][::6]/24. 295 | tcw[(idtb//6):(idte//6),:,:] = nc.variables['TCW'][::6,idx_lat,idx_lon]/9.806 296 | nc.close() 297 | idtb = idte 298 | 299 | 300 | # Upscale to 1-deg grid 301 | 302 | nxu = (nx-1)//4 303 | nyu = (ny-1)//4 304 | 305 | tcw_1deg = np.zeros((ntimes,nyu,nxu), dtype=np.float32) 306 | 307 | for ixd in range(-2,3): 308 | wx = 0.125*min(3-abs(ixd),2) 309 | for jyd in range(-2,3): 310 | wy = 0.125*min(3-abs(jyd),2) 311 | w = wx*wy 312 | tcw_1deg += tcw[:,(2+jyd):(ny-2+jyd):4,(2+ixd):(nx-2+ixd):4]*w 313 | 314 | lon_1deg = lon[idx_lon][2:nx-2:4] 315 | lat_1deg = lat[idx_lat][2:ny-2:4] 316 | 317 | 318 | # Accumulate to 7-day averages 319 | 320 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz") 321 | mod_dates_ord = f1['dates_ord'] 322 | f1.close() 323 | 324 | ndts, nyrs, nlts = mod_dates_ord.shape 325 | 326 | era5_dates_ord = mod_dates_ord[:,:,0]-1. 327 | wgt6h = np.r_[0.5,np.ones(27,dtype=np.float32),0.5] 328 | 329 | tcw_acc1wk = np.zeros((ndts,nyrs,nyu,nxu), dtype=np.float32) 330 | 331 | for idt in range(ndts): 332 | for iyr in range(nyrs): 333 | date_init_ord = era5_dates_ord[idt,iyr] # Initialization date of ECMWF reforecast 334 | era5_ind = np.where(dates_ord==date_init_ord)[0] 335 | if len(era5_ind)<1: 336 | print('Waring! No match found for idt='+str(idt)+', iyr='+str(iyr)+'.\n') 337 | continue 338 | idtl = era5_ind[0] 339 | idtu = era5_ind[0] + 29 340 | if idtu>ntimes: 341 | print('Waring! Aggregation period outside the data range for idt='+str(idt)+', iyr='+str(iyr)+'.\n') 342 | continue 343 | tcw_acc1wk[idt,iyr,:,:] = np.average(tcw_1deg[idtl:idtu,:,:],axis=0,weights=wgt6h) 344 | 345 | 346 | ### Save out to file 347 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/data/z500_tcw_predictors_era5" 348 | np.savez(outfilename, dates_ord=era5_dates_ord, 349 | longitude=lon_1deg, 350 | latitude=lat_1deg.data, 351 | z500_1wk=z500_acc1wk, 352 | tcw_1wk=tcw_acc1wk) 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | -------------------------------------------------------------------------------- /CNN-CalculateVerificationMetrics.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import numpy.ma as ma 4 | import scipy.stats as stats 5 | import math 6 | import os, sys 7 | import matplotlib.pyplot as plt 8 | import datetime 9 | import time 10 | import pdb 11 | import pickle 12 | 13 | from scipy.stats import gamma 14 | from scipy.interpolate import interp1d 15 | 16 | plt.ion() 17 | 18 | 19 | 20 | leadDay = 20 # d works out to being a d+0.5 day forecast 21 | accumulation = 7 # Precipitation accumulation period 22 | 23 | clead = 'week'+str((leadDay+8)//7) 24 | 25 | 26 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 27 | #list(f1) 28 | obs_lat = f1['obs_lat'] 29 | obs_lon = f1['obs_lon'] 30 | obs_dates_ord = f1['obs_dates_ord'] 31 | thr_doy = f1['thr_doy'] 32 | qtev_doy = f1['qtev_doy'] 33 | apcp_obs_cat = f1['apcp_obs_cat'] 34 | obs_precip_week = f1['apcp_obs'] 35 | f1.close() 36 | 37 | ndts, nxy, ncat = apcp_obs_cat.shape 38 | 39 | 40 | #f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz") 41 | #mod_dates = f2['dates_ord'] 42 | #f2.close() 43 | 44 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz") 45 | mod_dates = f2['dates_ord'][:,:,leadDay] 46 | f2.close() 47 | 48 | ndts, nyrs = mod_dates.shape 49 | 50 | 51 | doy_dts = np.zeros(ndts,dtype=np.int32) 52 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32) 53 | for idt in range(ndts): 54 | for iyr in range(nyrs): 55 | fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0] 56 | obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:] 57 | date_ord = int(mod_dates[idt,-1]-0.5) 58 | doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 59 | 60 | 61 | ### Calculate skill scores 62 | 63 | exc33p = np.zeros(obs_precip_vdate.shape) 64 | brier33pClm = np.zeros(obs_precip_vdate.shape) 65 | pot33pCNN = np.zeros(obs_precip_vdate.shape) 66 | brier33pCNN = np.zeros(obs_precip_vdate.shape) 67 | 68 | exc67p = np.zeros(obs_precip_vdate.shape) 69 | brier67pClm = np.zeros(obs_precip_vdate.shape) 70 | pot67pCNN = np.zeros(obs_precip_vdate.shape) 71 | brier67pCNN = np.zeros(obs_precip_vdate.shape) 72 | 73 | exc85p = np.zeros(obs_precip_vdate.shape) 74 | brier85pClm = np.zeros(obs_precip_vdate.shape) 75 | pot85pCNN = np.zeros(obs_precip_vdate.shape) 76 | brier85pCNN = np.zeros(obs_precip_vdate.shape) 77 | 78 | rpsClm = np.zeros(obs_precip_vdate.shape) 79 | rpsCNN = np.zeros(obs_precip_vdate.shape) 80 | 81 | crpsClm = np.zeros(obs_precip_vdate.shape) 82 | crpsCNN = np.zeros(obs_precip_vdate.shape) 83 | 84 | 85 | wwCl = 15 86 | 87 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical approximation of the CRPS 88 | dx = np.diff(x) 89 | 90 | 91 | imod = 0 92 | 93 | for iyr in range(nyrs): 94 | print(iyr) 95 | #f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_yr"+str(iyr)+".npz") 96 | #prob_fcst_cat = f5['prob_fcst_cat_era5'] 97 | #f5.close() 98 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_"+clead+"_yr"+str(iyr)+".npz") 99 | prob_fcst_cat = f5['prob_fcst_cat'] 100 | f5.close() 101 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)]) 102 | prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32) 103 | for idt in range(ndts): 104 | windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl] 105 | ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function 106 | for ixy in range(nxy): 107 | itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate') 108 | prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:])) 109 | pot33pCNN[idt,iyr,ixy] = prob_over_thr[0] 110 | pot67pCNN[idt,iyr,ixy] = prob_over_thr[1] 111 | pot85pCNN[idt,iyr,ixy] = prob_over_thr[2] 112 | ## Calculate CRPS for CNN 113 | bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2 114 | crpsCNN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 115 | ### Get current year and julian day to use to select climatological percentiles 116 | currentYear = datetime.date.fromordinal(int(mod_dates[idt,iyr])).year 117 | currentDay = (datetime.date.fromordinal(int(mod_dates[idt,iyr]))-datetime.date(currentYear,1,1)).days 118 | obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy)) 119 | crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x) 120 | ## Calculate CRPS for Clm 121 | clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0) 122 | bs = (clm_cdf-crps_exc)**2 123 | crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1) 124 | ## Calculate Brier scores for different thresholds 125 | p33 = qtev_doy[doy_dts[idt],:,0] 126 | exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33) 127 | brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2 128 | brier33pCNN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCNN[idt,iyr,:])**2 129 | p67 = qtev_doy[doy_dts[idt],:,1] 130 | exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67) 131 | brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2 132 | brier67pCNN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCNN[idt,iyr,:])**2 133 | p85 = qtev_doy[doy_dts[idt],:,2] 134 | exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85) 135 | brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2 136 | brier85pCNN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCNN[idt,iyr,:])**2 137 | 138 | 139 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-cnn_"+clead 140 | np.savez(outfilename, crpsClm=crpsClm, crpsCNN=crpsCNN, \ 141 | exc33p=exc33p, pot33pCNN=pot33pCNN, Bs33pClm=brier33pClm, Bs33pCNN=brier33pCNN, \ 142 | exc67p=exc67p, pot67pCNN=pot67pCNN, Bs67pClm=brier67pClm, Bs67pCNN=brier67pCNN, \ 143 | exc85p=exc85p, pot85pCNN=pot85pCNN, Bs85pClm=brier85pClm, Bs85pCNN=brier85pCNN) 144 | 145 | 146 | 147 | # calculate ranked probability score 148 | rpsClm = brier33pClm + brier67pClm + brier85pClm 149 | rpsCNN = brier33pCNN + brier67pCNN + brier85pCNN 150 | 151 | 152 | # rpssAvgCNN 153 | round(1.-np.sum(rpsCNN)/np.sum(rpsClm),4) 154 | 155 | 156 | -------------------------------------------------------------------------------- /CNN-FindTuningParameters.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import keras 10 | import keras.backend as K 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | from numpy.linalg import solve 15 | from scipy import stats 16 | from scipy.interpolate import interp1d 17 | 18 | from keras import models 19 | from keras import layers 20 | from keras import regularizers 21 | 22 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout 23 | from keras.models import Model 24 | from keras.optimizers import Adam 25 | 26 | #plt.ion() 27 | 28 | 29 | ## Load categorical analysis data 30 | 31 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 32 | lat = f1['obs_lat'] 33 | lon = f1['obs_lon'] 34 | obs_dates_ord = f1['obs_dates_ord'] 35 | pop_doy = f1['pop_doy'] 36 | thr_doy = f1['thr_doy'] 37 | qtev_doy = f1['qtev_doy'] 38 | apcp_obs_cat = f1['apcp_obs_cat'] 39 | apcp_obs = f1['apcp_obs'] 40 | f1.close() 41 | 42 | ndts, nxy, ncat = apcp_obs_cat.shape 43 | 44 | 45 | 46 | ## Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields 47 | 48 | ixl = 10 49 | ixu = -6 50 | jyl = 6 51 | jyu = -6 52 | 53 | f2 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz") 54 | era5_dates_ord = f2['dates_ord'] 55 | era5_lon = f2['longitude'][ixl:ixu] 56 | era5_lat = f2['latitude'][jyl:jyu] 57 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu] 58 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu] 59 | f2.close() 60 | 61 | ndts, nyrs, ny, nx = z500.shape 62 | 63 | 64 | 65 | ## Calculate doy for each analysis date 66 | 67 | doy_dts = np.zeros(ndts,dtype=np.int32) 68 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32) 69 | for idt in range(ndts): 70 | for iyr in range(nyrs): 71 | apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0] 72 | date_ord = int(era5_dates_ord[idt,0]-0.5) 73 | doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 74 | 75 | 76 | 77 | ## Normalize tcw to 10th/90th climatological percentiles at each grid point 78 | 79 | tcw_q10 = np.percentile(tcw,10,axis=1) 80 | tcw_q90 = np.percentile(tcw,90,axis=1) 81 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32) 82 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32) 83 | 84 | X = np.ones((ndts,3), dtype=np.float32) # Fit harmonic function to annual cycle of tcw climatology 85 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25) 86 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25) 87 | 88 | for ix in range(nx): 89 | for jy in range(ny): 90 | coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix])) 91 | tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10) 92 | coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix])) 93 | tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90) 94 | 95 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:] 96 | 97 | 98 | 99 | ## Normalize z500 to 1st/99th climatological percentiles across all grid points 100 | 101 | z500_q01 = np.percentile(z500,1,axis=(1,2,3)) 102 | z500_q99 = np.percentile(z500,99,axis=(1,2,3)) 103 | 104 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01)) 105 | z500_q01_sm = np.matmul(X,coef_q01) 106 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99)) 107 | z500_q99_sm = np.matmul(X,coef_q99) 108 | 109 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None] 110 | 111 | 112 | 113 | # Define basis functions 114 | 115 | r_basis = 7. 116 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]] 117 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]] 118 | 119 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr)) 120 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr)) 121 | dst = np.sqrt(dst_lon**2+dst_lat**2) 122 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3) 123 | basis = basis/np.sum(basis,axis=1)[:,None] 124 | nbs = basis.shape[1] 125 | 126 | 127 | 128 | ## Define functions for building a CNN 129 | 130 | #def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, par_reg): 131 | # inp_imgs = Input(shape=(18,22,2,)) 132 | # inp_basis = Input(shape=(n_xy,n_basis,)) 133 | # inp_cl = Input(shape=(n_xy,n_bins,)) 134 | # c = Conv2D(4, (3,3), activation='elu')(inp_imgs) 135 | # c = MaxPooling2D((2,2))(c) 136 | # c = Conv2D(8, (3,3), activation='elu')(c) 137 | # c = MaxPooling2D((2,2))(c) 138 | # x = Flatten()(c) 139 | # #x = Concatenate()([c,inp_aux]) 140 | # for h in hidden_nodes: 141 | # x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 142 | # x = Dense(n_bins*n_basis, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 143 | # x = Reshape((n_bins,n_basis))(x) 144 | # z = Dot(axes=2)([inp_basis, x]) 145 | # z = Add()([z, inp_cl]) 146 | # out = Activation('softmax')(z) 147 | # return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out) 148 | 149 | 150 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate): 151 | inp_imgs = Input(shape=(18,22,2,)) 152 | inp_basis = Input(shape=(n_xy,n_basis,)) 153 | inp_cl = Input(shape=(n_xy,n_bins,)) 154 | c = Conv2D(4, (3,3), activation='elu')(inp_imgs) 155 | c = MaxPooling2D((2,2))(c) 156 | c = Conv2D(8, (3,3), activation='elu')(c) 157 | c = MaxPooling2D((2,2))(c) 158 | x = Flatten()(c) 159 | for h in hidden_nodes: 160 | x = Dropout(dropout_rate)(x) 161 | x = Dense(h, activation='elu')(x) 162 | x = Dense(n_bins*n_basis, activation='elu')(x) 163 | x = Reshape((n_bins,n_basis))(x) 164 | z = Dot(axes=2)([inp_basis, x]) 165 | z = Add()([z, inp_cl]) 166 | out = Activation('softmax')(z) 167 | return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out) 168 | 169 | 170 | def modified_categorical_crossentropy(y_mat, prob_fcst): 171 | prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2) 172 | return -K.mean(K.log(prob_obs_cat)) 173 | 174 | 175 | nyrs = 20 176 | 177 | #reg = 10.**np.arange(-6,-2) 178 | reg = np.arange(0.1,0.6,0.1) 179 | nreg = len(reg) 180 | 181 | imod = 0 182 | 183 | mod = [[10],[20],[10,10]] 184 | 185 | 186 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical calculation of the CRPS 187 | dx = np.diff(x) 188 | 189 | opt_reg_param = np.zeros(nyrs, dtype=np.float32) 190 | opt_valid_scores = np.zeros((nyrs,5), dtype=np.float32) 191 | opt_valid_crps = np.zeros((nyrs,5), dtype=np.float32) 192 | 193 | 194 | for iyr in range(nyrs): 195 | print('year: ',iyr) 196 | # Calculate image predictors and basis functions 197 | apcp_obs_ind_cv = np.delete(apcp_obs_ind,iyr,axis=1) 198 | z500_pred_cv = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 199 | tcw_pred_cv = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 200 | pred_imgs_cv = np.concatenate((z500_pred_cv,tcw_pred_cv),axis=3) 201 | basis_cv = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0) 202 | # Calculate climatological log probabilities for each class 203 | apcp_pop_cl = np.repeat(pop_doy[doy_dts,np.newaxis,:],nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1)) 204 | logp_cl_cv = np.concatenate((np.log(1.-apcp_pop_cl),np.repeat(np.log(apcp_pop_cl),ncat-1,axis=2)-np.log(ncat-1)),axis=2) 205 | # perform 5-fold cross validation to find optimal regularization 206 | date_order = np.arange(ndts*(nyrs-1)).reshape(ndts,nyrs-1).T.flatten() 207 | cv_ind = date_order[np.arange(ndts*(nyrs-1))%232<231] # remove the date between the 5 cross-validated blocks 208 | valid_score = np.zeros((nreg,5), dtype=np.float32) 209 | valid_crps = np.zeros((nreg,5), dtype=np.float32) 210 | for cvi in range(5): 211 | train_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)!=cvi] 212 | valid_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)==cvi] 213 | pred_imgs_train = pred_imgs_cv[train_ind,:,:,:] 214 | basis_train = basis_cv[train_ind,:,:] 215 | logp_cl_train = logp_cl_cv[train_ind,:,:] 216 | cat_targets_train = apcp_obs_cat[apcp_obs_ind_cv.flatten()[train_ind],:,:].astype(float) 217 | pred_imgs_valid = pred_imgs_cv[valid_ind,:,:] 218 | basis_valid = basis_cv[valid_ind,:,:] 219 | logp_cl_valid = logp_cl_cv[valid_ind,:,:] 220 | cat_targets_valid = apcp_obs_cat[apcp_obs_ind_cv.flatten()[valid_ind],:,:].astype(float) 221 | doy_valid = np.repeat(doy_dts[:,np.newaxis],nyrs-1,axis=1).flatten()[valid_ind] 222 | for ireg in range(nreg): 223 | # Define and fit ANN model (using batch gradient descent) 224 | keras.backend.clear_session() 225 | model = model = build_cat_model(nxy, ncat, nbs, mod[imod], reg[ireg]) 226 | model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy) 227 | model.fit([pred_imgs_train,basis_train,logp_cl_train], cat_targets_train, epochs=150, batch_size=len(train_ind), verbose=0) 228 | valid_score[ireg,cvi] = model.evaluate([pred_imgs_valid,basis_valid,logp_cl_valid], cat_targets_valid, batch_size=len(valid_ind), verbose=0) 229 | # Calculate CRPS for each cross-validation fold 230 | prob_fcst_cat = model.predict([pred_imgs_valid,basis_valid,logp_cl_valid]) 231 | prob_fcst_chf = -np.log(np.maximum(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)],1.e-10)) 232 | crps_fold = np.zeros((len(valid_ind),nxy),dtype=np.float32) 233 | for ivdt in range(len(valid_ind)): 234 | for ixy in range(nxy): 235 | itp_fct = interp1d(thr_doy[doy_valid[ivdt],ixy,:], prob_fcst_chf[ivdt,ixy,:], kind='linear',fill_value='extrapolate') 236 | bs = (1.-np.exp(-itp_fct(x))-1.*(apcp_obs[apcp_obs_ind_cv.flatten()[valid_ind[ivdt]],ixy]<=x))**2 237 | crps_fold[ivdt,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 238 | valid_crps[ireg,cvi] = np.mean(crps_fold) 239 | opt_reg_ind = np.argmin(np.mean(valid_score,axis=1)) 240 | opt_reg_param[iyr] = reg[opt_reg_ind] 241 | opt_valid_scores[iyr,:] = valid_score[opt_reg_ind,:] 242 | opt_valid_crps[iyr,:] = valid_crps[opt_reg_ind,:] 243 | 244 | 245 | ### Save out to file 246 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/tuning/cnn-m"+str(imod)+"-drpt-f48" 247 | np.savez(outfilename, opt_reg_param=opt_reg_param, opt_valid_scores=opt_valid_scores, opt_valid_crps=opt_valid_crps) 248 | 249 | 250 | 251 | 252 | 253 | -------------------------------------------------------------------------------- /CNN-FitConvolutionalNetworkModel.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import keras 10 | import keras.backend as K 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | from numpy.linalg import solve 15 | from scipy import stats 16 | 17 | from keras import models 18 | from keras import layers 19 | from keras import regularizers 20 | 21 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout 22 | from keras.models import Model 23 | from keras.optimizers import Adam 24 | 25 | #plt.ion() 26 | 27 | 28 | ## Load categorical analysis data 29 | 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 31 | #list(f1) 32 | lat = f1['obs_lat'] 33 | lon = f1['obs_lon'] 34 | obs_dates_ord = f1['obs_dates_ord'] 35 | pop_doy = f1['pop_doy'] 36 | apcp_obs_cat = f1['apcp_obs_cat'] 37 | f1.close() 38 | 39 | ndts, nxy, ncat = apcp_obs_cat.shape 40 | 41 | 42 | 43 | ## Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields 44 | 45 | ixl = 10 46 | ixu = -6 47 | jyl = 6 48 | jyu = -6 49 | 50 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz") 51 | era5_dates_ord = f2['dates_ord'] 52 | era5_lon = f2['longitude'][ixl:ixu] 53 | era5_lat = f2['latitude'][jyl:jyu] 54 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu] 55 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu] 56 | f2.close() 57 | 58 | ndts, nyrs, ny, nx = z500.shape 59 | 60 | 61 | z500_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32) 62 | tcw_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32) 63 | 64 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_predictor_cnn.npz") 65 | mod_dates_ord = f3['mod_dates_ord'][:,:,6:21:7] 66 | 67 | f4 = np.load("/home/michael/Desktop/CalifAPCP/data/tcw_predictor_cnn.npz") 68 | 69 | for ilt in range(3): 70 | clead = 'week'+str(ilt+2) 71 | z500_fcst[ilt,:,:,:,:,:] = f3['z500_'+clead][:,:,:,jyl:jyu,ixl:ixu] # subset to 22 x 18 image 72 | tcw_fcst[ilt,:,:,:,:,:] = f4['tcw_'+clead][:,:,:,jyl:jyu,ixl:ixu] 73 | 74 | f3.close() 75 | f4.close() 76 | 77 | 78 | 79 | ## Calculate doy for each analysis date and for each forecast valid date 80 | 81 | doy_dts = np.zeros(ndts,dtype=np.int32) 82 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32) 83 | for idt in range(ndts): 84 | for iyr in range(nyrs): 85 | apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0] 86 | date_ord = int(era5_dates_ord[idt,0]-0.5) 87 | doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 88 | 89 | doy_fcst = np.zeros((3,ndts),dtype=np.int32) 90 | for idt in range(ndts): 91 | for ilt in range(3): 92 | date_ord = int(int(mod_dates_ord[idt,0,ilt])-0.5) 93 | doy_fcst[ilt,idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 94 | 95 | 96 | 97 | ## Normalize tcw to 10th/90th climatological percentiles at each grid point 98 | 99 | tcw_q10 = np.percentile(tcw,10,axis=1) 100 | tcw_q90 = np.percentile(tcw,90,axis=1) 101 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32) 102 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32) 103 | 104 | tcw_fcst_q10 = np.percentile(tcw_fcst,10,axis=(2,3)) 105 | tcw_fcst_q90 = np.percentile(tcw_fcst,90,axis=(2,3)) 106 | tcw_fcst_q10_sm = np.zeros(tcw_fcst_q10.shape, dtype=np.float32) 107 | tcw_fcst_q90_sm = np.zeros(tcw_fcst_q90.shape, dtype=np.float32) 108 | 109 | X = np.ones((ndts,3), dtype=np.float32) # Fit harmonic function to annual cycle of tcw climatology 110 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25) 111 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25) 112 | 113 | for ix in range(nx): 114 | for jy in range(ny): 115 | coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix])) 116 | tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10) 117 | coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix])) 118 | tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90) 119 | for ilt in range(3): 120 | coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q10[ilt,:,jy,ix])) 121 | tcw_fcst_q10_sm[ilt,:,jy,ix] = np.matmul(X,coef_q10) 122 | coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q90[ilt,:,jy,ix])) 123 | tcw_fcst_q90_sm[ilt,:,jy,ix] = np.matmul(X,coef_q90) 124 | 125 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:] 126 | tcw_fcst_ano = -1.+2.*(tcw_fcst-tcw_fcst_q10_sm[:,:,None,None,:,:])/(tcw_fcst_q90_sm-tcw_fcst_q10_sm)[:,:,None,None,:,:] 127 | 128 | 129 | 130 | ## Normalize z500 to 1st/99th climatological percentiles across all grid points 131 | 132 | z500_q01 = np.percentile(z500,1,axis=(1,2,3)) 133 | z500_q99 = np.percentile(z500,99,axis=(1,2,3)) 134 | z500_fcst_q01 = np.percentile(z500_fcst,1,axis=(2,3,4,5)) 135 | z500_fcst_q99 = np.percentile(z500_fcst,99,axis=(2,3,4,5)) 136 | 137 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01)) 138 | z500_q01_sm = np.matmul(X,coef_q01) 139 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99)) 140 | z500_q99_sm = np.matmul(X,coef_q99) 141 | 142 | z500_fcst_q01_sm = np.zeros(z500_fcst_q01.shape, dtype=np.float32) 143 | z500_fcst_q99_sm = np.zeros(z500_fcst_q99.shape, dtype=np.float32) 144 | 145 | for ilt in range(3): 146 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q01[ilt,:])) 147 | z500_fcst_q01_sm[ilt,:] = np.matmul(X,coef_q01) 148 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q99[ilt,:])) 149 | z500_fcst_q99_sm[ilt,:] = np.matmul(X,coef_q99) 150 | 151 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None] 152 | z500_fcst_ano = -1.+2.*(z500_fcst-z500_fcst_q01_sm[:,:,None,None,None,None])/(z500_fcst_q99_sm-z500_fcst_q01_sm)[:,:,None,None,None,None] 153 | 154 | 155 | # Define basis functions 156 | 157 | r_basis = 7. 158 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]] 159 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]] 160 | 161 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr)) 162 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr)) 163 | dst = np.sqrt(dst_lon**2+dst_lat**2) 164 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3) 165 | basis = basis/np.sum(basis,axis=1)[:,None] 166 | nbs = basis.shape[1] 167 | 168 | 169 | ## Define functions for building a CNN 170 | 171 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate): 172 | inp_imgs = Input(shape=(18,22,2,)) 173 | #inp_imgs = Input(shape=(18,22,1,)) 174 | inp_basis = Input(shape=(n_xy,n_basis,)) 175 | inp_cl = Input(shape=(n_xy,n_bins,)) 176 | c = Conv2D(4, (3,3), activation='elu')(inp_imgs) 177 | c = MaxPooling2D((2,2))(c) 178 | c = Conv2D(8, (3,3), activation='elu')(c) 179 | c = MaxPooling2D((2,2))(c) 180 | x = Flatten()(c) 181 | for h in hidden_nodes: 182 | x = Dropout(dropout_rate)(x) 183 | x = Dense(h, activation='elu')(x) 184 | x = Dense(n_bins*n_basis, activation='elu')(x) 185 | x = Reshape((n_bins,n_basis))(x) 186 | z = Dot(axes=2)([inp_basis, x]) # Tensor product with basis functions 187 | z = Add()([z, inp_cl]) # Add (log) probability anomalies to log climatological probabilities 188 | out = Activation('softmax')(z) 189 | return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out) 190 | 191 | 192 | def modified_categorical_crossentropy(y_mat, prob_fcst): 193 | prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2) 194 | return -K.mean(K.log(prob_obs_cat)) 195 | 196 | 197 | 198 | imod = 0 199 | 200 | mod = [[10],[20],[10,10]] 201 | 202 | f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m"+str(imod)+"-drpt-f48.npz") 203 | opt_reg_param = f5['opt_reg_param'] 204 | f5.close() 205 | 206 | 207 | for iyr in range(0,20): 208 | print(iyr) 209 | # Split data into training and verification data set 210 | apcp_obs_ind_train = np.delete(apcp_obs_ind,iyr,axis=1) 211 | apcp_obs_ind_verif = apcp_obs_ind[:,iyr] 212 | z500_pred_train = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 213 | z500_pred_verif = z500_ano[:,iyr,:,:,None] 214 | z500_pred_fcst_train = np.delete(z500_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1)) 215 | z500_pred_fcst_verif = z500_fcst_ano[:,:,iyr,:,:,:,None] 216 | tcw_pred_train = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 217 | tcw_pred_verif = tcw_ano[:,iyr,:,:,None] 218 | tcw_pred_fcst_train = np.delete(tcw_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1)) 219 | tcw_pred_fcst_verif = tcw_fcst_ano[:,:,iyr,:,:,:,None] 220 | # Calculate climatological log probabilities for each class 221 | apcp_lgp0_cl_train = np.repeat(np.log(1.-pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1)) 222 | apcp_lgp0_cl_verif = np.log(1.-pop_doy[doy_dts,:])[:,:,None] 223 | apcp_lgpop_cl_train = np.repeat(np.log(pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1)) 224 | apcp_lgpop_cl_verif = np.log(pop_doy[doy_dts,:])[:,:,None] 225 | apcp_lgp0_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32) 226 | apcp_lgp0_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32) 227 | apcp_lgpop_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32) 228 | apcp_lgpop_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32) 229 | for ilt in range(3): 230 | apcp_lgp0_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(1.-pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy)) 231 | apcp_lgp0_cl_fcst_verif[ilt,:,:,0] = np.log(1.-pop_doy[doy_fcst[ilt,:],:]) 232 | apcp_lgpop_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy)) 233 | apcp_lgpop_cl_fcst_verif[ilt,:,:,0] = np.log(pop_doy[doy_fcst[ilt,:],:]) 234 | # Compose training data (large-scale predictors, auxiliary predictors, climatological probabilities, observed categories) 235 | train_pred_imgs = np.concatenate((z500_pred_train,tcw_pred_train),axis=3) 236 | #train_pred_imgs = tcw_pred_train 237 | train_basis = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0) 238 | train_logp_cl = np.concatenate((apcp_lgp0_cl_train,np.repeat(apcp_lgpop_cl_train,ncat-1,axis=2)-np.log(ncat-1)),axis=2) 239 | train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].astype(float) 240 | # Define and fit CNN model 241 | keras.backend.clear_session() 242 | model = build_cat_model(nxy, ncat, nbs, mod[imod], opt_reg_param[iyr]) 243 | model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy) 244 | model.fit([train_pred_imgs,train_basis,train_logp_cl], train_cat_targets, epochs=150, batch_size=ndts*(nyrs-1), verbose=1) 245 | # Calculate ERA-5 probability forecasts 246 | verif_pred_imgs = np.concatenate((z500_pred_verif,tcw_pred_verif),axis=3) 247 | #verif_pred_imgs = tcw_pred_verif 248 | verif_basis = np.repeat(basis[np.newaxis,:,:],ndts,axis=0) 249 | verif_logp_cl = np.concatenate((apcp_lgp0_cl_verif,np.repeat(apcp_lgpop_cl_verif,ncat-1,axis=2)-np.log(ncat-1)),axis=2) 250 | prob_fcst_cat_era5 = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl]) 251 | # Calculate ensemble-based, mean probability forecasts 252 | logp_ano_ensmean_train = np.zeros((3,ndts*(nyrs-1),nxy,ncat), dtype=np.float32) 253 | logp_ano_ensmean_verif = np.zeros((3,ndts,nxy,ncat), dtype=np.float32) 254 | for ilt in range(3): 255 | train_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_train[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_train[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2) 256 | verif_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_verif[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_verif[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2) 257 | prob_fcst_cat_ens_train = np.zeros((11,ndts*(nyrs-1),nxy,ncat), dtype=np.float32) 258 | prob_fcst_cat_ens_verif = np.zeros((11,ndts,nxy,ncat), dtype=np.float32) 259 | for imem in range(11): 260 | train_pred_imgs = np.concatenate((z500_pred_fcst_train[ilt,:,imem,:,:,:],tcw_pred_fcst_train[ilt,:,imem,:,:,:]),axis=3) 261 | #train_pred_imgs = tcw_pred_fcst_train[ilt,:,imem,:,:,:] 262 | prob_fcst_cat_ens_train[imem,:,:,:] = model.predict([train_pred_imgs,train_basis,train_logp_cl]) 263 | verif_pred_imgs = np.concatenate((z500_pred_fcst_verif[ilt,:,imem,:,:,:],tcw_pred_fcst_verif[ilt,:,imem,:,:,:]),axis=3) 264 | #verif_pred_imgs = tcw_pred_fcst_verif[ilt,:,imem,:,:,:] 265 | prob_fcst_cat_ens_verif[imem,:,:,:] = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl]) 266 | logp_ano_ensmean_train[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_train),axis=0) - train_logp_cl # Reconstruct the log probability anomalies 267 | logp_ano_ensmean_verif[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_verif),axis=0) - verif_logp_cl # for each ensemble member and calculate mean 268 | ### Save out to file 269 | outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_yr"+str(iyr) 270 | np.savez(outfilename, prob_fcst_cat_era5=prob_fcst_cat_era5, \ 271 | logp_ano_ensmean_train=logp_ano_ensmean_train, \ 272 | logp_ano_ensmean_verif=logp_ano_ensmean_verif, \ 273 | apcp_lgp0_cl_fcst_train=apcp_lgp0_cl_fcst_train, \ 274 | apcp_lgp0_cl_fcst_verif=apcp_lgp0_cl_fcst_verif, \ 275 | apcp_lgpop_cl_fcst_train=apcp_lgpop_cl_fcst_train, \ 276 | apcp_lgpop_cl_fcst_verif=apcp_lgpop_cl_fcst_verif) 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | -------------------------------------------------------------------------------- /CNN-GenerateProbabilityForecasts.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | 10 | from netCDF4 import Dataset 11 | from numpy import ma 12 | from scipy import stats 13 | 14 | from scipy.optimize import minimize_scalar 15 | 16 | 17 | #plt.ion() 18 | 19 | def adjustment_factor_target(par, y_mat, logp_ensmeanano, logp_cl): 20 | # average modified categorical crossentropy for relaxed perfect prog probabilities 21 | prob_fcst_cat_cmb = np.exp(par*logp_ensmeanano+logp_cl) 22 | prob_fcst = prob_fcst_cat_cmb / np.sum(prob_fcst_cat_cmb,axis=2)[:,:,None] 23 | prob_obs_cat = np.sum(y_mat*prob_fcst,axis=2) 24 | return -np.mean(np.log(prob_obs_cat)) 25 | 26 | 27 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 28 | #list(f1) 29 | lat = f1['obs_lat'] 30 | lon = f1['obs_lon'] 31 | obs_dates_ord = f1['obs_dates_ord'] 32 | apcp_obs_cat = f1['apcp_obs_cat'] 33 | f1.close() 34 | 35 | ndts, nxy, ncat = apcp_obs_cat.shape 36 | 37 | 38 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_predictor_cnn.npz") 39 | mod_dates_ord = f2['mod_dates_ord'][:,:,6:21:7] 40 | f2.close() 41 | 42 | ndts, nyrs, nlt = mod_dates_ord.shape 43 | 44 | apcp_obs_ind = np.zeros((ndts,nyrs,nlt),dtype=np.int32) 45 | for idt in range(ndts): 46 | for iyr in range(nyrs): 47 | for ilt in range(3): 48 | apcp_obs_ind[idt,iyr,ilt] = np.where(obs_dates_ord==mod_dates_ord[idt,iyr,ilt])[0][0] 49 | 50 | 51 | imod = 0 52 | 53 | for iyr in range(0,20): 54 | print(iyr) 55 | # Load smoothed ensemble forecast anomalies 56 | f3 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_yr"+str(iyr)+".npz") 57 | logp_ano_ensmean_train = f3['logp_ano_ensmean_train'] 58 | logp_ano_ensmean_verif = f3['logp_ano_ensmean_verif'] 59 | apcp_lgp0_cl_fcst_train = f3['apcp_lgp0_cl_fcst_train'] 60 | apcp_lgp0_cl_fcst_verif = f3['apcp_lgp0_cl_fcst_verif'] 61 | apcp_lgpop_cl_fcst_train = f3['apcp_lgpop_cl_fcst_train'] 62 | apcp_lgpop_cl_fcst_verif = f3['apcp_lgpop_cl_fcst_verif'] 63 | f3.close() 64 | for ilt in range(3): 65 | # Calculate index for training observations 66 | apcp_obs_ind_train = np.delete(apcp_obs_ind[:,:,ilt],iyr,axis=1) 67 | train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].astype(float) 68 | train_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_train[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_train[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2) 69 | verif_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_verif[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_verif[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2) 70 | train_logp_ensmeanano = logp_ano_ensmean_train[ilt,:,:,:] 71 | verif_logp_ensmeanano = logp_ano_ensmean_verif[ilt,:,:,:] 72 | a = minimize_scalar(adjustment_factor_target, args=(train_cat_targets,train_logp_ensmeanano,train_logp_cl), method='bounded', bounds=(0.,1.)).x 73 | print(a) 74 | prob_fcst_cat_cmb = np.exp(a*verif_logp_ensmeanano+verif_logp_cl) 75 | prob_fcst_cat = prob_fcst_cat_cmb / np.sum(prob_fcst_cat_cmb,axis=2)[:,:,None] 76 | ### Save out to file 77 | outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_week"+str(2+ilt)+"_yr"+str(iyr) 78 | np.savez(outfilename, prob_fcst_cat=prob_fcst_cat) 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /CSGD-FitClimatologicalDistributions.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | import matplotlib.pyplot as plt 7 | import matplotlib.path as path 8 | import datetime 9 | import time 10 | 11 | from netCDF4 import Dataset 12 | from numpy import ma 13 | from numpy import loadtxt 14 | from scipy import stats 15 | from scipy.stats import gamma 16 | from scipy.special import beta 17 | from scipy.optimize import minimize_scalar 18 | from scipy.optimize import minimize 19 | 20 | plt.ion() 21 | 22 | 23 | def crpsClimoCSGD(shape,obs,mean,pop): 24 | # average CRPS for climatological CSGD as a function of shape (pop and mean fixed) 25 | crps = np.zeros(len(obs),dtype='float64') 26 | Fck = 1.-pop 27 | cstd = gamma.ppf(Fck,shape) 28 | fkp1q0 = gamma.pdf(cstd,shape+1.,scale=1.) 29 | scale = (mean-0.254*pop) / (shape*(pop+fkp1q0)-pop*cstd) # assumes that precipitation amounts < 0.254 mm are considered zero 30 | shift = 0.254-cstd*scale 31 | penalty = max(0.005-shape*scale-shift,0.0) # penalize shifts that would move most of the PDF below zero 32 | betaf = beta(0.5,shape+0.5) 33 | FckP1 = gamma.cdf(cstd,shape+1,scale=1) 34 | F2c2k = gamma.cdf(2*cstd,2*shape,scale=1) 35 | indz = np.less(obs,0.254) 36 | indp = np.greater_equal(obs,0.254) 37 | ystd = (obs[indp]-shift)/scale 38 | Fyk = gamma.cdf(ystd,shape,scale=1) 39 | FykP1 = gamma.cdf(ystd,shape+1,scale=1) 40 | crps[indz] = cstd*(2.*Fck-1.) - cstd*np.square(Fck) \ 41 | + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FckP1) \ 42 | - (shape/float(math.pi))*betaf*(1.-F2c2k) 43 | crps[indp] = ystd*(2.*Fyk-1.) - cstd*np.square(Fck) \ 44 | + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FykP1) \ 45 | - (shape/float(math.pi))*betaf*(1.-F2c2k) 46 | return scale*ma.mean(crps) + penalty 47 | 48 | 49 | 50 | #============================================================================== 51 | # Load the PRISM gridded precipitation data and fit monthly CSGD distribution 52 | #============================================================================== 53 | 54 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz") 55 | #list(f1) 56 | obs_precip = f1['precip'] 57 | obs_lat = f1['lat'] 58 | obs_lon = f1['lon'] 59 | obs_dates_ord = f1['dates_ord'] 60 | obs_dates = f1['dates'] 61 | f1.close() 62 | 63 | ndts, nxy = obs_precip.shape 64 | 65 | obs_precip_week = np.zeros((ndts-6,nxy), dtype=np.float32) 66 | for iday in range(7): 67 | obs_precip_week += obs_precip[iday:(ndts-6+iday),:] 68 | 69 | ndts, nxy = obs_precip_week.shape 70 | 71 | obs_precip_week[obs_precip_week<0.254] = 0. 72 | obs_dates_ord = obs_dates_ord[:ndts] 73 | obs_dates = obs_dates[:ndts] 74 | 75 | 76 | pop_month = np.zeros((12,nxy), dtype=np.float32) 77 | mean_month = np.zeros((12,nxy), dtype=np.float32) 78 | shape_month = np.zeros((12,nxy), dtype=np.float32) 79 | 80 | mid_mon = [14,45,73,104,134,165,195,226,257,287,318,348] 81 | 82 | for imonth in range(0,12): 83 | date2 = datetime.datetime(2001,1,1)+datetime.timedelta(mid_mon[imonth]) 84 | fnd_month = np.nonzero(obs_dates[:,1]==date2.month)[0] 85 | fnd_day = np.nonzero(obs_dates[fnd_month,2]==date2.day)[0] 86 | day_array = [] 87 | for windowval in range(-30,31): 88 | day_array.extend(fnd_month[fnd_day]+windowval) 89 | day_array = np.sort(np.array(day_array)) 90 | day_array = day_array[day_array>=0] 91 | day_array = day_array[day_array=0)[0][0] # are considered to estimate climatological PoP 83 | imax = np.where(wnd_ind0.254) 88 | mean[ixy] = np.mean(y) 89 | thr[ixy,0] = 0.254 90 | qtlv = 1. + pop[ixy]*((np.arange(1,ncat-1)/float(ncat-1))-1.) 91 | thr[ixy,1:] = np.quantile(y,qtlv) 92 | pctl[ixy,:] = np.percentile(y,np.arange(1,100)) 93 | 94 | 95 | itnf = np.logical_and(obs_lon==-120.625,obs_lat==39.375) # coordinates of our example grid point in Tahoe National Forest 96 | ilat = (obs_lat==39.375) # latitude of our example transect 97 | 98 | 99 | plt.figure(figsize=(10,4)) 100 | 101 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 102 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 103 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 104 | plt.scatter(obs_lon,obs_lat,c=mean,marker='s',cmap=clmcmp,s=28,lw=.1,vmin=0,vmax=105,edgecolors=[.2,.2,.2]) 105 | cbar = plt.colorbar() 106 | cbar.ax.set_yticklabels(['0 mm','20 mm','40 mm','60 mm','80 mm','100 mm']) 107 | plt.plot([-124.5,-119.],[39.375,39.375],c='black',linewidth=2) 108 | plt.scatter(obs_lon[itnf],obs_lat[itnf],c='red',marker='*',zorder=3) 109 | plt.title(' Average 7-day precipitation amounts in January\n',fontsize=12) 110 | 111 | plt.subplot(1, 2, 2, xlim=(-123.8,-120), \ 112 | xticks=[-123.5,-122.5,-121.5,-120.5], xticklabels=['-123.5'+'\u00b0','-122.5'+'\u00b0','-121.5'+'\u00b0','-120.5'+'\u00b0'], \ 113 | yticks=[0,100,200,300,400], yticklabels=['0 mm','100 mm','200 mm','300 mm','400 mm']) 114 | plt.scatter(np.repeat(obs_lon[ilat,np.newaxis],99,axis=1),pctl[ilat,:],c='DodgerBlue',s=30) 115 | plt.plot(np.repeat(obs_lon[ilat,np.newaxis],ncat-1,axis=1),thr[ilat,:],c='black',linewidth=1.5) 116 | plt.title('Category boundaries along meridional transect\n',fontsize=12) 117 | 118 | plt.tight_layout() 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | ################################################################################################### 127 | # # 128 | # Figure 2: ANN schematic and probability forecasts for case study at Tahoe National Forest # 129 | # # 130 | ################################################################################################### 131 | 132 | 133 | iyyyy = 2017 134 | imm = 1 135 | idd = 8 136 | 137 | itnf = np.logical_and(obs_lon==-120.625,obs_lat==39.375) # coordinates of our example grid point in Tahoe National Forest 138 | 139 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 140 | #list(f1) 141 | obs_lat = f1['obs_lat'] 142 | obs_lon = f1['obs_lon'] 143 | obs_dates_ord = f1['obs_dates_ord'] 144 | pop_doy = f1['pop_doy'] 145 | thr_doy = f1['thr_doy'] 146 | qtev_doy = f1['qtev_doy'] 147 | obs_precip_week = f1['apcp_obs'] 148 | f1.close() 149 | 150 | ntms, nxy = obs_precip_week.shape 151 | 152 | for ivdate in range(ntms): 153 | if datetime.date.fromordinal(int(obs_dates_ord[ivdate])) == datetime.date(iyyyy,imm,idd): 154 | break 155 | 156 | 157 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz") 158 | mod_dates = f2['dates_ord'] 159 | f2.close() 160 | 161 | ndts, nyrs, nlts = mod_dates.shape 162 | 163 | iidate = np.zeros((3,2),dtype=np.int16) # date and year index for selected date 164 | 165 | for idt in range(ndts): 166 | for iyr in range(nyrs): 167 | for ilt in range(3): 168 | if datetime.date.fromordinal(int(mod_dates[idt,iyr,6+ilt*7])) == datetime.date(iyyyy,imm,idd): 169 | iidate[ilt,0] = idt 170 | iidate[ilt,1] = iyr 171 | 172 | 173 | f3 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_week2_ANN_yr"+str(iidate[0,1])+".npz") 174 | doy_vdate = f3['doy_dts'][iidate[0,0]] 175 | apcp_ens_pit = f3['apcp_ens_pit_verif'][iidate[0,0],:,:] 176 | f3.close() 177 | 178 | 179 | prob_cat_tnf = np.zeros((4,20),dtype=np.float32) # Probability forcast for each category at TNF grid point 180 | 181 | for ilt in range(3): 182 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_week"+str(ilt+2)+"_yr"+str(iidate[ilt,1])+".npz") 183 | prob_cat_tnf[ilt,:] = f5['prob_fcst_cat'][iidate[ilt,0],itnf,:] 184 | f5.close() 185 | 186 | prob_cat_tnf[3,:] = np.append(1.-pop_doy[doy_vdate,itnf],np.repeat(pop_doy[doy_vdate,itnf]/(ncat-1),ncat-1)) # Clim. probabilities 187 | 188 | 189 | ## Set positions for ANN schematic 190 | 191 | npr = 3 192 | nhd = 5 193 | ncl = 4 194 | 195 | size = 450. 196 | 197 | pcl_x = np.full(ncl,2.5,dtype=np.float32) 198 | pcl_y = np.arange(5.5,5.5+ncl) 199 | pcl_c = np.full(ncl,0.2,dtype=np.float32) 200 | 201 | pred_x = np.full(npr,1,dtype=np.float32) 202 | pred_y = np.arange(1,1+npr) 203 | pred_c = np.full(npr,0.4,dtype=np.float32) 204 | 205 | hid1_x = np.full(nhd,2,dtype=np.float32) 206 | hid1_y = np.arange(0,nhd) 207 | hid1_c = np.full(nhd,0.6,dtype=np.float32) 208 | 209 | hid2_x = np.full(ncl,3,dtype=np.float32) 210 | hid2_y = np.arange(0.5,ncl+0.5) 211 | hid2_c = np.full(ncl,0.6,dtype=np.float32) 212 | 213 | out_x = np.full(ncl,4.5,dtype=np.float32) 214 | out_y = np.arange(3,3+ncl) 215 | out_c = np.full(ncl,0.8,dtype=np.float32) 216 | 217 | x = np.concatenate([pcl_x,pred_x,hid1_x,hid2_x,out_x-.5]) 218 | y = np.concatenate([pcl_y,pred_y,hid1_y,hid2_y,out_y]) 219 | colors = np.concatenate([pcl_c,pred_c,hid1_c,hid2_c+.1,out_c]) 220 | 221 | 222 | ## Now: actual plot 223 | 224 | width = 0.2 225 | 226 | plt.figure(figsize=(12,4)) 227 | 228 | plt.subplot(1, 2, 1, xlim=[0.8,4.55]) 229 | plt.scatter(x, y, c=colors, s=size, alpha=0.5) 230 | plt.axis('off') 231 | 232 | for i in range(ncl): 233 | plt.arrow(pcl_x[i]+.15,pcl_y[i],out_x[i]-0.8-pcl_x[i],out_y[i]-pcl_y[i]+.1, head_width=.05, length_includes_head=True, color='k') 234 | plt.arrow(hid2_x[i]+.15,hid2_y[i],out_x[i]-0.8-hid2_x[i],out_y[i]-hid2_y[i]-.1, head_width=.05, length_includes_head=True, color='k') 235 | 236 | for i in range(npr): 237 | for j in range(nhd): 238 | plt.arrow(pred_x[i]+.15,pred_y[i],hid1_x[j]-0.3-pred_x[i],.95*(hid1_y[j]-pred_y[i]), head_width=.05, length_includes_head=True, color='k') 239 | 240 | for i in range(nhd): 241 | for j in range(ncl): 242 | plt.arrow(hid1_x[i]+.15,hid1_y[i],hid2_x[j]-0.3-hid1_x[i],.95*(hid2_y[j]-hid1_y[i]), head_width=.05, length_includes_head=True, color='k') 243 | 244 | for i in range(nhd): 245 | plt.text(hid1_x[i],hid1_y[i],'ELU',horizontalalignment='center',verticalalignment='center',fontsize=9) 246 | 247 | for i in range(ncl): 248 | plt.text(hid2_x[i],hid2_y[i],'ELU',horizontalalignment='center',verticalalignment='center',fontsize=9) 249 | 250 | for i in range(ncl): 251 | plt.text(out_x[i]-.5,out_y[i],'S',horizontalalignment='center',verticalalignment='center',fontsize=9) 252 | 253 | plt.text(1.35,7.5,'climatological\n log probabilities') 254 | plt.text(.6,-.4,'input layer\n (predictors)') 255 | plt.text(1.4,3.65,r'$W_1$') 256 | plt.text(1.8,-.9,'hidden layer') 257 | plt.text(2.5,3.9,r'$W_2$') 258 | plt.text(2.8,-.8,'preliminary\n output layer') 259 | plt.text(3.85,1.8,'output\n layer') 260 | plt.text(0.7,8.6,'a)',fontsize=18) 261 | 262 | ax = plt.subplot(1, 2, 2, xticks=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19], xticklabels=['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20']) 263 | ax.spines['top'].set_visible(False) 264 | ax.spines['right'].set_visible(False) 265 | ax.spines['bottom'].set_visible(False) 266 | ax.spines['left'].set_visible(False) 267 | ax.bar(np.arange(ncat)-3*width/2, prob_cat_tnf[0,:], width, label='week-2 probability forecast', color='orange') 268 | ax.bar(np.arange(ncat)-width/2, prob_cat_tnf[1,:], width, label='week-3 probability forecast', color='seagreen') 269 | ax.bar(np.arange(ncat)+width/2, prob_cat_tnf[2,:], width, label='week-4 probability forecast', color='r') 270 | ax.bar(np.arange(ncat)+3*width/2, prob_cat_tnf[3,:], width, label='climatological probability', color='b') 271 | ax.legend(loc=9,fontsize=11) 272 | plt.text(-0.6,0.21,'b)',fontsize=18) 273 | 274 | plt.tight_layout() 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | ################################################################################################### 283 | # # 284 | # Figure 3: Illustrate conversion of probability forecasts at TNF to predictive CDF # 285 | # # 286 | ################################################################################################### 287 | 288 | cdf_tnf = np.cumsum(prob_cat_tnf,axis=1)[:,:(ncat-1)] 289 | chf_tnf = -np.log(1.-cdf_tnf) 290 | 291 | xx = np.arange(315.) 292 | 293 | cdf_ip_tnf = np.zeros((4,len(xx)),dtype=np.float32) 294 | chf_ip_tnf = np.zeros((4,len(xx)),dtype=np.float32) 295 | 296 | for ithr in range(4): 297 | itp_fct = interp1d(thr_doy[doy_vdate,itnf,:].squeeze(), chf_tnf[ithr,:], kind='linear',fill_value='extrapolate') 298 | chf_ip_tnf[ithr,:] = itp_fct(xx) 299 | cdf_ip_tnf[ithr,:] = 1.-np.exp(-itp_fct(xx)) 300 | 301 | 302 | plt.figure(figsize=(15,4)) 303 | 304 | ax = plt.subplot(1, 3, 1, xlim=[-5,320], xticks=[0,50,100,150,200,250,300], xticklabels=['0 mm','50 mm','100 mm','150 mm','200 mm','250 mm','300 mm'], yticks=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1.]) 305 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[0,:], label='week-2 cum. probabilities', color='orange') 306 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[1,:], label='week-3 cum. probabilities', color='seagreen') 307 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[2,:], label='week-4 cum. probabilities', color='r') 308 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[3,:], label='clim. cum. probabilities', color='b') 309 | #ax.set_title('Cumulative probabilities\n') 310 | ax.legend(loc=4,fontsize=10) 311 | plt.text(10,0.91,'a)',fontsize=18) 312 | 313 | ax = plt.subplot(1, 3, 2, xlim=[-5,320], xticks=[0,50,100,150,200,250,300], xticklabels=['0 mm','50 mm','100 mm','150 mm','200 mm','250 mm','300 mm'], yticks=[0,.5,1.,1.5,2.,2.5,3.,3.5]) 314 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[0,:], label='week-2 cum. hazard', color='orange') 315 | ax.plot(xx, chf_ip_tnf[0,:], color='orange') 316 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[1,:], label='week-3 cum. hazard', color='seagreen') 317 | ax.plot(xx, chf_ip_tnf[1,:], color='seagreen') 318 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[2,:], label='week-4 cum. hazard', color='r') 319 | ax.plot(xx, chf_ip_tnf[2,:], color='r') 320 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[3,:], label='clim. cum. hazard', color='b') 321 | ax.plot(xx, chf_ip_tnf[3,:], color='b') 322 | #ax.set_title('Cumulative hazard function\n') 323 | ax.legend(loc=(.15,.7),fontsize=10) 324 | plt.text(10,3.5,'b)',fontsize=18) 325 | 326 | ax = plt.subplot(1, 3, 3, xlim=[-5,320], xticks=[0,50,100,150,200,250,300], xticklabels=['0 mm','50 mm','100 mm','150 mm','200 mm','250 mm','300 mm'], yticks=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1.]) 327 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[0,:], label='week-2 predictive CDF', color='orange') 328 | ax.plot(xx, cdf_ip_tnf[0,:], color='orange') 329 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[1,:], label='week-3 predictive CDF', color='seagreen') 330 | ax.plot(xx, cdf_ip_tnf[1,:], color='seagreen') 331 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[2,:], label='week-4 predictive CDF', color='r') 332 | ax.plot(xx, cdf_ip_tnf[2,:], color='r') 333 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[3,:], label='clim. predictive CDF', color='b') 334 | ax.plot(xx, cdf_ip_tnf[3,:], color='b') 335 | #ax.set_title('Interpolated CDF\n') 336 | ax.legend(loc=4,fontsize=10) 337 | plt.text(10,0.91,'c)',fontsize=18) 338 | 339 | plt.tight_layout() 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | ################################################################################################### 348 | # # 349 | # Figure 4: Illustrate construction of basis functions # 350 | # # 351 | ################################################################################################### 352 | 353 | 354 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz") 355 | #list(f1) 356 | obs_precip = f1['precip'] 357 | obs_lat = f1['lat'] 358 | obs_lon = f1['lon'] 359 | obs_dates_ord = f1['dates_ord'] 360 | obs_dates = f1['dates'] 361 | f1.close() 362 | 363 | ndts, nxy = obs_precip.shape 364 | 365 | 366 | r_basis = 7. 367 | lon_ctr = np.outer(np.ones(3),np.arange(-124,-115,3.5)).reshape(9)[[0,1,4,5,8]] 368 | lat_ctr = np.outer(np.arange(33,42,3.5)[::-1],np.ones(3)).reshape(9)[[0,1,4,5,8]] 369 | 370 | dst_lon = np.abs(np.subtract.outer(obs_lon,lon_ctr)) 371 | dst_lat = np.abs(np.subtract.outer(obs_lat,lat_ctr)) 372 | dst = np.sqrt(dst_lon**2+dst_lat**2) 373 | rbf = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3) 374 | basis = rbf/np.sum(rbf,axis=1)[:,None] 375 | nbs = basis.shape[1] 376 | 377 | 378 | plt.figure(figsize=(18.5,7.5)) 379 | 380 | for ibs in range(5): 381 | plt.subplot(2, 5, ibs+1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 382 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 383 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 384 | plt.scatter(obs_lon,obs_lat,c=rbf[:,ibs],marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=1.0,edgecolors=[.2,.2,.2]) 385 | plt.scatter(lon_ctr[ibs],lat_ctr[ibs],c='black',marker='*',zorder=3) 386 | #cbar = plt.colorbar() 387 | plt.title(' Preliminary basis function '+str(ibs+1)+'\n',fontsize=12) 388 | plt.subplot(2, 5, ibs+6, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 389 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 390 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 391 | plt.scatter(obs_lon,obs_lat,c=basis[:,ibs],marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.68,edgecolors=[.2,.2,.2]) 392 | #plt.scatter(lon_ctr[ibs],lat_ctr[ibs],c='black',marker='*',zorder=3) 393 | #cbar = plt.colorbar() 394 | plt.title(' Basis function '+str(ibs+1)+'\n',fontsize=12) 395 | 396 | plt.tight_layout() 397 | 398 | 399 | 400 | 401 | 402 | 403 | ################################################################################################### 404 | # # 405 | # Figure 5: Schematic to explain CNN-based modeling approach # 406 | # # 407 | ################################################################################################### 408 | 409 | 410 | plt.figure(figsize=(11,4.5)) 411 | 412 | ax = plt.subplot(2, 1, 1, xlim=[.95,5.15], ylim=[-.8,1.2]) 413 | 414 | plt.text(0.88,0.9,'a)',fontsize=16) 415 | rect = patches.Rectangle((1.15,-.43),3.3,1.6, edgecolor='r', facecolor="none") 416 | ax.add_patch(rect) 417 | plt.axis('off') 418 | plt.text(1.35,.85,'CNN',color='r',fontsize=18) 419 | 420 | plt.scatter(np.full(1,1.), np.zeros(1), marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01]) 421 | plt.scatter(np.full(1,1.)+0.02, np.zeros(1)-0.08, marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01]) 422 | plt.text(.95,.25,'ERA5',fontsize=8) 423 | plt.arrow(1.1,0.,.15,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 424 | 425 | plt.text(1.29,-.1,'Conv2D',fontsize=12) 426 | rect = patches.Rectangle((1.27,-.25),.3,.45, edgecolor='k', facecolor="none") 427 | ax.add_patch(rect) 428 | plt.arrow(1.6,0.,.08,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 429 | plt.text(1.78,0.,'max',fontsize=10) 430 | plt.text(1.75,-.18,'pooling',fontsize=10) 431 | rect = patches.Rectangle((1.72,-.25),.26,.45, edgecolor='k', facecolor="none") 432 | ax.add_patch(rect) 433 | plt.arrow(2.02,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 434 | 435 | plt.text(2.19,-.1,'Conv2D',fontsize=12) 436 | rect = patches.Rectangle((2.17,-.25),.3,.45, edgecolor='k', facecolor="none") 437 | ax.add_patch(rect) 438 | plt.arrow(2.5,0.,.08,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 439 | plt.text(2.68,0.,'max',fontsize=10) 440 | plt.text(2.65,-.18,'pooling',fontsize=10) 441 | rect = patches.Rectangle((2.62,-.25),.26,.45, edgecolor='k', facecolor="none") 442 | ax.add_patch(rect) 443 | plt.arrow(2.92,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 444 | 445 | plt.text(3.09,0.,'Hidden',fontsize=10) 446 | plt.text(3.1,-0.18,'Layer',fontsize=10) 447 | rect = patches.Rectangle((3.07,-.25),.24,.45, edgecolor='k', facecolor="none") 448 | ax.add_patch(rect) 449 | plt.arrow(3.34,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 450 | 451 | plt.text(3.59,0.,'Basis',fontsize=10) 452 | plt.text(3.5,-0.18,'Coefficients',fontsize=10) 453 | rect = patches.Rectangle((3.48,-.25),.36,.45, edgecolor='k', facecolor="none") 454 | ax.add_patch(rect) 455 | plt.arrow(3.87,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 456 | 457 | plt.text(3.97,.9,'Basis',fontsize=10) 458 | plt.text(3.92,0.72,'Functions',fontsize=10) 459 | rect = patches.Rectangle((3.9,.66),.3,.42, edgecolor='k', facecolor="none") 460 | ax.add_patch(rect) 461 | plt.arrow(4.05,.56,0.,-.35, head_width=.02, head_length=0.08, length_includes_head=True, color='k') 462 | plt.scatter(4.05,0.0, color='w', s=180, alpha=1., lw=1., edgecolors=[.01,.01,.01]) 463 | plt.scatter(4.05,0.0, color='k', s=6) 464 | plt.arrow(4.11,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 465 | 466 | plt.text(4.27,0.,'Preliminary',fontsize=10) 467 | plt.text(4.31,-0.18,'Output',fontsize=10) 468 | rect = patches.Rectangle((4.25,-.25),.35,.45, edgecolor='k', facecolor="none") 469 | ax.add_patch(rect) 470 | 471 | plt.text(4.6,.9,'Log. Clim.',fontsize=10) 472 | plt.text(4.55,0.72,'Probabilities',fontsize=10) 473 | rect = patches.Rectangle((4.53,.66),.38,.42, edgecolor='k', facecolor="none") 474 | ax.add_patch(rect) 475 | plt.arrow(4.64,0.,.18,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 476 | plt.arrow(4.73,.56,0.1,-.5, head_width=.02, head_length=0.08, length_includes_head=True, color='k') 477 | 478 | plt.text(4.89,-.1,'Output',fontsize=12) 479 | rect = patches.Rectangle((4.87,-.25),.27,.45, edgecolor='k', facecolor="none") 480 | ax.add_patch(rect) 481 | 482 | 483 | plt.subplot(2, 1, 2, xlim=[0.8,3.55]) 484 | plt.text(0.76,3.,'b)',fontsize=16) 485 | 486 | plt.scatter(np.full(2,1.), np.arange(0,4,3), marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01]) 487 | plt.scatter(np.full(2,1.)+0.02, np.arange(0,4,3)-0.08, marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01]) 488 | plt.scatter(np.full(3,1.), np.arange(1.2,2.6,0.5), color='k', s=10) 489 | plt.axis('off') 490 | plt.text(.92,3.35,'IFS m1',fontsize=8) 491 | plt.text(.92,0.35,'IFS m11',fontsize=8) 492 | 493 | plt.arrow(1.1,0.,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 494 | plt.text(1.15,0.15,'CNN',fontsize=8,color='r') 495 | plt.arrow(1.1,3.,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 496 | plt.text(1.15,3.15,'CNN',fontsize=8,color='r') 497 | plt.arrow(1.1,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 498 | plt.text(1.15,1.65,'CNN',fontsize=8,color='r') 499 | 500 | plt.text(1.35,3.,r'$x_{s,i}^1$') 501 | plt.text(1.35,0.,r'$x_{s,i}^{11}$') 502 | plt.scatter(np.full(3,1.4), np.arange(1.2,2.6,0.5), color='k', s=10) 503 | 504 | plt.arrow(1.47,0.,.2,1.2, head_width=.025, head_length=0.08, length_includes_head=True, color='k') 505 | plt.arrow(1.47,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 506 | plt.arrow(1.47,3.,.2,-1.2, head_width=.025, head_length=0.08, length_includes_head=True, color='k') 507 | 508 | plt.text(1.71,1.4,r'${\widebar x}_{s,i}$') 509 | plt.arrow(1.85,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 510 | plt.text(1.86,1.65,'relaxation',fontsize=8) 511 | 512 | plt.text(2.1,1.4,r'$\eta\/{\widebar x}_{s,i}$') 513 | plt.arrow(2.28,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 514 | plt.text(2.12,3.,r'$log(p_{cl,s,i})$',fontsize=10) 515 | plt.arrow(2.28,2.8,.2,-1.1, head_width=.025, head_length=0.08, length_includes_head=True, color='k') 516 | 517 | plt.text(2.5,1.4,r'$z_{s,i}(\eta)$') 518 | plt.arrow(2.7,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k') 519 | plt.text(2.95,1.4,r'$p_{s,i}(\eta)$') 520 | 521 | plt.tight_layout() 522 | 523 | 524 | 525 | 526 | 527 | ################################################################################################### 528 | # # 529 | # Figure 6: Plots of scores for the discussion of tuning parameters # 530 | # # 531 | ################################################################################################### 532 | 533 | 534 | x_wk2 = np.repeat(np.arange(20)[:,np.newaxis],5,axis=1) 535 | x_wk3 = np.repeat(np.arange(22,42)[:,np.newaxis],5,axis=1) 536 | x_wk4 = np.repeat(np.arange(44,64)[:,np.newaxis],5,axis=1) 537 | 538 | crps_10cl_m0 = np.zeros((20,5,3),dtype=np.float32) 539 | crps_20cl_m0 = np.zeros((20,5,3),dtype=np.float32) 540 | crps_30cl_m0 = np.zeros((20,5,3),dtype=np.float32) 541 | ccces_20cl_m0 = np.zeros((20,5,3),dtype=np.float32) 542 | ccces_20cl_m1 = np.zeros((20,5,3),dtype=np.float32) 543 | ccces_20cl_m2 = np.zeros((20,5,3),dtype=np.float32) 544 | 545 | 546 | for ilead in range(3): 547 | clead = ['week2','week3','week4'][ilead] 548 | f1 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-10cl-m0-l1_"+clead+".npz") 549 | crps_10cl_m0[:,:,ilead] = f1['opt_valid_crps'] 550 | f1.close() 551 | f2 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz") 552 | crps_20cl_m0[:,:,ilead] = f2['opt_valid_crps'] 553 | f2.close() 554 | f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-30cl-m0-l1_"+clead+".npz") 555 | crps_30cl_m0[:,:,ilead] = f3['opt_valid_crps'] 556 | f3.close() 557 | f4 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz") 558 | ccces_20cl_m0[:,:,ilead] = f4['opt_valid_scores'] 559 | f4.close() 560 | f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m1-l1_"+clead+".npz") 561 | ccces_20cl_m1[:,:,ilead] = f5['opt_valid_scores'] 562 | f5.close() 563 | f6 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m2-l1_"+clead+".npz") 564 | ccces_20cl_m2[:,:,ilead] = f6['opt_valid_scores'] 565 | f6.close() 566 | 567 | 568 | y1c_wk2 = 1.-np.sort(crps_10cl_m0[:,:,0]/crps_20cl_m0[:,:,0]) 569 | y2c_wk2 = 1.-np.sort(crps_30cl_m0[:,:,0]/crps_20cl_m0[:,:,0]) 570 | 571 | y1c_wk3 = 1.-np.sort(crps_10cl_m0[:,:,1]/crps_20cl_m0[:,:,1]) 572 | y2c_wk3 = 1.-np.sort(crps_30cl_m0[:,:,1]/crps_20cl_m0[:,:,1]) 573 | 574 | y1c_wk4 = 1.-np.sort(crps_10cl_m0[:,:,2]/crps_20cl_m0[:,:,2]) 575 | y2c_wk4 = 1.-np.sort(crps_30cl_m0[:,:,2]/crps_20cl_m0[:,:,2]) 576 | 577 | y1m_wk2 = 1.-np.sort(ccces_20cl_m1[:,:,0]/ccces_20cl_m0[:,:,0]) 578 | y2m_wk2 = 1.-np.sort(ccces_20cl_m2[:,:,0]/ccces_20cl_m0[:,:,0]) 579 | 580 | y1m_wk3 = 1.-np.sort(ccces_20cl_m1[:,:,1]/ccces_20cl_m0[:,:,1]) 581 | y2m_wk3 = 1.-np.sort(ccces_20cl_m2[:,:,1]/ccces_20cl_m0[:,:,1]) 582 | 583 | y1m_wk4 = 1.-np.sort(ccces_20cl_m1[:,:,2]/ccces_20cl_m0[:,:,2]) 584 | y2m_wk4 = 1.-np.sort(ccces_20cl_m2[:,:,2]/ccces_20cl_m0[:,:,2]) 585 | 586 | 587 | 588 | f1 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f48.npz") 589 | ccces_m0f48 = f1['opt_valid_scores'] 590 | f1.close() 591 | 592 | f2 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m1-drpt-f48.npz") 593 | ccces_m1f48 = f2['opt_valid_scores'] 594 | f2.close() 595 | 596 | f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m2-drpt-f48.npz") 597 | ccces_m2f48 = f3['opt_valid_scores'] 598 | f3.close() 599 | 600 | f4 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f44.npz") 601 | ccces_m0f44 = f4['opt_valid_scores'] 602 | f4.close() 603 | 604 | f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f88.npz") 605 | ccces_m0f88 = f5['opt_valid_scores'] 606 | f5.close() 607 | 608 | f6 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f816.npz") 609 | ccces_m0f816 = f6['opt_valid_scores'] 610 | f6.close() 611 | 612 | f7 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-l1-f48.npz") 613 | ccces_m0f48_l1 = f7['opt_valid_scores'] 614 | f7.close() 615 | 616 | 617 | y1m = 1.-np.sort(ccces_m1f48/ccces_m0f48) 618 | y2m = 1.-np.sort(ccces_m2f48/ccces_m0f48) 619 | y3r = 1.-np.sort(ccces_m0f48_l1/ccces_m0f48) 620 | 621 | y1f = 1.-np.sort(ccces_m0f44/ccces_m0f48) 622 | y2f = 1.-np.sort(ccces_m0f88/ccces_m0f48) 623 | y3f = 1.-np.sort(ccces_m0f816/ccces_m0f48) 624 | 625 | 626 | 627 | plt.figure(figsize=(16,12)) 628 | 629 | plt.subplot(3,2,1, ylim=[-0.0077,0.0077]) 630 | plt.scatter(x_wk2,y1c_wk2,c='orange',label='week-2') 631 | plt.scatter(x_wk3,y1c_wk3,c='seagreen',label='week-3') 632 | plt.scatter(x_wk4,y1c_wk4,c='r',label='week-4') 633 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False) 634 | plt.legend(loc=(0.52,0.68),fontsize=12) 635 | plt.title('CRPSS: 10 vs 20 categories, 1 x 10 nodes',fontsize=14) 636 | plt.text(-1,0.0063,'a)',fontsize=16) 637 | plt.axhline(y=0) 638 | for i in range(20): 639 | plt.plot(x_wk2[i,::4],y1c_wk2[i,::4],c='orange') 640 | plt.plot(x_wk3[i,::4],y1c_wk3[i,::4],c='seagreen') 641 | plt.plot(x_wk4[i,::4],y1c_wk4[i,::4],c='r') 642 | 643 | plt.subplot(3,2,2, ylim=[-0.0077,0.0077]) 644 | plt.scatter(x_wk2,y2c_wk2,c='orange',label='week-2') 645 | plt.scatter(x_wk3,y2c_wk3,c='seagreen',label='week-3') 646 | plt.scatter(x_wk4,y2c_wk4,c='r',label='week-4') 647 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False) 648 | plt.legend(loc=(0.52,0.68),fontsize=12) 649 | plt.title('CRPSS: 30 vs 20 categories, 1 x 10 nodes',fontsize=14) 650 | plt.text(-1,0.0063,'b)',fontsize=16) 651 | plt.axhline(y=0) 652 | for i in range(20): 653 | plt.plot(x_wk2[i,::4],y2c_wk2[i,::4],c='orange') 654 | plt.plot(x_wk3[i,::4],y2c_wk3[i,::4],c='seagreen') 655 | plt.plot(x_wk4[i,::4],y2c_wk4[i,::4],c='r') 656 | 657 | plt.subplot(3,2,3, ylim=[-0.0042,0.0042]) 658 | plt.scatter(x_wk2,y1m_wk2,c='orange',label='week-2') 659 | plt.scatter(x_wk3,y1m_wk3,c='seagreen',label='week-3') 660 | plt.scatter(x_wk4,y1m_wk4,c='r',label='week-4') 661 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False) 662 | plt.legend(loc=(0.52,0.68),fontsize=12) 663 | plt.title('MCCESS: 1 x 10 vs 1 x 20 nodes, 20 categories',fontsize=14) 664 | plt.text(-1,0.0034,'c)',fontsize=16) 665 | plt.axhline(y=0) 666 | for i in range(20): 667 | plt.plot(x_wk2[i,::4],y1m_wk2[i,::4],c='orange') 668 | plt.plot(x_wk3[i,::4],y1m_wk3[i,::4],c='seagreen') 669 | plt.plot(x_wk4[i,::4],y1m_wk4[i,::4],c='r') 670 | 671 | plt.subplot(3,2,4, ylim=[-0.0042,0.0042]) 672 | plt.scatter(x_wk2,y2m_wk2,c='orange',label='week-2') 673 | plt.scatter(x_wk3,y2m_wk3,c='seagreen',label='week-3') 674 | plt.scatter(x_wk4,y2m_wk4,c='r',label='week-4') 675 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False) 676 | plt.legend(loc=(0.52,0.68),fontsize=12) 677 | plt.title('MCCESS: 1 x 10 vs 2 x 10 nodes, 20 categories',fontsize=14) 678 | plt.text(-1,0.0034,'d)',fontsize=16) 679 | plt.axhline(y=0) 680 | for i in range(20): 681 | plt.plot(x_wk2[i,::4],y2m_wk2[i,::4],c='orange') 682 | plt.plot(x_wk3[i,::4],y2m_wk3[i,::4],c='seagreen') 683 | plt.plot(x_wk4[i,::4],y2m_wk4[i,::4],c='r') 684 | 685 | plt.subplot(3,2,5, ylim=[-0.042,0.042]) 686 | plt.scatter(x_wk2,y1m,c='royalblue',label='1 x 10 vs. 1 x 20 nodes, dropout') 687 | plt.scatter(x_wk3,y2m,c='navy',label='1 x 10 vs. 2 x 10 nodes, dropout') 688 | plt.scatter(x_wk4,y3r,c='darkturquoise',label='1 x 10 nodes, dropout vs. l1') 689 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False) 690 | plt.legend(loc=(0.58,0.67),fontsize=12) 691 | plt.title('MCCESS: CNN with 4/8 filters',fontsize=14) 692 | plt.text(-1,0.034,'e)',fontsize=16) 693 | plt.axhline(y=0) 694 | for i in range(20): 695 | plt.plot(x_wk2[i,::4],y1m[i,::4],c='royalblue') 696 | plt.plot(x_wk3[i,::4],y2m[i,::4],c='navy') 697 | plt.plot(x_wk4[i,::4],y3r[i,::4],c='darkturquoise') 698 | 699 | plt.subplot(3,2,6, ylim=[-0.023,0.023]) 700 | plt.scatter(x_wk2,y1f,c='blueviolet',label='4/8 vs. 4/4 filters') 701 | plt.scatter(x_wk3,y2f,c='lightskyblue',label='4/8 vs. 8/8 filters') 702 | plt.scatter(x_wk4,y3f,c='midnightblue',label='4/8 vs. 8/16 filters') 703 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False) 704 | plt.legend(loc=(0.51,0.02),fontsize=12) 705 | plt.title('MCCESS: CNN for 1 x 10 hidden nodes, dropout',fontsize=14) 706 | plt.text(-1,0.018,'f)',fontsize=16) 707 | plt.axhline(y=0) 708 | for i in range(20): 709 | plt.plot(x_wk2[i,::4],y1f[i,::4],c='blueviolet') 710 | plt.plot(x_wk3[i,::4],y2f[i,::4],c='lightskyblue') 711 | plt.plot(x_wk4[i,::4],y3f[i,::4],c='midnightblue') 712 | 713 | plt.tight_layout() 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | ################################################################################################### 722 | # # 723 | # Figure 7: Maps of RPSS (highlighting statistically significant grid points) # 724 | # # 725 | ################################################################################################### 726 | 727 | 728 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz") 729 | obs_lat = f1['lat'] 730 | obs_lon = f1['lon'] 731 | f1.close() 732 | 733 | nxy = len(obs_lon) 734 | 735 | ndts = 61 736 | nyrs = 20 737 | 738 | 739 | acfANN = np.zeros((3,15),dtype=np.float32) 740 | acfCNN = np.zeros((3,15),dtype=np.float32) 741 | pvalANN = np.zeros((3,nxy),dtype=np.float32) 742 | pvalCNN = np.zeros((3,nxy),dtype=np.float32) 743 | alphaFDR_ANN = np.zeros(3,dtype=np.float32) 744 | alphaFDR_CNN = np.zeros(3,dtype=np.float32) 745 | 746 | rpssMapANN = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True) 747 | rpssMapCSGD = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True) 748 | rpssMapCNN = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True) 749 | 750 | rpssAvgANN = ma.array(np.zeros(3,dtype=np.float32),mask=True) 751 | rpssAvgCSGD = ma.array(np.zeros(3,dtype=np.float32),mask=True) 752 | rpssAvgCNN = ma.array(np.zeros(3,dtype=np.float32),mask=True) 753 | 754 | for ilead in range(3): 755 | f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz") 756 | Bs33Clm = f1['Bs33pClm'] 757 | Bs33ANN = f1['Bs33pANN'] 758 | Bs33CSGD = f1['Bs33pCSGD'] 759 | Bs67Clm = f1['Bs67pClm'] 760 | Bs67ANN = f1['Bs67pANN'] 761 | Bs67CSGD = f1['Bs67pCSGD'] 762 | Bs85Clm = f1['Bs85pClm'] 763 | Bs85ANN = f1['Bs85pANN'] 764 | Bs85CSGD = f1['Bs85pCSGD'] 765 | f1.close() 766 | f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-cnn_week"+str(ilead+2)+".npz") 767 | Bs33CNN = f2['Bs33pCNN'] 768 | Bs67CNN = f2['Bs67pCNN'] 769 | Bs85CNN = f2['Bs85pCNN'] 770 | f2.close() 771 | rpsClm = Bs33Clm + Bs67Clm + Bs85Clm # calculate ranked probability score 772 | rpsANN = Bs33ANN + Bs67ANN + Bs85ANN 773 | rpsCSGD = Bs33CSGD + Bs67CSGD + Bs85CSGD 774 | rpsCNN = Bs33CNN + Bs67CNN + Bs85CNN 775 | rpssMapANN[ilead,:] = 1.-np.sum(rpsANN,axis=(0,1))/np.sum(rpsClm,axis=(0,1)) 776 | rpssMapCSGD[ilead,:] = 1.-np.sum(rpsCSGD,axis=(0,1))/np.sum(rpsClm,axis=(0,1)) 777 | rpssMapCNN[ilead,:] = 1.-np.sum(rpsCNN,axis=(0,1))/np.sum(rpsClm,axis=(0,1)) 778 | rpssAvgANN[ilead] = 1.-np.sum(rpsANN)/np.sum(rpsClm) 779 | rpssAvgCSGD[ilead] = 1.-np.sum(rpsCSGD)/np.sum(rpsClm) 780 | rpssAvgCNN[ilead] = 1.-np.sum(rpsCNN)/np.sum(rpsClm) 781 | rpsDiffANN = rpsCSGD-rpsANN 782 | rpsDiffCNN = rpsCSGD-rpsCNN 783 | rpsDiffStdzANN = (rpsDiffANN-np.mean(rpsDiffANN,axis=(0,1))[None,None,:])/np.std(rpsDiffANN,axis=(0,1))[None,None,:] 784 | rpsDiffStdzCNN = (rpsDiffCNN-np.mean(rpsDiffCNN,axis=(0,1))[None,None,:])/np.std(rpsDiffCNN,axis=(0,1))[None,None,:] 785 | for lg in range(15): 786 | acfANN[ilead,lg] = np.mean(rpsDiffStdzANN[lg:,:,:]*rpsDiffStdzANN[:(ndts-lg),:,:]) # Estimate temporal autocorrelation 787 | acfCNN[ilead,lg] = np.mean(rpsDiffStdzCNN[lg:,:,:]*rpsDiffStdzCNN[:(ndts-lg),:,:]) 788 | rhoANN = acfANN[ilead,1]/acfANN[ilead,0] 789 | rhoCNN = acfCNN[ilead,1]/acfCNN[ilead,0] 790 | print(rhoANN,rhoCNN) 791 | nANN = round(ndts*nyrs*(1-rhoANN)/(1+rhoANN)) 792 | nCNN = round(ndts*nyrs*(1-rhoCNN)/(1+rhoCNN)) 793 | #print(nANN,nCNN) 794 | for ixy in range(nxy): 795 | smplANN = rpsCSGD[:,:,ixy].flatten()-rpsANN[:,:,ixy].flatten() 796 | smplCNN = rpsCSGD[:,:,ixy].flatten()-rpsCNN[:,:,ixy].flatten() 797 | tstatANN = np.mean(smplANN)/np.sqrt(np.var(smplANN)/nANN) # test statistic for paired t-test 798 | tstatCNN = np.mean(smplCNN)/np.sqrt(np.var(smplCNN)/nCNN) 799 | pvalANN[ilead,ixy] = 1.-sp.stats.t.cdf(tstatANN,df=nANN-1) # p-value for one-sided test 800 | pvalCNN[ilead,ixy] = 1.-sp.stats.t.cdf(tstatCNN,df=nCNN-1) 801 | #pval[ilead,ixy] = 2*min(1.-sp.stats.t.cdf(tstat,df=n-1),sp.stats.t.cdf(tstat,df=n-1)) 802 | pvalANN_srt = np.sort(pvalANN[ilead,:]) 803 | iANN = np.where(pvalANN_srt<=0.1*np.arange(1,nxy+1)/nxy)[0] 804 | if len(iANN)>0: 805 | alphaFDR_ANN[ilead] = pvalANN_srt[iANN[-1]] 806 | pvalCNN_srt = np.sort(pvalCNN[ilead,:]) 807 | iCNN = np.where(pvalCNN_srt<=0.1*np.arange(1,nxy+1)/nxy)[0] 808 | if len(iCNN)>0: 809 | alphaFDR_CNN[ilead] = pvalCNN_srt[iCNN[-1]] 810 | plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalANN_srt); plt.scatter(np.arange(663),pvalCNN_srt) 811 | 812 | 813 | 814 | ## First figure depicts distribution of RPS differences and autocorrelation function 815 | 816 | fig = plt.figure(figsize=(15,9)) 817 | 818 | for ilead in range(3): 819 | ax1 = fig.add_subplot(2,3,ilead+1) 820 | sp.stats.probplot(rpsDiffStdzANN.flatten(),plot=plt) 821 | plt.title("Q-Q Plot of RPS differences (week "+str(ilead+2)+")",fontsize=14) 822 | ax2 = fig.add_subplot(2,3,ilead+4) 823 | plt.scatter(np.arange(15),acfANN[ilead,:]) 824 | plt.axhline(y=0) 825 | plt.axhline(y=0.05,ls='--') 826 | plt.axhline(y=-0.05,ls='--') 827 | plt.plot(np.arange(15),acfANN[ilead,1]**np.arange(15),c='red') 828 | plt.title("ACF of RPS differences (week "+str(ilead+2)+")",fontsize=14) 829 | 830 | plt.tight_layout() 831 | 832 | 833 | fig = plt.figure(figsize=(11.3,9.)) 834 | 835 | for ilead in range(3): 836 | ylim = np.array([0.26,0.052,0.026])[ilead] 837 | #ylim = np.amax(abs(rpssMapCSGD[ilead,:])) 838 | indSgnfANN = (pvalANN[ilead,:]85th pctl) at Eureka/San Diego # 958 | # # 959 | ################################################################################################### 960 | 961 | 962 | divcmp = diverging_hcl("Green-Brown",rev=True).cmap(name = "Diverging Color Map") 963 | 964 | states_us = np.load('/home/michael/Desktop/CalifAPCP/data/states_us.npz',allow_pickle=True)['polygons'].tolist() 965 | states_mexico = np.load('/home/michael/Desktop/CalifAPCP/data/states_mexico.npz',allow_pickle=True)['polygons'].tolist() 966 | 967 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 968 | lat = f1['obs_lat'] 969 | lon = f1['obs_lon'] 970 | f1.close() 971 | 972 | 973 | inc = np.logical_and(lon==-124.125,lat==40.875) # Eureka 974 | isc = np.logical_and(lon==-117.125,lat==32.875) # San Diego 975 | lcns = [np.argmax(inc),np.argmax(isc)] 976 | 977 | iyr = np.array([[19,7],[12,3]],dtype=np.int32) # date and year index for lowest/highest P(>85th pctl) 978 | idt = np.array([[3,4],[16,25]],dtype=np.int32) # at Eureka and San Diego, set manually here 979 | 980 | 981 | ## Load ERA5 z500 and tcw fields, subset to 22 x 18 image 982 | 983 | ixl = 10 984 | ixu = -6 985 | jyl = 6 986 | jyu = -6 987 | 988 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz") 989 | era5_dates_ord = f2['dates_ord'] 990 | era5_lon = f2['longitude'][ixl:ixu] 991 | era5_lat = f2['latitude'][jyl:jyu] 992 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu] 993 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu] 994 | f2.close() 995 | 996 | ndts, nyrs, ny, nx = z500.shape 997 | 998 | 999 | ## Normalize tcw to 10th/90th climatological percentiles at each grid point 1000 | 1001 | tcw_q10 = np.percentile(tcw,10,axis=1) 1002 | tcw_q90 = np.percentile(tcw,90,axis=1) 1003 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32) 1004 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32) 1005 | 1006 | X = np.ones((ndts,3), dtype=np.float32) # Fit harmonic function to annual cycle of tcw climatology 1007 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25) 1008 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25) 1009 | 1010 | for ix in range(nx): 1011 | for jy in range(ny): 1012 | coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix])) 1013 | tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10) 1014 | coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix])) 1015 | tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90) 1016 | 1017 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:] 1018 | 1019 | 1020 | ## Normalize z500 to 1st/99th climatological percentiles across all grid points 1021 | 1022 | z500_q01 = np.percentile(z500,1,axis=(1,2,3)) 1023 | z500_q99 = np.percentile(z500,99,axis=(1,2,3)) 1024 | 1025 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01)) 1026 | z500_q01_sm = np.matmul(X,coef_q01) 1027 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99)) 1028 | z500_q99_sm = np.matmul(X,coef_q99) 1029 | 1030 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None] 1031 | 1032 | 1033 | ## Make plots 1034 | 1035 | contour_levels_tcw = np.arange(-2.,2.25,0.25) 1036 | x, y = np.meshgrid(era5_lon,era5_lat) 1037 | 1038 | title_str = ['Lowest P(>85th percentile) at Eureka','Highest P(>85th percentile) at Eureka','Lowest P(>85th percentile) at San Diego','Highest P(>85th percentile) at San Diego'] 1039 | 1040 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10.,6.5)) 1041 | fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.95, hspace=0.15, wspace=0.05) 1042 | for ilc in range(2): 1043 | for iwd in range(2): 1044 | ax = axes.flat[2*ilc+(1-iwd)] 1045 | ax.set_xticks([]) 1046 | ax.set_yticks([]) 1047 | ax.set_title(title_str[2*ilc+(1-iwd)]) 1048 | C1 = ax.contour(x,y,z500_ano[idt[iwd,ilc],iyr[iwd,ilc],:,:],linewidths=0.35,colors='k',zorder=2) 1049 | C2 = ax.contourf(x,y,tcw_ano[idt[iwd,ilc],iyr[iwd,ilc],:,:],levels=contour_levels_tcw,alpha=1,cmap=divcmp,extend='both',zorder=1,corner_mask=True) 1050 | plt.clabel(C1,fontsize=10,inline=1,fmt='%.2f') 1051 | ax.scatter(lon[lcns[ilc]],lat[lcns[ilc]],c='red',marker='*',zorder=3) 1052 | for k in range(len(states_us)): 1053 | pathPolygon = path.Path(states_us[str(k)]) 1054 | ax.add_patch(patches.PathPatch(pathPolygon, facecolor='none', lw=1.)) 1055 | for k in range(len(states_mexico)): 1056 | pathPolygon = path.Path(np.squeeze(states_mexico[k])) 1057 | ax.add_patch(patches.PathPatch(pathPolygon, facecolor='none', lw=1.)) 1058 | 1059 | cbar = fig.colorbar(C2,ax=axes.ravel().tolist()) 1060 | cbar.set_label('\n normalized TCW anomalies', fontsize=12) 1061 | 1062 | 1063 | 1064 | 1065 | 1066 | 1067 | 1068 | 1069 | 1070 | ################################################################################################### 1071 | # # 1072 | # Figure for presentations: Examples of resulting exceedance probabilities # 1073 | # # 1074 | ################################################################################################### 1075 | 1076 | 1077 | iyyyy = 2017 1078 | imm = 1 1079 | idd = 8 1080 | 1081 | itnf = np.logical_and(obs_lon==-120.625,obs_lat==39.375) # coordinates of our example grid point in Tahoe National Forest 1082 | 1083 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 1084 | #list(f1) 1085 | obs_lat = f1['obs_lat'] 1086 | obs_lon = f1['obs_lon'] 1087 | obs_dates_ord = f1['obs_dates_ord'] 1088 | pop_doy = f1['pop_doy'] 1089 | thr_doy = f1['thr_doy'] 1090 | qtev_doy = f1['qtev_doy'] 1091 | obs_precip_week = f1['apcp_obs'] 1092 | f1.close() 1093 | 1094 | ntms, nxy = obs_precip_week.shape 1095 | 1096 | for ivdate in range(ntms): 1097 | if datetime.date.fromordinal(int(obs_dates_ord[ivdate])) == datetime.date(iyyyy,imm,idd): 1098 | break 1099 | 1100 | 1101 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz") 1102 | mod_dates = f2['dates_ord'] 1103 | f2.close() 1104 | 1105 | ndts, nyrs, nlts = mod_dates.shape 1106 | 1107 | iidate = np.zeros((3,2),dtype=np.int16) # date and year index for selected date 1108 | 1109 | for idt in range(ndts): 1110 | for iyr in range(nyrs): 1111 | for ilt in range(3): 1112 | if datetime.date.fromordinal(int(mod_dates[idt,iyr,6+ilt*7])) == datetime.date(iyyyy,imm,idd): 1113 | iidate[ilt,0] = idt 1114 | iidate[ilt,1] = iyr 1115 | 1116 | 1117 | f3 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_week2_ANN_yr"+str(iidate[0,1])+".npz") 1118 | doy_vdate = f3['doy_dts'][iidate[0,0]] 1119 | apcp_ens_pit = f3['apcp_ens_pit_verif'][iidate[0,0],:,:] 1120 | f3.close() 1121 | 1122 | 1123 | ilt = 0 1124 | 1125 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_week"+str(ilt+2)+"_yr"+str(iidate[ilt,1])+".npz") 1126 | prob_fcst_cat = f5['prob_fcst_cat'][iidate[ilt,0],:,:] 1127 | f5.close() 1128 | 1129 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=1)[:,:(ncat-1)]) 1130 | 1131 | prob_clm_cat = np.concatenate((1.-pop_doy[doy_vdate,:,np.newaxis],np.repeat(pop_doy[doy_vdate,:,np.newaxis]/(ncat-1),ncat-1,axis=1)),axis=1) 1132 | prob_clm_chf = -np.log(1.-np.cumsum(prob_clm_cat,axis=1)[:,:(ncat-1)]) 1133 | 1134 | pot6in = np.zeros(nxy,dtype=np.float32) 1135 | pot85p = np.zeros(nxy,dtype=np.float32) 1136 | pot6in_cl = np.zeros(nxy,dtype=np.float32) 1137 | pot85p_cl = np.zeros(nxy,dtype=np.float32) 1138 | 1139 | for ixy in range(nxy): 1140 | itp_fct = interp1d(thr_doy[doy_vdate,ixy,:], prob_fcst_chf[ixy,:], kind='linear',fill_value='extrapolate') 1141 | pot6in[ixy] = np.exp(-itp_fct(152.4)) 1142 | pot85p[ixy] = np.exp(-itp_fct(qtev_doy[doy_vdate,ixy,2])) 1143 | itp_fct = interp1d(thr_doy[doy_vdate,ixy,:], prob_clm_chf[ixy,:], kind='linear',fill_value='extrapolate') 1144 | pot6in_cl[ixy] = np.exp(-itp_fct(152.4)) 1145 | pot85p_cl[ixy] = np.exp(-itp_fct(qtev_doy[doy_vdate,ixy,2])) 1146 | 1147 | 1148 | plt.figure(figsize=(10,4)) 1149 | 1150 | plt.subplot(1, 2, 2, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 1151 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 1152 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 1153 | plt.scatter(obs_lon,obs_lat,c=pot6in,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.64,edgecolors=[.2,.2,.2]) 1154 | #plt.scatter(obs_lon,obs_lat,c=pot6in,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.32,edgecolors=[.2,.2,.2]) 1155 | cbar = plt.colorbar() 1156 | plt.title(' Probability for exceeding 6 inches of precipitation\n',fontsize=12) 1157 | 1158 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 1159 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 1160 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 1161 | plt.scatter(obs_lon,obs_lat,c=pot85p,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.,vmax=1.,edgecolors=[.2,.2,.2]) 1162 | #plt.scatter(obs_lon,obs_lat,c=pot85p,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.13,vmax=0.23,edgecolors=[.2,.2,.2]) 1163 | cbar = plt.colorbar() 1164 | plt.title(' Probability for exceeding 85th climat. percentile\n',fontsize=12) 1165 | 1166 | plt.tight_layout() 1167 | 1168 | 1169 | plt.figure(figsize=(10,4)) 1170 | 1171 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 1172 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 1173 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 1174 | plt.scatter(obs_lon,obs_lat,c=pot6in,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.65,edgecolors=[.2,.2,.2]) 1175 | cbar = plt.colorbar() 1176 | plt.title(' Probability for exceeding 6 inches of precipitation\n',fontsize=12) 1177 | 1178 | plt.subplot(1, 2, 2, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 1179 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 1180 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 1181 | plt.scatter(obs_lon,obs_lat,c=np.log10(pot6in/pot6in_cl),marker='s',cmap=divcmp,s=28,lw=.1,vmin=-2.2,vmax=2.2,edgecolors=[.2,.2,.2]) 1182 | cbar = plt.colorbar(ticks=[-2,-1,0,1,2]) 1183 | cbar.ax.set_yticklabels(['0.01','0.1','1','10','100']) 1184 | plt.title(' Ratio of forecast probability to climat. probability\n',fontsize=12) 1185 | 1186 | plt.tight_layout() 1187 | 1188 | 1189 | plt.figure(figsize=(10,4)) 1190 | 1191 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 1192 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 1193 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 1194 | plt.scatter(obs_lon,obs_lat,c=pot85p,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=1.0,edgecolors=[.2,.2,.2]) 1195 | cbar = plt.colorbar() 1196 | plt.title(' Probability for exceeding 85th climat. percentile\n',fontsize=12) 1197 | 1198 | plt.subplot(1, 2, 2, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \ 1199 | xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \ 1200 | yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0']) 1201 | plt.scatter(obs_lon,obs_lat,c=np.log10(pot85p/pot85p_cl),marker='s',cmap=divcmp,s=28,lw=.1,vmin=-0.7,vmax=0.7,edgecolors=[.2,.2,.2]) 1202 | cbar = plt.colorbar(ticks=[np.log10(0.25),np.log10(0.5),0.,np.log10(2.),np.log10(4.)]) 1203 | cbar.ax.set_yticklabels(['0.25','0.5','1','2','4']) 1204 | plt.title(' Ratio of forecast probability to climat. probability\n',fontsize=12) 1205 | 1206 | plt.tight_layout() 1207 | 1208 | 1209 | 1210 | 1211 | ################################################################################################### 1212 | # # 1213 | # Figure for presentations: Reliability diagrams # 1214 | # # 1215 | ################################################################################################### 1216 | 1217 | 1218 | #fct = 8 1219 | #p = 0.8 1220 | nmin = 50 1221 | 1222 | #cat33u = np.arange(np.round(-fct*0.67**p),np.round(fct*0.33**p)) 1223 | #cat67u = np.arange(np.round(-fct*0.33**p),np.round(fct*0.67**p)) 1224 | #cat85u = np.arange(np.round(-fct*0.15**p),np.round(fct*0.85**p)) 1225 | 1226 | cat33u = np.arange(11) 1227 | cat67u = np.arange(11) 1228 | cat85u = np.arange(11) 1229 | 1230 | x33 = ma.array(np.zeros((3,3,len(cat33u)),dtype=np.float32),mask=True) 1231 | x67 = ma.array(np.zeros((3,3,len(cat67u)),dtype=np.float32),mask=True) 1232 | x85 = ma.array(np.zeros((3,3,len(cat85u)),dtype=np.float32),mask=True) 1233 | y33 = ma.array(np.zeros((3,3,len(cat33u)),dtype=np.float32),mask=True) 1234 | y67 = ma.array(np.zeros((3,3,len(cat67u)),dtype=np.float32),mask=True) 1235 | y85 = ma.array(np.zeros((3,3,len(cat85u)),dtype=np.float32),mask=True) 1236 | freq33 = ma.array(np.zeros((3,3,len(cat33u)),dtype=np.float32),mask=True) 1237 | freq67 = ma.array(np.zeros((3,3,len(cat67u)),dtype=np.float32),mask=True) 1238 | freq85 = ma.array(np.zeros((3,3,len(cat85u)),dtype=np.float32),mask=True) 1239 | 1240 | for ilead in range(3): 1241 | f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz") 1242 | exc33p = f1['exc33p'] 1243 | exc67p = f1['exc67p'] 1244 | exc85p = f1['exc85p'] 1245 | pot33pCSGD = f1['pot33pCSGD'] 1246 | pot67pCSGD = f1['pot67pCSGD'] 1247 | pot85pCSGD = f1['pot85pCSGD'] 1248 | pot33pANN = f1['pot33pANN'] 1249 | pot67pANN = f1['pot67pANN'] 1250 | pot85pANN = f1['pot85pANN'] 1251 | f1.close() 1252 | cat33csgd = np.round(pot33pCSGD*10).flatten() 1253 | cat67csgd = np.round(pot67pCSGD*10).flatten() 1254 | cat85csgd = np.round(pot85pCSGD*10).flatten() 1255 | cat33ann = np.round(pot33pANN*10).flatten() 1256 | cat67ann = np.round(pot67pANN*10).flatten() 1257 | cat85ann = np.round(pot85pANN*10).flatten() 1258 | #cat33 = np.round(fct*np.sign(pot33pANN-0.67)*abs(pot33pANN-0.67)**p).flatten() 1259 | #cat67 = np.round(fct*np.sign(pot67pANN-0.33)*abs(pot67pANN-0.33)**p).flatten() 1260 | #cat85 = np.round(fct*np.sign(pot85pANN-0.15)*abs(pot85pANN-0.15)**p).flatten() 1261 | f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-cnn_week"+str(ilead+2)+".npz") 1262 | pot33pCNN = f2['pot33pCNN'] 1263 | pot67pCNN = f2['pot67pCNN'] 1264 | pot85pCNN = f2['pot85pCNN'] 1265 | f2.close() 1266 | cat33cnn = np.round(pot33pCNN*10).flatten() 1267 | cat67cnn = np.round(pot67pCNN*10).flatten() 1268 | cat85cnn = np.round(pot85pCNN*10).flatten() 1269 | for i in range(len(cat33u)): 1270 | freq33[0,ilead,i] = np.sum(cat33csgd==cat33u[i]) 1271 | if freq33[0,ilead,i]>nmin: 1272 | x33[0,ilead,i] = np.mean(pot33pCSGD.flatten()[cat33csgd==cat33u[i]]) 1273 | y33[0,ilead,i] = np.mean(exc33p.flatten()[cat33csgd==cat33u[i]]) 1274 | freq33[1,ilead,i] = np.sum(cat33ann==cat33u[i]) 1275 | if freq33[1,ilead,i]>nmin: 1276 | x33[1,ilead,i] = np.mean(pot33pANN.flatten()[cat33ann==cat33u[i]]) 1277 | y33[1,ilead,i] = np.mean(exc33p.flatten()[cat33ann==cat33u[i]]) 1278 | freq33[2,ilead,i] = np.sum(cat33cnn==cat33u[i]) 1279 | if freq33[2,ilead,i]>nmin: 1280 | x33[2,ilead,i] = np.mean(pot33pCNN.flatten()[cat33cnn==cat33u[i]]) 1281 | y33[2,ilead,i] = np.mean(exc33p.flatten()[cat33cnn==cat33u[i]]) 1282 | freq67[0,ilead,i] = np.sum(cat67csgd==cat67u[i]) 1283 | if freq67[0,ilead,i]>nmin: 1284 | x67[0,ilead,i] = np.mean(pot67pCSGD.flatten()[cat67csgd==cat67u[i]]) 1285 | y67[0,ilead,i] = np.mean(exc67p.flatten()[cat67csgd==cat67u[i]]) 1286 | freq67[1,ilead,i] = np.sum(cat67ann==cat67u[i]) 1287 | if freq67[1,ilead,i]>nmin: 1288 | x67[1,ilead,i] = np.mean(pot67pANN.flatten()[cat67ann==cat67u[i]]) 1289 | y67[1,ilead,i] = np.mean(exc67p.flatten()[cat67ann==cat67u[i]]) 1290 | freq67[2,ilead,i] = np.sum(cat67cnn==cat67u[i]) 1291 | if freq67[2,ilead,i]>nmin: 1292 | x67[2,ilead,i] = np.mean(pot67pCNN.flatten()[cat67cnn==cat67u[i]]) 1293 | y67[2,ilead,i] = np.mean(exc67p.flatten()[cat67cnn==cat67u[i]]) 1294 | freq85[0,ilead,i] = np.sum(cat85cnn==cat85u[i]) 1295 | if freq85[0,ilead,i]>nmin: 1296 | x85[0,ilead,i] = np.mean(pot85pCSGD.flatten()[cat85csgd==cat85u[i]]) 1297 | y85[0,ilead,i] = np.mean(exc85p.flatten()[cat85csgd==cat85u[i]]) 1298 | freq85[1,ilead,i] = np.sum(cat85ann==cat85u[i]) 1299 | if freq85[1,ilead,i]>nmin: 1300 | x85[1,ilead,i] = np.mean(pot85pANN.flatten()[cat85ann==cat85u[i]]) 1301 | y85[1,ilead,i] = np.mean(exc85p.flatten()[cat85ann==cat85u[i]]) 1302 | freq85[2,ilead,i] = np.sum(cat85cnn==cat85u[i]) 1303 | if freq85[2,ilead,i]>nmin: 1304 | x85[2,ilead,i] = np.mean(pot85pCNN.flatten()[cat85cnn==cat85u[i]]) 1305 | y85[2,ilead,i] = np.mean(exc85p.flatten()[cat85cnn==cat85u[i]]) 1306 | 1307 | 1308 | fig = plt.figure(figsize=(14,9)) 1309 | 1310 | for ilt in range(3): 1311 | ax1 = fig.add_subplot(2,3,1+ilt) 1312 | relCSGD = plt.plot(x33[0,ilt,:],y33[0,ilt,:],'-o',c='blueviolet') 1313 | relANN = plt.plot(x33[1,ilt,:],y33[1,ilt,:],'-o',c='royalblue') 1314 | relCNN = plt.plot(x33[2,ilt,:],y33[2,ilt,:],'-o',c='indigo') 1315 | plt.plot([0,1],[0,1],c='k') 1316 | plt.axvline(0.67,c='k',ls=':',lw=1,ymin=0.05,ymax=0.95) 1317 | plt.title("Reliability for P(> 33th pctl), week-"+str(ilt+2)+"\n",fontsize=14) 1318 | plt.legend((relCSGD[0],relANN[0],relCNN[0]),('CSGD','ANN','CNN'),loc=4,fontsize=12) 1319 | ins1 = ax1.inset_axes([0.03,0.68,0.4,0.3]) 1320 | ins1.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False,right=False,left=False,labelleft=False) 1321 | ins1.set_xlabel('Frequency of usage',fontsize=11) 1322 | ins1.bar(cat33u-0.25,freq33[0,ilt,:],0.23,color='blueviolet') 1323 | ins1.bar(cat33u-0.0,freq33[1,ilt,:],0.23,color='royalblue') 1324 | ins1.bar(cat33u+0.25,freq33[2,ilt,:],0.23,color='indigo') 1325 | #ins1.axvline(0.0,c='k',ls=':',lw=1) 1326 | ins1.axvline(6.67,c='k',ls=':',lw=1) 1327 | ax2 = fig.add_subplot(2,3,4+ilt) 1328 | relCSGD = plt.plot(x67[0,ilt,:],y67[0,ilt,:],'-o',c='blueviolet') 1329 | relANN = plt.plot(x67[1,ilt,:],y67[1,ilt,:],'-o',c='royalblue') 1330 | relCNN = plt.plot(x67[2,ilt,:],y67[2,ilt,:],'-o',c='indigo') 1331 | plt.plot([0,1],[0,1],c='k') 1332 | plt.axvline(0.33,c='k',ls=':',lw=1,ymin=0.05,ymax=0.5) 1333 | plt.title("Reliability for P(> 67th pctl), week-"+str(ilt+2)+"\n",fontsize=14) 1334 | plt.legend((relCSGD[0],relANN[0],relCNN[0]),('CSGD','ANN','CNN'),loc=4,fontsize=12) 1335 | ins2 = ax2.inset_axes([0.03,0.68,0.4,0.3]) 1336 | ins2.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False,right=False,left=False,labelleft=False) 1337 | ins2.set_xlabel('Frequency of usage',fontsize=11) 1338 | ins2.bar(cat67u-0.25,freq67[0,ilt,:],0.23,color='blueviolet') 1339 | ins2.bar(cat67u-0.0,freq67[1,ilt,:],0.23,color='royalblue') 1340 | ins2.bar(cat67u+0.25,freq67[2,ilt,:],0.23,color='indigo') 1341 | #ins2.axvline(0.0,c='k',ls=':',lw=1) 1342 | ins2.axvline(3.33,c='k',ls=':',lw=1) 1343 | 1344 | plt.tight_layout() 1345 | 1346 | 1347 | 1348 | 1349 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NeuralNetworkS2S 2 | This repository contains files with Python code for the algorithms discussed in the paper 'Using Artificial Neural Networks for Generating Probabilistic Subseasonal Precipitation Forecasts over California', submitted to Monthly Weather Review. The following gives a brief description of the individual files: 3 | 4 | - ANN-CalculateEnsembleStatistics.py: Reads the ensemble forecasts and calculates, separately for each member, the probability integral transform relative to the model climatology 5 | 6 | - ANN-CalculateObsCategories.py: Reads the analysis data, calculates the climatology-dependent category boundaries, and uses them to categorize the analyzed precipitation amounts 7 | 8 | - ANN-CalculateVerificationMetrics.py: Calculates various verification metrics for the ANN, CSGD, raw ensemble, and climatological probabilistic forecasts 9 | 10 | - ANN-FindTuningParameters.py: Calculates cross-validated scores for different ANN architectures and selects the optimal regularization parameters 11 | 12 | - ANN-GenerateProbabilityForecasts.py: Calculates probability forecasts based on the selected ANN model with optimal regularization parameters 13 | 14 | - CNN-CalculateLargeScalePredictors.py: Reads ensemble forecast and analyzed Z500 and TCW data, upscales them to 1 degree and 7-day averages, and saves as .npz file 15 | 16 | - CNN-CalculateVerificationMetrics.py: Calculates various verification metrics for the CNN probabilistic forecasts, based on either analyzed or forecast Z500/TCW fields 17 | 18 | - CNN-FindTuningParameters.py: Calculates cross-validated scores for different CNN architectures and selects the optimal dropout rate for each of them 19 | 20 | - CNN-FitConvolutionalNetworkModel.py: Fits a CNN model based on the selected CNN architecture with optimal dropout rate 21 | 22 | - CNN-GenerateProbabilityForecasts.py: Estimates the adjustment factor and calculates adjusted probability forecasts based on the forecast Z500/TCW fields 23 | 24 | - CodeForGraphics.py: Python code used to generate the figures in the MWR paper and a few additional figures used for presentations 25 | 26 | - CSGD-FitClimatologicalDistributions.py: Fits climatological censored, shifted gamma distributions to the analyzed precipitation amounts 27 | 28 | - CSGD-GenerateForecastDistributions.py: Fits a simplified CSGD model that links forecast and analyzed precipitation data, and generates probabilistic forecasts 29 | 30 | - S-ANN-GenerateProbabilityForecasts.py: Calculates probability forecasts for the ANN model discussed in 'SupplementB.pdf' 31 | 32 | - S-CalculateVerificationMetrics.py: Calculates various verification metrics for the additional experiments with ANN, CNN, and CSGD in the supplements 33 | 34 | - S-CNN-FindTuningParameters.py: Calculates cross-validated scores for different CNN architectures discussed in 'SupplementC.pdf' 35 | 36 | - S-CNN-FitConvolutionalNetworkModel.py: Fits a CNN model with optimal dropout rate for the additional experiments in 'SupplementC.pdf' 37 | 38 | - S-CodeForGraphics.py: Python code used to generate the figures in 'SupplementA.pdf', 'SupplementB.pdf', and 'SupplementC.pdf' 39 | 40 | - S-CSGD-GenerateForecastDistributions.py: Fits other variants of the CSGD model discussed in 'SupplementA.pdf' and generates probabilistic forecasts 41 | 42 | 43 | -------------------------------------------------------------------------------- /S-ANN-GenerateProbabilityForecasts.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import keras 10 | import keras.backend as K 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | 15 | from scipy import stats 16 | 17 | from keras import models 18 | from keras import layers 19 | from keras import regularizers 20 | 21 | from keras.layers import Input, Dense, Add, Activation, Dropout 22 | from keras.layers.merge import Concatenate 23 | from keras.models import Model 24 | from keras.optimizers import Adam 25 | 26 | #plt.ion() 27 | 28 | 29 | def build_cat_model(n_features, hidden_nodes, n_bins, par_reg): 30 | inp = Input(shape=(n_features,)) 31 | x = Dense(hidden_nodes[0], activation='elu', kernel_regularizer=regularizers.l1(par_reg))(inp) 32 | if len(hidden_nodes) > 1: 33 | for h in hidden_nodes[1:]: 34 | x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 35 | x = Dense(n_bins, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x) 36 | out = Activation('softmax')(x) 37 | return Model(inputs=inp, outputs=out) 38 | 39 | 40 | def modified_categorical_crossentropy(y_mat, prob_fcst): 41 | prob_obs_cat = K.sum(y_mat*prob_fcst,axis=1) 42 | return -K.mean(K.log(prob_obs_cat)) 43 | 44 | 45 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 46 | #list(f1) 47 | lat = f1['obs_lat'] 48 | lon = f1['obs_lon'] 49 | obs_dates_ord = f1['obs_dates_ord'] 50 | pop_doy = f1['pop_doy'] 51 | thr_doy = f1['thr_doy'] 52 | qtev_doy = f1['qtev_doy'] 53 | apcp_obs_cat = f1['apcp_obs_cat'] 54 | f1.close() 55 | 56 | ncat = apcp_obs_cat.shape[2] 57 | 58 | 59 | 60 | clead = 'week4' 61 | 62 | f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz") 63 | opt_reg_param = f3['opt_reg_param'] 64 | f3.close() 65 | 66 | 67 | for iyr in range(20): 68 | print(iyr) 69 | # Load smoothed ensemble forecast PIT values 70 | f2 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz") 71 | doy_dts = f2['doy_dts'] 72 | apcp_obs_ind_train = f2['apcp_obs_ind_train'] 73 | apcp_obs_ind_verif = f2['apcp_obs_ind_verif'] 74 | apcp_ens_pit_train = f2['apcp_ens_pit_train'] 75 | apcp_ens_pit_verif = f2['apcp_ens_pit_verif'] 76 | f2.close() 77 | ndts, nyrs_tr, nxy, nmem = apcp_ens_pit_train.shape 78 | # Calculate normalized coordinates and climatological probability of precipitation 79 | lon_train = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1)) 80 | lon_verif = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts,axis=0).reshape((ndts,nxy,1)) 81 | lat_train = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1)) 82 | lat_verif = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts,axis=0).reshape((ndts,nxy,1)) 83 | # Calculate predictors and classification targets 84 | apcp_efi_train = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_train),axis=3)[:,:,:,None] 85 | apcp_efi_verif = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_verif),axis=2)[:,:,None] 86 | train_predictors = np.concatenate((lon_train,lat_train,apcp_efi_train),axis=3).reshape((-1,3)) 87 | train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].reshape((-1,ncat)).astype(float) 88 | # Define and fit ANN model 89 | keras.backend.clear_session() 90 | model = build_cat_model(train_predictors.shape[-1], [10], ncat, opt_reg_param[iyr]) 91 | model.compile(optimizer=Adam(0.05), loss=modified_categorical_crossentropy) 92 | model.fit(train_predictors, train_cat_targets, epochs=100, batch_size=ndts*nyrs_tr*nxy, verbose=0) 93 | # Calculate probability forecasts 94 | verif_predictors = np.concatenate((lon_verif,lat_verif,apcp_efi_verif),axis=2).reshape((-1,3)) 95 | prob_fcst_cat = model.predict(verif_predictors).reshape((ndts,nxy,ncat)) 96 | ### Save out to file 97 | outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/ANN-rv/probfcst_10-l1_"+clead+"_yr"+str(iyr) 98 | np.savez(outfilename, prob_fcst_cat=prob_fcst_cat) 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /S-CNN-FindTuningParameters.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import keras 10 | import keras.backend as K 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | from numpy.linalg import solve 15 | from scipy import stats 16 | from scipy.interpolate import interp1d 17 | 18 | from keras import models 19 | from keras import layers 20 | from keras import regularizers 21 | 22 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout 23 | from keras.models import Model 24 | from keras.optimizers import Adam 25 | 26 | #plt.ion() 27 | 28 | 29 | ## Load categorical analysis data 30 | 31 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 32 | lat = f1['obs_lat'] 33 | lon = f1['obs_lon'] 34 | obs_dates_ord = f1['obs_dates_ord'] 35 | pop_doy = f1['pop_doy'] 36 | thr_doy = f1['thr_doy'] 37 | qtev_doy = f1['qtev_doy'] 38 | apcp_obs_cat = f1['apcp_obs_cat'] 39 | apcp_obs = f1['apcp_obs'] 40 | f1.close() 41 | 42 | ndts, nxy, ncat = apcp_obs_cat.shape 43 | 44 | 45 | 46 | ## Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields 47 | 48 | ixl = 10 49 | ixu = -6 50 | jyl = 6 51 | jyu = -6 52 | 53 | f2 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz") 54 | era5_dates_ord = f2['dates_ord'] 55 | era5_lon = f2['longitude'][ixl:ixu] 56 | era5_lat = f2['latitude'][jyl:jyu] 57 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu] 58 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu] 59 | f2.close() 60 | 61 | ndts, nyrs, ny, nx = z500.shape 62 | 63 | 64 | 65 | ########################################################################################################################################################################## 66 | # 67 | # Upscale to 2 degrees 68 | # 69 | z500_1deg = z500 70 | tcw_1deg = tcw 71 | z500 = (z500_1deg[:,:,0:ny:2,0:nx:2]+z500_1deg[:,:,1:ny:2,0:nx:2]+z500_1deg[:,:,0:ny:2,1:nx:2]+z500_1deg[:,:,1:ny:2,1:nx:2])/4. 72 | tcw = (tcw_1deg[:,:,0:ny:2,0:nx:2]+tcw_1deg[:,:,1:ny:2,0:nx:2]+tcw_1deg[:,:,0:ny:2,1:nx:2]+tcw_1deg[:,:,1:ny:2,1:nx:2])/4. 73 | ny = ny//2 74 | nx = nx//2 75 | # 76 | ########################################################################################################################################################################## 77 | 78 | 79 | ## Calculate doy for each analysis date 80 | 81 | doy_dts = np.zeros(ndts,dtype=np.int32) 82 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32) 83 | for idt in range(ndts): 84 | for iyr in range(nyrs): 85 | apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0] 86 | date_ord = int(era5_dates_ord[idt,0]-0.5) 87 | doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 88 | 89 | 90 | 91 | ## Normalize tcw to 10th/90th climatological percentiles at each grid point 92 | 93 | tcw_q10 = np.percentile(tcw,10,axis=1) 94 | tcw_q90 = np.percentile(tcw,90,axis=1) 95 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32) 96 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32) 97 | 98 | X = np.ones((ndts,3), dtype=np.float32) # Fit harmonic function to annual cycle of tcw climatology 99 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25) 100 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25) 101 | 102 | for ix in range(nx): 103 | for jy in range(ny): 104 | coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix])) 105 | tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10) 106 | coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix])) 107 | tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90) 108 | 109 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:] 110 | 111 | 112 | 113 | ## Normalize z500 to 1st/99th climatological percentiles across all grid points 114 | 115 | z500_q01 = np.percentile(z500,1,axis=(1,2,3)) 116 | z500_q99 = np.percentile(z500,99,axis=(1,2,3)) 117 | 118 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01)) 119 | z500_q01_sm = np.matmul(X,coef_q01) 120 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99)) 121 | z500_q99_sm = np.matmul(X,coef_q99) 122 | 123 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None] 124 | 125 | 126 | 127 | # Define basis functions 128 | 129 | r_basis = 7. 130 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]] 131 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]] 132 | 133 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr)) 134 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr)) 135 | dst = np.sqrt(dst_lon**2+dst_lat**2) 136 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3) 137 | basis = basis/np.sum(basis,axis=1)[:,None] 138 | nbs = basis.shape[1] 139 | 140 | 141 | 142 | ## Define functions for building a CNN 143 | 144 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate): 145 | #inp_imgs = Input(shape=(18,22,2,)) 146 | inp_imgs = Input(shape=(9,11,2,)) 147 | inp_basis = Input(shape=(n_xy,n_basis,)) 148 | inp_cl = Input(shape=(n_xy,n_bins,)) 149 | c = Conv2D(4, (3,3), activation='elu')(inp_imgs) 150 | #c = MaxPooling2D((2,2))(c) 151 | c = Conv2D(8, (3,3), activation='elu')(c) 152 | #c = MaxPooling2D((2,2))(c) 153 | x = Flatten()(c) 154 | for h in hidden_nodes: 155 | x = Dropout(dropout_rate)(x) 156 | x = Dense(h, activation='elu')(x) 157 | x = Dense(n_bins*n_basis, activation='elu')(x) 158 | x = Reshape((n_bins,n_basis))(x) 159 | z = Dot(axes=2)([inp_basis, x]) 160 | z = Add()([z, inp_cl]) 161 | out = Activation('softmax')(z) 162 | return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out) 163 | 164 | 165 | def modified_categorical_crossentropy(y_mat, prob_fcst): 166 | prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2) 167 | return -K.mean(K.log(prob_obs_cat)) 168 | 169 | 170 | nyrs = 20 171 | 172 | #reg = 10.**np.arange(-6,-2) 173 | reg = np.arange(0.1,0.6,0.1) 174 | nreg = len(reg) 175 | 176 | imod = 0 177 | 178 | mod = [[10],[20],[10,10]] 179 | 180 | 181 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical calculation of the CRPS 182 | dx = np.diff(x) 183 | 184 | opt_reg_param = np.zeros(nyrs, dtype=np.float32) 185 | opt_valid_scores = np.zeros((nyrs,5), dtype=np.float32) 186 | opt_valid_crps = np.zeros((nyrs,5), dtype=np.float32) 187 | 188 | 189 | for iyr in range(nyrs): 190 | print('year: ',iyr) 191 | # Calculate image predictors and basis functions 192 | apcp_obs_ind_cv = np.delete(apcp_obs_ind,iyr,axis=1) 193 | z500_pred_cv = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 194 | tcw_pred_cv = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 195 | pred_imgs_cv = np.concatenate((z500_pred_cv,tcw_pred_cv),axis=3) 196 | basis_cv = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0) 197 | # Calculate climatological log probabilities for each class 198 | apcp_pop_cl = np.repeat(pop_doy[doy_dts,np.newaxis,:],nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1)) 199 | logp_cl_cv = np.concatenate((np.log(1.-apcp_pop_cl),np.repeat(np.log(apcp_pop_cl),ncat-1,axis=2)-np.log(ncat-1)),axis=2) 200 | # perform 5-fold cross validation to find optimal regularization 201 | date_order = np.arange(ndts*(nyrs-1)).reshape(ndts,nyrs-1).T.flatten() 202 | cv_ind = date_order[np.arange(ndts*(nyrs-1))%232<231] # remove the date between the 5 cross-validated blocks 203 | valid_score = np.zeros((nreg,5), dtype=np.float32) 204 | valid_crps = np.zeros((nreg,5), dtype=np.float32) 205 | for cvi in range(5): 206 | train_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)!=cvi] 207 | valid_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)==cvi] 208 | pred_imgs_train = pred_imgs_cv[train_ind,:,:,:] 209 | basis_train = basis_cv[train_ind,:,:] 210 | logp_cl_train = logp_cl_cv[train_ind,:,:] 211 | cat_targets_train = apcp_obs_cat[apcp_obs_ind_cv.flatten()[train_ind],:,:].astype(float) 212 | pred_imgs_valid = pred_imgs_cv[valid_ind,:,:] 213 | basis_valid = basis_cv[valid_ind,:,:] 214 | logp_cl_valid = logp_cl_cv[valid_ind,:,:] 215 | cat_targets_valid = apcp_obs_cat[apcp_obs_ind_cv.flatten()[valid_ind],:,:].astype(float) 216 | doy_valid = np.repeat(doy_dts[:,np.newaxis],nyrs-1,axis=1).flatten()[valid_ind] 217 | for ireg in range(nreg): 218 | # Define and fit ANN model (using batch gradient descent) 219 | keras.backend.clear_session() 220 | model = model = build_cat_model(nxy, ncat, nbs, mod[imod], reg[ireg]) 221 | model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy) 222 | model.fit([pred_imgs_train,basis_train,logp_cl_train], cat_targets_train, epochs=150, batch_size=len(train_ind), verbose=0) 223 | valid_score[ireg,cvi] = model.evaluate([pred_imgs_valid,basis_valid,logp_cl_valid], cat_targets_valid, batch_size=len(valid_ind), verbose=0) 224 | # Calculate CRPS for each cross-validation fold 225 | prob_fcst_cat = model.predict([pred_imgs_valid,basis_valid,logp_cl_valid]) 226 | prob_fcst_chf = -np.log(np.maximum(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)],1.e-10)) 227 | crps_fold = np.zeros((len(valid_ind),nxy),dtype=np.float32) 228 | for ivdt in range(len(valid_ind)): 229 | for ixy in range(nxy): 230 | itp_fct = interp1d(thr_doy[doy_valid[ivdt],ixy,:], prob_fcst_chf[ivdt,ixy,:], kind='linear',fill_value='extrapolate') 231 | bs = (1.-np.exp(-itp_fct(x))-1.*(apcp_obs[apcp_obs_ind_cv.flatten()[valid_ind[ivdt]],ixy]<=x))**2 232 | crps_fold[ivdt,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 233 | valid_crps[ireg,cvi] = np.mean(crps_fold) 234 | opt_reg_ind = np.argmin(np.mean(valid_score,axis=1)) 235 | opt_reg_param[iyr] = reg[opt_reg_ind] 236 | opt_valid_scores[iyr,:] = valid_score[opt_reg_ind,:] 237 | opt_valid_crps[iyr,:] = valid_crps[opt_reg_ind,:] 238 | print(np.mean(valid_score,axis=1).round(3)) 239 | print(np.mean(valid_crps,axis=1).round(2)) 240 | print(opt_reg_param[iyr]) 241 | 242 | ### Save out to file 243 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/tuning/cnn-2deg-m"+str(imod)+"-drpt-f48" 244 | np.savez(outfilename, opt_reg_param=opt_reg_param, opt_valid_scores=opt_valid_scores, opt_valid_crps=opt_valid_crps) 245 | 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /S-CNN-FitConvolutionalNetworkModel.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | #import matplotlib.pyplot as plt 7 | import datetime 8 | import time 9 | import keras 10 | import keras.backend as K 11 | 12 | from netCDF4 import Dataset 13 | from numpy import ma 14 | from numpy.linalg import solve 15 | from scipy import stats 16 | 17 | from keras import models 18 | from keras import layers 19 | from keras import regularizers 20 | 21 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout 22 | from keras.models import Model 23 | from keras.optimizers import Adam 24 | 25 | #plt.ion() 26 | 27 | 28 | ## Load categorical analysis data 29 | 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz") 31 | #list(f1) 32 | lat = f1['obs_lat'] 33 | lon = f1['obs_lon'] 34 | obs_dates_ord = f1['obs_dates_ord'] 35 | pop_doy = f1['pop_doy'] 36 | apcp_obs_cat = f1['apcp_obs_cat'] 37 | f1.close() 38 | 39 | ndts, nxy, ncat = apcp_obs_cat.shape 40 | 41 | 42 | 43 | ## Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields 44 | 45 | ixl = 10 46 | ixu = -6 47 | jyl = 6 48 | jyu = -6 49 | 50 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz") 51 | era5_dates_ord = f2['dates_ord'] 52 | era5_lon = f2['longitude'][ixl:ixu] 53 | era5_lat = f2['latitude'][jyl:jyu] 54 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu] 55 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu] 56 | f2.close() 57 | 58 | ndts, nyrs, ny, nx = z500.shape 59 | 60 | 61 | z500_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32) 62 | tcw_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32) 63 | 64 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_predictor_cnn.npz") 65 | mod_dates_ord = f3['mod_dates_ord'][:,:,6:21:7] 66 | 67 | f4 = np.load("/home/michael/Desktop/CalifAPCP/data/tcw_predictor_cnn.npz") 68 | 69 | for ilt in range(3): 70 | clead = 'week'+str(ilt+2) 71 | z500_fcst[ilt,:,:,:,:,:] = f3['z500_'+clead][:,:,:,jyl:jyu,ixl:ixu] # subset to 22 x 18 image 72 | tcw_fcst[ilt,:,:,:,:,:] = f4['tcw_'+clead][:,:,:,jyl:jyu,ixl:ixu] 73 | 74 | f3.close() 75 | f4.close() 76 | 77 | 78 | ## Upscale to 2 degrees 79 | 80 | z500_1deg = z500 81 | tcw_1deg = tcw 82 | z500 = (z500_1deg[:,:,0:ny:2,0:nx:2]+z500_1deg[:,:,1:ny:2,0:nx:2]+z500_1deg[:,:,0:ny:2,1:nx:2]+z500_1deg[:,:,1:ny:2,1:nx:2])/4. 83 | tcw = (tcw_1deg[:,:,0:ny:2,0:nx:2]+tcw_1deg[:,:,1:ny:2,0:nx:2]+tcw_1deg[:,:,0:ny:2,1:nx:2]+tcw_1deg[:,:,1:ny:2,1:nx:2])/4. 84 | z500_fcst_1deg = z500_fcst 85 | tcw_fcst_1deg = tcw_fcst 86 | z500_fcst = (z500_fcst_1deg[:,:,:,:,0:ny:2,0:nx:2]+z500_fcst_1deg[:,:,:,:,1:ny:2,0:nx:2]+z500_fcst_1deg[:,:,:,:,0:ny:2,1:nx:2]+z500_fcst_1deg[:,:,:,:,1:ny:2,1:nx:2])/4. 87 | tcw_fcst = (tcw_fcst_1deg[:,:,:,:,0:ny:2,0:nx:2]+tcw_fcst_1deg[:,:,:,:,1:ny:2,0:nx:2]+tcw_fcst_1deg[:,:,:,:,0:ny:2,1:nx:2]+tcw_fcst_1deg[:,:,:,:,1:ny:2,1:nx:2])/4. 88 | ny = ny//2 89 | nx = nx//2 90 | 91 | 92 | 93 | ## Calculate doy for each analysis date and for each forecast valid date 94 | 95 | doy_dts = np.zeros(ndts,dtype=np.int32) 96 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32) 97 | for idt in range(ndts): 98 | for iyr in range(nyrs): 99 | apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0] 100 | date_ord = int(era5_dates_ord[idt,0]-0.5) 101 | doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 102 | 103 | doy_fcst = np.zeros((3,ndts),dtype=np.int32) 104 | for idt in range(ndts): 105 | for ilt in range(3): 106 | date_ord = int(int(mod_dates_ord[idt,0,ilt])-0.5) 107 | doy_fcst[ilt,idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 108 | 109 | 110 | 111 | ## Normalize tcw to 10th/90th climatological percentiles at each grid point 112 | 113 | tcw_q10 = np.percentile(tcw,10,axis=1) 114 | tcw_q90 = np.percentile(tcw,90,axis=1) 115 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32) 116 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32) 117 | 118 | tcw_fcst_q10 = np.percentile(tcw_fcst,10,axis=(2,3)) 119 | tcw_fcst_q90 = np.percentile(tcw_fcst,90,axis=(2,3)) 120 | tcw_fcst_q10_sm = np.zeros(tcw_fcst_q10.shape, dtype=np.float32) 121 | tcw_fcst_q90_sm = np.zeros(tcw_fcst_q90.shape, dtype=np.float32) 122 | 123 | X = np.ones((ndts,3), dtype=np.float32) # Fit harmonic function to annual cycle of tcw climatology 124 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25) 125 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25) 126 | 127 | for ix in range(nx): 128 | for jy in range(ny): 129 | coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix])) 130 | tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10) 131 | coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix])) 132 | tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90) 133 | for ilt in range(3): 134 | coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q10[ilt,:,jy,ix])) 135 | tcw_fcst_q10_sm[ilt,:,jy,ix] = np.matmul(X,coef_q10) 136 | coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q90[ilt,:,jy,ix])) 137 | tcw_fcst_q90_sm[ilt,:,jy,ix] = np.matmul(X,coef_q90) 138 | 139 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:] 140 | tcw_fcst_ano = -1.+2.*(tcw_fcst-tcw_fcst_q10_sm[:,:,None,None,:,:])/(tcw_fcst_q90_sm-tcw_fcst_q10_sm)[:,:,None,None,:,:] 141 | 142 | 143 | 144 | ## Normalize z500 to 1st/99th climatological percentiles across all grid points 145 | 146 | z500_q01 = np.percentile(z500,1,axis=(1,2,3)) 147 | z500_q99 = np.percentile(z500,99,axis=(1,2,3)) 148 | z500_fcst_q01 = np.percentile(z500_fcst,1,axis=(2,3,4,5)) 149 | z500_fcst_q99 = np.percentile(z500_fcst,99,axis=(2,3,4,5)) 150 | 151 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01)) 152 | z500_q01_sm = np.matmul(X,coef_q01) 153 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99)) 154 | z500_q99_sm = np.matmul(X,coef_q99) 155 | 156 | z500_fcst_q01_sm = np.zeros(z500_fcst_q01.shape, dtype=np.float32) 157 | z500_fcst_q99_sm = np.zeros(z500_fcst_q99.shape, dtype=np.float32) 158 | 159 | for ilt in range(3): 160 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q01[ilt,:])) 161 | z500_fcst_q01_sm[ilt,:] = np.matmul(X,coef_q01) 162 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q99[ilt,:])) 163 | z500_fcst_q99_sm[ilt,:] = np.matmul(X,coef_q99) 164 | 165 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None] 166 | z500_fcst_ano = -1.+2.*(z500_fcst-z500_fcst_q01_sm[:,:,None,None,None,None])/(z500_fcst_q99_sm-z500_fcst_q01_sm)[:,:,None,None,None,None] 167 | 168 | 169 | # Define basis functions 170 | 171 | r_basis = 7. 172 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]] 173 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]] 174 | 175 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr)) 176 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr)) 177 | dst = np.sqrt(dst_lon**2+dst_lat**2) 178 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3) 179 | basis = basis/np.sum(basis,axis=1)[:,None] 180 | nbs = basis.shape[1] 181 | 182 | 183 | ## Define functions for building a CNN 184 | 185 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate): 186 | inp_imgs = Input(shape=(9,11,2,)) 187 | inp_basis = Input(shape=(n_xy,n_basis,)) 188 | inp_cl = Input(shape=(n_xy,n_bins,)) 189 | c = Conv2D(4, (3,3), activation='elu')(inp_imgs) 190 | c = Conv2D(8, (3,3), activation='elu')(c) 191 | x = Flatten()(c) 192 | for h in hidden_nodes: 193 | x = Dropout(dropout_rate)(x) 194 | x = Dense(h, activation='elu')(x) 195 | x = Dense(n_bins*n_basis, activation='elu')(x) 196 | x = Reshape((n_bins,n_basis))(x) 197 | z = Dot(axes=2)([inp_basis, x]) # Tensor product with basis functions 198 | z = Add()([z, inp_cl]) # Add (log) probability anomalies to log climatological probabilities 199 | out = Activation('softmax')(z) 200 | return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out) 201 | 202 | 203 | def modified_categorical_crossentropy(y_mat, prob_fcst): 204 | prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2) 205 | return -K.mean(K.log(prob_obs_cat)) 206 | 207 | 208 | 209 | imod = 0 210 | 211 | mod = [[10],[20],[10,10]] 212 | 213 | f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-2deg-m"+str(imod)+"-drpt-f48.npz") 214 | opt_reg_param = f5['opt_reg_param'] 215 | f5.close() 216 | 217 | 218 | for iyr in range(0,20): 219 | print(iyr) 220 | # Split data into training and verification data set 221 | apcp_obs_ind_train = np.delete(apcp_obs_ind,iyr,axis=1) 222 | apcp_obs_ind_verif = apcp_obs_ind[:,iyr] 223 | z500_pred_train = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 224 | z500_pred_verif = z500_ano[:,iyr,:,:,None] 225 | z500_pred_fcst_train = np.delete(z500_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1)) 226 | z500_pred_fcst_verif = z500_fcst_ano[:,:,iyr,:,:,:,None] 227 | tcw_pred_train = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1)) 228 | tcw_pred_verif = tcw_ano[:,iyr,:,:,None] 229 | tcw_pred_fcst_train = np.delete(tcw_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1)) 230 | tcw_pred_fcst_verif = tcw_fcst_ano[:,:,iyr,:,:,:,None] 231 | # Calculate climatological log probabilities for each class 232 | apcp_lgp0_cl_train = np.repeat(np.log(1.-pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1)) 233 | apcp_lgp0_cl_verif = np.log(1.-pop_doy[doy_dts,:])[:,:,None] 234 | apcp_lgpop_cl_train = np.repeat(np.log(pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1)) 235 | apcp_lgpop_cl_verif = np.log(pop_doy[doy_dts,:])[:,:,None] 236 | apcp_lgp0_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32) 237 | apcp_lgp0_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32) 238 | apcp_lgpop_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32) 239 | apcp_lgpop_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32) 240 | for ilt in range(3): 241 | apcp_lgp0_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(1.-pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy)) 242 | apcp_lgp0_cl_fcst_verif[ilt,:,:,0] = np.log(1.-pop_doy[doy_fcst[ilt,:],:]) 243 | apcp_lgpop_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy)) 244 | apcp_lgpop_cl_fcst_verif[ilt,:,:,0] = np.log(pop_doy[doy_fcst[ilt,:],:]) 245 | # Compose training data (large-scale predictors, auxiliary predictors, climatological probabilities, observed categories) 246 | train_pred_imgs = np.concatenate((z500_pred_train,tcw_pred_train),axis=3) 247 | train_basis = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0) 248 | train_logp_cl = np.concatenate((apcp_lgp0_cl_train,np.repeat(apcp_lgpop_cl_train,ncat-1,axis=2)-np.log(ncat-1)),axis=2) 249 | train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].astype(float) 250 | # Define and fit CNN model 251 | keras.backend.clear_session() 252 | model = build_cat_model(nxy, ncat, nbs, mod[imod], opt_reg_param[iyr]) 253 | model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy) 254 | model.fit([train_pred_imgs,train_basis,train_logp_cl], train_cat_targets, epochs=150, batch_size=ndts*(nyrs-1), verbose=1) 255 | # Calculate ERA-5 probability forecasts 256 | verif_pred_imgs = np.concatenate((z500_pred_verif,tcw_pred_verif),axis=3) 257 | verif_basis = np.repeat(basis[np.newaxis,:,:],ndts,axis=0) 258 | verif_logp_cl = np.concatenate((apcp_lgp0_cl_verif,np.repeat(apcp_lgpop_cl_verif,ncat-1,axis=2)-np.log(ncat-1)),axis=2) 259 | prob_fcst_cat_era5 = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl]) 260 | # Calculate ensemble-based, mean probability forecasts 261 | logp_ano_ensmean_train = np.zeros((3,ndts*(nyrs-1),nxy,ncat), dtype=np.float32) 262 | logp_ano_ensmean_verif = np.zeros((3,ndts,nxy,ncat), dtype=np.float32) 263 | for ilt in range(3): 264 | train_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_train[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_train[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2) 265 | verif_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_verif[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_verif[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2) 266 | prob_fcst_cat_ens_train = np.zeros((11,ndts*(nyrs-1),nxy,ncat), dtype=np.float32) 267 | prob_fcst_cat_ens_verif = np.zeros((11,ndts,nxy,ncat), dtype=np.float32) 268 | for imem in range(11): 269 | train_pred_imgs = np.concatenate((z500_pred_fcst_train[ilt,:,imem,:,:,:],tcw_pred_fcst_train[ilt,:,imem,:,:,:]),axis=3) 270 | prob_fcst_cat_ens_train[imem,:,:,:] = model.predict([train_pred_imgs,train_basis,train_logp_cl]) 271 | verif_pred_imgs = np.concatenate((z500_pred_fcst_verif[ilt,:,imem,:,:,:],tcw_pred_fcst_verif[ilt,:,imem,:,:,:]),axis=3) 272 | prob_fcst_cat_ens_verif[imem,:,:,:] = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl]) 273 | logp_ano_ensmean_train[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_train),axis=0) - train_logp_cl # Reconstruct the log probability anomalies 274 | logp_ano_ensmean_verif[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_verif),axis=0) - verif_logp_cl # for each ensemble member and calculate mean 275 | ### Save out to file 276 | outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/CNN-rv/probfcst_cnn-m"+str(imod)+"-drpt-2deg_yr"+str(iyr) 277 | np.savez(outfilename, prob_fcst_cat_era5=prob_fcst_cat_era5, \ 278 | logp_ano_ensmean_train=logp_ano_ensmean_train, \ 279 | logp_ano_ensmean_verif=logp_ano_ensmean_verif, \ 280 | apcp_lgp0_cl_fcst_train=apcp_lgp0_cl_fcst_train, \ 281 | apcp_lgp0_cl_fcst_verif=apcp_lgp0_cl_fcst_verif, \ 282 | apcp_lgpop_cl_fcst_train=apcp_lgpop_cl_fcst_train, \ 283 | apcp_lgpop_cl_fcst_verif=apcp_lgpop_cl_fcst_verif) 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | -------------------------------------------------------------------------------- /S-CSGD-GenerateForecastDistributions.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import scipy as sp 4 | import math 5 | import os, sys 6 | import datetime 7 | import time 8 | import matplotlib.pyplot as plt 9 | 10 | from netCDF4 import Dataset 11 | from numpy import ma 12 | from numpy.random import random_sample 13 | from numpy.linalg import solve 14 | from scipy import stats 15 | from scipy.stats import kendalltau 16 | from scipy.stats import gamma 17 | from scipy.special import beta 18 | from scipy.optimize import minimize 19 | from scipy.interpolate import * 20 | 21 | 22 | 23 | #plt.ion() 24 | 25 | rho = 3 # neighborhood radius (degrees) 26 | rho2 = rho**2 27 | 28 | #r = 300. # neighborhood radius (kilometers) 29 | #R = 6373. # earth radius (kilometers) 30 | 31 | 32 | leadDay = 20 # Start of the forecast period 33 | accumulation = 7 # Precipitation accumulation period 34 | 35 | 36 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz") 37 | #list(f1) 38 | obs_precip = f1['precip'] 39 | obs_lat = f1['lat'] 40 | obs_lon = f1['lon'] 41 | obs_dates_ord = f1['dates_ord'] 42 | obs_dates = f1['dates'] 43 | f1.close() 44 | 45 | ndays, nxy = obs_precip.shape 46 | 47 | obs_precip_week = np.zeros((ndays-6,nxy), dtype=np.float32) 48 | for iday in range(7): 49 | obs_precip_week += obs_precip[iday:(ndays-6+iday),:] 50 | 51 | nwks, nxy = obs_precip_week.shape 52 | 53 | obs_precip_week[obs_precip_week<0.254] = 0. 54 | obs_dates_ord = obs_dates_ord[:nwks] 55 | obs_dates = obs_dates[:nwks] 56 | 57 | 58 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_climatological_csgd.npz") 59 | pop_cl = f2['pop_cl_doy'] 60 | mean_cl = f2['mean_cl_doy'] 61 | shape_cl = f2['shape_cl_doy'] 62 | scale_cl = f2['scale_cl_doy'] 63 | shift_cl = f2['shift_cl_doy'] 64 | f2.close() 65 | 66 | 67 | 68 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_calplus.npz") 69 | ### Modeled precip is (reforecast time, member, year, lead time, lat, lon) 70 | mod_precip = f3['precip'] 71 | #mod_dates_ord = f3['datesOrd'] 72 | mod_lon = f3['lon'] 73 | mod_lat = f3['lat'] 74 | f3.close() 75 | 76 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz") 77 | mod_dates_ord = f3['dates_ord'] # Need to load dates from other file since dates in 'mod_precip_calplus.npz' are incorrect 78 | f3.close() 79 | 80 | ndts, nmem, nyrs, nlts, nlat, nlon = mod_precip.shape 81 | 82 | ### Modeled precip accumulated over forecast period (reforecast time, year, ensembles, space) 83 | mod_precip_fcstperiod = np.sum(mod_precip[:,:,:,leadDay:leadDay+accumulation,:,:],axis=3).reshape((ndts,nmem,nyrs,nlon*nlat)) 84 | mod_dates_fcstperiod = mod_dates_ord[:,:,leadDay] 85 | 86 | 87 | ### Calculate day of the year ('doy') for each reforecast date 88 | doy = np.zeros(ndts,dtype=np.int32) 89 | for idt in range(ndts): 90 | yyyy = datetime.date.fromordinal(int(mod_dates_fcstperiod[idt,0])).year 91 | doy[idt] = (datetime.date.fromordinal(int(mod_dates_fcstperiod[idt,0]))-datetime.date(yyyy,1,1)).days 92 | 93 | 94 | ## Define function for calculating weighted mean absolute difference of a sample 95 | def wgt_meandiff(ensfcst, weights): 96 | n, m, k = ensfcst.shape 97 | d = m*k 98 | res = np.zeros(n, dtype=np.float32) 99 | inz = np.where(np.greater(np.sum(ensfcst>0.0,axis=(1,2)),0))[0] 100 | x = ensfcst[inz,:,:].reshape(len(inz),d) 101 | w = weights.reshape(d) 102 | x_ord = np.argsort(x,axis=1) 103 | for i in range(len(inz)): 104 | x_sort = x[i,x_ord[i,]] 105 | W = np.cumsum(w[x_ord[i,]]) 106 | res[inz[i]] = 2*sum(W[0:(d-1)]*(1.0-W[0:(d-1)])*np.diff(x_sort)) 107 | return res 108 | 109 | 110 | def crpsCondCSGD(par,obs,ensmeanano,ensmeandiffano,muc,sigmac,shiftc): 111 | # average CRPS for CSGD conditional on the ensemble statistics 112 | logarg = par[1] + par[2]*ensmeanano 113 | mu = muc * np.log1p(np.expm1(par[0])*logarg) / par[0] 114 | # sigma = sigmac * (par[3]*np.sqrt(mu/muc)) 115 | sigma = sigmac * (par[3]*np.sqrt(mu/muc)+par[4]*ensmeandiffano) 116 | shape = np.square(mu/sigma) 117 | scale = np.square(sigma)/mu 118 | shift = shiftc 119 | betaf = beta(0.5,shape+0.5) 120 | cstd = (0.254-shift)/scale 121 | ystd = np.maximum(obs-shift,0.0)/scale 122 | Fyk = sp.stats.gamma.cdf(ystd,shape,scale=1) 123 | Fck = sp.stats.gamma.cdf(cstd,shape,scale=1) 124 | FykP1 = sp.stats.gamma.cdf(ystd,shape+1,scale=1) 125 | FckP1 = sp.stats.gamma.cdf(cstd,shape+1,scale=1) 126 | F2c2k = sp.stats.gamma.cdf(2*cstd,2*shape,scale=1) 127 | crps = ystd*(2.*Fyk-1.) - cstd*np.square(Fck) + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FykP1) - (shape/float(math.pi))*betaf*(1.-F2c2k) 128 | return ma.mean(scale*crps) 129 | 130 | 131 | param_initial = [0.05,0.5,0.5,0.7,0.5] 132 | param_ranges = ((0.001,1.0), (0.01,1.0), (0.0,3.0), (0.1,1.0), (0.0,3.0)) 133 | 134 | par_reg = np.zeros((nyrs,nxy,5), dtype=np.float32) 135 | csgd_pars_fcst = np.zeros((ndts,nyrs,nxy,3), dtype=np.float32) 136 | 137 | for iyr in range(nyrs): 138 | print(iyr) 139 | ### Split data into training and verification data, save day index of observational data 140 | doy_train = np.outer(doy,np.ones(19,dtype=np.int32)).flatten() 141 | apcp_obs_ind_train = np.zeros((ndts,nyrs),dtype=np.int32) 142 | apcp_obs_ind_verif = np.zeros(ndts,dtype=np.int32) 143 | for idt in range(ndts): 144 | apcp_obs_ind_verif[idt] = np.nonzero(mod_dates_fcstperiod[idt,iyr]==obs_dates_ord)[0][0] 145 | for jyr in range(0,nyrs): 146 | apcp_obs_ind_train[idt,jyr] = np.nonzero(mod_dates_fcstperiod[idt,jyr]==obs_dates_ord)[0][0] 147 | apcp_obs_ind_train = np.delete(apcp_obs_ind_train,iyr,axis=1) 148 | ensfcst_train = np.delete(mod_precip_fcstperiod,iyr,axis=2) 149 | ensfcst_clavg = np.mean(ensfcst_train,axis=(1,2)) 150 | ensfcst_clavg_sm = np.zeros((ndts,nlon*nlat), dtype=np.float32) 151 | for idt in range(ndts): 152 | wnd_ind = np.minimum(np.minimum(abs(doy[idt]-doy),abs(doy[idt]-365-doy)),abs(doy[idt]+365-doy))<31 153 | ensfcst_clavg_sm[idt,:] = np.mean(ensfcst_clavg[wnd_ind,:],axis=0) 154 | ensfcst_ano_train = ensfcst_train / ensfcst_clavg_sm[:,None,None,:] 155 | ensfcst_ano_verif = mod_precip_fcstperiod[:,:,iyr,:] / ensfcst_clavg_sm[:,None,:] 156 | for ixy in range(nxy): 157 | dx2 = np.square(obs_lon[ixy]-mod_lon) 158 | dy2 = np.square(obs_lat[ixy]-mod_lat) 159 | dst2 = np.add.outer(dy2,dx2).reshape(nlon*nlat) 160 | use = (dst2p33) 111 | pot33pCSGD[idt,iyr,:] = 1.-gamma.cdf((p33-shift)/scale,shape) 112 | brier33pCSGD[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCSGD[idt,iyr,:])**2 113 | p67 = qtev_doy[doy_dts[idt],:,1] 114 | exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67) 115 | pot67pCSGD[idt,iyr,:] = 1.-gamma.cdf((p67-shift)/scale,shape) 116 | brier67pCSGD[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCSGD[idt,iyr,:])**2 117 | p85 = qtev_doy[doy_dts[idt],:,2] 118 | exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85) 119 | pot85pCSGD[idt,iyr,:] = 1.-gamma.cdf((p85-shift)/scale,shape) 120 | brier85pCSGD[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCSGD[idt,iyr,:])**2 121 | 122 | 123 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv2_"+clead 124 | np.savez(outfilename, crpsCSGD=crpsCSGD, \ 125 | exc33p=exc33p, pot33pCSGD=pot33pCSGD, Bs33pCSGD=brier33pCSGD, \ 126 | exc67p=exc67p, pot67pCSGD=pot67pCSGD, Bs67pCSGD=brier67pCSGD, \ 127 | exc85p=exc85p, pot85pCSGD=pot85pCSGD, Bs85pCSGD=brier85pCSGD) 128 | 129 | 130 | 131 | 132 | 133 | 134 | ### Calculate skill scores for ANN predictions w/o climatology probabilities 135 | 136 | exc33p = np.zeros(obs_precip_vdate.shape) 137 | pot33pANN = np.zeros(obs_precip_vdate.shape) 138 | brier33pANN = np.zeros(obs_precip_vdate.shape) 139 | 140 | exc67p = np.zeros(obs_precip_vdate.shape) 141 | pot67pANN = np.zeros(obs_precip_vdate.shape) 142 | brier67pANN = np.zeros(obs_precip_vdate.shape) 143 | 144 | exc85p = np.zeros(obs_precip_vdate.shape) 145 | pot85pANN = np.zeros(obs_precip_vdate.shape) 146 | brier85pANN = np.zeros(obs_precip_vdate.shape) 147 | 148 | rpsANN = np.zeros(obs_precip_vdate.shape) 149 | crpsANN = np.zeros(obs_precip_vdate.shape) 150 | 151 | 152 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical approximation of the CRPS 153 | dx = np.diff(x) 154 | 155 | for iyr in range(nyrs): 156 | print(iyr) 157 | f4 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz") 158 | doy_dts = f4['doy_dts'] 159 | f4.close() 160 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-rv/probfcst_10-l1_"+clead+"_yr"+str(iyr)+".npz") 161 | prob_fcst_cat = f5['prob_fcst_cat'] 162 | f5.close() 163 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)]) 164 | prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32) 165 | for idt in range(ndts): 166 | ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function 167 | for ixy in range(nxy): 168 | itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate') 169 | prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:])) 170 | pot33pANN[idt,iyr,ixy] = prob_over_thr[0] 171 | pot67pANN[idt,iyr,ixy] = prob_over_thr[1] 172 | pot85pANN[idt,iyr,ixy] = prob_over_thr[2] 173 | ## Calculate CRPS for ANN 174 | bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2 175 | crpsANN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 176 | ### Calculate threshold exceedances for the Brier scores used to approximate the CRPS 177 | crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x) 178 | ## Calculate Brier scores for different thresholds 179 | p33 = qtev_doy[doy_dts[idt],:,0] 180 | exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33) 181 | brier33pANN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pANN[idt,iyr,:])**2 182 | p67 = qtev_doy[doy_dts[idt],:,1] 183 | exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67) 184 | brier67pANN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pANN[idt,iyr,:])**2 185 | p85 = qtev_doy[doy_dts[idt],:,2] 186 | exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85) 187 | brier85pANN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pANN[idt,iyr,:])**2 188 | 189 | 190 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv3_"+clead 191 | np.savez(outfilename, crpsANN=crpsANN, \ 192 | exc33p=exc33p, pot33pANN=pot33pANN, Bs33pANN=brier33pANN, \ 193 | exc67p=exc67p, pot67pANN=pot67pANN, Bs67pANN=brier67pANN, \ 194 | exc85p=exc85p, pot85pANN=pot85pANN, Bs85pANN=brier85pANN) 195 | 196 | 197 | 198 | 199 | ### Calculate skill scores for CNN predictions with different architectures for the convolutional layers 200 | 201 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz") 202 | mod_dates = f2['dates_ord'] 203 | f2.close() 204 | 205 | ndts, nyrs = mod_dates.shape 206 | 207 | 208 | doy_dts = np.zeros(ndts,dtype=np.int32) 209 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32) 210 | for idt in range(ndts): 211 | for iyr in range(nyrs): 212 | fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0] 213 | obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:] 214 | date_ord = int(mod_dates[idt,-1]-0.5) 215 | doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 216 | 217 | 218 | ### Calculate skill scores 219 | 220 | exc33p = np.zeros(obs_precip_vdate.shape) 221 | brier33pClm = np.zeros(obs_precip_vdate.shape) 222 | pot33pCNN = np.zeros(obs_precip_vdate.shape) 223 | brier33pCNN = np.zeros(obs_precip_vdate.shape) 224 | 225 | exc67p = np.zeros(obs_precip_vdate.shape) 226 | brier67pClm = np.zeros(obs_precip_vdate.shape) 227 | pot67pCNN = np.zeros(obs_precip_vdate.shape) 228 | brier67pCNN = np.zeros(obs_precip_vdate.shape) 229 | 230 | exc85p = np.zeros(obs_precip_vdate.shape) 231 | brier85pClm = np.zeros(obs_precip_vdate.shape) 232 | pot85pCNN = np.zeros(obs_precip_vdate.shape) 233 | brier85pCNN = np.zeros(obs_precip_vdate.shape) 234 | 235 | rpsClm = np.zeros(obs_precip_vdate.shape) 236 | rpsCNN = np.zeros(obs_precip_vdate.shape) 237 | 238 | crpsClm = np.zeros(obs_precip_vdate.shape) 239 | crpsCNN = np.zeros(obs_precip_vdate.shape) 240 | 241 | 242 | wwCl = 15 243 | 244 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical approximation of the CRPS 245 | dx = np.diff(x) 246 | 247 | 248 | imod = 0 249 | 250 | for iyr in range(nyrs): 251 | print(iyr) 252 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN-rv/probfcst_cnn-m"+str(imod)+"-drpt-2deg_yr"+str(iyr)+".npz") 253 | prob_fcst_cat = f5['prob_fcst_cat_era5'] 254 | f5.close() 255 | #f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_"+clead+"_yr"+str(iyr)+".npz") 256 | #prob_fcst_cat = f5['prob_fcst_cat'] 257 | #f5.close() 258 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)]) 259 | prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32) 260 | for idt in range(ndts): 261 | windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl] 262 | ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function 263 | for ixy in range(nxy): 264 | itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate') 265 | prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:])) 266 | pot33pCNN[idt,iyr,ixy] = prob_over_thr[0] 267 | pot67pCNN[idt,iyr,ixy] = prob_over_thr[1] 268 | pot85pCNN[idt,iyr,ixy] = prob_over_thr[2] 269 | ## Calculate CRPS for CNN 270 | bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2 271 | crpsCNN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 272 | ### Get current year and julian day to use to select climatological percentiles 273 | currentYear = datetime.date.fromordinal(int(mod_dates[idt,iyr])).year 274 | currentDay = (datetime.date.fromordinal(int(mod_dates[idt,iyr]))-datetime.date(currentYear,1,1)).days 275 | obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy)) 276 | crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x) 277 | ## Calculate CRPS for Clm 278 | clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0) 279 | bs = (clm_cdf-crps_exc)**2 280 | crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1) 281 | ## Calculate Brier scores for different thresholds 282 | p33 = qtev_doy[doy_dts[idt],:,0] 283 | exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33) 284 | brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2 285 | brier33pCNN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCNN[idt,iyr,:])**2 286 | p67 = qtev_doy[doy_dts[idt],:,1] 287 | exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67) 288 | brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2 289 | brier67pCNN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCNN[idt,iyr,:])**2 290 | p85 = qtev_doy[doy_dts[idt],:,2] 291 | exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85) 292 | brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2 293 | brier85pCNN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCNN[idt,iyr,:])**2 294 | 295 | 296 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv5" 297 | np.savez(outfilename, crpsClm=crpsClm, crpsCNN=crpsCNN, \ 298 | exc33p=exc33p, pot33pCNN=pot33pCNN, Bs33pClm=brier33pClm, Bs33pCNN=brier33pCNN, \ 299 | exc67p=exc67p, pot67pCNN=pot67pCNN, Bs67pClm=brier67pClm, Bs67pCNN=brier67pCNN, \ 300 | exc85p=exc85p, pot85pCNN=pot85pCNN, Bs85pClm=brier85pClm, Bs85pCNN=brier85pCNN) 301 | 302 | 303 | 304 | 305 | # calculate ranked probability score 306 | rpsClm = brier33pClm + brier67pClm + brier85pClm 307 | rpsCNN = brier33pCNN + brier67pCNN + brier85pCNN 308 | 309 | # rpssAvgCNN 310 | round(1.-np.sum(rpsCNN)/np.sum(rpsClm),4) 311 | 312 | 313 | 0.4183 # 1deg, max pooling 314 | 0.4081 # 1deg, no max pooling 315 | 0.4172 # 2deg, no max pooling 316 | 317 | 318 | 319 | 320 | 321 | 322 | ## Now based on IFS ensemble forecasts 323 | 324 | leadDay = 20 # d works out to being a d+0.5 day forecast 325 | accumulation = 7 # Precipitation accumulation period 326 | 327 | clead = 'week'+str((leadDay+8)//7) 328 | 329 | 330 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz") 331 | mod_dates = f2['dates_ord'][:,:,leadDay] 332 | f2.close() 333 | 334 | ndts, nyrs = mod_dates.shape 335 | 336 | 337 | doy_dts = np.zeros(ndts,dtype=np.int32) 338 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32) 339 | for idt in range(ndts): 340 | for iyr in range(nyrs): 341 | fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0] 342 | obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:] 343 | date_ord = int(mod_dates[idt,-1]-0.5) 344 | doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days) 345 | 346 | 347 | ### Calculate skill scores 348 | 349 | exc33p = np.zeros(obs_precip_vdate.shape) 350 | brier33pClm = np.zeros(obs_precip_vdate.shape) 351 | pot33pCNN = np.zeros(obs_precip_vdate.shape) 352 | brier33pCNN = np.zeros(obs_precip_vdate.shape) 353 | 354 | exc67p = np.zeros(obs_precip_vdate.shape) 355 | brier67pClm = np.zeros(obs_precip_vdate.shape) 356 | pot67pCNN = np.zeros(obs_precip_vdate.shape) 357 | brier67pCNN = np.zeros(obs_precip_vdate.shape) 358 | 359 | exc85p = np.zeros(obs_precip_vdate.shape) 360 | brier85pClm = np.zeros(obs_precip_vdate.shape) 361 | pot85pCNN = np.zeros(obs_precip_vdate.shape) 362 | brier85pCNN = np.zeros(obs_precip_vdate.shape) 363 | 364 | rpsClm = np.zeros(obs_precip_vdate.shape) 365 | rpsCNN = np.zeros(obs_precip_vdate.shape) 366 | 367 | crpsClm = np.zeros(obs_precip_vdate.shape) 368 | crpsCNN = np.zeros(obs_precip_vdate.shape) 369 | 370 | 371 | wwCl = 15 372 | 373 | x = (np.arange(0,101)/5)**2 # evaluation points for numerical approximation of the CRPS 374 | dx = np.diff(x) 375 | 376 | 377 | imod = 0 378 | 379 | for iyr in range(nyrs): 380 | print(iyr) 381 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN-rv/probfcst_cnn-m"+str(imod)+"-drpt-2deg_"+clead+"_yr"+str(iyr)+".npz") 382 | prob_fcst_cat = f5['prob_fcst_cat'] 383 | f5.close() 384 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)]) 385 | prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32) 386 | for idt in range(ndts): 387 | windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl] 388 | ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function 389 | for ixy in range(nxy): 390 | itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate') 391 | prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:])) 392 | pot33pCNN[idt,iyr,ixy] = prob_over_thr[0] 393 | pot67pCNN[idt,iyr,ixy] = prob_over_thr[1] 394 | pot85pCNN[idt,iyr,ixy] = prob_over_thr[2] 395 | ## Calculate CRPS for CNN 396 | bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2 397 | crpsCNN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx) 398 | ### Get current year and julian day to use to select climatological percentiles 399 | currentYear = datetime.date.fromordinal(int(mod_dates[idt,iyr])).year 400 | currentDay = (datetime.date.fromordinal(int(mod_dates[idt,iyr]))-datetime.date(currentYear,1,1)).days 401 | obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy)) 402 | crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x) 403 | ## Calculate CRPS for Clm 404 | clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0) 405 | bs = (clm_cdf-crps_exc)**2 406 | crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1) 407 | ## Calculate Brier scores for different thresholds 408 | p33 = qtev_doy[doy_dts[idt],:,0] 409 | exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33) 410 | brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2 411 | brier33pCNN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCNN[idt,iyr,:])**2 412 | p67 = qtev_doy[doy_dts[idt],:,1] 413 | exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67) 414 | brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2 415 | brier67pCNN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCNN[idt,iyr,:])**2 416 | p85 = qtev_doy[doy_dts[idt],:,2] 417 | exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85) 418 | brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2 419 | brier85pCNN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCNN[idt,iyr,:])**2 420 | 421 | 422 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv5_"+clead 423 | np.savez(outfilename, crpsClm=crpsClm, crpsCNN=crpsCNN, \ 424 | exc33p=exc33p, pot33pCNN=pot33pCNN, Bs33pClm=brier33pClm, Bs33pCNN=brier33pCNN, \ 425 | exc67p=exc67p, pot67pCNN=pot67pCNN, Bs67pClm=brier67pClm, Bs67pCNN=brier67pCNN, \ 426 | exc85p=exc85p, pot85pCNN=pot85pCNN, Bs85pClm=brier85pClm, Bs85pCNN=brier85pCNN) 427 | 428 | 429 | 430 | # calculate ranked probability score 431 | rpsClm = brier33pClm + brier67pClm + brier85pClm 432 | rpsCNN = brier33pCNN + brier67pCNN + brier85pCNN 433 | 434 | 435 | # rpssAvgCNN 436 | round(1.-np.sum(rpsCNN)/np.sum(rpsClm),4) 437 | 438 | 439 | 440 | -------------------------------------------------------------------------------- /S-CodeForGraphics.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import numpy.ma as ma 4 | import scipy as sp 5 | import math 6 | import os, sys 7 | import matplotlib.pyplot as plt 8 | import matplotlib.path as path 9 | import matplotlib.patches as patches 10 | 11 | import datetime 12 | import time 13 | import pandas as pd 14 | import statsmodels.api as sm 15 | import statsmodels.formula.api as smf 16 | 17 | from netCDF4 import Dataset 18 | from numpy import ma, loadtxt 19 | from numpy.linalg import solve 20 | from scipy import stats 21 | from scipy.interpolate import interp1d 22 | from scipy.stats import kendalltau 23 | from colorspace import diverging_hcl, sequential_hcl 24 | 25 | plt.ion() 26 | 27 | divcmp = diverging_hcl("Tropic",rev=True).cmap(name = "Diverging Color Map") 28 | 29 | 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz") 31 | obs_lat = f1['lat'] 32 | obs_lon = f1['lon'] 33 | f1.close() 34 | 35 | nxy = len(obs_lon) 36 | 37 | ndts = 61 38 | nyrs = 20 39 | 40 | 41 | ################################################################################################### 42 | # # 43 | # Figure S1: Maps of RPSS comparing different CSGD implementations # 44 | # # 45 | ################################################################################################### 46 | 47 | 48 | acfRv1 = np.zeros((3,15),dtype=np.float32) 49 | acfRv2 = np.zeros((3,15),dtype=np.float32) 50 | pvalRv1 = np.zeros((3,nxy),dtype=np.float32) 51 | pvalRv2 = np.zeros((3,nxy),dtype=np.float32) 52 | alphaFDRrv1 = np.zeros(3,dtype=np.float32) 53 | alphaFDRrv2 = np.zeros(3,dtype=np.float32) 54 | 55 | rpssMapCSGD = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True) 56 | rpssMapCSGDrv1 = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True) 57 | rpssMapCSGDrv2 = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True) 58 | 59 | rpssAvgCSGD = ma.array(np.zeros(3,dtype=np.float32),mask=True) 60 | rpssAvgCSGDrv1 = ma.array(np.zeros(3,dtype=np.float32),mask=True) 61 | rpssAvgCSGDrv2 = ma.array(np.zeros(3,dtype=np.float32),mask=True) 62 | 63 | for ilead in range(3): 64 | f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz") 65 | Bs33Clm = f1['Bs33pClm'] 66 | Bs33CSGD = f1['Bs33pCSGD'] 67 | Bs67Clm = f1['Bs67pClm'] 68 | Bs67CSGD = f1['Bs67pCSGD'] 69 | Bs85Clm = f1['Bs85pClm'] 70 | Bs85CSGD = f1['Bs85pCSGD'] 71 | f1.close() 72 | f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-rv1_week"+str(ilead+2)+".npz") 73 | Bs33CSGDrv1 = f2['Bs33pCSGD'] 74 | Bs67CSGDrv1 = f2['Bs67pCSGD'] 75 | Bs85CSGDrv1 = f2['Bs85pCSGD'] 76 | f2.close() 77 | f3 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-rv2_week"+str(ilead+2)+".npz") 78 | Bs33CSGDrv2 = f3['Bs33pCSGD'] 79 | Bs67CSGDrv2 = f3['Bs67pCSGD'] 80 | Bs85CSGDrv2 = f3['Bs85pCSGD'] 81 | f3.close() 82 | rpsClm = Bs33Clm + Bs67Clm + Bs85Clm # calculate ranked probability score 83 | rpsCSGD = Bs33CSGD + Bs67CSGD + Bs85CSGD 84 | rpsCSGDrv1 = Bs33CSGDrv1 + Bs67CSGDrv1 + Bs85CSGDrv1 85 | rpsCSGDrv2 = Bs33CSGDrv2 + Bs67CSGDrv2 + Bs85CSGDrv2 86 | rpssMapCSGD[ilead,:] = 1.-np.sum(rpsCSGD,axis=(0,1))/np.sum(rpsClm,axis=(0,1)) 87 | rpssMapCSGDrv1[ilead,:] = 1.-np.sum(rpsCSGDrv1,axis=(0,1))/np.sum(rpsClm,axis=(0,1)) 88 | rpssMapCSGDrv2[ilead,:] = 1.-np.sum(rpsCSGDrv2,axis=(0,1))/np.sum(rpsClm,axis=(0,1)) 89 | rpssAvgCSGD[ilead] = 1.-np.sum(rpsCSGD)/np.sum(rpsClm) 90 | rpssAvgCSGDrv1[ilead] = 1.-np.sum(rpsCSGDrv1)/np.sum(rpsClm) 91 | rpssAvgCSGDrv2[ilead] = 1.-np.sum(rpsCSGDrv2)/np.sum(rpsClm) 92 | rpsDiffCSGDrv1 = rpsCSGD-rpsCSGDrv1 93 | rpsDiffCSGDrv2 = rpsCSGD-rpsCSGDrv2 94 | rpsDiffStdzCSGDrv1 = (rpsDiffCSGDrv1-np.mean(rpsDiffCSGDrv1,axis=(0,1))[None,None,:])/np.std(rpsDiffCSGDrv1,axis=(0,1))[None,None,:] 95 | rpsDiffStdzCSGDrv2 = (rpsDiffCSGDrv2-np.mean(rpsDiffCSGDrv2,axis=(0,1))[None,None,:])/np.std(rpsDiffCSGDrv2,axis=(0,1))[None,None,:] 96 | for lg in range(15): 97 | acfRv1[ilead,lg] = np.mean(rpsDiffStdzCSGDrv1[lg:,:,:]*rpsDiffStdzCSGDrv1[:(ndts-lg),:,:]) # Estimate temporal autocorrelation 98 | acfRv2[ilead,lg] = np.mean(rpsDiffStdzCSGDrv2[lg:,:,:]*rpsDiffStdzCSGDrv2[:(ndts-lg),:,:]) 99 | rhoCSGDrv1 = acfRv1[ilead,1]/acfRv1[ilead,0] 100 | rhoCSGDrv2 = acfRv2[ilead,1]/acfRv2[ilead,0] 101 | print(rhoCSGDrv1,rhoCSGDrv2) 102 | nCSGDrv1 = round(ndts*nyrs*(1-rhoCSGDrv1)/(1+rhoCSGDrv1)) 103 | nCSGDrv2 = round(ndts*nyrs*(1-rhoCSGDrv2)/(1+rhoCSGDrv2)) 104 | for ixy in range(nxy): 105 | smplCSGDrv1 = rpsCSGD[:,:,ixy].flatten()-rpsCSGDrv1[:,:,ixy].flatten() 106 | smplCSGDrv2 = rpsCSGD[:,:,ixy].flatten()-rpsCSGDrv2[:,:,ixy].flatten() 107 | tstatCSGDrv1 = np.mean(smplCSGDrv1)/np.sqrt(np.var(smplCSGDrv1)/nCSGDrv1) # test statistic for paired t-test 108 | tstatCSGDrv2 = np.mean(smplCSGDrv2)/np.sqrt(np.var(smplCSGDrv2)/nCSGDrv2) 109 | pvalRv1[ilead,ixy] = 1.-sp.stats.t.cdf(tstatCSGDrv1,df=nCSGDrv1-1) # p-value for one-sided test 110 | pvalRv2[ilead,ixy] = 1.-sp.stats.t.cdf(tstatCSGDrv2,df=nCSGDrv2-1) 111 | #pval[ilead,ixy] = 2*min(1.-sp.stats.t.cdf(tstat,df=n-1),sp.stats.t.cdf(tstat,df=n-1)) 112 | pvalRv1_srt = np.sort(pvalRv1[ilead,:]) 113 | iCSGDrv1 = np.where(pvalRv1_srt<=0.1*np.arange(1,nxy+1)/nxy)[0] 114 | if len(iCSGDrv1)>0: 115 | alphaFDRrv1[ilead] = pvalRv1_srt[iCSGDrv1[-1]] 116 | pvalRv2_srt = np.sort(pvalRv2[ilead,:]) 117 | iCSGDrv2 = np.where(pvalRv2_srt<=0.1*np.arange(1,nxy+1)/nxy)[0] 118 | if len(iCSGDrv2)>0: 119 | alphaFDRrv2[ilead] = pvalRv2_srt[iCSGDrv2[-1]] 120 | plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalRv1_srt); plt.scatter(np.arange(663),pvalRv2_srt) 121 | 122 | 123 | 124 | 125 | fig = plt.figure(figsize=(11.3,9.)) 126 | 127 | for ilead in range(3): 128 | ylim = np.array([0.26,0.052,0.026])[ilead] 129 | #ylim = np.amax(abs(rpssMapCSGD[ilead,:])) 130 | indSgnfRv1 = (pvalRv1[ilead,:]0: 214 | alphaFDRrv[ilead] = pvalRv_srt[iANNrv[-1]] 215 | plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalRv_srt) 216 | 217 | 218 | 219 | 220 | fig = plt.figure(figsize=(11.8,6.)) 221 | 222 | for ilead in range(3): 223 | ylim = np.array([0.26,0.052,0.026])[ilead] 224 | #ylim = np.amax(abs(rpssMapCSGD[ilead,:])) 225 | indSgnfRv = (pvalRv[ilead,:]0: 295 | alphaFDR_CNN[ilead] = pvalCNN_srt[iCNN[-1]] 296 | plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalCNN_srt) 297 | 298 | 299 | 300 | fig = plt.figure(figsize=(11.3,3.1)) 301 | 302 | for ilead in range(3): 303 | ylim = np.array([0.26,0.052,0.026])[ilead] 304 | indSgnfCNN = (pvalCNN[ilead,:]