├── ANN-CalculateEnsembleStatistics.py
├── ANN-CalculateObsCategories.py
├── ANN-CalculateVerificationMetrics.py
├── ANN-FindTuningParameters.py
├── ANN-GenerateProbabilityForecasts.py
├── CNN-CalculateLargeScalePredictors.py
├── CNN-CalculateVerificationMetrics.py
├── CNN-FindTuningParameters.py
├── CNN-FitConvolutionalNetworkModel.py
├── CNN-GenerateProbabilityForecasts.py
├── CSGD-FitClimatologicalDistributions.py
├── CSGD-GenerateForecastDistributions.py
├── CodeForGraphics.py
├── README.md
├── S-ANN-GenerateProbabilityForecasts.py
├── S-CNN-FindTuningParameters.py
├── S-CNN-FitConvolutionalNetworkModel.py
├── S-CSGD-GenerateForecastDistributions.py
├── S-CalculateVerificationMetrics.py
└── S-CodeForGraphics.py


/ANN-CalculateEnsembleStatistics.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | import datetime
  7 | import time
  8 | import matplotlib.path as path
  9 | import matplotlib.patches as patches
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | from numpy import loadtxt
 15 | from scipy.interpolate import interp1d
 16 | 
 17 | 
 18 | 
 19 | #plt.ion()
 20 | 
 21 | r = 300.   # neighborhood radius (kilometers)
 22 | R = 6373.   # earth radius (kilometers)
 23 | 
 24 | 
 25 | leadDay = 6      # leadDay=d works out to being a d+0.5 day forecast
 26 | accumulation = 7  # Precipitation accumulation period
 27 | 
 28 | clead = 'week'+str((leadDay+8)//7)
 29 | 
 30 | 
 31 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 32 | obs_lat = f1['obs_lat']
 33 | obs_lon = f1['obs_lon']
 34 | obs_1week_dates_ord = f1['obs_dates_ord']
 35 | obs_1week_dates = f1['obs_dates']    
 36 | f1.close()
 37 | 
 38 | nxy = len(obs_lat)
 39 | 
 40 | 
 41 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_calplus.npz")
 42 | ### Modeled precip is (reforecast time, member, year, lead time, lat, lon)
 43 | mod_precip = f3['precip']
 44 | #mod_dates_ord = f3['datesOrd']
 45 | mod_lon = f3['lon']
 46 | mod_lat = f3['lat']
 47 | f3.close()
 48 | 
 49 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 50 | mod_dates_ord = f3['dates_ord']
 51 | f3.close()
 52 | 
 53 | ndts, nmem, nyrs, nlts, nlat, nlon = mod_precip.shape
 54 | 
 55 | ### Modeled precip 7-day accumulation is (reforecast time, year, ensembles, space)
 56 | mod_precip_week = np.sum(mod_precip[:,:,:,leadDay:leadDay+accumulation,:,:],axis=3).reshape((ndts,nmem,nyrs,nlon*nlat))
 57 | mod_dates_week = mod_dates_ord[:,:,leadDay]
 58 | 
 59 | 
 60 | ### Calculate day of the year ('doy') for each reforecast date
 61 | doy = np.zeros(ndts,dtype=np.int32)
 62 | for idt in range(ndts):
 63 |     yyyy = datetime.date.fromordinal(int(mod_dates_week[idt,0])).year
 64 |     doy[idt] = min(364,(datetime.date.fromordinal(int(mod_dates_week[idt,0]))-datetime.date(yyyy,1,1)).days)
 65 | 
 66 | 
 67 | ### Calculate spatially smoothed ensemble foreasts at analysis grid locations
 68 | mod_precip_week_sm = np.zeros((ndts,nmem,nyrs,nxy),dtype=np.float32)
 69 | for ixy in range(0,nxy):
 70 |     lat1 = np.deg2rad(obs_lat[ixy])
 71 |     lon1 = np.deg2rad(obs_lon[ixy])
 72 |     lat2 = np.deg2rad(mod_lat)
 73 |     lon2 = np.deg2rad(mod_lon)
 74 |     dlon = lon2 - lon1
 75 |     dlat = lat2 - lat1
 76 |     a = (np.sin(dlat/2)**2)[:,None] + np.cos(lat1) * np.outer(np.cos(lat2),np.sin(dlon/2)**2)
 77 |     c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
 78 |     gcdst = (R*c).reshape(nlat*nlon)                    # great circle distances between forecast and analysis grid points
 79 |     uselocs = np.nonzero(gcdst<r)[0]
 80 |     wgt = (1.-(gcdst[uselocs]/r)**2) / sum(1.-(gcdst[uselocs]/r)**2)
 81 |     mod_precip_week_sm[:,:,:,ixy] = np.average(mod_precip_week[:,:,:,uselocs],axis=3,weights=wgt)
 82 | 
 83 | 
 84 | qt_levels = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
 85 | chf = -np.log(1.-(np.array(qt_levels)))
 86 | 
 87 | for iyr in range(0,nyrs):
 88 |     print(iyr)
 89 |     ### Split data into training and verification data, save day index of observational data
 90 |     apcp_obs_ind_train = np.zeros((ndts,nyrs),dtype=np.int32)
 91 |     apcp_obs_ind_verif = np.zeros(ndts,dtype=np.int32)
 92 |     for idt in range(ndts):
 93 |         apcp_obs_ind_verif[idt] = np.nonzero(mod_dates_week[idt,iyr]==obs_1week_dates_ord)[0][0]
 94 |         for jyr in range(0,nyrs):
 95 |             apcp_obs_ind_train[idt,jyr] = np.nonzero(mod_dates_week[idt,jyr]==obs_1week_dates_ord)[0][0]
 96 |     apcp_obs_ind_train = np.delete(apcp_obs_ind_train,iyr,axis=1)
 97 |     fcst_train = np.delete(mod_precip_week_sm,iyr,axis=2)
 98 |     fcst_verif = mod_precip_week_sm[:,:,iyr,:]
 99 |     ### Transform smoothed ensemble forecasts to uniform distribution
100 |     apcp_ens_pit_train = np.zeros((ndts,nyrs-1,nxy,nmem),dtype=np.float32)
101 |     apcp_ens_pit_verif = np.zeros((ndts,nxy,nmem),dtype=np.float32)
102 |     apcp_fcst_p0_cl = np.zeros((ndts,nxy),dtype=np.float32)
103 |     for idt in range(ndts):
104 |         doy_diff = np.minimum(abs(doy-doy[idt]-365),np.minimum(abs(doy-doy[idt]),abs(doy-doy[idt]+365)))
105 |         wnd_ind = np.where(doy_diff<31)[0]
106 |         for ixy in range(nxy):
107 |             x = np.quantile(fcst_train[wnd_ind,:,:,ixy].flatten(),q=qt_levels)
108 |             izmax = np.where(x>0.0)[0][0] - 1
109 |             if izmax<0:
110 |                 itp_fct = interp1d(np.append(0.0,x), np.append(0.0,chf), kind='linear',fill_value='extrapolate')
111 |             else:
112 |                 itp_fct = interp1d(x[izmax:], chf[izmax:], kind='linear',fill_value='extrapolate')
113 |             apcp_ens_pit_train[idt,:,ixy,:] = np.transpose(1.-np.exp(-itp_fct(fcst_train[idt,:,:,ixy])))
114 |             apcp_ens_pit_verif[idt,ixy,:] = 1.-np.exp(-itp_fct(fcst_verif[idt,:,ixy]))
115 |             apcp_fcst_p0_cl[idt,ixy] = np.mean(fcst_train[wnd_ind,:,:,ixy]==0.0)
116 |     ### Save out to file
117 |     outfilename = "/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)
118 |     np.savez(outfilename, doy_dts=doy, \
119 |         apcp_obs_ind_train=apcp_obs_ind_train, \
120 |         apcp_obs_ind_verif=apcp_obs_ind_verif, \
121 |         apcp_ens_pit_train=apcp_ens_pit_train, \
122 |         apcp_ens_pit_verif=apcp_ens_pit_verif, \
123 |         apcp_fcst_p0_cl=apcp_fcst_p0_cl)
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/ANN-CalculateObsCategories.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import matplotlib.path as path
  8 | import datetime
  9 | import time
 10 | import pandas as pd
 11 | import statsmodels.api as sm
 12 | import statsmodels.formula.api as smf
 13 | 
 14 | from netCDF4 import Dataset
 15 | from numpy import ma
 16 | from numpy import loadtxt
 17 | from scipy import stats
 18 | 
 19 | #plt.ion()
 20 | 
 21 | ncat = 30
 22 | qtlv_eval = [.333,.667,.85,.95]
 23 | 
 24 | 
 25 | 
 26 | #==============================================================================    
 27 | # Load PRISM data set, aggregate to 1-week average and calculate doy
 28 | #==============================================================================
 29 | 
 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
 31 | #list(f1)
 32 | obs_precip = f1['precip']
 33 | obs_lat = f1['lat']
 34 | obs_lon = f1['lon']
 35 | obs_dates_ord = f1['dates_ord']
 36 | obs_dates = f1['dates']
 37 | f1.close()
 38 | 
 39 | ndts, nxy = obs_precip.shape
 40 | 
 41 | obs_precip_week = np.zeros((ndts-6,nxy), dtype=np.float32)
 42 | for iday in range(7):
 43 |     obs_precip_week += obs_precip[iday:(ndts-6+iday),:]
 44 | 
 45 | ndts, nxy = obs_precip_week.shape
 46 | 
 47 | obs_dates_ord = obs_dates_ord[:ndts]
 48 | obs_dates = obs_dates[:ndts]
 49 | 
 50 | 
 51 | doy = np.zeros(ndts, dtype=np.int32)
 52 | for idt in range(ndts):
 53 |     doy[idt] = (datetime.date.fromordinal(int(obs_dates_ord[idt]))-datetime.date(obs_dates[idt,0],1,1)).days
 54 |     if datetime.date(obs_dates[idt,0],1,1).year%4==0 and  doy[idt]>58:
 55 |         doy[idt] -= 1                                                     # in leap year, do not count Feb 29
 56 | 
 57 | 
 58 | 
 59 | #==============================================================================    
 60 | # Estimate climatological PoP and 'hybrid' quantiles using a moving window
 61 | #==============================================================================
 62 | 
 63 | pop_doy = np.zeros((365,nxy), dtype=np.float32)
 64 | thr_doy = np.zeros((365,nxy,ncat-1), dtype=np.float32)
 65 | qtev_doy = np.zeros((365,nxy,len(qtlv_eval)), dtype=np.float32)
 66 | 
 67 | for idd in range(365):
 68 |     print(idd)
 69 |     ind_doy = np.where(doy==idd)[0]
 70 |     ind_doy_ext = np.append(np.append(ind_doy[0]-366,ind_doy),ind_doy[-1]+365)
 71 |     wnd_ind = np.add.outer(ind_doy_ext,np.arange(-30,31)).flatten()
 72 |     imin = np.where(wnd_ind>=0)[0][0]
 73 |     imax = np.where(wnd_ind<ndts)[0][-1]
 74 |     for ixy in range(nxy):
 75 |         y = obs_precip_week[wnd_ind[imin:(imax+1)],ixy]
 76 |         pop_doy[idd,ixy] = np.mean(y>0.254)
 77 |         thr_doy[idd,ixy,0] = 0.254
 78 |         qtlv = 1. + pop_doy[idd,ixy]*((np.arange(1,ncat-1)/float(ncat-1))-1.)
 79 |         thr_doy[idd,ixy,1:] = np.quantile(y,qtlv)
 80 |         qtev_doy[idd,ixy,:] = np.maximum(0.254,np.quantile(y,qtlv_eval))
 81 | 
 82 | 
 83 | 
 84 | #==============================================================================    
 85 | # Assign observations to classes (multiple assignments allowed if ambiguous)
 86 | #==============================================================================
 87 | 
 88 | apcp_obs_cat = np.zeros((ndts,nxy,ncat),dtype=np.bool_)
 89 | 
 90 | for idt in range(ndts):
 91 |     for ixy in range(0,nxy):
 92 |         lower = np.append(-np.Inf,thr_doy[doy[idt],ixy,:])
 93 |         upper = np.append(thr_doy[doy[idt],ixy,:],np.Inf)
 94 |         apcp_obs_cat[idt,ixy,:] = np.logical_and(obs_precip_week[idt,ixy]>=lower,obs_precip_week[idt,ixy]<=upper)
 95 | 
 96 | np.savez("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_"+str(ncat)+"cl",
 97 |     obs_lat = obs_lat,
 98 |     obs_lon = obs_lon,
 99 |     obs_dates_ord = obs_dates_ord,
100 |     obs_dates = obs_dates,    
101 |     apcp_obs_cat = apcp_obs_cat,
102 |     apcp_obs = obs_precip_week,
103 |     pop_doy = pop_doy,
104 |     thr_doy = thr_doy,
105 |     qtev_doy = qtev_doy)
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/ANN-CalculateVerificationMetrics.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy.stats as stats
  4 | import math
  5 | import os, sys
  6 | import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import pdb
 10 | import pickle
 11 | 
 12 | from scipy.stats import gamma
 13 | from scipy.interpolate import interp1d
 14 | 
 15 | plt.ion()
 16 | 
 17 | 
 18 | 
 19 | leadDay = 20         # d works out to being a d+0.5 day forecast
 20 | accumulation = 7     # Precipitation accumulation period
 21 | 
 22 | clead = 'week'+str((leadDay+8)//7)
 23 | 
 24 | 
 25 | ##  Load PRISM data
 26 | 
 27 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 28 | #list(f1)
 29 | obs_lat = f1['obs_lat']
 30 | obs_lon = f1['obs_lon']
 31 | obs_dates_ord = f1['obs_dates_ord']
 32 | pop_doy = f1['pop_doy']
 33 | thr_doy = f1['thr_doy']
 34 | qtev_doy = f1['qtev_doy']
 35 | apcp_obs_cat = f1['apcp_obs_cat']
 36 | obs_precip_week = f1['apcp_obs']
 37 | f1.close()
 38 | 
 39 | ndts, nxy, ncat = apcp_obs_cat.shape
 40 | 
 41 | 
 42 | ##  Load IFS ensemble forecasts
 43 | 
 44 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 45 | mod_dates = f2['dates_ord'][:,:,leadDay]
 46 | mod_lon = f2['lon']
 47 | mod_lat = f2['lat']
 48 | mod_precip = np.sum(f2['precip'][:,:,leadDay:leadDay+accumulation,:,:],axis=2)
 49 | f2.close()
 50 | 
 51 | ndts, nyrs = mod_dates.shape
 52 | 
 53 | ecmwf_ind = np.zeros(len(mod_lon),dtype=np.bool)
 54 | 
 55 | for ixy in range(nxy):
 56 |     dst = abs(mod_lon-obs_lon[ixy]) + abs(mod_lat-obs_lat[ixy])    # Nearest neighbor interpolation
 57 |     ecmwf_ind[np.argmin(dst)] = True
 58 | 
 59 | ecmwf_precip = mod_precip[:,:,:,ecmwf_ind]
 60 | 
 61 | 
 62 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32)
 63 | for idt in range(ndts):
 64 |     for iyr in range(nyrs):
 65 |         fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0]
 66 |         obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:]         # PRISM data on the verification days
 67 | 
 68 | 
 69 | 
 70 | ### Calculate skill scores
 71 | 
 72 | exc33p = np.zeros(obs_precip_vdate.shape)
 73 | brier33pClm = np.zeros(obs_precip_vdate.shape)
 74 | pot33pANN = np.zeros(obs_precip_vdate.shape)
 75 | brier33pANN = np.zeros(obs_precip_vdate.shape)
 76 | pot33pCSGD = np.zeros(obs_precip_vdate.shape)
 77 | brier33pCSGD = np.zeros(obs_precip_vdate.shape)
 78 | pot33pENS = np.zeros(obs_precip_vdate.shape)
 79 | brier33pENS = np.zeros(obs_precip_vdate.shape)
 80 | 
 81 | exc67p = np.zeros(obs_precip_vdate.shape)
 82 | brier67pClm = np.zeros(obs_precip_vdate.shape)
 83 | pot67pANN = np.zeros(obs_precip_vdate.shape)
 84 | brier67pANN = np.zeros(obs_precip_vdate.shape)
 85 | pot67pCSGD = np.zeros(obs_precip_vdate.shape)
 86 | brier67pCSGD = np.zeros(obs_precip_vdate.shape)
 87 | pot67pENS = np.zeros(obs_precip_vdate.shape)
 88 | brier67pENS = np.zeros(obs_precip_vdate.shape)
 89 | 
 90 | exc85p = np.zeros(obs_precip_vdate.shape)
 91 | brier85pClm = np.zeros(obs_precip_vdate.shape)
 92 | pot85pANN = np.zeros(obs_precip_vdate.shape)
 93 | brier85pANN = np.zeros(obs_precip_vdate.shape)
 94 | pot85pCSGD = np.zeros(obs_precip_vdate.shape)
 95 | brier85pCSGD = np.zeros(obs_precip_vdate.shape)
 96 | pot85pENS = np.zeros(obs_precip_vdate.shape)
 97 | brier85pENS = np.zeros(obs_precip_vdate.shape)
 98 | 
 99 | rpsClm = np.zeros(obs_precip_vdate.shape)
100 | rpsANN = np.zeros(obs_precip_vdate.shape)
101 | rpsCSGD = np.zeros(obs_precip_vdate.shape)
102 | rpsENS = np.zeros(obs_precip_vdate.shape)
103 | 
104 | crpsClm = np.zeros(obs_precip_vdate.shape)
105 | crpsANN = np.zeros(obs_precip_vdate.shape)
106 | crpsCSGD = np.zeros(obs_precip_vdate.shape)
107 | 
108 | 
109 | f3 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/csgd_fcsts_params_"+clead+".npz")
110 | csgd_pars_fcst = f3['csgd_pars_fcst']
111 | f3.close()
112 | 
113 | 
114 | wwCl = 15   # number of dates around the date of interest used to create a sample representing climatology 
115 | 
116 | 
117 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical approximation of the CRPS
118 | dx = np.diff(x)
119 | 
120 | for iyr in range(nyrs):
121 |     print(iyr)
122 |     f4 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz")
123 |     doy_dts = f4['doy_dts']
124 |     f4.close()
125 |     f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_"+clead+"_yr"+str(iyr)+".npz")
126 |     prob_fcst_cat = f5['prob_fcst_cat']
127 |     f5.close()
128 |     prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)])
129 |     prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32)
130 |     for idt in range(ndts):
131 |         ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function
132 |         for ixy in range(nxy):
133 |             itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate')
134 |             prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:]))
135 |             pot33pANN[idt,iyr,ixy] = prob_over_thr[0]
136 |             pot67pANN[idt,iyr,ixy] = prob_over_thr[1]
137 |             pot85pANN[idt,iyr,ixy] = prob_over_thr[2]
138 |             ## Calculate CRPS for ANN
139 |             bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2
140 |             crpsANN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
141 |         ### Calculate climatological exceedances based on analyzed values within a time window around the forecast date
142 |         windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl]
143 |         obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy))
144 |         ### Calculate threshold exceedances for the Brier scores used to approximate the CRPS
145 |         crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x)
146 |         ### Compose sample to represent ECMWF model climatology
147 |         modClm = ecmwf_precip[windowClm,:,:,:].reshape((wwCl*nyrs*11,nxy))
148 |         ## Calculate CRPS for Clm
149 |         clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0)
150 |         bs = (clm_cdf-crps_exc)**2
151 |         crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1)
152 |         ## Calculate CRPS for CSGD
153 |         shape = np.square(csgd_pars_fcst[idt,iyr,:,0]/csgd_pars_fcst[idt,iyr,:,1])
154 |         scale = np.square(csgd_pars_fcst[idt,iyr,:,1])/csgd_pars_fcst[idt,iyr,:,0]
155 |         shift = csgd_pars_fcst[idt,iyr,:,2]
156 |         csgd_cdf = gamma.cdf((x[None,:]-shift[:,None])/scale[:,None],shape[:,None])
157 |         bs = (csgd_cdf-crps_exc)**2
158 |         crpsCSGD[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1)
159 |         ## Calculate Brier scores for different thresholds
160 |         p33 = qtev_doy[doy_dts[idt],:,0]
161 |         exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33)
162 |         brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2
163 |         brier33pANN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pANN[idt,iyr,:])**2
164 |         pot33pCSGD[idt,iyr,:] = 1.-gamma.cdf((p33-shift)/scale,shape)
165 |         brier33pCSGD[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCSGD[idt,iyr,:])**2
166 |         p33mod = np.maximum(0.254,np.quantile(modClm,0.333,axis=0))
167 |         #pot33pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p33mod[None,:],axis=0)
168 |         pot33pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p33[None,:],axis=0)
169 |         brier33pENS[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pENS[idt,iyr,:])**2
170 |         p67 = qtev_doy[doy_dts[idt],:,1]
171 |         exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67)
172 |         brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2
173 |         brier67pANN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pANN[idt,iyr,:])**2
174 |         pot67pCSGD[idt,iyr,:] = 1.-gamma.cdf((p67-shift)/scale,shape)
175 |         brier67pCSGD[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCSGD[idt,iyr,:])**2
176 |         p67mod = np.maximum(0.254,np.quantile(modClm,0.667,axis=0))
177 |         #pot67pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p67mod[None,:],axis=0)
178 |         pot67pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p67[None,:],axis=0)
179 |         brier67pENS[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pENS[idt,iyr,:])**2
180 |         p85 = qtev_doy[doy_dts[idt],:,2]
181 |         exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85)
182 |         brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2
183 |         brier85pANN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pANN[idt,iyr,:])**2
184 |         pot85pCSGD[idt,iyr,:] = 1.-gamma.cdf((p85-shift)/scale,shape)
185 |         brier85pCSGD[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCSGD[idt,iyr,:])**2
186 |         p85mod = np.maximum(0.254,np.quantile(modClm,0.85,axis=0))
187 |         #pot85pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p85mod[None,:],axis=0)
188 |         pot85pENS[idt,iyr,:] = np.mean(ecmwf_precip[idt,iyr,:,:]>p85[None,:],axis=0)
189 |         brier85pENS[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pENS[idt,iyr,:])**2
190 | 
191 | 
192 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-ann_"+clead
193 | np.savez(outfilename, crpsClm=crpsClm, crpsANN=crpsANN, crpsCSGD=crpsCSGD, \
194 |      exc33p=exc33p, pot33pANN=pot33pANN, pot33pCSGD=pot33pCSGD, pot33pENS=pot33pENS, Bs33pClm=brier33pClm, Bs33pANN=brier33pANN, Bs33pCSGD=brier33pCSGD, Bs33pENS=brier33pENS, \
195 |      exc67p=exc67p, pot67pANN=pot67pANN, pot67pCSGD=pot67pCSGD, pot67pENS=pot67pENS, Bs67pClm=brier67pClm, Bs67pANN=brier67pANN, Bs67pCSGD=brier67pCSGD, Bs67pENS=brier67pENS, \
196 |      exc85p=exc85p, pot85pANN=pot85pANN, pot85pCSGD=pot85pCSGD, pot85pENS=pot85pENS, Bs85pClm=brier85pClm, Bs85pANN=brier85pANN, Bs85pCSGD=brier85pCSGD, Bs85pENS=brier85pENS)
197 | 
198 | 
199 | 
200 | 
201 | 
202 | # calculate ranked probability score
203 | rpsClm = brier33pClm + brier67pClm + brier85pClm
204 | rpsANN = brier33pANN + brier67pANN + brier85pANN
205 | rpsCSGD = brier33pCSGD + brier67pCSGD + brier85pCSGD
206 | rpsENS = brier33pENS + brier67pENS + brier85pENS
207 | 
208 | # rpssAvgANN
209 | round(1.-np.sum(rpsANN)/np.sum(rpsClm),4)
210 | 
211 | # rpssAvgCSGD
212 | round(1.-np.sum(rpsCSGD)/np.sum(rpsClm),4)
213 | 
214 | # rpssAvgENS
215 | round(1.-np.sum(rpsENS)/np.sum(rpsClm),4)
216 | 
217 | 
218 | 
219 | 


--------------------------------------------------------------------------------
/ANN-FindTuningParameters.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import keras
 10 | import keras.backend as K
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | 
 15 | from scipy import stats
 16 | from scipy.interpolate import interp1d
 17 | 
 18 | from keras import models
 19 | from keras import layers
 20 | from keras import regularizers
 21 | 
 22 | from keras.layers import Input, Dense, Add, Activation
 23 | from keras.models import Model
 24 | from keras.optimizers import Adam
 25 | 
 26 | #plt.ion()
 27 | 
 28 | 
 29 | ncl = '20'
 30 | clead = 'week2'
 31 | imod = 0
 32 | 
 33 | 
 34 | def build_cat_model(n_features, hidden_nodes, n_bins, par_reg):
 35 |     inp1 = Input(shape=(n_features,))
 36 |     inp2 = Input(shape=(n_bins,))
 37 |     x = Dense(hidden_nodes[0], activation='elu', kernel_regularizer=regularizers.l1(par_reg))(inp1)
 38 |     if len(hidden_nodes) > 1:
 39 |         for h in hidden_nodes[1:]:
 40 |             x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
 41 |     x = Dense(n_bins, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
 42 |     x = Add()([x, inp2])
 43 |     out = Activation('softmax')(x)
 44 |     return Model(inputs=[inp1, inp2], outputs=out)
 45 | 
 46 | 
 47 | def modified_categorical_crossentropy(y_mat, prob_fcst):
 48 |     prob_obs_cat = K.sum(y_mat*prob_fcst,axis=1)
 49 |     return -K.mean(K.log(prob_obs_cat))
 50 | 
 51 | 
 52 | 
 53 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_"+ncl+"cl.npz")
 54 | #list(f1)
 55 | lat = f1['obs_lat']
 56 | lon = f1['obs_lon']
 57 | obs_dates_ord = f1['obs_dates_ord']
 58 | pop_doy = f1['pop_doy']
 59 | thr_doy = f1['thr_doy']
 60 | qtev_doy = f1['qtev_doy']
 61 | apcp_obs_cat = f1['apcp_obs_cat']
 62 | apcp_obs = f1['apcp_obs']
 63 | f1.close()
 64 | 
 65 | ndts, nxy, ncat = apcp_obs_cat.shape
 66 | 
 67 | 
 68 | nyrs = 20
 69 | 
 70 | reg = 10.**np.arange(-6,-2)
 71 | nreg = len(reg)
 72 | 
 73 | mod = [[10],[20],[10,10]]
 74 | 
 75 | 
 76 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical approximation of the CRPS
 77 | dx = np.diff(x)
 78 | 
 79 | opt_reg_param = np.zeros(nyrs, dtype=np.float32)
 80 | opt_valid_scores = np.zeros((nyrs,5), dtype=np.float32)
 81 | opt_valid_crps = np.zeros((nyrs,5), dtype=np.float32)
 82 | 
 83 | 
 84 | for iyr in range(nyrs):
 85 |     print('year: ',iyr)
 86 |     # Load smoothed ensemble forecast  PIT values
 87 |     f4 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz")
 88 |     doy_dts = f4['doy_dts']
 89 |     apcp_obs_ind = f4['apcp_obs_ind_train']
 90 |     apcp_ens_pit = f4['apcp_ens_pit_train']
 91 |     f4.close()
 92 |     ndts, nyrs_cv, nxy, nmem = apcp_ens_pit.shape
 93 |     # Calculate normalized coordinates, cosine/sine of day of the year, and climatological probability of precipitation
 94 |     lon_nml = np.repeat(-1.+2.*(lon[np.newaxis,:]-min(lon))/(max(lon)-min(lon)),ndts*nyrs_cv,axis=0).reshape((ndts*nyrs_cv,nxy,1))
 95 |     lat_nml = np.repeat(-1.+2.*(lat[np.newaxis,:]-min(lat))/(max(lat)-min(lat)),ndts*nyrs_cv,axis=0).reshape((ndts*nyrs_cv,nxy,1))
 96 |     apcp_pop_cl = np.repeat(pop_doy[doy_dts,np.newaxis,:],nyrs_cv,axis=1).reshape((ndts*nyrs_cv,nxy,1))
 97 |     # Calculate predictors and classification targets
 98 |     apcp_efi = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit),axis=3).reshape((ndts*nyrs_cv,nxy,1))
 99 |     predictors = np.concatenate((lon_nml,lat_nml,-1.+2.*apcp_pop_cl,apcp_efi),axis=2)
100 |     logp_cl = np.concatenate((np.log(1.-apcp_pop_cl),np.repeat(np.log(apcp_pop_cl),ncat-1,axis=2)-np.log(ncat-1)),axis=2)
101 |     # perform 5-fold cross validation to find optimal regularization
102 |     date_order = np.arange(ndts*nyrs_cv).reshape(ndts,nyrs_cv).T.flatten()
103 |     cv_ind = date_order[np.arange(ndts*nyrs_cv)%232<231]                        # remove the date between the 5 cross-validated blocks
104 |     valid_score = np.zeros((nreg,5), dtype=np.float32)
105 |     valid_crps = np.zeros((nreg,5), dtype=np.float32)
106 |     for cvi in range(5):
107 |         train_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)!=cvi]
108 |         valid_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)==cvi]
109 |         predictors_train = predictors[train_ind,:,:].reshape((-1,predictors.shape[-1]))
110 |         logp_cl_train = logp_cl[train_ind,:,:].reshape((-1,ncat))
111 |         cat_targets_train = apcp_obs_cat[apcp_obs_ind.flatten()[train_ind],:,:].reshape((-1,ncat)).astype(float)
112 |         predictors_valid = predictors[valid_ind,:,:].reshape((-1,predictors.shape[-1]))
113 |         logp_cl_valid = logp_cl[valid_ind,:,:].reshape((-1,ncat))
114 |         cat_targets_valid = apcp_obs_cat[apcp_obs_ind.flatten()[valid_ind],:,:].reshape((-1,ncat)).astype(float)
115 |         doy_valid = np.repeat(doy_dts[:,np.newaxis],nyrs_cv,axis=1).flatten()[valid_ind]
116 |         for ireg in range(nreg):
117 |             # Define and fit ANN model (using batch gradient descent)
118 |             keras.backend.clear_session()
119 |             model = build_cat_model(predictors.shape[-1], mod[imod], ncat, reg[ireg])
120 |             model.compile(optimizer=Adam(0.05), loss=modified_categorical_crossentropy)
121 |             model.fit([predictors_train,logp_cl_train], cat_targets_train, epochs=100, batch_size=len(train_ind)*nxy, verbose=0)
122 |             valid_score[ireg,cvi] = model.evaluate([predictors_valid,logp_cl_valid], cat_targets_valid, batch_size=len(train_ind)*nxy, verbose=0)
123 |             # Calculate CRPS for each cross-validation fold
124 |             prob_fcst_cat = model.predict([predictors_valid,logp_cl_valid]).reshape((len(valid_ind),nxy,ncat))
125 |             prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)])
126 |             crps_fold = np.zeros((len(valid_ind),nxy),dtype=np.float32)
127 |             for ivdt in range(len(valid_ind)):
128 |                 for ixy in range(nxy):
129 |                     itp_fct = interp1d(thr_doy[doy_valid[ivdt],ixy,:], prob_fcst_chf[ivdt,ixy,:], kind='linear',fill_value='extrapolate')
130 |                     bs = (1.-np.exp(-itp_fct(x))-1.*(apcp_obs[apcp_obs_ind.flatten()[valid_ind[ivdt]],ixy]<=x))**2
131 |                     crps_fold[ivdt,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
132 |             valid_crps[ireg,cvi] = np.mean(crps_fold)
133 |     opt_reg_ind = np.argmin(np.mean(valid_score,axis=1))
134 |     opt_reg_param[iyr] = reg[opt_reg_ind]
135 |     opt_valid_scores[iyr,:] = valid_score[opt_reg_ind,:]
136 |     opt_valid_crps[iyr,:] =  valid_crps[opt_reg_ind,:]
137 | 
138 | 
139 | ### Save out to file
140 | outfilename = "/home/michael/Desktop/CalifAPCP/tuning/efi-"+ncl+"cl-m"+str(imod)+"-l1_"+clead
141 | np.savez(outfilename, opt_reg_param=opt_reg_param, opt_valid_scores=opt_valid_scores, opt_valid_crps=opt_valid_crps)
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/ANN-GenerateProbabilityForecasts.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import keras
 10 | import keras.backend as K
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | 
 15 | from scipy import stats
 16 | 
 17 | from keras import models
 18 | from keras import layers
 19 | from keras import regularizers
 20 | 
 21 | from keras.layers import Input, Dense, Add, Activation, Dropout
 22 | from keras.layers.merge import Concatenate
 23 | from keras.models import Model
 24 | from keras.optimizers import Adam
 25 | 
 26 | #plt.ion()
 27 | 
 28 | 
 29 | 
 30 | def build_cat_model(n_features, hidden_nodes, n_bins, par_reg):
 31 |     inp1 = Input(shape=(n_features,))
 32 |     inp2 = Input(shape=(n_bins,))
 33 |     x = Dense(hidden_nodes[0], activation='elu', kernel_regularizer=regularizers.l1(par_reg))(inp1)
 34 |     if len(hidden_nodes) > 1:
 35 |         for h in hidden_nodes[1:]:
 36 |             x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
 37 |     x = Dense(n_bins, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
 38 |     x = Add()([x, inp2])
 39 |     out = Activation('softmax')(x)
 40 |     return Model(inputs=[inp1, inp2], outputs=out)
 41 | 
 42 | 
 43 | def modified_categorical_crossentropy(y_mat, prob_fcst):
 44 |     prob_obs_cat = K.sum(y_mat*prob_fcst,axis=1)
 45 |     return -K.mean(K.log(prob_obs_cat))
 46 | 
 47 | 
 48 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 49 | #list(f1)
 50 | lat = f1['obs_lat']
 51 | lon = f1['obs_lon']
 52 | obs_dates_ord = f1['obs_dates_ord']
 53 | pop_doy = f1['pop_doy']
 54 | thr_doy = f1['thr_doy']
 55 | qtev_doy = f1['qtev_doy']
 56 | apcp_obs_cat = f1['apcp_obs_cat']
 57 | f1.close()
 58 | 
 59 | ncat = apcp_obs_cat.shape[2]
 60 | 
 61 | 
 62 | 
 63 | clead = 'week2'
 64 | 
 65 | f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz")
 66 | opt_reg_param = f3['opt_reg_param']
 67 | f3.close()
 68 | 
 69 | 
 70 | for iyr in range(20):
 71 |     print(iyr)
 72 |     # Load smoothed ensemble forecast PIT values
 73 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz")
 74 |     doy_dts = f2['doy_dts']
 75 |     apcp_obs_ind_train = f2['apcp_obs_ind_train']
 76 |     apcp_obs_ind_verif = f2['apcp_obs_ind_verif']
 77 |     apcp_ens_pit_train = f2['apcp_ens_pit_train']
 78 |     apcp_ens_pit_verif = f2['apcp_ens_pit_verif']
 79 |     f2.close()
 80 |     ndts, nyrs_tr, nxy, nmem = apcp_ens_pit_train.shape
 81 |     # Calculate normalized coordinates and climatological probability of precipitation
 82 |     lon_train = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1))
 83 |     lon_verif = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts,axis=0).reshape((ndts,nxy,1))
 84 |     lat_train = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1))
 85 |     lat_verif = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts,axis=0).reshape((ndts,nxy,1))
 86 |     apcp_pop_cl_train = np.repeat(pop_doy[doy_dts,np.newaxis,:,None],nyrs_tr,axis=1)
 87 |     apcp_pop_cl_verif = pop_doy[doy_dts,:,None]
 88 |     # Calculate predictors and classification targets
 89 |     apcp_efi_train = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_train),axis=3)[:,:,:,None]
 90 |     apcp_efi_verif = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_verif),axis=2)[:,:,None]
 91 |     train_predictors = np.concatenate((lon_train,lat_train,apcp_efi_train),axis=3).reshape((-1,3))
 92 |     train_logp_cl = np.concatenate((np.log(1.-apcp_pop_cl_train),np.repeat(np.log(apcp_pop_cl_train),ncat-1,axis=3)-np.log(ncat-1)),axis=3).reshape((-1,ncat))
 93 |     train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].reshape((-1,ncat)).astype(float)
 94 |     # Define and fit ANN model
 95 |     keras.backend.clear_session()
 96 |     model = build_cat_model(train_predictors.shape[-1], [10], ncat, opt_reg_param[iyr])
 97 |     model.compile(optimizer=Adam(0.05), loss=modified_categorical_crossentropy)
 98 |     model.fit([train_predictors,train_logp_cl], train_cat_targets, epochs=100, batch_size=ndts*nyrs_tr*nxy, verbose=0)
 99 |     # Calculate probability forecasts
100 |     verif_predictors = np.concatenate((lon_verif,lat_verif,apcp_efi_verif),axis=2).reshape((-1,3))
101 |     verif_logp_cl = np.concatenate((np.log(1.-apcp_pop_cl_verif),np.repeat(np.log(apcp_pop_cl_verif),ncat-1,axis=2)-np.log(ncat-1)),axis=2).reshape((-1,ncat))
102 |     prob_fcst_cat = model.predict([verif_predictors,verif_logp_cl]).reshape((ndts,nxy,ncat))
103 |     ### Save out to file
104 |     outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_"+clead+"_yr"+str(iyr)
105 |     np.savez(outfilename, prob_fcst_cat=prob_fcst_cat)
106 | 
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/CNN-CalculateLargeScalePredictors.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | import datetime
  7 | import time
  8 | #import matplotlib.path as path
  9 | #import matplotlib.patches as patches
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | from numpy.linalg import solve
 15 | from numpy.linalg import svd
 16 | 
 17 | #plt.ion()
 18 | 
 19 | data_path = '/Volumes/ExtMichael/Michael/ECMWF-subseasonal/'
 20 | 
 21 | 
 22 | 
 23 | ###  Load geopotential height forecast fields and aggregate to week-2, week-3, and week-4 averages
 24 | 
 25 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 26 | mod_dates_ord = f1['dates_ord']
 27 | f1.close()
 28 | 
 29 | ndts, nyrs, nlts = mod_dates_ord.shape
 30 | 
 31 | ixl = 71    # -144
 32 | ixu = 147   # -107
 33 | jyl = 55    #   52
 34 | jyu = 115   #   23
 35 | 
 36 | nxf = len(range(ixl,ixu+1))
 37 | nyf = len(range(jyl,jyu+1))
 38 | 
 39 | nens = 11
 40 | 
 41 | z500_week2 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32)
 42 | z500_week3 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32)
 43 | z500_week4 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32)
 44 | 
 45 | wgt12h = np.r_[0.5,np.ones(13,dtype=np.float32),0.5]
 46 | 
 47 | for idt in range(ndts):
 48 |     date_init = datetime.date.fromordinal(int(mod_dates_ord[idt,-1,0]-1.5))    # Initialization date of ECMWF reforecast
 49 |     cyear = format(date_init.year+1)
 50 |     cmonth = format(date_init.month,'02')
 51 |     cday = format(date_init.day,'02')
 52 |     infile = data_path+'ControlLargeDomain/geopotential/'+cyear+'-'+cmonth+'-'+cday+'cntrl_12hrpress_start0hr.nc'
 53 |     nc = Dataset(infile)
 54 |     z = nc.variables['z'][:,:,:,jyl:(jyu+1),ixl:(ixu+1)]
 55 |     nc.close()
 56 |     z500_week2[idt,:,0,:,:] = np.average(z[:,13:28,0,:,:],axis=1,weights=wgt12h)
 57 |     z500_week3[idt,:,0,:,:] = np.average(z[:,27:42,0,:,:],axis=1,weights=wgt12h)
 58 |     z500_week4[idt,:,0,:,:] = np.average(z[:,41:56,0,:,:],axis=1,weights=wgt12h)
 59 |     print(infile)
 60 |     infile = data_path+'EnsembleLargeDomain/geopotential/'+cyear+'-'+cmonth+'-'+cday+'ens_12hrpress_start0hr.z.nc'
 61 |     nc = Dataset(infile)
 62 |     z = nc.variables['z'][:,:,:,:,jyl:(jyu+1),ixl:(ixu+1)]
 63 |     nc.close()
 64 |     z500_week2[idt,:,1:,:,:] = np.average(z[:,13:28,:,0,:,:],axis=1,weights=wgt12h)
 65 |     z500_week3[idt,:,1:,:,:] = np.average(z[:,27:42,:,0,:,:],axis=1,weights=wgt12h)
 66 |     z500_week4[idt,:,1:,:,:] = np.average(z[:,41:56,:,0,:,:],axis=1,weights=wgt12h)
 67 |     print(infile)
 68 | 
 69 | 
 70 | #  Upscale to 1-deg grid
 71 | 
 72 | nxfu = (nxf-1)//2
 73 | nyfu = (nyf-1)//2
 74 | 
 75 | z500_week2_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32)
 76 | z500_week3_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32)
 77 | z500_week4_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32)
 78 | 
 79 | for ixd in range(-1,2):
 80 |     wx = 0.5**(1+abs(ixd))
 81 |     for jyd in range(-1,2):
 82 |         wy = 0.5**(1+abs(jyd))
 83 |         w = wx*wy
 84 |         z500_week2_1deg += z500_week2[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w
 85 |         z500_week3_1deg += z500_week3[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w
 86 |         z500_week4_1deg += z500_week4[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w
 87 | 
 88 | 
 89 | ### Save out to file
 90 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/data/z500_predictor_cnn"
 91 | np.savez(outfilename, mod_dates_ord=mod_dates_ord,
 92 |              longitude=lon.data[(ixl+1):ixu:2],
 93 |              latitude=lat.data[(jyl+1):jyu:2],
 94 |              z500_week2=z500_week2_1deg,
 95 |              z500_week3=z500_week3_1deg,
 96 |              z500_week4=z500_week4_1deg)
 97 | 
 98 | 
 99 | 
100 | 
101 | ###  Load total column water forecast fields and aggregate to week-2, week-3, and week-4 averages
102 | 
103 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz")
104 | mod_dates_ord = f1['dates_ord']
105 | f1.close()
106 | 
107 | ndts, nyrs, nlts = mod_dates_ord.shape
108 | 
109 | ixl = 71    # -144
110 | ixu = 147   # -107
111 | jyl = 55    #   52
112 | jyu = 115   #   23
113 | 
114 | nxf = len(range(ixl,ixu+1))
115 | nyf = len(range(jyl,jyu+1))
116 | 
117 | nens = 11
118 | 
119 | tcw_week2 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32)
120 | tcw_week3 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32)
121 | tcw_week4 = np.zeros((ndts,nyrs,nens,nyf,nxf), dtype=np.float32)
122 | 
123 | wgt6h = np.r_[0.5,np.ones(27,dtype=np.float32),0.5]
124 | 
125 | for idt in range(ndts):
126 |     date_init = datetime.date.fromordinal(int(mod_dates_ord[idt,-1,0]-1.5))    # Initialization date of ECMWF reforecast
127 |     cyear = format(date_init.year+1)
128 |     cmonth = format(date_init.month,'02')
129 |     cday = format(date_init.day,'02')
130 |     infile = data_path+'ControlLargeDomain/tcw/'+cyear+'-'+cmonth+'-'+cday+'cntrl_6hrsfc_start0hr.nc'
131 |     nc = Dataset(infile)
132 |     twc = nc.variables['tcw'][:,:,jyl:(jyu+1),ixl:(ixu+1)]
133 |     nc.close()
134 |     tcw_week2[idt,:,0,:,:] = np.average(twc[:,26:55,:,:],axis=1,weights=wgt6h)
135 |     tcw_week3[idt,:,0,:,:] = np.average(twc[:,54:83,:,:],axis=1,weights=wgt6h)
136 |     tcw_week4[idt,:,0,:,:] = np.average(twc[:,82:111,:,:],axis=1,weights=wgt6h)
137 |     print(infile)
138 |     infile = data_path+'EnsembleLargeDomain/tcw/'+cyear+'-'+cmonth+'-'+cday+'ens_6hrsfc_start0hr.tcw.nc'
139 |     nc = Dataset(infile)
140 |     twc = nc.variables['tcw'][:,:,:,jyl:(jyu+1),ixl:(ixu+1)]
141 |     nc.close()
142 |     tcw_week2[idt,:,1:,:,:] = np.average(twc[:,26:55,:,:],axis=1,weights=wgt6h)
143 |     tcw_week3[idt,:,1:,:,:] = np.average(twc[:,54:83,:,:],axis=1,weights=wgt6h)
144 |     tcw_week4[idt,:,1:,:,:] = np.average(twc[:,82:111,:,:],axis=1,weights=wgt6h)
145 |     print(infile)
146 | 
147 | #nc = Dataset(infile)
148 | #lons = nc.variables['longitude'][ixl:(ixu+1)]
149 | #lats = nc.variables['latitude'][jyl:(jyu+1)]
150 | #nc.close()
151 | 
152 | 
153 | 
154 | #  Upscale to 1-deg grid
155 | 
156 | nxfu = (nxf-1)//2
157 | nyfu = (nyf-1)//2
158 | 
159 | tcw_week2_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32)
160 | tcw_week3_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32)
161 | tcw_week4_1deg = np.zeros((ndts,nyrs,nens,nyfu,nxfu), dtype=np.float32)
162 | 
163 | for ixd in range(-1,2):
164 |     wx = 0.5**(1+abs(ixd))
165 |     for jyd in range(-1,2):
166 |         wy = 0.5**(1+abs(jyd))
167 |         w = wx*wy
168 |         tcw_week2_1deg += tcw_week2[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w
169 |         tcw_week3_1deg += tcw_week3[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w
170 |         tcw_week4_1deg += tcw_week4[:,:,:,(1+jyd):(nyf-1+jyd):2,(1+ixd):(nxf-1+ixd):2]*w
171 | 
172 | 
173 | ### Save out to file
174 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/data/tcw_predictor_cnn"
175 | np.savez(outfilename, mod_dates_ord=mod_dates_ord, tcw_week2=tcw_week2_1deg, tcw_week3=tcw_week3_1deg, tcw_week4=tcw_week4_1deg)
176 | 
177 | 
178 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/tcw_predictor.npz")
179 | tcw_week2 = f1['tcw_week2']
180 | tcw_week3 = f1['tcw_week3']
181 | tcw_week4 = f1['tcw_week4']
182 | f1.close()
183 | 
184 | 
185 | 
186 | # Load ERA-5 reanalyses for z500
187 | 
188 | data_path = '/Projects/ClimateAnalysis/OBS/ERA5/'
189 | 
190 | infile = data_path+'GEOPOT500.1981.4x.nc'
191 | nc = Dataset(infile)
192 | lon = nc.variables['lon'][:]
193 | lat = nc.variables['lat'][:]
194 | nc.close()
195 | 
196 | lon = np.where(lon>180,lon-360,lon)
197 | 
198 | idx_lon = np.logical_and(np.greater_equal(lon,-144.5),np.less_equal(lon,-106.5))
199 | idx_lat = np.logical_and(np.greater_equal(lat,22.5),np.less_equal(lat,52.5))
200 | 
201 | nx = sum(idx_lon)
202 | ny = sum(idx_lat)
203 | ntimes = 4*(737059-723181+1)
204 | 
205 | dates_ord = np.zeros(ntimes,dtype=np.float32)
206 | z500 = np.zeros((ntimes,ny,nx),dtype=np.float32)
207 | 
208 | idtb = 0
209 | 
210 | for iyr in range(38):
211 |     infile = data_path+'GEOPOT500.'+str(1981+iyr)+'.4x.nc'
212 |     print(infile)
213 |     nc = Dataset(infile)
214 |     ntyr = len(nc.dimensions['time'])
215 |     idte = idtb + ntyr
216 |     dates_ord[idtb:idte] = 657072 + nc.variables['time'][:]/24.
217 |     z500[idtb:idte,:,:] = nc.variables['GEOPOT'][:,0,idx_lat,idx_lon]/9.806
218 |     nc.close()
219 |     idtb = idte
220 | 
221 | #  Upscale to 1-deg grid
222 | 
223 | nxu = (nx-1)//4
224 | nyu = (ny-1)//4
225 | 
226 | z500_1deg = np.zeros((ntimes,nyu,nxu), dtype=np.float32)
227 | 
228 | for ixd in range(-2,3):
229 |     wx = 0.125*min(3-abs(ixd),2)
230 |     for jyd in range(-2,3):
231 |         wy = 0.125*min(3-abs(jyd),2)
232 |         w = wx*wy
233 |         z500_1deg += z500[:,(2+jyd):(ny-2+jyd):4,(2+ixd):(nx-2+ixd):4]*w
234 | 
235 | 
236 | #  Accumulate to 7-day averages
237 | 
238 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz")
239 | mod_dates_ord = f1['dates_ord']
240 | f1.close()
241 | 
242 | ndts, nyrs, nlts = mod_dates_ord.shape
243 | 
244 | wgt6h = np.r_[0.5,np.ones(27,dtype=np.float32),0.5]
245 | 
246 | z500_acc1wk = np.zeros((ndts,nyrs,nyu,nxu), dtype=np.float32)
247 | 
248 | for idt in range(ndts):
249 |     for iyr in range(nyrs):
250 |         date_init_ord = mod_dates_ord[idt,iyr,0]-1.            # Initialization date of ECMWF reforecast
251 |         era5_ind = np.where(dates_ord==date_init_ord)[0]
252 |         if len(era5_ind)<1:
253 |             print('Waring! No match found for idt='+str(idt)+', iyr='+str(iyr)+'.\n')
254 |             continue
255 |         idtl = era5_ind[0]
256 |         idtu = era5_ind[0] + 29
257 |         if idtu>ntimes:
258 |             print('Waring! Aggregation period outside the data range for idt='+str(idt)+', iyr='+str(iyr)+'.\n')
259 |             continue
260 |         z500_acc1wk[idt,iyr,:,:] = np.average(z500_1deg[idtl:idtu,:,:],axis=0,weights=wgt6h)
261 | 
262 | 
263 | 
264 | # Load ERA-5 reanalyses for tcw
265 | 
266 | data_path = '/Projects/ClimateAnalysis/OBS/ERA5/'
267 | 
268 | infile = data_path+'TCW.1981.nc'
269 | nc = Dataset(infile)
270 | lon = nc.variables['lon'][:]
271 | lat = nc.variables['lat'][:]
272 | nc.close()
273 | 
274 | lon = np.where(lon>180,lon-360,lon)
275 | 
276 | idx_lon = np.logical_and(np.greater_equal(lon,-144.5),np.less_equal(lon,-106.5))
277 | idx_lat = np.logical_and(np.greater_equal(lat,22.5),np.less_equal(lat,52.5))
278 | 
279 | nx = sum(idx_lon)
280 | ny = sum(idx_lat)
281 | ntimes = 4*(737059-723181+1)
282 | 
283 | dates_ord = np.zeros(ntimes,dtype=np.float32)
284 | tcw = np.zeros((ntimes,ny,nx),dtype=np.float32)
285 | 
286 | idtb = 0
287 | 
288 | for iyr in range(38):
289 |     infile = data_path+'TCW.'+str(1981+iyr)+'.nc'
290 |     print(infile)
291 |     nc = Dataset(infile)
292 |     ntyr = len(nc.dimensions['time'])
293 |     idte = idtb + ntyr
294 |     dates_ord[(idtb//6):(idte//6)] = 657072 + nc.variables['time'][::6]/24.
295 |     tcw[(idtb//6):(idte//6),:,:] = nc.variables['TCW'][::6,idx_lat,idx_lon]/9.806
296 |     nc.close()
297 |     idtb = idte
298 | 
299 | 
300 | #  Upscale to 1-deg grid
301 | 
302 | nxu = (nx-1)//4
303 | nyu = (ny-1)//4
304 | 
305 | tcw_1deg = np.zeros((ntimes,nyu,nxu), dtype=np.float32)
306 | 
307 | for ixd in range(-2,3):
308 |     wx = 0.125*min(3-abs(ixd),2)
309 |     for jyd in range(-2,3):
310 |         wy = 0.125*min(3-abs(jyd),2)
311 |         w = wx*wy
312 |         tcw_1deg += tcw[:,(2+jyd):(ny-2+jyd):4,(2+ixd):(nx-2+ixd):4]*w
313 | 
314 | lon_1deg = lon[idx_lon][2:nx-2:4]
315 | lat_1deg = lat[idx_lat][2:ny-2:4]
316 | 
317 | 
318 | #  Accumulate to 7-day averages
319 | 
320 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz")
321 | mod_dates_ord = f1['dates_ord']
322 | f1.close()
323 | 
324 | ndts, nyrs, nlts = mod_dates_ord.shape
325 | 
326 | era5_dates_ord = mod_dates_ord[:,:,0]-1.
327 | wgt6h = np.r_[0.5,np.ones(27,dtype=np.float32),0.5]
328 | 
329 | tcw_acc1wk = np.zeros((ndts,nyrs,nyu,nxu), dtype=np.float32)
330 | 
331 | for idt in range(ndts):
332 |     for iyr in range(nyrs):
333 |         date_init_ord = era5_dates_ord[idt,iyr]               # Initialization date of ECMWF reforecast
334 |         era5_ind = np.where(dates_ord==date_init_ord)[0]
335 |         if len(era5_ind)<1:
336 |             print('Waring! No match found for idt='+str(idt)+', iyr='+str(iyr)+'.\n')
337 |             continue
338 |         idtl = era5_ind[0]
339 |         idtu = era5_ind[0] + 29
340 |         if idtu>ntimes:
341 |             print('Waring! Aggregation period outside the data range for idt='+str(idt)+', iyr='+str(iyr)+'.\n')
342 |             continue
343 |         tcw_acc1wk[idt,iyr,:,:] = np.average(tcw_1deg[idtl:idtu,:,:],axis=0,weights=wgt6h)
344 | 
345 | 
346 | ### Save out to file
347 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/data/z500_tcw_predictors_era5"
348 | np.savez(outfilename, dates_ord=era5_dates_ord,
349 |              longitude=lon_1deg,
350 |              latitude=lat_1deg.data,
351 |              z500_1wk=z500_acc1wk,
352 |              tcw_1wk=tcw_acc1wk)
353 | 
354 | 
355 | 
356 | 
357 | 
358 | 
359 | 
360 | 


--------------------------------------------------------------------------------
/CNN-CalculateVerificationMetrics.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import numpy.ma as ma
  4 | import scipy.stats as stats
  5 | import math
  6 | import os, sys
  7 | import matplotlib.pyplot as plt
  8 | import datetime
  9 | import time
 10 | import pdb
 11 | import pickle
 12 | 
 13 | from scipy.stats import gamma
 14 | from scipy.interpolate import interp1d
 15 | 
 16 | plt.ion()
 17 | 
 18 | 
 19 | 
 20 | leadDay = 20         # d works out to being a d+0.5 day forecast
 21 | accumulation = 7     # Precipitation accumulation period
 22 | 
 23 | clead = 'week'+str((leadDay+8)//7)
 24 | 
 25 | 
 26 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 27 | #list(f1)
 28 | obs_lat = f1['obs_lat']
 29 | obs_lon = f1['obs_lon']
 30 | obs_dates_ord = f1['obs_dates_ord']
 31 | thr_doy = f1['thr_doy']
 32 | qtev_doy = f1['qtev_doy']
 33 | apcp_obs_cat = f1['apcp_obs_cat']
 34 | obs_precip_week = f1['apcp_obs']
 35 | f1.close()
 36 | 
 37 | ndts, nxy, ncat = apcp_obs_cat.shape
 38 | 
 39 | 
 40 | #f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz")
 41 | #mod_dates = f2['dates_ord']
 42 | #f2.close()
 43 | 
 44 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 45 | mod_dates = f2['dates_ord'][:,:,leadDay]
 46 | f2.close()
 47 | 
 48 | ndts, nyrs = mod_dates.shape
 49 | 
 50 | 
 51 | doy_dts = np.zeros(ndts,dtype=np.int32)
 52 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32)
 53 | for idt in range(ndts):
 54 |     for iyr in range(nyrs):
 55 |         fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0]
 56 |         obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:]
 57 |     date_ord = int(mod_dates[idt,-1]-0.5)
 58 |     doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
 59 | 
 60 | 
 61 | ### Calculate skill scores
 62 | 
 63 | exc33p = np.zeros(obs_precip_vdate.shape)
 64 | brier33pClm = np.zeros(obs_precip_vdate.shape)
 65 | pot33pCNN = np.zeros(obs_precip_vdate.shape)
 66 | brier33pCNN = np.zeros(obs_precip_vdate.shape)
 67 | 
 68 | exc67p = np.zeros(obs_precip_vdate.shape)
 69 | brier67pClm = np.zeros(obs_precip_vdate.shape)
 70 | pot67pCNN = np.zeros(obs_precip_vdate.shape)
 71 | brier67pCNN = np.zeros(obs_precip_vdate.shape)
 72 | 
 73 | exc85p = np.zeros(obs_precip_vdate.shape)
 74 | brier85pClm = np.zeros(obs_precip_vdate.shape)
 75 | pot85pCNN = np.zeros(obs_precip_vdate.shape)
 76 | brier85pCNN = np.zeros(obs_precip_vdate.shape)
 77 | 
 78 | rpsClm = np.zeros(obs_precip_vdate.shape)
 79 | rpsCNN = np.zeros(obs_precip_vdate.shape)
 80 | 
 81 | crpsClm = np.zeros(obs_precip_vdate.shape)
 82 | crpsCNN = np.zeros(obs_precip_vdate.shape)
 83 | 
 84 | 
 85 | wwCl = 15
 86 | 
 87 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical approximation of the CRPS
 88 | dx = np.diff(x)
 89 | 
 90 | 
 91 | imod = 0
 92 | 
 93 | for iyr in range(nyrs):
 94 |     print(iyr)
 95 |     #f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_yr"+str(iyr)+".npz")
 96 |     #prob_fcst_cat = f5['prob_fcst_cat_era5']
 97 |     #f5.close()
 98 |     f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_"+clead+"_yr"+str(iyr)+".npz")
 99 |     prob_fcst_cat = f5['prob_fcst_cat']
100 |     f5.close()
101 |     prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)])
102 |     prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32)
103 |     for idt in range(ndts):
104 |         windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl]
105 |         ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function
106 |         for ixy in range(nxy):
107 |             itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate')
108 |             prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:]))
109 |             pot33pCNN[idt,iyr,ixy] = prob_over_thr[0]
110 |             pot67pCNN[idt,iyr,ixy] = prob_over_thr[1]
111 |             pot85pCNN[idt,iyr,ixy] = prob_over_thr[2]
112 |             ## Calculate CRPS for CNN
113 |             bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2
114 |             crpsCNN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
115 |         ### Get current year and julian day to use to select climatological percentiles
116 |         currentYear = datetime.date.fromordinal(int(mod_dates[idt,iyr])).year
117 |         currentDay = (datetime.date.fromordinal(int(mod_dates[idt,iyr]))-datetime.date(currentYear,1,1)).days
118 |         obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy))
119 |         crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x)
120 |         ## Calculate CRPS for Clm
121 |         clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0)
122 |         bs = (clm_cdf-crps_exc)**2
123 |         crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1)
124 |         ## Calculate Brier scores for different thresholds
125 |         p33 = qtev_doy[doy_dts[idt],:,0]
126 |         exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33)
127 |         brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2
128 |         brier33pCNN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCNN[idt,iyr,:])**2
129 |         p67 = qtev_doy[doy_dts[idt],:,1]
130 |         exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67)
131 |         brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2
132 |         brier67pCNN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCNN[idt,iyr,:])**2
133 |         p85 = qtev_doy[doy_dts[idt],:,2]
134 |         exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85)
135 |         brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2
136 |         brier85pCNN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCNN[idt,iyr,:])**2
137 | 
138 | 
139 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-cnn_"+clead
140 | np.savez(outfilename, crpsClm=crpsClm, crpsCNN=crpsCNN, \
141 |      exc33p=exc33p, pot33pCNN=pot33pCNN, Bs33pClm=brier33pClm, Bs33pCNN=brier33pCNN, \
142 |      exc67p=exc67p, pot67pCNN=pot67pCNN, Bs67pClm=brier67pClm, Bs67pCNN=brier67pCNN, \
143 |      exc85p=exc85p, pot85pCNN=pot85pCNN, Bs85pClm=brier85pClm, Bs85pCNN=brier85pCNN)
144 | 
145 | 
146 | 
147 | # calculate ranked probability score
148 | rpsClm = brier33pClm + brier67pClm + brier85pClm
149 | rpsCNN = brier33pCNN + brier67pCNN + brier85pCNN
150 | 
151 | 
152 | # rpssAvgCNN
153 | round(1.-np.sum(rpsCNN)/np.sum(rpsClm),4)
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/CNN-FindTuningParameters.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import keras
 10 | import keras.backend as K
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | from numpy.linalg import solve
 15 | from scipy import stats
 16 | from scipy.interpolate import interp1d
 17 | 
 18 | from keras import models
 19 | from keras import layers
 20 | from keras import regularizers
 21 | 
 22 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout
 23 | from keras.models import Model
 24 | from keras.optimizers import Adam
 25 | 
 26 | #plt.ion()
 27 | 
 28 | 
 29 | ##  Load categorical analysis data
 30 | 
 31 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 32 | lat = f1['obs_lat']
 33 | lon = f1['obs_lon']
 34 | obs_dates_ord = f1['obs_dates_ord']
 35 | pop_doy = f1['pop_doy']
 36 | thr_doy = f1['thr_doy']
 37 | qtev_doy = f1['qtev_doy']
 38 | apcp_obs_cat = f1['apcp_obs_cat']
 39 | apcp_obs = f1['apcp_obs']
 40 | f1.close()
 41 | 
 42 | ndts, nxy, ncat = apcp_obs_cat.shape
 43 | 
 44 | 
 45 | 
 46 | ##  Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields
 47 | 
 48 | ixl = 10
 49 | ixu = -6
 50 | jyl = 6
 51 | jyu = -6
 52 | 
 53 | f2 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz")
 54 | era5_dates_ord = f2['dates_ord']
 55 | era5_lon = f2['longitude'][ixl:ixu]
 56 | era5_lat = f2['latitude'][jyl:jyu]
 57 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu]
 58 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu]
 59 | f2.close()
 60 | 
 61 | ndts, nyrs, ny, nx = z500.shape
 62 | 
 63 | 
 64 | 
 65 | ## Calculate doy for each analysis date
 66 | 
 67 | doy_dts = np.zeros(ndts,dtype=np.int32)
 68 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32)
 69 | for idt in range(ndts):
 70 |     for iyr in range(nyrs):
 71 |         apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0]
 72 |     date_ord = int(era5_dates_ord[idt,0]-0.5)
 73 |     doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
 74 | 
 75 | 
 76 | 
 77 | ##  Normalize tcw to 10th/90th climatological percentiles at each grid point
 78 | 
 79 | tcw_q10 = np.percentile(tcw,10,axis=1)
 80 | tcw_q90 = np.percentile(tcw,90,axis=1)
 81 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32)
 82 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32)
 83 | 
 84 | X = np.ones((ndts,3), dtype=np.float32)                  # Fit harmonic function to annual cycle of tcw climatology
 85 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25)
 86 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25)
 87 | 
 88 | for ix in range(nx):
 89 |     for jy in range(ny):
 90 |         coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix]))
 91 |         tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10)
 92 |         coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix]))
 93 |         tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90)
 94 | 
 95 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:]
 96 | 
 97 | 
 98 | 
 99 | ##  Normalize z500 to 1st/99th climatological percentiles across all grid points
100 | 
101 | z500_q01 = np.percentile(z500,1,axis=(1,2,3))
102 | z500_q99 = np.percentile(z500,99,axis=(1,2,3))
103 | 
104 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01))
105 | z500_q01_sm = np.matmul(X,coef_q01)
106 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99))
107 | z500_q99_sm = np.matmul(X,coef_q99)
108 | 
109 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None]
110 | 
111 | 
112 | 
113 | # Define basis functions
114 | 
115 | r_basis = 7.
116 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]]
117 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]]
118 | 
119 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr))
120 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr))
121 | dst = np.sqrt(dst_lon**2+dst_lat**2)
122 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3)
123 | basis = basis/np.sum(basis,axis=1)[:,None]
124 | nbs = basis.shape[1]
125 | 
126 | 
127 | 
128 | ##  Define functions for building a CNN
129 | 
130 | #def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, par_reg):
131 | #    inp_imgs = Input(shape=(18,22,2,))
132 | #    inp_basis = Input(shape=(n_xy,n_basis,))
133 | #    inp_cl = Input(shape=(n_xy,n_bins,))
134 | #    c = Conv2D(4, (3,3), activation='elu')(inp_imgs)
135 | #    c = MaxPooling2D((2,2))(c)
136 | #    c = Conv2D(8, (3,3), activation='elu')(c)
137 | #    c = MaxPooling2D((2,2))(c)
138 | #    x = Flatten()(c)
139 | #    #x = Concatenate()([c,inp_aux])
140 | #    for h in hidden_nodes:
141 | #        x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
142 | #    x = Dense(n_bins*n_basis, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
143 | #    x = Reshape((n_bins,n_basis))(x)
144 | #    z = Dot(axes=2)([inp_basis, x])
145 | #    z = Add()([z, inp_cl])
146 | #    out = Activation('softmax')(z)
147 | #    return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out)
148 | 
149 | 
150 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate):
151 |     inp_imgs = Input(shape=(18,22,2,))
152 |     inp_basis = Input(shape=(n_xy,n_basis,))
153 |     inp_cl = Input(shape=(n_xy,n_bins,))
154 |     c = Conv2D(4, (3,3), activation='elu')(inp_imgs)
155 |     c = MaxPooling2D((2,2))(c)
156 |     c = Conv2D(8, (3,3), activation='elu')(c)
157 |     c = MaxPooling2D((2,2))(c)
158 |     x = Flatten()(c)
159 |     for h in hidden_nodes:
160 |         x = Dropout(dropout_rate)(x)
161 |         x = Dense(h, activation='elu')(x)
162 |     x = Dense(n_bins*n_basis, activation='elu')(x)
163 |     x = Reshape((n_bins,n_basis))(x)
164 |     z = Dot(axes=2)([inp_basis, x])
165 |     z = Add()([z, inp_cl])
166 |     out = Activation('softmax')(z)
167 |     return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out)
168 | 
169 | 
170 | def modified_categorical_crossentropy(y_mat, prob_fcst):
171 |     prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2)
172 |     return -K.mean(K.log(prob_obs_cat))
173 | 
174 | 
175 | nyrs = 20
176 | 
177 | #reg = 10.**np.arange(-6,-2)
178 | reg = np.arange(0.1,0.6,0.1)
179 | nreg = len(reg)
180 | 
181 | imod = 0
182 | 
183 | mod = [[10],[20],[10,10]]
184 | 
185 | 
186 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical calculation of the CRPS
187 | dx = np.diff(x)
188 | 
189 | opt_reg_param = np.zeros(nyrs, dtype=np.float32)
190 | opt_valid_scores = np.zeros((nyrs,5), dtype=np.float32)
191 | opt_valid_crps = np.zeros((nyrs,5), dtype=np.float32)
192 | 
193 | 
194 | for iyr in range(nyrs):
195 |     print('year: ',iyr)
196 |     # Calculate image predictors and basis functions
197 |     apcp_obs_ind_cv = np.delete(apcp_obs_ind,iyr,axis=1)
198 |     z500_pred_cv = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
199 |     tcw_pred_cv = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
200 |     pred_imgs_cv = np.concatenate((z500_pred_cv,tcw_pred_cv),axis=3)
201 |     basis_cv = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0)
202 |     # Calculate climatological log probabilities for each class
203 |     apcp_pop_cl = np.repeat(pop_doy[doy_dts,np.newaxis,:],nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1))
204 |     logp_cl_cv = np.concatenate((np.log(1.-apcp_pop_cl),np.repeat(np.log(apcp_pop_cl),ncat-1,axis=2)-np.log(ncat-1)),axis=2)
205 |     # perform 5-fold cross validation to find optimal regularization
206 |     date_order = np.arange(ndts*(nyrs-1)).reshape(ndts,nyrs-1).T.flatten()
207 |     cv_ind = date_order[np.arange(ndts*(nyrs-1))%232<231]                        # remove the date between the 5 cross-validated blocks
208 |     valid_score = np.zeros((nreg,5), dtype=np.float32)
209 |     valid_crps = np.zeros((nreg,5), dtype=np.float32)
210 |     for cvi in range(5):
211 |         train_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)!=cvi]
212 |         valid_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)==cvi]
213 |         pred_imgs_train = pred_imgs_cv[train_ind,:,:,:]
214 |         basis_train = basis_cv[train_ind,:,:]
215 |         logp_cl_train = logp_cl_cv[train_ind,:,:]
216 |         cat_targets_train = apcp_obs_cat[apcp_obs_ind_cv.flatten()[train_ind],:,:].astype(float)
217 |         pred_imgs_valid = pred_imgs_cv[valid_ind,:,:]
218 |         basis_valid = basis_cv[valid_ind,:,:]
219 |         logp_cl_valid = logp_cl_cv[valid_ind,:,:]
220 |         cat_targets_valid = apcp_obs_cat[apcp_obs_ind_cv.flatten()[valid_ind],:,:].astype(float)
221 |         doy_valid = np.repeat(doy_dts[:,np.newaxis],nyrs-1,axis=1).flatten()[valid_ind]
222 |         for ireg in range(nreg):
223 |             # Define and fit ANN model (using batch gradient descent)
224 |             keras.backend.clear_session()
225 |             model = model = build_cat_model(nxy, ncat, nbs, mod[imod], reg[ireg])
226 |             model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy)
227 |             model.fit([pred_imgs_train,basis_train,logp_cl_train], cat_targets_train, epochs=150, batch_size=len(train_ind), verbose=0)
228 |             valid_score[ireg,cvi] = model.evaluate([pred_imgs_valid,basis_valid,logp_cl_valid], cat_targets_valid, batch_size=len(valid_ind), verbose=0)
229 |             # Calculate CRPS for each cross-validation fold
230 |             prob_fcst_cat = model.predict([pred_imgs_valid,basis_valid,logp_cl_valid])
231 |             prob_fcst_chf = -np.log(np.maximum(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)],1.e-10))
232 |             crps_fold = np.zeros((len(valid_ind),nxy),dtype=np.float32)
233 |             for ivdt in range(len(valid_ind)):
234 |                 for ixy in range(nxy):
235 |                     itp_fct = interp1d(thr_doy[doy_valid[ivdt],ixy,:], prob_fcst_chf[ivdt,ixy,:], kind='linear',fill_value='extrapolate')
236 |                     bs = (1.-np.exp(-itp_fct(x))-1.*(apcp_obs[apcp_obs_ind_cv.flatten()[valid_ind[ivdt]],ixy]<=x))**2
237 |                     crps_fold[ivdt,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
238 |             valid_crps[ireg,cvi] = np.mean(crps_fold)
239 |     opt_reg_ind = np.argmin(np.mean(valid_score,axis=1))
240 |     opt_reg_param[iyr] = reg[opt_reg_ind]
241 |     opt_valid_scores[iyr,:] = valid_score[opt_reg_ind,:]
242 |     opt_valid_crps[iyr,:] =  valid_crps[opt_reg_ind,:]
243 | 
244 | 
245 | ### Save out to file
246 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/tuning/cnn-m"+str(imod)+"-drpt-f48"
247 | np.savez(outfilename, opt_reg_param=opt_reg_param, opt_valid_scores=opt_valid_scores, opt_valid_crps=opt_valid_crps)
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/CNN-FitConvolutionalNetworkModel.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import keras
 10 | import keras.backend as K
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | from numpy.linalg import solve
 15 | from scipy import stats
 16 | 
 17 | from keras import models
 18 | from keras import layers
 19 | from keras import regularizers
 20 | 
 21 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout
 22 | from keras.models import Model
 23 | from keras.optimizers import Adam
 24 | 
 25 | #plt.ion()
 26 | 
 27 | 
 28 | ##  Load categorical analysis data
 29 | 
 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 31 | #list(f1)
 32 | lat = f1['obs_lat']
 33 | lon = f1['obs_lon']
 34 | obs_dates_ord = f1['obs_dates_ord']
 35 | pop_doy = f1['pop_doy']
 36 | apcp_obs_cat = f1['apcp_obs_cat']
 37 | f1.close()
 38 | 
 39 | ndts, nxy, ncat = apcp_obs_cat.shape
 40 | 
 41 | 
 42 | 
 43 | ##  Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields
 44 | 
 45 | ixl = 10
 46 | ixu = -6
 47 | jyl = 6
 48 | jyu = -6
 49 | 
 50 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz")
 51 | era5_dates_ord = f2['dates_ord']
 52 | era5_lon = f2['longitude'][ixl:ixu]
 53 | era5_lat = f2['latitude'][jyl:jyu]
 54 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu]
 55 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu]
 56 | f2.close()
 57 | 
 58 | ndts, nyrs, ny, nx = z500.shape
 59 | 
 60 | 
 61 | z500_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32)
 62 | tcw_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32)
 63 | 
 64 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_predictor_cnn.npz")
 65 | mod_dates_ord = f3['mod_dates_ord'][:,:,6:21:7]
 66 | 
 67 | f4 = np.load("/home/michael/Desktop/CalifAPCP/data/tcw_predictor_cnn.npz")
 68 | 
 69 | for ilt in range(3):
 70 |     clead = 'week'+str(ilt+2)
 71 |     z500_fcst[ilt,:,:,:,:,:] = f3['z500_'+clead][:,:,:,jyl:jyu,ixl:ixu]      # subset to 22 x 18 image
 72 |     tcw_fcst[ilt,:,:,:,:,:] = f4['tcw_'+clead][:,:,:,jyl:jyu,ixl:ixu]
 73 | 
 74 | f3.close()
 75 | f4.close()
 76 | 
 77 | 
 78 | 
 79 | ## Calculate doy for each analysis date and for each forecast valid date
 80 | 
 81 | doy_dts = np.zeros(ndts,dtype=np.int32)
 82 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32)
 83 | for idt in range(ndts):
 84 |     for iyr in range(nyrs):
 85 |         apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0]
 86 |     date_ord = int(era5_dates_ord[idt,0]-0.5)
 87 |     doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
 88 | 
 89 | doy_fcst = np.zeros((3,ndts),dtype=np.int32)
 90 | for idt in range(ndts):
 91 |     for ilt in range(3):
 92 |         date_ord = int(int(mod_dates_ord[idt,0,ilt])-0.5)
 93 |         doy_fcst[ilt,idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
 94 | 
 95 | 
 96 | 
 97 | ##  Normalize tcw to 10th/90th climatological percentiles at each grid point
 98 | 
 99 | tcw_q10 = np.percentile(tcw,10,axis=1)
100 | tcw_q90 = np.percentile(tcw,90,axis=1)
101 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32)
102 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32)
103 | 
104 | tcw_fcst_q10 = np.percentile(tcw_fcst,10,axis=(2,3))
105 | tcw_fcst_q90 = np.percentile(tcw_fcst,90,axis=(2,3))
106 | tcw_fcst_q10_sm = np.zeros(tcw_fcst_q10.shape, dtype=np.float32)
107 | tcw_fcst_q90_sm = np.zeros(tcw_fcst_q90.shape, dtype=np.float32)
108 | 
109 | X = np.ones((ndts,3), dtype=np.float32)                  # Fit harmonic function to annual cycle of tcw climatology
110 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25)
111 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25)
112 | 
113 | for ix in range(nx):
114 |     for jy in range(ny):
115 |         coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix]))
116 |         tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10)
117 |         coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix]))
118 |         tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90)
119 |         for ilt in range(3):
120 |             coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q10[ilt,:,jy,ix]))
121 |             tcw_fcst_q10_sm[ilt,:,jy,ix] = np.matmul(X,coef_q10)
122 |             coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q90[ilt,:,jy,ix]))
123 |             tcw_fcst_q90_sm[ilt,:,jy,ix] = np.matmul(X,coef_q90)
124 | 
125 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:]
126 | tcw_fcst_ano = -1.+2.*(tcw_fcst-tcw_fcst_q10_sm[:,:,None,None,:,:])/(tcw_fcst_q90_sm-tcw_fcst_q10_sm)[:,:,None,None,:,:]
127 | 
128 | 
129 | 
130 | ##  Normalize z500 to 1st/99th climatological percentiles across all grid points
131 | 
132 | z500_q01 = np.percentile(z500,1,axis=(1,2,3))
133 | z500_q99 = np.percentile(z500,99,axis=(1,2,3))
134 | z500_fcst_q01 = np.percentile(z500_fcst,1,axis=(2,3,4,5))
135 | z500_fcst_q99 = np.percentile(z500_fcst,99,axis=(2,3,4,5))
136 | 
137 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01))
138 | z500_q01_sm = np.matmul(X,coef_q01)
139 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99))
140 | z500_q99_sm = np.matmul(X,coef_q99)
141 | 
142 | z500_fcst_q01_sm = np.zeros(z500_fcst_q01.shape, dtype=np.float32)
143 | z500_fcst_q99_sm = np.zeros(z500_fcst_q99.shape, dtype=np.float32)
144 | 
145 | for ilt in range(3):
146 |     coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q01[ilt,:]))
147 |     z500_fcst_q01_sm[ilt,:] = np.matmul(X,coef_q01)
148 |     coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q99[ilt,:]))
149 |     z500_fcst_q99_sm[ilt,:] = np.matmul(X,coef_q99)
150 | 
151 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None]
152 | z500_fcst_ano = -1.+2.*(z500_fcst-z500_fcst_q01_sm[:,:,None,None,None,None])/(z500_fcst_q99_sm-z500_fcst_q01_sm)[:,:,None,None,None,None]
153 | 
154 | 
155 | # Define basis functions
156 | 
157 | r_basis = 7.
158 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]]
159 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]]
160 | 
161 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr))
162 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr))
163 | dst = np.sqrt(dst_lon**2+dst_lat**2)
164 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3)
165 | basis = basis/np.sum(basis,axis=1)[:,None]
166 | nbs = basis.shape[1]
167 | 
168 | 
169 | ##  Define functions for building a CNN
170 | 
171 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate):
172 |     inp_imgs = Input(shape=(18,22,2,))
173 |     #inp_imgs = Input(shape=(18,22,1,))
174 |     inp_basis = Input(shape=(n_xy,n_basis,))
175 |     inp_cl = Input(shape=(n_xy,n_bins,))
176 |     c = Conv2D(4, (3,3), activation='elu')(inp_imgs)
177 |     c = MaxPooling2D((2,2))(c)
178 |     c = Conv2D(8, (3,3), activation='elu')(c)
179 |     c = MaxPooling2D((2,2))(c)
180 |     x = Flatten()(c)
181 |     for h in hidden_nodes:
182 |         x = Dropout(dropout_rate)(x)
183 |         x = Dense(h, activation='elu')(x)
184 |     x = Dense(n_bins*n_basis, activation='elu')(x)
185 |     x = Reshape((n_bins,n_basis))(x)
186 |     z = Dot(axes=2)([inp_basis, x])     # Tensor product with basis functions
187 |     z = Add()([z, inp_cl])              # Add (log) probability anomalies to log climatological probabilities 
188 |     out = Activation('softmax')(z)
189 |     return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out)
190 | 
191 | 
192 | def modified_categorical_crossentropy(y_mat, prob_fcst):
193 |     prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2)
194 |     return -K.mean(K.log(prob_obs_cat))
195 | 
196 | 
197 | 
198 | imod = 0
199 | 
200 | mod = [[10],[20],[10,10]]
201 | 
202 | f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m"+str(imod)+"-drpt-f48.npz")
203 | opt_reg_param = f5['opt_reg_param']
204 | f5.close()
205 | 
206 | 
207 | for iyr in range(0,20):
208 |     print(iyr)
209 |     # Split data into training and verification data set
210 |     apcp_obs_ind_train = np.delete(apcp_obs_ind,iyr,axis=1)
211 |     apcp_obs_ind_verif = apcp_obs_ind[:,iyr]
212 |     z500_pred_train = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
213 |     z500_pred_verif = z500_ano[:,iyr,:,:,None]
214 |     z500_pred_fcst_train = np.delete(z500_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1))
215 |     z500_pred_fcst_verif = z500_fcst_ano[:,:,iyr,:,:,:,None]
216 |     tcw_pred_train = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
217 |     tcw_pred_verif = tcw_ano[:,iyr,:,:,None]
218 |     tcw_pred_fcst_train = np.delete(tcw_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1))
219 |     tcw_pred_fcst_verif = tcw_fcst_ano[:,:,iyr,:,:,:,None]
220 |     # Calculate climatological log probabilities for each class
221 |     apcp_lgp0_cl_train = np.repeat(np.log(1.-pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1))
222 |     apcp_lgp0_cl_verif = np.log(1.-pop_doy[doy_dts,:])[:,:,None]
223 |     apcp_lgpop_cl_train = np.repeat(np.log(pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1))
224 |     apcp_lgpop_cl_verif = np.log(pop_doy[doy_dts,:])[:,:,None]
225 |     apcp_lgp0_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32)
226 |     apcp_lgp0_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32)
227 |     apcp_lgpop_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32)
228 |     apcp_lgpop_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32)
229 |     for ilt in range(3):
230 |         apcp_lgp0_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(1.-pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy))
231 |         apcp_lgp0_cl_fcst_verif[ilt,:,:,0] = np.log(1.-pop_doy[doy_fcst[ilt,:],:])
232 |         apcp_lgpop_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy))
233 |         apcp_lgpop_cl_fcst_verif[ilt,:,:,0] = np.log(pop_doy[doy_fcst[ilt,:],:])
234 |     # Compose training data (large-scale predictors, auxiliary predictors, climatological probabilities, observed categories)
235 |     train_pred_imgs = np.concatenate((z500_pred_train,tcw_pred_train),axis=3)
236 |     #train_pred_imgs = tcw_pred_train
237 |     train_basis = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0)
238 |     train_logp_cl = np.concatenate((apcp_lgp0_cl_train,np.repeat(apcp_lgpop_cl_train,ncat-1,axis=2)-np.log(ncat-1)),axis=2)
239 |     train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].astype(float)
240 |     # Define and fit CNN model
241 |     keras.backend.clear_session()
242 |     model = build_cat_model(nxy, ncat, nbs, mod[imod], opt_reg_param[iyr])
243 |     model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy)
244 |     model.fit([train_pred_imgs,train_basis,train_logp_cl], train_cat_targets, epochs=150, batch_size=ndts*(nyrs-1), verbose=1)
245 |     # Calculate ERA-5 probability forecasts
246 |     verif_pred_imgs = np.concatenate((z500_pred_verif,tcw_pred_verif),axis=3)
247 |     #verif_pred_imgs = tcw_pred_verif
248 |     verif_basis = np.repeat(basis[np.newaxis,:,:],ndts,axis=0)
249 |     verif_logp_cl = np.concatenate((apcp_lgp0_cl_verif,np.repeat(apcp_lgpop_cl_verif,ncat-1,axis=2)-np.log(ncat-1)),axis=2)
250 |     prob_fcst_cat_era5 = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl])
251 |     # Calculate ensemble-based, mean probability forecasts
252 |     logp_ano_ensmean_train = np.zeros((3,ndts*(nyrs-1),nxy,ncat), dtype=np.float32)
253 |     logp_ano_ensmean_verif = np.zeros((3,ndts,nxy,ncat), dtype=np.float32)
254 |     for ilt in range(3):
255 |         train_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_train[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_train[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2)
256 |         verif_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_verif[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_verif[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2)
257 |         prob_fcst_cat_ens_train = np.zeros((11,ndts*(nyrs-1),nxy,ncat), dtype=np.float32)
258 |         prob_fcst_cat_ens_verif = np.zeros((11,ndts,nxy,ncat), dtype=np.float32)
259 |         for imem in range(11):
260 |             train_pred_imgs = np.concatenate((z500_pred_fcst_train[ilt,:,imem,:,:,:],tcw_pred_fcst_train[ilt,:,imem,:,:,:]),axis=3)
261 |             #train_pred_imgs = tcw_pred_fcst_train[ilt,:,imem,:,:,:]
262 |             prob_fcst_cat_ens_train[imem,:,:,:] = model.predict([train_pred_imgs,train_basis,train_logp_cl])
263 |             verif_pred_imgs = np.concatenate((z500_pred_fcst_verif[ilt,:,imem,:,:,:],tcw_pred_fcst_verif[ilt,:,imem,:,:,:]),axis=3)
264 |             #verif_pred_imgs = tcw_pred_fcst_verif[ilt,:,imem,:,:,:]
265 |             prob_fcst_cat_ens_verif[imem,:,:,:] = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl])
266 |         logp_ano_ensmean_train[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_train),axis=0) - train_logp_cl     # Reconstruct the log probability anomalies
267 |         logp_ano_ensmean_verif[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_verif),axis=0) - verif_logp_cl     #  for each ensemble member and calculate mean
268 |     ### Save out to file
269 |     outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_yr"+str(iyr)
270 |     np.savez(outfilename, prob_fcst_cat_era5=prob_fcst_cat_era5, \
271 |                  logp_ano_ensmean_train=logp_ano_ensmean_train, \
272 |                  logp_ano_ensmean_verif=logp_ano_ensmean_verif, \
273 |                  apcp_lgp0_cl_fcst_train=apcp_lgp0_cl_fcst_train, \
274 |                  apcp_lgp0_cl_fcst_verif=apcp_lgp0_cl_fcst_verif, \
275 |                  apcp_lgpop_cl_fcst_train=apcp_lgpop_cl_fcst_train, \
276 |                  apcp_lgpop_cl_fcst_verif=apcp_lgpop_cl_fcst_verif)
277 | 
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 


--------------------------------------------------------------------------------
/CNN-GenerateProbabilityForecasts.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import scipy as sp
 4 | import math
 5 | import os, sys
 6 | #import matplotlib.pyplot as plt
 7 | import datetime
 8 | import time
 9 | 
10 | from netCDF4 import Dataset
11 | from numpy import ma
12 | from scipy import stats
13 | 
14 | from scipy.optimize import minimize_scalar
15 | 
16 | 
17 | #plt.ion()
18 | 
19 | def adjustment_factor_target(par, y_mat, logp_ensmeanano, logp_cl):
20 |     # average modified categorical crossentropy for relaxed perfect prog probabilities
21 |     prob_fcst_cat_cmb = np.exp(par*logp_ensmeanano+logp_cl)
22 |     prob_fcst = prob_fcst_cat_cmb / np.sum(prob_fcst_cat_cmb,axis=2)[:,:,None]
23 |     prob_obs_cat = np.sum(y_mat*prob_fcst,axis=2)
24 |     return -np.mean(np.log(prob_obs_cat))
25 | 
26 | 
27 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
28 | #list(f1)
29 | lat = f1['obs_lat']
30 | lon = f1['obs_lon']
31 | obs_dates_ord = f1['obs_dates_ord']
32 | apcp_obs_cat = f1['apcp_obs_cat']
33 | f1.close()
34 | 
35 | ndts, nxy, ncat = apcp_obs_cat.shape
36 | 
37 | 
38 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_predictor_cnn.npz")
39 | mod_dates_ord = f2['mod_dates_ord'][:,:,6:21:7]
40 | f2.close()
41 | 
42 | ndts, nyrs, nlt = mod_dates_ord.shape
43 | 
44 | apcp_obs_ind = np.zeros((ndts,nyrs,nlt),dtype=np.int32)
45 | for idt in range(ndts):
46 |     for iyr in range(nyrs):
47 |         for ilt in range(3):
48 |             apcp_obs_ind[idt,iyr,ilt] = np.where(obs_dates_ord==mod_dates_ord[idt,iyr,ilt])[0][0]
49 | 
50 | 
51 | imod = 0
52 | 
53 | for iyr in range(0,20):
54 |     print(iyr)
55 |     # Load smoothed ensemble forecast anomalies 
56 |     f3 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_yr"+str(iyr)+".npz")
57 |     logp_ano_ensmean_train = f3['logp_ano_ensmean_train']
58 |     logp_ano_ensmean_verif = f3['logp_ano_ensmean_verif']
59 |     apcp_lgp0_cl_fcst_train = f3['apcp_lgp0_cl_fcst_train']
60 |     apcp_lgp0_cl_fcst_verif = f3['apcp_lgp0_cl_fcst_verif']
61 |     apcp_lgpop_cl_fcst_train = f3['apcp_lgpop_cl_fcst_train']
62 |     apcp_lgpop_cl_fcst_verif = f3['apcp_lgpop_cl_fcst_verif']
63 |     f3.close()
64 |     for ilt in range(3):
65 |         # Calculate index for training observations
66 |         apcp_obs_ind_train = np.delete(apcp_obs_ind[:,:,ilt],iyr,axis=1)
67 |         train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].astype(float)
68 |         train_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_train[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_train[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2)
69 |         verif_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_verif[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_verif[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2)
70 |         train_logp_ensmeanano = logp_ano_ensmean_train[ilt,:,:,:]
71 |         verif_logp_ensmeanano = logp_ano_ensmean_verif[ilt,:,:,:]
72 |         a = minimize_scalar(adjustment_factor_target, args=(train_cat_targets,train_logp_ensmeanano,train_logp_cl), method='bounded', bounds=(0.,1.)).x
73 |         print(a)
74 |         prob_fcst_cat_cmb = np.exp(a*verif_logp_ensmeanano+verif_logp_cl)
75 |         prob_fcst_cat = prob_fcst_cat_cmb / np.sum(prob_fcst_cat_cmb,axis=2)[:,:,None]
76 |         ### Save out to file
77 |         outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_week"+str(2+ilt)+"_yr"+str(iyr)
78 |         np.savez(outfilename, prob_fcst_cat=prob_fcst_cat)
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/CSGD-FitClimatologicalDistributions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.path as path
  8 | import datetime
  9 | import time
 10 | 
 11 | from netCDF4 import Dataset
 12 | from numpy import ma
 13 | from numpy import loadtxt
 14 | from scipy import stats
 15 | from scipy.stats import gamma
 16 | from scipy.special import beta
 17 | from scipy.optimize import minimize_scalar
 18 | from scipy.optimize import minimize
 19 | 
 20 | plt.ion()
 21 | 
 22 | 
 23 | def crpsClimoCSGD(shape,obs,mean,pop):
 24 |     # average CRPS for climatological CSGD as a function of shape (pop and mean fixed)
 25 |     crps = np.zeros(len(obs),dtype='float64')
 26 |     Fck = 1.-pop
 27 |     cstd = gamma.ppf(Fck,shape)
 28 |     fkp1q0 = gamma.pdf(cstd,shape+1.,scale=1.)
 29 |     scale = (mean-0.254*pop) / (shape*(pop+fkp1q0)-pop*cstd)    # assumes that precipitation amounts < 0.254 mm are considered zero
 30 |     shift = 0.254-cstd*scale
 31 |     penalty = max(0.005-shape*scale-shift,0.0)    # penalize shifts that would move most of the PDF below zero 
 32 |     betaf = beta(0.5,shape+0.5)
 33 |     FckP1 = gamma.cdf(cstd,shape+1,scale=1)
 34 |     F2c2k = gamma.cdf(2*cstd,2*shape,scale=1)
 35 |     indz = np.less(obs,0.254)
 36 |     indp = np.greater_equal(obs,0.254)
 37 |     ystd = (obs[indp]-shift)/scale
 38 |     Fyk = gamma.cdf(ystd,shape,scale=1)
 39 |     FykP1 = gamma.cdf(ystd,shape+1,scale=1)
 40 |     crps[indz] = cstd*(2.*Fck-1.) - cstd*np.square(Fck) \
 41 |       + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FckP1) \
 42 |       - (shape/float(math.pi))*betaf*(1.-F2c2k)
 43 |     crps[indp] = ystd*(2.*Fyk-1.) - cstd*np.square(Fck) \
 44 |       + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FykP1) \
 45 |       - (shape/float(math.pi))*betaf*(1.-F2c2k)
 46 |     return scale*ma.mean(crps) + penalty
 47 | 
 48 | 
 49 | 
 50 | #==============================================================================    
 51 | # Load the PRISM gridded precipitation data and fit monthly CSGD distribution
 52 | #==============================================================================
 53 | 
 54 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
 55 | #list(f1)
 56 | obs_precip = f1['precip']
 57 | obs_lat = f1['lat']
 58 | obs_lon = f1['lon']
 59 | obs_dates_ord = f1['dates_ord']
 60 | obs_dates = f1['dates']
 61 | f1.close()
 62 | 
 63 | ndts, nxy = obs_precip.shape
 64 | 
 65 | obs_precip_week = np.zeros((ndts-6,nxy), dtype=np.float32)
 66 | for iday in range(7):
 67 |     obs_precip_week += obs_precip[iday:(ndts-6+iday),:]
 68 | 
 69 | ndts, nxy = obs_precip_week.shape
 70 | 
 71 | obs_precip_week[obs_precip_week<0.254] = 0.
 72 | obs_dates_ord = obs_dates_ord[:ndts]
 73 | obs_dates = obs_dates[:ndts]
 74 | 
 75 | 
 76 | pop_month = np.zeros((12,nxy), dtype=np.float32)
 77 | mean_month = np.zeros((12,nxy), dtype=np.float32)
 78 | shape_month = np.zeros((12,nxy), dtype=np.float32)
 79 | 
 80 | mid_mon = [14,45,73,104,134,165,195,226,257,287,318,348]
 81 | 
 82 | for imonth in range(0,12):
 83 |     date2 = datetime.datetime(2001,1,1)+datetime.timedelta(mid_mon[imonth])
 84 |     fnd_month = np.nonzero(obs_dates[:,1]==date2.month)[0]
 85 |     fnd_day = np.nonzero(obs_dates[fnd_month,2]==date2.day)[0]
 86 |     day_array = []
 87 |     for windowval in range(-30,31):
 88 |         day_array.extend(fnd_month[fnd_day]+windowval)
 89 |     day_array = np.sort(np.array(day_array))
 90 |     day_array = day_array[day_array>=0]
 91 |     day_array = day_array[day_array<len(obs_dates)]
 92 |     for ixy in range(0,nxy):
 93 |         obs = obs_precip_week[day_array,ixy]
 94 |         obs = obs[np.isnan(obs)==False]
 95 |         pop_month[imonth,ixy] = np.mean(np.greater(obs,.1))
 96 |         mean_month[imonth,ixy] = np.mean(obs)
 97 |         if pop_month[imonth,ixy]<0.002:                                   # very dry location, use fixed (very dry) CSGD
 98 |             pop_month[imonth,ixy] = 1.-gamma.cdf(0.254,0.0016,scale=1.25)
 99 |             mean_month[imonth,ixy] = 0.002*(1-gamma.cdf(0.254,1.0016,scale=1.25))
100 |             shape_month[imonth,ixy] = 0.0016
101 |             continue
102 |         shape_month[imonth,ixy] = minimize_scalar(crpsClimoCSGD, args=(obs,mean_month[imonth,ixy],pop_month[imonth,ixy]), method='bounded', bounds=(.0016,1.5)).x
103 |         if ixy%100==0:
104 |             print(imonth+1,ixy+1)
105 | 
106 | 
107 | 
108 | #==============================================================================    
109 | # Interpolate parameters from mid-month to each day of the year
110 | #==============================================================================
111 | 
112 | pop_doy = np.zeros((366,nxy), dtype=np.float32)
113 | mean_doy = np.zeros((366,nxy), dtype=np.float32)
114 | shape_doy = np.zeros((366,nxy), dtype=np.float32)
115 | scale_doy = np.zeros((366,nxy), dtype=np.float32)
116 | shift_doy = np.zeros((366,nxy), dtype=np.float32)
117 | 
118 | 
119 | mid_ind = np.array((-17,14,45,73,104,134,165,195,226,257,287,318,348,379), dtype=np.float32)
120 | 
121 | for idd in range(366):
122 |     print( 'Processing doy '+str(idd+1))
123 |     iup = np.where(idd<mid_ind)[0][0]
124 |     wlw = (mid_ind[iup]-idd) / (mid_ind[iup]-mid_ind[iup-1])
125 |     wup = (idd-mid_ind[iup-1]) / (mid_ind[iup]-mid_ind[iup-1])
126 |     imlw = (11,0,1,2,3,4,5,6,7,8,9,10,11,0)[iup-1]
127 |     imup = (11,0,1,2,3,4,5,6,7,8,9,10,11,0)[iup]
128 |     for ixy in range(0,nxy):
129 |         pop_doy[idd,ixy] = wlw*pop_month[imlw,ixy] + wup*pop_month[imup,ixy]
130 |         mean_doy[idd,ixy] = wlw*mean_month[imlw,ixy] + wup*mean_month[imup,ixy]
131 |         shape_doy[idd,ixy] = wlw*shape_month[imlw,ixy] + wup*shape_month[imup,ixy]
132 |         q0 = gamma.ppf(1.-pop_doy[idd,ixy],shape_doy[idd,ixy])
133 |         scale_doy[idd,ixy] = (mean_doy[idd,ixy]-0.254*pop_doy[idd,ixy])/(shape_doy[idd,ixy]*(1.-gamma.cdf(q0,shape_doy[idd,ixy]+1.))-pop_doy[idd,ixy]*q0)
134 |         shift_doy[idd,ixy] = 0.254-scale_doy[idd,ixy]*q0
135 | 
136 | 
137 | np.savez("/Users/mscheuerer/Desktop/CalifAPCP/data/precip_climatological_csgd",
138 |     pop_cl_doy=pop_doy, mean_cl_doy = mean_doy, shape_cl_doy = shape_doy, scale_cl_doy = scale_doy, shift_cl_doy = shift_doy)
139 | 
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/CSGD-GenerateForecastDistributions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | import datetime
  7 | import time
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from netCDF4 import Dataset
 11 | from numpy import ma
 12 | from numpy.random import random_sample
 13 | from numpy.linalg import solve
 14 | from scipy import stats
 15 | from scipy.stats import kendalltau
 16 | from scipy.stats import gamma
 17 | from scipy.special import beta
 18 | from scipy.optimize import minimize
 19 | from scipy.interpolate import *
 20 | 
 21 | 
 22 | 
 23 | #plt.ion()
 24 | 
 25 | r = 300.   # neighborhood radius (kilometers)
 26 | R = 6373.   # earth radius (kilometers)
 27 | 
 28 | 
 29 | leadDay = 20       # Start of the forecast period
 30 | accumulation = 7  # Precipitation accumulation period
 31 | 
 32 | 
 33 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
 34 | #list(f1)
 35 | obs_precip = f1['precip']
 36 | obs_lat = f1['lat']
 37 | obs_lon = f1['lon']
 38 | obs_dates_ord = f1['dates_ord']
 39 | obs_dates = f1['dates']
 40 | f1.close()
 41 | 
 42 | ndays, nxy = obs_precip.shape
 43 | 
 44 | obs_precip_week = np.zeros((ndays-6,nxy), dtype=np.float32)
 45 | for iday in range(7):
 46 |     obs_precip_week += obs_precip[iday:(ndays-6+iday),:]
 47 | 
 48 | nwks, nxy = obs_precip_week.shape
 49 | 
 50 | obs_precip_week[obs_precip_week<0.254] = 0.
 51 | obs_dates_ord = obs_dates_ord[:nwks]
 52 | obs_dates = obs_dates[:nwks]
 53 | 
 54 | 
 55 | 
 56 | f2 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/precip_climatological_csgd.npz")
 57 | pop_cl = f2['pop_cl_doy']
 58 | mean_cl = f2['mean_cl_doy']
 59 | shape_cl = f2['shape_cl_doy']
 60 | scale_cl = f2['scale_cl_doy']
 61 | shift_cl = f2['shift_cl_doy']
 62 | f2.close()
 63 | 
 64 | 
 65 | 
 66 | f3 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_calplus.npz")
 67 | ### Modeled precip is (reforecast time, member, year, lead time, lat, lon)
 68 | mod_precip = f3['precip']
 69 | #mod_dates_ord = f3['datesOrd']
 70 | mod_lon = f3['lon']
 71 | mod_lat = f3['lat']
 72 | f3.close()
 73 | 
 74 | f3 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 75 | mod_dates_ord = f3['dates_ord']                                                 # Need to load dates from other file since dates in 'mod_precip_calplus.npz' are incorrect
 76 | f3.close()
 77 | 
 78 | ndts, nmem, nyrs, nlts, nlat, nlon = mod_precip.shape
 79 | 
 80 | ### Modeled precip accumulated over forecast period (reforecast time, year, ensembles, space)
 81 | mod_precip_fcstperiod = np.sum(mod_precip[:,:,:,leadDay:leadDay+accumulation,:,:],axis=3).reshape((ndts,nmem,nyrs,nlon*nlat))
 82 | mod_dates_fcstperiod = mod_dates_ord[:,:,leadDay]
 83 | 
 84 | 
 85 | 
 86 | ### Calculate day of the year ('doy') for each reforecast date
 87 | doy = np.zeros(ndts,dtype=np.int32)
 88 | for idt in range(ndts):
 89 |     yyyy = datetime.date.fromordinal(int(mod_dates_fcstperiod[idt,0])).year
 90 |     doy[idt] = (datetime.date.fromordinal(int(mod_dates_fcstperiod[idt,0]))-datetime.date(yyyy,1,1)).days
 91 | 
 92 | 
 93 | ### Calculate spatially smoothed ensemble foreasts at analysis grid locations
 94 | mod_precip_fcstperiod_sm = np.zeros((ndts,nmem,nyrs,nxy),dtype=np.float32)
 95 | for ixy in range(nxy):
 96 |     lat1 = np.deg2rad(obs_lat[ixy])
 97 |     lon1 = np.deg2rad(obs_lon[ixy])
 98 |     lat2 = np.deg2rad(mod_lat)
 99 |     lon2 = np.deg2rad(mod_lon)
100 |     dlon = lon2 - lon1
101 |     dlat = lat2 - lat1
102 |     a = (np.sin(dlat/2)**2)[:,None] + np.cos(lat1) * np.outer(np.cos(lat2),np.sin(dlon/2)**2)
103 |     c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
104 |     gcdst = (R*c).reshape(nlat*nlon)                    # great circle distances between forecast and analysis grid points
105 |     uselocs = np.nonzero(gcdst<r)[0]
106 |     wgt = (1.-(gcdst[uselocs]/r)**2) / sum(1.-(gcdst[uselocs]/r)**2)
107 |     mod_precip_fcstperiod_sm[:,:,:,ixy] = np.average(mod_precip_fcstperiod[:,:,:,uselocs],axis=3,weights=wgt)
108 | 
109 | 
110 | 
111 | ### Calculate ensemble mean and l-scale
112 | ## (note: we ended up not using l-scale as a predictor for uncertainty)
113 | 
114 | def l_scale(ensfcst,axis):
115 |     # L-scale of n ensemble forecasts with m member
116 |     shp = ensfcst.shape
117 |     m = shp[axis]
118 |     bcst = np.repeat(1,len(shp))
119 |     bcst[axis] = m
120 |     term1 = 2.*np.sum(np.sort(ensfcst,axis=axis)*np.arange(1,m+1).reshape((tuple(bcst))),axis=axis)/(m**2)
121 |     term2 = (m+1)*np.mean(ensfcst,axis=axis)/m
122 |     return term1-term2
123 | 
124 | ensmean_precip = np.mean(mod_precip_fcstperiod_sm,axis=1)
125 | ensmeandiff_precip = 2.*l_scale(mod_precip_fcstperiod_sm,axis=1)
126 | 
127 | 
128 | def crpsCondCSGD(par,obs,ensmeanano,muc,sigmac,shiftc):
129 |     # average CRPS for CSGD conditional on the ensemble statistics
130 |     logarg = par[1] + par[2]*ensmeanano
131 |     mu = muc * np.log1p(np.expm1(par[0])*logarg) / par[0]
132 |     sigma = sigmac * (par[3]*np.sqrt(mu/muc))
133 |     shape = np.square(mu/sigma)
134 |     scale = np.square(sigma)/mu
135 |     shift = shiftc
136 |     betaf = beta(0.5,shape+0.5)
137 |     cstd = (0.254-shift)/scale
138 |     ystd = np.maximum(obs-shift,0.0)/scale
139 |     Fyk = sp.stats.gamma.cdf(ystd,shape,scale=1)
140 |     Fck = sp.stats.gamma.cdf(cstd,shape,scale=1)
141 |     FykP1 = sp.stats.gamma.cdf(ystd,shape+1,scale=1)
142 |     FckP1 = sp.stats.gamma.cdf(cstd,shape+1,scale=1)
143 |     F2c2k = sp.stats.gamma.cdf(2*cstd,2*shape,scale=1)
144 |     crps = ystd*(2.*Fyk-1.) - cstd*np.square(Fck) + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FykP1) - (shape/float(math.pi))*betaf*(1.-F2c2k)
145 |     return ma.mean(scale*crps)
146 | 
147 | 
148 | param_initial = [0.05,0.5,0.5,0.9]
149 | param_ranges = ((0.001,1.0), (0.1,1.0), (0.0,3.0), (0.1,1.0))
150 | 
151 | par_reg = np.zeros((nyrs,nxy,4), dtype=np.float32)
152 | csgd_pars_fcst = np.zeros((ndts,nyrs,nxy,3), dtype=np.float32)
153 | 
154 | for iyr in range(nyrs):
155 |     print(iyr)
156 |     ### Split data into training and verification data, save day index of observational data
157 |     doy_train = np.outer(doy,np.ones(19,dtype=np.int32)).flatten()
158 |     apcp_obs_ind_train = np.zeros((ndts,nyrs),dtype=np.int32)
159 |     apcp_obs_ind_verif = np.zeros(ndts,dtype=np.int32)
160 |     for idt in range(ndts):
161 |         apcp_obs_ind_verif[idt] = np.nonzero(mod_dates_fcstperiod[idt,iyr]==obs_dates_ord)[0][0]
162 |         for jyr in range(0,nyrs):
163 |             apcp_obs_ind_train[idt,jyr] = np.nonzero(mod_dates_fcstperiod[idt,jyr]==obs_dates_ord)[0][0]
164 |     apcp_obs_ind_train = np.delete(apcp_obs_ind_train,iyr,axis=1)
165 |     ensmean_train = np.delete(ensmean_precip,iyr,axis=1)
166 |     ensmean_clavg = np.mean(ensmean_train,axis=1)
167 |     ensmean_clavg_sm = np.zeros((ndts,nxy), dtype=np.float32)
168 |     for idt in range(ndts):
169 |         wnd_ind = np.minimum(np.minimum(abs(doy[idt]-doy),abs(doy[idt]-365-doy)),abs(doy[idt]+365-doy))<31
170 |         ensmean_clavg_sm[idt,:] = np.mean(ensmean_clavg[wnd_ind,:],axis=0)
171 |     ensmean_ano_train = ensmean_train / ensmean_clavg_sm[:,None,:]
172 |     ensmean_ano_verif = ensmean_precip[:,iyr,:] / ensmean_clavg_sm
173 |     apcp_obs_train = obs_precip_week[apcp_obs_ind_train.flatten(),:]
174 |     for ixy in range(nxy):
175 |         obs = apcp_obs_train[:,ixy].astype(np.float64)
176 |         ensmeanano = ensmean_ano_train[:,:,ixy].flatten().astype(np.float64)
177 |         muc = (shape_cl[doy_train,ixy]*scale_cl[doy_train,ixy]).astype(np.float64)
178 |         sigmac = (np.sqrt(shape_cl[doy_train,ixy])*scale_cl[doy_train,ixy]).astype(np.float64)
179 |         shiftc = (shift_cl[doy_train,ixy]).astype(np.float64)
180 |         par_reg[iyr,ixy,:] = minimize(crpsCondCSGD, param_initial, args=(obs,ensmeanano,muc,sigmac,shiftc), \
181 |                                       method='L-BFGS-B', bounds=param_ranges, tol=1e-6).x
182 |     ### Get mu, sigma and shift for each training day
183 |     mu_cl_verif = shape_cl[doy,:]*scale_cl[doy,:]
184 |     sigma_cl_verif = np.sqrt(shape_cl[doy,:])*scale_cl[doy,:]
185 |     shift_cl_verif = shift_cl[doy,:]
186 |     logarg = par_reg[iyr,:,1] + par_reg[iyr,:,2]*ensmean_ano_verif
187 |     csgd_pars_fcst[:,iyr,:,0] = mu_cl_verif * np.log1p(np.expm1(par_reg[iyr,:,0])*logarg) / par_reg[iyr,:,0]
188 |     csgd_pars_fcst[:,iyr,:,1] = sigma_cl_verif * (par_reg[iyr,:,3]*np.sqrt(csgd_pars_fcst[:,iyr,:,0]/mu_cl_verif))
189 |     csgd_pars_fcst[:,iyr,:,2] = shift_cl_verif
190 | 
191 | 
192 | ### Save out to file
193 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/forecasts/csgd_fcsts_params_week4"
194 | np.savez(outfilename, par_reg= par_reg, csgd_pars_fcst=csgd_pars_fcst)
195 | 
196 | 
197 | 
198 | 
199 | 


--------------------------------------------------------------------------------
/CodeForGraphics.py:
--------------------------------------------------------------------------------
   1 | 
   2 | import numpy as np
   3 | import numpy.ma as ma
   4 | import scipy as sp
   5 | import math
   6 | import os, sys
   7 | import matplotlib.pyplot as plt
   8 | import matplotlib.path as path
   9 | import matplotlib.patches as patches
  10 | 
  11 | import datetime
  12 | import time
  13 | import pandas as pd
  14 | import statsmodels.api as sm
  15 | import statsmodels.formula.api as smf
  16 | 
  17 | from netCDF4 import Dataset
  18 | from numpy import ma, loadtxt
  19 | from numpy.linalg import solve
  20 | from scipy import stats
  21 | from scipy.interpolate import interp1d
  22 | from scipy.stats import kendalltau
  23 | from colorspace import diverging_hcl, sequential_hcl
  24 | 
  25 | plt.ion()
  26 | 
  27 | divcmp = diverging_hcl("Tropic",rev=True).cmap(name = "Diverging Color Map")
  28 | pcpcmp = sequential_hcl(h=[-220,20], c=[0,100], l=[100,30], power=1.5).cmap(name = "Precipitation Color Map")
  29 | clmcmp = sequential_hcl(h=[90,330], c=[30,100], l=[90,30],power=2.5).cmap(name = "Climatological Precipitation Color Map")
  30 | 
  31 | ncat = 20
  32 | 
  33 | 
  34 | 
  35 | 
  36 | ###################################################################################################
  37 | #                                                                                                 #
  38 | #  Figure 1:  Illustrate discretization of analyzed precipitation amounts                         #
  39 | #                                                                                                 #
  40 | ###################################################################################################
  41 | 
  42 | 
  43 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
  44 | #list(f1)
  45 | obs_precip = f1['precip']
  46 | obs_lat = f1['lat']
  47 | obs_lon = f1['lon']
  48 | obs_dates_ord = f1['dates_ord']
  49 | obs_dates = f1['dates']
  50 | f1.close()
  51 | 
  52 | ndts, nxy = obs_precip.shape
  53 | 
  54 | 
  55 | ## Aggregate daily to weekly accumulations
  56 | obs_precip_week = np.zeros((ndts-6,nxy), dtype=np.float32)
  57 | for iday in range(7):
  58 |     obs_precip_week += obs_precip[iday:(ndts-6+iday),:]
  59 | 
  60 | ndts, nxy = obs_precip_week.shape
  61 | 
  62 | obs_dates_ord = obs_dates_ord[:ndts]
  63 | obs_dates = obs_dates[:ndts]
  64 | 
  65 | 
  66 | ## Calculate day of the year ('doy') for each date in the observation data set
  67 | doy = np.zeros(ndts, dtype=np.int32)
  68 | for idt in range(ndts):
  69 |     doy[idt] = (datetime.date.fromordinal(int(obs_dates_ord[idt]))-datetime.date(obs_dates[idt,0],1,1)).days
  70 | 
  71 | 
  72 | pctl = np.zeros((nxy,99), dtype=np.float32)
  73 | pop = np.zeros(nxy, dtype=np.float32)
  74 | mean = np.zeros(nxy, dtype=np.float32)
  75 | thr = np.zeros((nxy,ncat-1), dtype=np.float32)
  76 | 
  77 | idd = 15
  78 | 
  79 | ind_doy = np.where(doy==idd)[0]
  80 | ind_doy_ext = np.append(np.append(ind_doy[0]-366,ind_doy),ind_doy[-1]+365)
  81 | wnd_ind = np.add.outer(ind_doy_ext,np.arange(-30,31)).flatten()                # data within a 61-day window around each date
  82 | imin = np.where(wnd_ind>=0)[0][0]                                              #  are considered to estimate climatological PoP
  83 | imax = np.where(wnd_ind<ndts)[0][-1]                                           #  and climatological percentiles
  84 | 
  85 | for ixy in range(nxy):
  86 |     y = obs_precip_week[wnd_ind[imin:(imax+1)],ixy]
  87 |     pop[ixy] = np.mean(y>0.254)
  88 |     mean[ixy] = np.mean(y)
  89 |     thr[ixy,0] = 0.254
  90 |     qtlv = 1. + pop[ixy]*((np.arange(1,ncat-1)/float(ncat-1))-1.)
  91 |     thr[ixy,1:] = np.quantile(y,qtlv)
  92 |     pctl[ixy,:] = np.percentile(y,np.arange(1,100))
  93 | 
  94 | 
  95 | itnf = np.logical_and(obs_lon==-120.625,obs_lat==39.375)     # coordinates of our example grid point in Tahoe National Forest
  96 | ilat = (obs_lat==39.375)                                     # latitude of our example transect
  97 | 
  98 | 
  99 | plt.figure(figsize=(10,4))
 100 | 
 101 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
 102 |     xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
 103 |     yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
 104 | plt.scatter(obs_lon,obs_lat,c=mean,marker='s',cmap=clmcmp,s=28,lw=.1,vmin=0,vmax=105,edgecolors=[.2,.2,.2])
 105 | cbar = plt.colorbar()
 106 | cbar.ax.set_yticklabels(['0 mm','20 mm','40 mm','60 mm','80 mm','100 mm'])
 107 | plt.plot([-124.5,-119.],[39.375,39.375],c='black',linewidth=2)
 108 | plt.scatter(obs_lon[itnf],obs_lat[itnf],c='red',marker='*',zorder=3)
 109 | plt.title('        Average 7-day precipitation amounts in January\n',fontsize=12)
 110 | 
 111 | plt.subplot(1, 2, 2, xlim=(-123.8,-120), \
 112 |     xticks=[-123.5,-122.5,-121.5,-120.5], xticklabels=['-123.5'+'\u00b0','-122.5'+'\u00b0','-121.5'+'\u00b0','-120.5'+'\u00b0'], \
 113 |     yticks=[0,100,200,300,400], yticklabels=['0 mm','100 mm','200 mm','300 mm','400 mm'])
 114 | plt.scatter(np.repeat(obs_lon[ilat,np.newaxis],99,axis=1),pctl[ilat,:],c='DodgerBlue',s=30)
 115 | plt.plot(np.repeat(obs_lon[ilat,np.newaxis],ncat-1,axis=1),thr[ilat,:],c='black',linewidth=1.5)
 116 | plt.title('Category boundaries along meridional transect\n',fontsize=12)
 117 | 
 118 | plt.tight_layout()
 119 | 
 120 | 
 121 | 
 122 | 
 123 | 
 124 | 
 125 | 
 126 | ###################################################################################################
 127 | #                                                                                                 #
 128 | #  Figure 2:  ANN schematic and probability forecasts for case study at Tahoe National Forest     #
 129 | #                                                                                                 #
 130 | ###################################################################################################
 131 | 
 132 | 
 133 | iyyyy = 2017
 134 | imm = 1
 135 | idd = 8
 136 | 
 137 | itnf = np.logical_and(obs_lon==-120.625,obs_lat==39.375)     # coordinates of our example grid point in Tahoe National Forest
 138 | 
 139 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 140 | #list(f1)
 141 | obs_lat = f1['obs_lat']
 142 | obs_lon = f1['obs_lon']
 143 | obs_dates_ord = f1['obs_dates_ord']
 144 | pop_doy = f1['pop_doy']
 145 | thr_doy = f1['thr_doy']
 146 | qtev_doy = f1['qtev_doy']
 147 | obs_precip_week = f1['apcp_obs']
 148 | f1.close()
 149 | 
 150 | ntms, nxy = obs_precip_week.shape
 151 | 
 152 | for ivdate in range(ntms):
 153 |     if datetime.date.fromordinal(int(obs_dates_ord[ivdate])) == datetime.date(iyyyy,imm,idd):
 154 |         break
 155 | 
 156 | 
 157 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 158 | mod_dates = f2['dates_ord']
 159 | f2.close()
 160 | 
 161 | ndts, nyrs, nlts = mod_dates.shape
 162 | 
 163 | iidate = np.zeros((3,2),dtype=np.int16)    # date and year index for selected date
 164 | 
 165 | for idt in range(ndts):
 166 |     for iyr in range(nyrs):
 167 |         for ilt in range(3):
 168 |             if datetime.date.fromordinal(int(mod_dates[idt,iyr,6+ilt*7])) == datetime.date(iyyyy,imm,idd):
 169 |                 iidate[ilt,0] = idt
 170 |                 iidate[ilt,1] = iyr
 171 | 
 172 | 
 173 | f3 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_week2_ANN_yr"+str(iidate[0,1])+".npz")
 174 | doy_vdate = f3['doy_dts'][iidate[0,0]]
 175 | apcp_ens_pit = f3['apcp_ens_pit_verif'][iidate[0,0],:,:]
 176 | f3.close()
 177 | 
 178 | 
 179 | prob_cat_tnf = np.zeros((4,20),dtype=np.float32)   # Probability forcast for each category at TNF grid point  
 180 | 
 181 | for ilt in range(3):
 182 |     f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_week"+str(ilt+2)+"_yr"+str(iidate[ilt,1])+".npz")
 183 |     prob_cat_tnf[ilt,:] = f5['prob_fcst_cat'][iidate[ilt,0],itnf,:]
 184 |     f5.close()
 185 | 
 186 | prob_cat_tnf[3,:] = np.append(1.-pop_doy[doy_vdate,itnf],np.repeat(pop_doy[doy_vdate,itnf]/(ncat-1),ncat-1))   # Clim. probabilities
 187 | 
 188 | 
 189 | ##  Set positions for ANN schematic
 190 | 
 191 | npr = 3
 192 | nhd = 5
 193 | ncl = 4
 194 | 
 195 | size = 450.
 196 | 
 197 | pcl_x = np.full(ncl,2.5,dtype=np.float32)
 198 | pcl_y = np.arange(5.5,5.5+ncl)
 199 | pcl_c = np.full(ncl,0.2,dtype=np.float32)
 200 | 
 201 | pred_x = np.full(npr,1,dtype=np.float32)
 202 | pred_y = np.arange(1,1+npr)
 203 | pred_c = np.full(npr,0.4,dtype=np.float32)
 204 | 
 205 | hid1_x =  np.full(nhd,2,dtype=np.float32)
 206 | hid1_y = np.arange(0,nhd)
 207 | hid1_c = np.full(nhd,0.6,dtype=np.float32)
 208 | 
 209 | hid2_x =  np.full(ncl,3,dtype=np.float32)
 210 | hid2_y = np.arange(0.5,ncl+0.5)
 211 | hid2_c = np.full(ncl,0.6,dtype=np.float32)
 212 | 
 213 | out_x = np.full(ncl,4.5,dtype=np.float32)
 214 | out_y = np.arange(3,3+ncl)
 215 | out_c = np.full(ncl,0.8,dtype=np.float32)
 216 | 
 217 | x = np.concatenate([pcl_x,pred_x,hid1_x,hid2_x,out_x-.5])
 218 | y = np.concatenate([pcl_y,pred_y,hid1_y,hid2_y,out_y])
 219 | colors = np.concatenate([pcl_c,pred_c,hid1_c,hid2_c+.1,out_c])
 220 | 
 221 | 
 222 | ##  Now: actual plot
 223 | 
 224 | width = 0.2
 225 | 
 226 | plt.figure(figsize=(12,4))
 227 | 
 228 | plt.subplot(1, 2, 1, xlim=[0.8,4.55])
 229 | plt.scatter(x, y, c=colors, s=size, alpha=0.5)
 230 | plt.axis('off')
 231 | 
 232 | for i in range(ncl):
 233 |     plt.arrow(pcl_x[i]+.15,pcl_y[i],out_x[i]-0.8-pcl_x[i],out_y[i]-pcl_y[i]+.1, head_width=.05, length_includes_head=True, color='k')
 234 |     plt.arrow(hid2_x[i]+.15,hid2_y[i],out_x[i]-0.8-hid2_x[i],out_y[i]-hid2_y[i]-.1, head_width=.05, length_includes_head=True, color='k')
 235 | 
 236 | for i in range(npr):
 237 |     for j in range(nhd):
 238 |         plt.arrow(pred_x[i]+.15,pred_y[i],hid1_x[j]-0.3-pred_x[i],.95*(hid1_y[j]-pred_y[i]), head_width=.05, length_includes_head=True, color='k')
 239 | 
 240 | for i in range(nhd):
 241 |     for j in range(ncl):
 242 |         plt.arrow(hid1_x[i]+.15,hid1_y[i],hid2_x[j]-0.3-hid1_x[i],.95*(hid2_y[j]-hid1_y[i]), head_width=.05, length_includes_head=True, color='k')
 243 | 
 244 | for i in range(nhd):
 245 |     plt.text(hid1_x[i],hid1_y[i],'ELU',horizontalalignment='center',verticalalignment='center',fontsize=9)
 246 | 
 247 | for i in range(ncl):
 248 |     plt.text(hid2_x[i],hid2_y[i],'ELU',horizontalalignment='center',verticalalignment='center',fontsize=9)
 249 | 
 250 | for i in range(ncl):
 251 |     plt.text(out_x[i]-.5,out_y[i],'S',horizontalalignment='center',verticalalignment='center',fontsize=9)
 252 | 
 253 | plt.text(1.35,7.5,'climatological\n log probabilities')
 254 | plt.text(.6,-.4,'input layer\n (predictors)')
 255 | plt.text(1.4,3.65,r'$W_1$')
 256 | plt.text(1.8,-.9,'hidden layer')
 257 | plt.text(2.5,3.9,r'$W_2$')
 258 | plt.text(2.8,-.8,'preliminary\n output layer')
 259 | plt.text(3.85,1.8,'output\n layer')
 260 | plt.text(0.7,8.6,'a)',fontsize=18)
 261 | 
 262 | ax = plt.subplot(1, 2, 2, xticks=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19], xticklabels=['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20'])
 263 | ax.spines['top'].set_visible(False)
 264 | ax.spines['right'].set_visible(False)
 265 | ax.spines['bottom'].set_visible(False)
 266 | ax.spines['left'].set_visible(False)
 267 | ax.bar(np.arange(ncat)-3*width/2, prob_cat_tnf[0,:], width, label='week-2 probability forecast', color='orange')
 268 | ax.bar(np.arange(ncat)-width/2, prob_cat_tnf[1,:], width, label='week-3 probability forecast', color='seagreen')
 269 | ax.bar(np.arange(ncat)+width/2, prob_cat_tnf[2,:], width, label='week-4 probability forecast', color='r')
 270 | ax.bar(np.arange(ncat)+3*width/2, prob_cat_tnf[3,:], width, label='climatological probability', color='b')
 271 | ax.legend(loc=9,fontsize=11)
 272 | plt.text(-0.6,0.21,'b)',fontsize=18)
 273 | 
 274 | plt.tight_layout()
 275 | 
 276 | 
 277 | 
 278 | 
 279 | 
 280 | 
 281 | 
 282 | ###################################################################################################
 283 | #                                                                                                 #
 284 | #  Figure 3:  Illustrate conversion of probability forecasts at TNF to predictive CDF             #
 285 | #                                                                                                 #
 286 | ###################################################################################################
 287 | 
 288 | cdf_tnf = np.cumsum(prob_cat_tnf,axis=1)[:,:(ncat-1)]
 289 | chf_tnf = -np.log(1.-cdf_tnf)
 290 | 
 291 | xx = np.arange(315.)
 292 | 
 293 | cdf_ip_tnf = np.zeros((4,len(xx)),dtype=np.float32)
 294 | chf_ip_tnf = np.zeros((4,len(xx)),dtype=np.float32)
 295 | 
 296 | for ithr in range(4):
 297 |     itp_fct = interp1d(thr_doy[doy_vdate,itnf,:].squeeze(), chf_tnf[ithr,:], kind='linear',fill_value='extrapolate')
 298 |     chf_ip_tnf[ithr,:] = itp_fct(xx)
 299 |     cdf_ip_tnf[ithr,:] = 1.-np.exp(-itp_fct(xx))
 300 | 
 301 | 
 302 | plt.figure(figsize=(15,4))
 303 | 
 304 | ax = plt.subplot(1, 3, 1, xlim=[-5,320], xticks=[0,50,100,150,200,250,300], xticklabels=['0 mm','50 mm','100 mm','150 mm','200 mm','250 mm','300 mm'], yticks=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1.])
 305 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[0,:], label='week-2 cum. probabilities', color='orange')
 306 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[1,:], label='week-3 cum. probabilities', color='seagreen')
 307 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[2,:], label='week-4 cum. probabilities', color='r')
 308 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[3,:], label='clim. cum. probabilities', color='b')
 309 | #ax.set_title('Cumulative probabilities\n')
 310 | ax.legend(loc=4,fontsize=10)
 311 | plt.text(10,0.91,'a)',fontsize=18)
 312 | 
 313 | ax = plt.subplot(1, 3, 2, xlim=[-5,320], xticks=[0,50,100,150,200,250,300], xticklabels=['0 mm','50 mm','100 mm','150 mm','200 mm','250 mm','300 mm'], yticks=[0,.5,1.,1.5,2.,2.5,3.,3.5])
 314 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[0,:], label='week-2 cum. hazard', color='orange')
 315 | ax.plot(xx, chf_ip_tnf[0,:], color='orange')
 316 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[1,:], label='week-3 cum. hazard', color='seagreen')
 317 | ax.plot(xx, chf_ip_tnf[1,:], color='seagreen')
 318 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[2,:], label='week-4 cum. hazard', color='r')
 319 | ax.plot(xx, chf_ip_tnf[2,:], color='r')
 320 | ax.scatter(thr_doy[doy_vdate,itnf,:], chf_tnf[3,:], label='clim. cum. hazard', color='b')
 321 | ax.plot(xx, chf_ip_tnf[3,:], color='b')
 322 | #ax.set_title('Cumulative hazard function\n')
 323 | ax.legend(loc=(.15,.7),fontsize=10)
 324 | plt.text(10,3.5,'b)',fontsize=18)
 325 | 
 326 | ax = plt.subplot(1, 3, 3, xlim=[-5,320], xticks=[0,50,100,150,200,250,300], xticklabels=['0 mm','50 mm','100 mm','150 mm','200 mm','250 mm','300 mm'], yticks=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1.])
 327 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[0,:], label='week-2 predictive CDF', color='orange')
 328 | ax.plot(xx, cdf_ip_tnf[0,:], color='orange')
 329 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[1,:], label='week-3 predictive CDF', color='seagreen')
 330 | ax.plot(xx, cdf_ip_tnf[1,:], color='seagreen')
 331 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[2,:], label='week-4 predictive CDF', color='r')
 332 | ax.plot(xx, cdf_ip_tnf[2,:], color='r')
 333 | ax.scatter(thr_doy[doy_vdate,itnf,:], cdf_tnf[3,:], label='clim. predictive CDF', color='b')
 334 | ax.plot(xx, cdf_ip_tnf[3,:], color='b')
 335 | #ax.set_title('Interpolated CDF\n')
 336 | ax.legend(loc=4,fontsize=10)
 337 | plt.text(10,0.91,'c)',fontsize=18)
 338 | 
 339 | plt.tight_layout()
 340 | 
 341 | 
 342 | 
 343 | 
 344 | 
 345 | 
 346 | 
 347 | ###################################################################################################
 348 | #                                                                                                 #
 349 | #  Figure 4:  Illustrate construction of basis functions                                          #
 350 | #                                                                                                 #
 351 | ###################################################################################################
 352 | 
 353 | 
 354 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
 355 | #list(f1)
 356 | obs_precip = f1['precip']
 357 | obs_lat = f1['lat']
 358 | obs_lon = f1['lon']
 359 | obs_dates_ord = f1['dates_ord']
 360 | obs_dates = f1['dates']
 361 | f1.close()
 362 | 
 363 | ndts, nxy = obs_precip.shape
 364 | 
 365 | 
 366 | r_basis = 7.
 367 | lon_ctr = np.outer(np.ones(3),np.arange(-124,-115,3.5)).reshape(9)[[0,1,4,5,8]]
 368 | lat_ctr = np.outer(np.arange(33,42,3.5)[::-1],np.ones(3)).reshape(9)[[0,1,4,5,8]]
 369 | 
 370 | dst_lon = np.abs(np.subtract.outer(obs_lon,lon_ctr))
 371 | dst_lat = np.abs(np.subtract.outer(obs_lat,lat_ctr))
 372 | dst = np.sqrt(dst_lon**2+dst_lat**2)
 373 | rbf = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3)
 374 | basis = rbf/np.sum(rbf,axis=1)[:,None]
 375 | nbs = basis.shape[1]
 376 | 
 377 | 
 378 | plt.figure(figsize=(18.5,7.5))
 379 | 
 380 | for ibs in range(5):
 381 |     plt.subplot(2, 5, ibs+1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
 382 |         xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
 383 |         yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
 384 |     plt.scatter(obs_lon,obs_lat,c=rbf[:,ibs],marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=1.0,edgecolors=[.2,.2,.2])
 385 |     plt.scatter(lon_ctr[ibs],lat_ctr[ibs],c='black',marker='*',zorder=3)
 386 |     #cbar = plt.colorbar()
 387 |     plt.title('   Preliminary basis function '+str(ibs+1)+'\n',fontsize=12)
 388 |     plt.subplot(2, 5, ibs+6, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
 389 |         xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
 390 |         yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
 391 |     plt.scatter(obs_lon,obs_lat,c=basis[:,ibs],marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.68,edgecolors=[.2,.2,.2])
 392 |     #plt.scatter(lon_ctr[ibs],lat_ctr[ibs],c='black',marker='*',zorder=3)
 393 |     #cbar = plt.colorbar()
 394 |     plt.title('      Basis function '+str(ibs+1)+'\n',fontsize=12)
 395 | 
 396 | plt.tight_layout()
 397 | 
 398 | 
 399 | 
 400 | 
 401 | 
 402 | 
 403 | ###################################################################################################
 404 | #                                                                                                 #
 405 | #  Figure 5:  Schematic to explain CNN-based modeling approach                                    #
 406 | #                                                                                                 #
 407 | ###################################################################################################
 408 | 
 409 | 
 410 | plt.figure(figsize=(11,4.5))
 411 | 
 412 | ax = plt.subplot(2, 1, 1, xlim=[.95,5.15], ylim=[-.8,1.2])
 413 | 
 414 | plt.text(0.88,0.9,'a)',fontsize=16)
 415 | rect = patches.Rectangle((1.15,-.43),3.3,1.6, edgecolor='r', facecolor="none")
 416 | ax.add_patch(rect)
 417 | plt.axis('off')
 418 | plt.text(1.35,.85,'CNN',color='r',fontsize=18)
 419 | 
 420 | plt.scatter(np.full(1,1.), np.zeros(1), marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01])
 421 | plt.scatter(np.full(1,1.)+0.02, np.zeros(1)-0.08, marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01])
 422 | plt.text(.95,.25,'ERA5',fontsize=8)
 423 | plt.arrow(1.1,0.,.15,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 424 | 
 425 | plt.text(1.29,-.1,'Conv2D',fontsize=12)
 426 | rect = patches.Rectangle((1.27,-.25),.3,.45, edgecolor='k', facecolor="none")
 427 | ax.add_patch(rect)
 428 | plt.arrow(1.6,0.,.08,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 429 | plt.text(1.78,0.,'max',fontsize=10)
 430 | plt.text(1.75,-.18,'pooling',fontsize=10)
 431 | rect = patches.Rectangle((1.72,-.25),.26,.45, edgecolor='k', facecolor="none")
 432 | ax.add_patch(rect)
 433 | plt.arrow(2.02,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 434 | 
 435 | plt.text(2.19,-.1,'Conv2D',fontsize=12)
 436 | rect = patches.Rectangle((2.17,-.25),.3,.45, edgecolor='k', facecolor="none")
 437 | ax.add_patch(rect)
 438 | plt.arrow(2.5,0.,.08,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 439 | plt.text(2.68,0.,'max',fontsize=10)
 440 | plt.text(2.65,-.18,'pooling',fontsize=10)
 441 | rect = patches.Rectangle((2.62,-.25),.26,.45, edgecolor='k', facecolor="none")
 442 | ax.add_patch(rect)
 443 | plt.arrow(2.92,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 444 | 
 445 | plt.text(3.09,0.,'Hidden',fontsize=10)
 446 | plt.text(3.1,-0.18,'Layer',fontsize=10)
 447 | rect = patches.Rectangle((3.07,-.25),.24,.45, edgecolor='k', facecolor="none")
 448 | ax.add_patch(rect)
 449 | plt.arrow(3.34,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 450 | 
 451 | plt.text(3.59,0.,'Basis',fontsize=10)
 452 | plt.text(3.5,-0.18,'Coefficients',fontsize=10)
 453 | rect = patches.Rectangle((3.48,-.25),.36,.45, edgecolor='k', facecolor="none")
 454 | ax.add_patch(rect)
 455 | plt.arrow(3.87,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 456 | 
 457 | plt.text(3.97,.9,'Basis',fontsize=10)
 458 | plt.text(3.92,0.72,'Functions',fontsize=10)
 459 | rect = patches.Rectangle((3.9,.66),.3,.42, edgecolor='k', facecolor="none")
 460 | ax.add_patch(rect)
 461 | plt.arrow(4.05,.56,0.,-.35, head_width=.02, head_length=0.08, length_includes_head=True, color='k')
 462 | plt.scatter(4.05,0.0, color='w', s=180, alpha=1., lw=1., edgecolors=[.01,.01,.01])
 463 | plt.scatter(4.05,0.0, color='k', s=6)
 464 | plt.arrow(4.11,0.,.12,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 465 | 
 466 | plt.text(4.27,0.,'Preliminary',fontsize=10)
 467 | plt.text(4.31,-0.18,'Output',fontsize=10)
 468 | rect = patches.Rectangle((4.25,-.25),.35,.45, edgecolor='k', facecolor="none")
 469 | ax.add_patch(rect)
 470 | 
 471 | plt.text(4.6,.9,'Log. Clim.',fontsize=10)
 472 | plt.text(4.55,0.72,'Probabilities',fontsize=10)
 473 | rect = patches.Rectangle((4.53,.66),.38,.42, edgecolor='k', facecolor="none")
 474 | ax.add_patch(rect)
 475 | plt.arrow(4.64,0.,.18,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 476 | plt.arrow(4.73,.56,0.1,-.5, head_width=.02, head_length=0.08, length_includes_head=True, color='k')
 477 | 
 478 | plt.text(4.89,-.1,'Output',fontsize=12)
 479 | rect = patches.Rectangle((4.87,-.25),.27,.45, edgecolor='k', facecolor="none")
 480 | ax.add_patch(rect)
 481 | 
 482 | 
 483 | plt.subplot(2, 1, 2, xlim=[0.8,3.55])
 484 | plt.text(0.76,3.,'b)',fontsize=16)
 485 | 
 486 | plt.scatter(np.full(2,1.), np.arange(0,4,3), marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01])
 487 | plt.scatter(np.full(2,1.)+0.02, np.arange(0,4,3)-0.08, marker='s', color='w', s=120., alpha=1., lw=1., edgecolors=[.01,.01,.01])
 488 | plt.scatter(np.full(3,1.), np.arange(1.2,2.6,0.5), color='k', s=10)
 489 | plt.axis('off')
 490 | plt.text(.92,3.35,'IFS m1',fontsize=8)
 491 | plt.text(.92,0.35,'IFS m11',fontsize=8)
 492 | 
 493 | plt.arrow(1.1,0.,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 494 | plt.text(1.15,0.15,'CNN',fontsize=8,color='r')
 495 | plt.arrow(1.1,3.,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 496 | plt.text(1.15,3.15,'CNN',fontsize=8,color='r')
 497 | plt.arrow(1.1,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 498 | plt.text(1.15,1.65,'CNN',fontsize=8,color='r')
 499 | 
 500 | plt.text(1.35,3.,r'$x_{s,i}^1$')
 501 | plt.text(1.35,0.,r'$x_{s,i}^{11}$')
 502 | plt.scatter(np.full(3,1.4), np.arange(1.2,2.6,0.5), color='k', s=10)
 503 | 
 504 | plt.arrow(1.47,0.,.2,1.2, head_width=.025, head_length=0.08, length_includes_head=True, color='k')
 505 | plt.arrow(1.47,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 506 | plt.arrow(1.47,3.,.2,-1.2, head_width=.025, head_length=0.08, length_includes_head=True, color='k')
 507 | 
 508 | plt.text(1.71,1.4,r'${\widebar x}_{s,i}$')
 509 | plt.arrow(1.85,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 510 | plt.text(1.86,1.65,'relaxation',fontsize=8)
 511 | 
 512 | plt.text(2.1,1.4,r'$\eta\/{\widebar x}_{s,i}$')
 513 | plt.arrow(2.28,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 514 | plt.text(2.12,3.,r'$log(p_{cl,s,i})$',fontsize=10)
 515 | plt.arrow(2.28,2.8,.2,-1.1, head_width=.025, head_length=0.08, length_includes_head=True, color='k')
 516 | 
 517 | plt.text(2.5,1.4,r'$z_{s,i}(\eta)$')
 518 | plt.arrow(2.7,1.5,.2,0., head_width=.08, head_length=0.02, length_includes_head=True, color='k')
 519 | plt.text(2.95,1.4,r'$p_{s,i}(\eta)$')
 520 | 
 521 | plt.tight_layout()
 522 | 
 523 | 
 524 | 
 525 | 
 526 | 
 527 | ###################################################################################################
 528 | #                                                                                                 #
 529 | #  Figure 6:  Plots of scores for the discussion of tuning parameters                             #
 530 | #                                                                                                 #
 531 | ###################################################################################################
 532 | 
 533 | 
 534 | x_wk2 = np.repeat(np.arange(20)[:,np.newaxis],5,axis=1)
 535 | x_wk3 = np.repeat(np.arange(22,42)[:,np.newaxis],5,axis=1)
 536 | x_wk4 = np.repeat(np.arange(44,64)[:,np.newaxis],5,axis=1)
 537 | 
 538 | crps_10cl_m0 = np.zeros((20,5,3),dtype=np.float32)
 539 | crps_20cl_m0 = np.zeros((20,5,3),dtype=np.float32)
 540 | crps_30cl_m0 = np.zeros((20,5,3),dtype=np.float32)
 541 | ccces_20cl_m0 = np.zeros((20,5,3),dtype=np.float32)
 542 | ccces_20cl_m1 = np.zeros((20,5,3),dtype=np.float32)
 543 | ccces_20cl_m2 = np.zeros((20,5,3),dtype=np.float32)
 544 | 
 545 | 
 546 | for ilead in range(3):
 547 |     clead = ['week2','week3','week4'][ilead]
 548 |     f1 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-10cl-m0-l1_"+clead+".npz")
 549 |     crps_10cl_m0[:,:,ilead] = f1['opt_valid_crps']
 550 |     f1.close()
 551 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz")
 552 |     crps_20cl_m0[:,:,ilead] = f2['opt_valid_crps']
 553 |     f2.close()
 554 |     f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-30cl-m0-l1_"+clead+".npz")
 555 |     crps_30cl_m0[:,:,ilead] = f3['opt_valid_crps']
 556 |     f3.close()
 557 |     f4 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz")
 558 |     ccces_20cl_m0[:,:,ilead] = f4['opt_valid_scores']
 559 |     f4.close()
 560 |     f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m1-l1_"+clead+".npz")
 561 |     ccces_20cl_m1[:,:,ilead] = f5['opt_valid_scores']
 562 |     f5.close()
 563 |     f6 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m2-l1_"+clead+".npz")
 564 |     ccces_20cl_m2[:,:,ilead] = f6['opt_valid_scores']
 565 |     f6.close()
 566 | 
 567 | 
 568 | y1c_wk2 = 1.-np.sort(crps_10cl_m0[:,:,0]/crps_20cl_m0[:,:,0])
 569 | y2c_wk2 = 1.-np.sort(crps_30cl_m0[:,:,0]/crps_20cl_m0[:,:,0])
 570 | 
 571 | y1c_wk3 = 1.-np.sort(crps_10cl_m0[:,:,1]/crps_20cl_m0[:,:,1])
 572 | y2c_wk3 = 1.-np.sort(crps_30cl_m0[:,:,1]/crps_20cl_m0[:,:,1])
 573 | 
 574 | y1c_wk4 = 1.-np.sort(crps_10cl_m0[:,:,2]/crps_20cl_m0[:,:,2])
 575 | y2c_wk4 = 1.-np.sort(crps_30cl_m0[:,:,2]/crps_20cl_m0[:,:,2])
 576 | 
 577 | y1m_wk2 = 1.-np.sort(ccces_20cl_m1[:,:,0]/ccces_20cl_m0[:,:,0])
 578 | y2m_wk2 = 1.-np.sort(ccces_20cl_m2[:,:,0]/ccces_20cl_m0[:,:,0])
 579 | 
 580 | y1m_wk3 = 1.-np.sort(ccces_20cl_m1[:,:,1]/ccces_20cl_m0[:,:,1])
 581 | y2m_wk3 = 1.-np.sort(ccces_20cl_m2[:,:,1]/ccces_20cl_m0[:,:,1])
 582 | 
 583 | y1m_wk4 = 1.-np.sort(ccces_20cl_m1[:,:,2]/ccces_20cl_m0[:,:,2])
 584 | y2m_wk4 = 1.-np.sort(ccces_20cl_m2[:,:,2]/ccces_20cl_m0[:,:,2])
 585 | 
 586 | 
 587 | 
 588 | f1 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f48.npz")
 589 | ccces_m0f48 = f1['opt_valid_scores']
 590 | f1.close()
 591 | 
 592 | f2 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m1-drpt-f48.npz")
 593 | ccces_m1f48 = f2['opt_valid_scores']
 594 | f2.close()
 595 | 
 596 | f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m2-drpt-f48.npz")
 597 | ccces_m2f48 = f3['opt_valid_scores']
 598 | f3.close()
 599 | 
 600 | f4 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f44.npz")
 601 | ccces_m0f44 = f4['opt_valid_scores']
 602 | f4.close()
 603 | 
 604 | f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f88.npz")
 605 | ccces_m0f88 = f5['opt_valid_scores']
 606 | f5.close()
 607 | 
 608 | f6 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-drpt-f816.npz")
 609 | ccces_m0f816 = f6['opt_valid_scores']
 610 | f6.close()
 611 | 
 612 | f7 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-m0-l1-f48.npz")
 613 | ccces_m0f48_l1 = f7['opt_valid_scores']
 614 | f7.close()
 615 | 
 616 | 
 617 | y1m = 1.-np.sort(ccces_m1f48/ccces_m0f48)
 618 | y2m = 1.-np.sort(ccces_m2f48/ccces_m0f48)
 619 | y3r = 1.-np.sort(ccces_m0f48_l1/ccces_m0f48)
 620 | 
 621 | y1f = 1.-np.sort(ccces_m0f44/ccces_m0f48)
 622 | y2f = 1.-np.sort(ccces_m0f88/ccces_m0f48)
 623 | y3f = 1.-np.sort(ccces_m0f816/ccces_m0f48)
 624 | 
 625 | 
 626 | 
 627 | plt.figure(figsize=(16,12))
 628 | 
 629 | plt.subplot(3,2,1, ylim=[-0.0077,0.0077])
 630 | plt.scatter(x_wk2,y1c_wk2,c='orange',label='week-2')
 631 | plt.scatter(x_wk3,y1c_wk3,c='seagreen',label='week-3')
 632 | plt.scatter(x_wk4,y1c_wk4,c='r',label='week-4')
 633 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False)
 634 | plt.legend(loc=(0.52,0.68),fontsize=12)
 635 | plt.title('CRPSS: 10 vs 20 categories, 1 x 10 nodes',fontsize=14)
 636 | plt.text(-1,0.0063,'a)',fontsize=16)
 637 | plt.axhline(y=0)
 638 | for i in range(20):
 639 |     plt.plot(x_wk2[i,::4],y1c_wk2[i,::4],c='orange')
 640 |     plt.plot(x_wk3[i,::4],y1c_wk3[i,::4],c='seagreen')
 641 |     plt.plot(x_wk4[i,::4],y1c_wk4[i,::4],c='r')
 642 | 
 643 | plt.subplot(3,2,2, ylim=[-0.0077,0.0077])
 644 | plt.scatter(x_wk2,y2c_wk2,c='orange',label='week-2')
 645 | plt.scatter(x_wk3,y2c_wk3,c='seagreen',label='week-3')
 646 | plt.scatter(x_wk4,y2c_wk4,c='r',label='week-4')
 647 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False)
 648 | plt.legend(loc=(0.52,0.68),fontsize=12)
 649 | plt.title('CRPSS: 30 vs 20 categories, 1 x 10 nodes',fontsize=14)
 650 | plt.text(-1,0.0063,'b)',fontsize=16)
 651 | plt.axhline(y=0)
 652 | for i in range(20):
 653 |     plt.plot(x_wk2[i,::4],y2c_wk2[i,::4],c='orange')
 654 |     plt.plot(x_wk3[i,::4],y2c_wk3[i,::4],c='seagreen')
 655 |     plt.plot(x_wk4[i,::4],y2c_wk4[i,::4],c='r')
 656 | 
 657 | plt.subplot(3,2,3, ylim=[-0.0042,0.0042])
 658 | plt.scatter(x_wk2,y1m_wk2,c='orange',label='week-2')
 659 | plt.scatter(x_wk3,y1m_wk3,c='seagreen',label='week-3')
 660 | plt.scatter(x_wk4,y1m_wk4,c='r',label='week-4')
 661 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False)
 662 | plt.legend(loc=(0.52,0.68),fontsize=12)
 663 | plt.title('MCCESS: 1 x 10 vs 1 x 20 nodes, 20 categories',fontsize=14)
 664 | plt.text(-1,0.0034,'c)',fontsize=16)
 665 | plt.axhline(y=0)
 666 | for i in range(20):
 667 |     plt.plot(x_wk2[i,::4],y1m_wk2[i,::4],c='orange')
 668 |     plt.plot(x_wk3[i,::4],y1m_wk3[i,::4],c='seagreen')
 669 |     plt.plot(x_wk4[i,::4],y1m_wk4[i,::4],c='r')
 670 | 
 671 | plt.subplot(3,2,4, ylim=[-0.0042,0.0042])
 672 | plt.scatter(x_wk2,y2m_wk2,c='orange',label='week-2')
 673 | plt.scatter(x_wk3,y2m_wk3,c='seagreen',label='week-3')
 674 | plt.scatter(x_wk4,y2m_wk4,c='r',label='week-4')
 675 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False)
 676 | plt.legend(loc=(0.52,0.68),fontsize=12)
 677 | plt.title('MCCESS: 1 x 10 vs 2 x 10 nodes, 20 categories',fontsize=14)
 678 | plt.text(-1,0.0034,'d)',fontsize=16)
 679 | plt.axhline(y=0)
 680 | for i in range(20):
 681 |     plt.plot(x_wk2[i,::4],y2m_wk2[i,::4],c='orange')
 682 |     plt.plot(x_wk3[i,::4],y2m_wk3[i,::4],c='seagreen')
 683 |     plt.plot(x_wk4[i,::4],y2m_wk4[i,::4],c='r')
 684 | 
 685 | plt.subplot(3,2,5, ylim=[-0.042,0.042])
 686 | plt.scatter(x_wk2,y1m,c='royalblue',label='1 x 10 vs. 1 x 20 nodes, dropout')
 687 | plt.scatter(x_wk3,y2m,c='navy',label='1 x 10 vs. 2 x 10 nodes, dropout')
 688 | plt.scatter(x_wk4,y3r,c='darkturquoise',label='1 x 10 nodes, dropout vs. l1')
 689 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False)
 690 | plt.legend(loc=(0.58,0.67),fontsize=12)
 691 | plt.title('MCCESS: CNN with 4/8 filters',fontsize=14)
 692 | plt.text(-1,0.034,'e)',fontsize=16)
 693 | plt.axhline(y=0)
 694 | for i in range(20):
 695 |     plt.plot(x_wk2[i,::4],y1m[i,::4],c='royalblue')
 696 |     plt.plot(x_wk3[i,::4],y2m[i,::4],c='navy')
 697 |     plt.plot(x_wk4[i,::4],y3r[i,::4],c='darkturquoise')
 698 | 
 699 | plt.subplot(3,2,6, ylim=[-0.023,0.023])
 700 | plt.scatter(x_wk2,y1f,c='blueviolet',label='4/8 vs. 4/4 filters')
 701 | plt.scatter(x_wk3,y2f,c='lightskyblue',label='4/8 vs. 8/8 filters')
 702 | plt.scatter(x_wk4,y3f,c='midnightblue',label='4/8 vs. 8/16 filters')
 703 | plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False)
 704 | plt.legend(loc=(0.51,0.02),fontsize=12)
 705 | plt.title('MCCESS: CNN for 1 x 10 hidden nodes, dropout',fontsize=14)
 706 | plt.text(-1,0.018,'f)',fontsize=16)
 707 | plt.axhline(y=0)
 708 | for i in range(20):
 709 |     plt.plot(x_wk2[i,::4],y1f[i,::4],c='blueviolet')
 710 |     plt.plot(x_wk3[i,::4],y2f[i,::4],c='lightskyblue')
 711 |     plt.plot(x_wk4[i,::4],y3f[i,::4],c='midnightblue')
 712 | 
 713 | plt.tight_layout()
 714 | 
 715 | 
 716 | 
 717 | 
 718 | 
 719 | 
 720 | 
 721 | ###################################################################################################
 722 | #                                                                                                 #
 723 | #  Figure 7:  Maps of RPSS (highlighting statistically significant grid points)                   #
 724 | #                                                                                                 #
 725 | ###################################################################################################
 726 | 
 727 | 
 728 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
 729 | obs_lat = f1['lat']
 730 | obs_lon = f1['lon']
 731 | f1.close()
 732 | 
 733 | nxy = len(obs_lon)
 734 | 
 735 | ndts = 61
 736 | nyrs = 20
 737 | 
 738 | 
 739 | acfANN = np.zeros((3,15),dtype=np.float32)
 740 | acfCNN = np.zeros((3,15),dtype=np.float32)
 741 | pvalANN = np.zeros((3,nxy),dtype=np.float32)
 742 | pvalCNN = np.zeros((3,nxy),dtype=np.float32)
 743 | alphaFDR_ANN = np.zeros(3,dtype=np.float32)
 744 | alphaFDR_CNN = np.zeros(3,dtype=np.float32)
 745 | 
 746 | rpssMapANN = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
 747 | rpssMapCSGD = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
 748 | rpssMapCNN = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
 749 | 
 750 | rpssAvgANN = ma.array(np.zeros(3,dtype=np.float32),mask=True)
 751 | rpssAvgCSGD = ma.array(np.zeros(3,dtype=np.float32),mask=True)
 752 | rpssAvgCNN = ma.array(np.zeros(3,dtype=np.float32),mask=True)
 753 | 
 754 | for ilead in range(3):
 755 |     f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz")
 756 |     Bs33Clm = f1['Bs33pClm']
 757 |     Bs33ANN = f1['Bs33pANN']
 758 |     Bs33CSGD = f1['Bs33pCSGD']
 759 |     Bs67Clm = f1['Bs67pClm']
 760 |     Bs67ANN = f1['Bs67pANN']
 761 |     Bs67CSGD = f1['Bs67pCSGD']
 762 |     Bs85Clm = f1['Bs85pClm']
 763 |     Bs85ANN = f1['Bs85pANN']
 764 |     Bs85CSGD = f1['Bs85pCSGD']
 765 |     f1.close()
 766 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-cnn_week"+str(ilead+2)+".npz")
 767 |     Bs33CNN = f2['Bs33pCNN']
 768 |     Bs67CNN = f2['Bs67pCNN']
 769 |     Bs85CNN = f2['Bs85pCNN']
 770 |     f2.close()
 771 |     rpsClm = Bs33Clm + Bs67Clm + Bs85Clm       # calculate ranked probability score
 772 |     rpsANN = Bs33ANN + Bs67ANN + Bs85ANN
 773 |     rpsCSGD = Bs33CSGD + Bs67CSGD + Bs85CSGD
 774 |     rpsCNN = Bs33CNN + Bs67CNN + Bs85CNN
 775 |     rpssMapANN[ilead,:] = 1.-np.sum(rpsANN,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
 776 |     rpssMapCSGD[ilead,:] = 1.-np.sum(rpsCSGD,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
 777 |     rpssMapCNN[ilead,:] = 1.-np.sum(rpsCNN,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
 778 |     rpssAvgANN[ilead] = 1.-np.sum(rpsANN)/np.sum(rpsClm)
 779 |     rpssAvgCSGD[ilead] = 1.-np.sum(rpsCSGD)/np.sum(rpsClm)
 780 |     rpssAvgCNN[ilead] = 1.-np.sum(rpsCNN)/np.sum(rpsClm)
 781 |     rpsDiffANN = rpsCSGD-rpsANN
 782 |     rpsDiffCNN = rpsCSGD-rpsCNN
 783 |     rpsDiffStdzANN = (rpsDiffANN-np.mean(rpsDiffANN,axis=(0,1))[None,None,:])/np.std(rpsDiffANN,axis=(0,1))[None,None,:]
 784 |     rpsDiffStdzCNN = (rpsDiffCNN-np.mean(rpsDiffCNN,axis=(0,1))[None,None,:])/np.std(rpsDiffCNN,axis=(0,1))[None,None,:]
 785 |     for lg in range(15):
 786 |         acfANN[ilead,lg] = np.mean(rpsDiffStdzANN[lg:,:,:]*rpsDiffStdzANN[:(ndts-lg),:,:])         # Estimate temporal autocorrelation
 787 |         acfCNN[ilead,lg] = np.mean(rpsDiffStdzCNN[lg:,:,:]*rpsDiffStdzCNN[:(ndts-lg),:,:])
 788 |     rhoANN = acfANN[ilead,1]/acfANN[ilead,0]
 789 |     rhoCNN = acfCNN[ilead,1]/acfCNN[ilead,0]
 790 |     print(rhoANN,rhoCNN)
 791 |     nANN = round(ndts*nyrs*(1-rhoANN)/(1+rhoANN))
 792 |     nCNN = round(ndts*nyrs*(1-rhoCNN)/(1+rhoCNN))
 793 |     #print(nANN,nCNN)
 794 |     for ixy in range(nxy):
 795 |         smplANN = rpsCSGD[:,:,ixy].flatten()-rpsANN[:,:,ixy].flatten()
 796 |         smplCNN = rpsCSGD[:,:,ixy].flatten()-rpsCNN[:,:,ixy].flatten()
 797 |         tstatANN = np.mean(smplANN)/np.sqrt(np.var(smplANN)/nANN)        # test statistic for paired t-test
 798 |         tstatCNN = np.mean(smplCNN)/np.sqrt(np.var(smplCNN)/nCNN)
 799 |         pvalANN[ilead,ixy] = 1.-sp.stats.t.cdf(tstatANN,df=nANN-1)       # p-value for one-sided test
 800 |         pvalCNN[ilead,ixy] = 1.-sp.stats.t.cdf(tstatCNN,df=nCNN-1)
 801 |         #pval[ilead,ixy] = 2*min(1.-sp.stats.t.cdf(tstat,df=n-1),sp.stats.t.cdf(tstat,df=n-1))
 802 |     pvalANN_srt = np.sort(pvalANN[ilead,:])
 803 |     iANN = np.where(pvalANN_srt<=0.1*np.arange(1,nxy+1)/nxy)[0]
 804 |     if len(iANN)>0:
 805 |         alphaFDR_ANN[ilead] = pvalANN_srt[iANN[-1]]
 806 |     pvalCNN_srt = np.sort(pvalCNN[ilead,:])
 807 |     iCNN = np.where(pvalCNN_srt<=0.1*np.arange(1,nxy+1)/nxy)[0]
 808 |     if len(iCNN)>0:
 809 |         alphaFDR_CNN[ilead] = pvalCNN_srt[iCNN[-1]]
 810 |     plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalANN_srt); plt.scatter(np.arange(663),pvalCNN_srt)
 811 | 
 812 | 
 813 | 
 814 | ##  First figure depicts distribution of RPS differences and autocorrelation function
 815 | 
 816 | fig = plt.figure(figsize=(15,9))
 817 | 
 818 | for ilead in range(3):
 819 |     ax1 = fig.add_subplot(2,3,ilead+1)
 820 |     sp.stats.probplot(rpsDiffStdzANN.flatten(),plot=plt)
 821 |     plt.title("Q-Q Plot of RPS differences (week "+str(ilead+2)+")",fontsize=14)
 822 |     ax2 = fig.add_subplot(2,3,ilead+4)
 823 |     plt.scatter(np.arange(15),acfANN[ilead,:])
 824 |     plt.axhline(y=0)
 825 |     plt.axhline(y=0.05,ls='--')
 826 |     plt.axhline(y=-0.05,ls='--')
 827 |     plt.plot(np.arange(15),acfANN[ilead,1]**np.arange(15),c='red')
 828 |     plt.title("ACF of RPS differences (week "+str(ilead+2)+")",fontsize=14)
 829 | 
 830 | plt.tight_layout()
 831 | 
 832 | 
 833 | fig = plt.figure(figsize=(11.3,9.))
 834 | 
 835 | for ilead in range(3):
 836 |     ylim = np.array([0.26,0.052,0.026])[ilead]
 837 |     #ylim = np.amax(abs(rpssMapCSGD[ilead,:]))
 838 |     indSgnfANN = (pvalANN[ilead,:]<alphaFDR_ANN[ilead])
 839 |     indSgnfCNN = (pvalCNN[ilead,:]<alphaFDR_CNN[ilead])
 840 |     ax1 = fig.add_subplot(3,3,ilead+1)
 841 |     ax1.set_xticks([])
 842 |     ax1.set_yticks([])
 843 |     plt.scatter(obs_lon,obs_lat,c=rpssMapCSGD[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
 844 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
 845 |     #plt.text(-117.5,39.6,rpssAvgCSGD[ilead].round(3),fontsize=12)
 846 |     plt.title("RPSS - CSGD (week "+str(ilead+2)+")",fontsize=14)
 847 |     ax2 = fig.add_subplot(3,3,ilead+4)
 848 |     ax2.set_xticks([])
 849 |     ax2.set_yticks([])
 850 |     plt.scatter(obs_lon,obs_lat,c=rpssMapANN[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
 851 |     plt.scatter(obs_lon[indSgnfANN],obs_lat[indSgnfANN],c=rpssMapANN[ilead,indSgnfANN],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=19.75,lw=0.8,edgecolors=[.2,.2,.2])
 852 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
 853 |     #plt.text(-117.5,39.6,rpssAvgANN[ilead].round(3),fontsize=12)
 854 |     plt.title("RPSS - ANN (week "+str(ilead+2)+")",fontsize=14)
 855 |     ax3 = fig.add_subplot(3,3,ilead+7)
 856 |     ax3.set_xticks([])
 857 |     ax3.set_yticks([])
 858 |     plt.scatter(obs_lon,obs_lat,c=rpssMapCNN[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
 859 |     plt.scatter(obs_lon[indSgnfCNN],obs_lat[indSgnfCNN],c=rpssMapCNN[ilead,indSgnfCNN],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=19.75,lw=0.8,edgecolors=[.2,.2,.2])
 860 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
 861 |     #plt.text(-117.5,39.6,rpssAvgCNN[ilead].round(3),fontsize=12)
 862 |     plt.title("RPSS - CNN (week "+str(ilead+2)+")",fontsize=14)
 863 | 
 864 | plt.tight_layout()
 865 | 
 866 | 
 867 | 
 868 | 
 869 | 
 870 | 
 871 | 
 872 | ###################################################################################################
 873 | #                                                                                                 #
 874 | #  Figure 8:  Maps of EFI skill and RPSS change due to removing geographic predictors             #
 875 | #                                                                                                 #
 876 | ###################################################################################################
 877 | 
 878 | 
 879 | taucmp = sequential_hcl("Purples 2").cmap(name = "Correlation Color Map")
 880 | 
 881 | 
 882 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 883 | obs_lat = f1['obs_lat']
 884 | obs_lon = f1['obs_lon']
 885 | obs_dates_ord = f1['obs_dates_ord']
 886 | apcp_obs_cat = f1['apcp_obs_cat']
 887 | f1.close()
 888 | 
 889 | ttt, nxy, ncat = apcp_obs_cat.shape
 890 | 
 891 | 
 892 | rpssDiffANN = np.zeros((3,nxy),dtype=np.float32)
 893 | tauEFI = np.zeros((3,nxy),dtype=np.float32)
 894 | 
 895 | for ilead in range(3):
 896 |     ylim = np.array([0.085,0.016,0.012])[ilead]
 897 |     f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz")
 898 |     Bs33Clm = f1['Bs33pClm']
 899 |     Bs67Clm = f1['Bs67pClm']
 900 |     Bs85Clm = f1['Bs85pClm']
 901 |     Bs33ANN = f1['Bs33pANN']
 902 |     Bs67ANN = f1['Bs67pANN']
 903 |     Bs85ANN = f1['Bs85pANN']
 904 |     f1.close()
 905 |     rpsClm = Bs33Clm + Bs67Clm + Bs85Clm
 906 |     rpsANN = Bs33ANN + Bs67ANN + Bs85ANN
 907 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_no-coords_week"+str(ilead+2)+".npz")
 908 |     Bs33ANN = f2['Bs33pANN']
 909 |     Bs67ANN = f2['Bs67pANN']
 910 |     Bs85ANN = f2['Bs85pANN']
 911 |     f2.close()
 912 |     rpsANNnc = Bs33ANN + Bs67ANN + Bs85ANN
 913 |     rpssDiffANN[ilead,:] = (np.mean(rpsANNnc,axis=(0,1))-np.mean(rpsANN,axis=(0,1))) / np.mean(rpsClm,axis=(0,1))
 914 | 
 915 | 
 916 | for ilead in range(3):
 917 |     apcp_efi_verif = np.zeros((61,20,nxy),dtype=np.float32)
 918 |     apcp_verif_cat = np.zeros((61,20,nxy),dtype=np.float32)
 919 |     for iyr in range(20):
 920 |         f2 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_week"+str(ilead+2)+"_ANN_yr"+str(iyr)+".npz")
 921 |         apcp_obs_ind_verif = f2['apcp_obs_ind_verif']
 922 |         apcp_ens_pit_verif = f2['apcp_ens_pit_verif']
 923 |         f2.close()
 924 |         apcp_efi_verif[:,iyr,:] = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_verif),axis=2)
 925 |         apcp_verif_cat[:,iyr,:] = np.argmax(apcp_obs_cat[apcp_obs_ind_verif,:,:],axis=2)
 926 |     for ixy in range(nxy):
 927 |         tauEFI[ilead,ixy] = kendalltau(apcp_efi_verif[:,:,ixy].flatten(),apcp_verif_cat[:,:,ixy].flatten())[0]
 928 | 
 929 | 
 930 | fig = plt.figure(figsize=(12.,6.))
 931 | 
 932 | for ilead in range(3):
 933 |     yliml = np.array([0.24,0.11,0.055])[ilead]
 934 |     ylimu = np.array([0.46,0.21,0.165])[ilead]
 935 |     ylim = np.array([0.065,0.0108,0.0108])[ilead]
 936 |     ax1 = fig.add_subplot(2,3,ilead+1)
 937 |     ax1.set_xticks([])
 938 |     ax1.set_yticks([])
 939 |     plt.scatter(obs_lon,obs_lat,c=tauEFI[ilead,:],marker='s',cmap=taucmp,vmin=yliml,vmax=ylimu,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
 940 |     plt.title("Kendall's tau for EFI (week "+str(ilead+2)+")",fontsize=14)
 941 |     ax2 = fig.add_subplot(2,3,ilead+4)
 942 |     ax2.set_xticks([])
 943 |     ax2.set_yticks([])
 944 |     plt.scatter(obs_lon,obs_lat,c=rpssDiffANN[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
 945 |     plt.title("RPSS difference (week "+str(ilead+2)+")",fontsize=14)
 946 | 
 947 | plt.tight_layout()
 948 | 
 949 | 
 950 | 
 951 | 
 952 | 
 953 | 
 954 | 
 955 | ###################################################################################################
 956 | #                                                                                                 #
 957 | #  Figure 9:  Map of Z500 and TCW analyses for highest/lowest P(>85th pctl) at Eureka/San Diego   #
 958 | #                                                                                                 #
 959 | ###################################################################################################
 960 | 
 961 | 
 962 | divcmp = diverging_hcl("Green-Brown",rev=True).cmap(name = "Diverging Color Map")
 963 | 
 964 | states_us = np.load('/home/michael/Desktop/CalifAPCP/data/states_us.npz',allow_pickle=True)['polygons'].tolist()
 965 | states_mexico = np.load('/home/michael/Desktop/CalifAPCP/data/states_mexico.npz',allow_pickle=True)['polygons'].tolist()
 966 | 
 967 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 968 | lat = f1['obs_lat']
 969 | lon = f1['obs_lon']
 970 | f1.close()
 971 | 
 972 | 
 973 | inc = np.logical_and(lon==-124.125,lat==40.875)   # Eureka
 974 | isc = np.logical_and(lon==-117.125,lat==32.875)   # San Diego
 975 | lcns = [np.argmax(inc),np.argmax(isc)]
 976 | 
 977 | iyr = np.array([[19,7],[12,3]],dtype=np.int32)    # date and year index for lowest/highest P(>85th pctl)
 978 | idt = np.array([[3,4],[16,25]],dtype=np.int32)    #  at Eureka and San Diego, set manually here
 979 | 
 980 | 
 981 | ##  Load ERA5 z500 and tcw fields, subset to 22 x 18 image
 982 | 
 983 | ixl = 10
 984 | ixu = -6
 985 | jyl = 6
 986 | jyu = -6
 987 | 
 988 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz")
 989 | era5_dates_ord = f2['dates_ord']
 990 | era5_lon = f2['longitude'][ixl:ixu]
 991 | era5_lat = f2['latitude'][jyl:jyu]
 992 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu]
 993 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu]
 994 | f2.close()
 995 | 
 996 | ndts, nyrs, ny, nx = z500.shape
 997 | 
 998 | 
 999 | ##  Normalize tcw to 10th/90th climatological percentiles at each grid point
1000 | 
1001 | tcw_q10 = np.percentile(tcw,10,axis=1)
1002 | tcw_q90 = np.percentile(tcw,90,axis=1)
1003 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32)
1004 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32)
1005 | 
1006 | X = np.ones((ndts,3), dtype=np.float32)                  # Fit harmonic function to annual cycle of tcw climatology
1007 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25)
1008 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25)
1009 | 
1010 | for ix in range(nx):
1011 |     for jy in range(ny):
1012 |         coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix]))
1013 |         tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10)
1014 |         coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix]))
1015 |         tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90)
1016 | 
1017 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:]
1018 | 
1019 | 
1020 | ##  Normalize z500 to 1st/99th climatological percentiles across all grid points
1021 | 
1022 | z500_q01 = np.percentile(z500,1,axis=(1,2,3))
1023 | z500_q99 = np.percentile(z500,99,axis=(1,2,3))
1024 | 
1025 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01))
1026 | z500_q01_sm = np.matmul(X,coef_q01)
1027 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99))
1028 | z500_q99_sm = np.matmul(X,coef_q99)
1029 | 
1030 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None]
1031 | 
1032 | 
1033 | ##  Make plots
1034 | 
1035 | contour_levels_tcw = np.arange(-2.,2.25,0.25)
1036 | x, y = np.meshgrid(era5_lon,era5_lat)
1037 | 
1038 | title_str = ['Lowest P(>85th percentile) at Eureka','Highest  P(>85th percentile) at Eureka','Lowest  P(>85th percentile) at San Diego','Highest  P(>85th percentile) at San Diego']
1039 | 
1040 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10.,6.5))
1041 | fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.95, hspace=0.15, wspace=0.05)
1042 | for ilc in range(2):
1043 |     for iwd in range(2):
1044 |         ax = axes.flat[2*ilc+(1-iwd)]
1045 |         ax.set_xticks([])
1046 |         ax.set_yticks([])
1047 |         ax.set_title(title_str[2*ilc+(1-iwd)])
1048 |         C1 = ax.contour(x,y,z500_ano[idt[iwd,ilc],iyr[iwd,ilc],:,:],linewidths=0.35,colors='k',zorder=2)
1049 |         C2 = ax.contourf(x,y,tcw_ano[idt[iwd,ilc],iyr[iwd,ilc],:,:],levels=contour_levels_tcw,alpha=1,cmap=divcmp,extend='both',zorder=1,corner_mask=True)
1050 |         plt.clabel(C1,fontsize=10,inline=1,fmt='%.2f')
1051 |         ax.scatter(lon[lcns[ilc]],lat[lcns[ilc]],c='red',marker='*',zorder=3)
1052 |         for k in range(len(states_us)):
1053 |             pathPolygon = path.Path(states_us[str(k)])
1054 |             ax.add_patch(patches.PathPatch(pathPolygon, facecolor='none', lw=1.))
1055 |         for k in range(len(states_mexico)):
1056 |             pathPolygon = path.Path(np.squeeze(states_mexico[k]))
1057 |             ax.add_patch(patches.PathPatch(pathPolygon, facecolor='none', lw=1.))
1058 | 
1059 | cbar = fig.colorbar(C2,ax=axes.ravel().tolist())
1060 | cbar.set_label('\n normalized TCW anomalies', fontsize=12)
1061 | 
1062 | 
1063 | 
1064 | 
1065 | 
1066 | 
1067 | 
1068 | 
1069 | 
1070 | ###################################################################################################
1071 | #                                                                                                 #
1072 | #  Figure for presentations: Examples of resulting exceedance probabilities                       #
1073 | #                                                                                                 #
1074 | ###################################################################################################
1075 | 
1076 | 
1077 | iyyyy = 2017
1078 | imm = 1
1079 | idd = 8
1080 | 
1081 | itnf = np.logical_and(obs_lon==-120.625,obs_lat==39.375)     # coordinates of our example grid point in Tahoe National Forest
1082 | 
1083 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
1084 | #list(f1)
1085 | obs_lat = f1['obs_lat']
1086 | obs_lon = f1['obs_lon']
1087 | obs_dates_ord = f1['obs_dates_ord']
1088 | pop_doy = f1['pop_doy']
1089 | thr_doy = f1['thr_doy']
1090 | qtev_doy = f1['qtev_doy']
1091 | obs_precip_week = f1['apcp_obs']
1092 | f1.close()
1093 | 
1094 | ntms, nxy = obs_precip_week.shape
1095 | 
1096 | for ivdate in range(ntms):
1097 |     if datetime.date.fromordinal(int(obs_dates_ord[ivdate])) == datetime.date(iyyyy,imm,idd):
1098 |         break
1099 | 
1100 | 
1101 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
1102 | mod_dates = f2['dates_ord']
1103 | f2.close()
1104 | 
1105 | ndts, nyrs, nlts = mod_dates.shape
1106 | 
1107 | iidate = np.zeros((3,2),dtype=np.int16)    # date and year index for selected date
1108 | 
1109 | for idt in range(ndts):
1110 |     for iyr in range(nyrs):
1111 |         for ilt in range(3):
1112 |             if datetime.date.fromordinal(int(mod_dates[idt,iyr,6+ilt*7])) == datetime.date(iyyyy,imm,idd):
1113 |                 iidate[ilt,0] = idt
1114 |                 iidate[ilt,1] = iyr
1115 | 
1116 | 
1117 | f3 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_week2_ANN_yr"+str(iidate[0,1])+".npz")
1118 | doy_vdate = f3['doy_dts'][iidate[0,0]]
1119 | apcp_ens_pit = f3['apcp_ens_pit_verif'][iidate[0,0],:,:]
1120 | f3.close()
1121 | 
1122 | 
1123 | ilt = 0
1124 | 
1125 | f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-efi/probfcst_10-l1_week"+str(ilt+2)+"_yr"+str(iidate[ilt,1])+".npz")
1126 | prob_fcst_cat = f5['prob_fcst_cat'][iidate[ilt,0],:,:]
1127 | f5.close()
1128 | 
1129 | prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=1)[:,:(ncat-1)])
1130 | 
1131 | prob_clm_cat = np.concatenate((1.-pop_doy[doy_vdate,:,np.newaxis],np.repeat(pop_doy[doy_vdate,:,np.newaxis]/(ncat-1),ncat-1,axis=1)),axis=1)
1132 | prob_clm_chf = -np.log(1.-np.cumsum(prob_clm_cat,axis=1)[:,:(ncat-1)])
1133 | 
1134 | pot6in = np.zeros(nxy,dtype=np.float32)
1135 | pot85p = np.zeros(nxy,dtype=np.float32)
1136 | pot6in_cl = np.zeros(nxy,dtype=np.float32)
1137 | pot85p_cl = np.zeros(nxy,dtype=np.float32)
1138 | 
1139 | for ixy in range(nxy):
1140 |     itp_fct = interp1d(thr_doy[doy_vdate,ixy,:], prob_fcst_chf[ixy,:], kind='linear',fill_value='extrapolate')
1141 |     pot6in[ixy] = np.exp(-itp_fct(152.4))
1142 |     pot85p[ixy] = np.exp(-itp_fct(qtev_doy[doy_vdate,ixy,2]))
1143 |     itp_fct = interp1d(thr_doy[doy_vdate,ixy,:], prob_clm_chf[ixy,:], kind='linear',fill_value='extrapolate')
1144 |     pot6in_cl[ixy] = np.exp(-itp_fct(152.4))
1145 |     pot85p_cl[ixy] = np.exp(-itp_fct(qtev_doy[doy_vdate,ixy,2]))
1146 | 
1147 | 
1148 | plt.figure(figsize=(10,4))
1149 | 
1150 | plt.subplot(1, 2, 2, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
1151 |     xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
1152 |     yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
1153 | plt.scatter(obs_lon,obs_lat,c=pot6in,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.64,edgecolors=[.2,.2,.2])
1154 | #plt.scatter(obs_lon,obs_lat,c=pot6in,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.32,edgecolors=[.2,.2,.2])
1155 | cbar = plt.colorbar()
1156 | plt.title('      Probability for exceeding 6 inches of precipitation\n',fontsize=12)
1157 | 
1158 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
1159 |     xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
1160 |     yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
1161 | plt.scatter(obs_lon,obs_lat,c=pot85p,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.,vmax=1.,edgecolors=[.2,.2,.2])
1162 | #plt.scatter(obs_lon,obs_lat,c=pot85p,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.13,vmax=0.23,edgecolors=[.2,.2,.2])
1163 | cbar = plt.colorbar()
1164 | plt.title('      Probability for exceeding 85th climat. percentile\n',fontsize=12)
1165 | 
1166 | plt.tight_layout()
1167 | 
1168 | 
1169 | plt.figure(figsize=(10,4))
1170 | 
1171 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
1172 |     xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
1173 |     yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
1174 | plt.scatter(obs_lon,obs_lat,c=pot6in,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=0.65,edgecolors=[.2,.2,.2])
1175 | cbar = plt.colorbar()
1176 | plt.title('      Probability for exceeding 6 inches of precipitation\n',fontsize=12)
1177 | 
1178 | plt.subplot(1, 2, 2, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
1179 |     xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
1180 |     yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
1181 | plt.scatter(obs_lon,obs_lat,c=np.log10(pot6in/pot6in_cl),marker='s',cmap=divcmp,s=28,lw=.1,vmin=-2.2,vmax=2.2,edgecolors=[.2,.2,.2])
1182 | cbar = plt.colorbar(ticks=[-2,-1,0,1,2])
1183 | cbar.ax.set_yticklabels(['0.01','0.1','1','10','100'])
1184 | plt.title('       Ratio of forecast probability to climat. probability\n',fontsize=12)
1185 | 
1186 | plt.tight_layout()
1187 | 
1188 | 
1189 | plt.figure(figsize=(10,4))
1190 | 
1191 | plt.subplot(1, 2, 1, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
1192 |     xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
1193 |     yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
1194 | plt.scatter(obs_lon,obs_lat,c=pot85p,marker='s',cmap=pcpcmp,s=28,lw=.1,vmin=0.0,vmax=1.0,edgecolors=[.2,.2,.2])
1195 | cbar = plt.colorbar()
1196 | plt.title('      Probability for exceeding 85th climat. percentile\n',fontsize=12)
1197 | 
1198 | plt.subplot(1, 2, 2, xlim=(-124.9,-113.8), ylim=(31.9,42.5), \
1199 |     xticks=[-124,-122,-120,-118,-116,-114], xticklabels=['-124'+'\u00b0','-122'+'\u00b0','-120'+'\u00b0','-118'+'\u00b0','-116'+'\u00b0','-114'+'\u00b0'], \
1200 |     yticks=[32,34,36,38,40,42], yticklabels=['32'+'\u00b0','34'+'\u00b0','36'+'\u00b0','38'+'\u00b0','40'+'\u00b0','42'+'\u00b0'])
1201 | plt.scatter(obs_lon,obs_lat,c=np.log10(pot85p/pot85p_cl),marker='s',cmap=divcmp,s=28,lw=.1,vmin=-0.7,vmax=0.7,edgecolors=[.2,.2,.2])
1202 | cbar = plt.colorbar(ticks=[np.log10(0.25),np.log10(0.5),0.,np.log10(2.),np.log10(4.)])
1203 | cbar.ax.set_yticklabels(['0.25','0.5','1','2','4'])
1204 | plt.title('       Ratio of forecast probability to climat. probability\n',fontsize=12)
1205 | 
1206 | plt.tight_layout()
1207 | 
1208 | 
1209 | 
1210 | 
1211 | ###################################################################################################
1212 | #                                                                                                 #
1213 | #  Figure for presentations: Reliability diagrams                                                 #
1214 | #                                                                                                 #
1215 | ###################################################################################################
1216 | 
1217 | 
1218 | #fct = 8
1219 | #p = 0.8
1220 | nmin = 50
1221 | 
1222 | #cat33u = np.arange(np.round(-fct*0.67**p),np.round(fct*0.33**p))
1223 | #cat67u = np.arange(np.round(-fct*0.33**p),np.round(fct*0.67**p))
1224 | #cat85u = np.arange(np.round(-fct*0.15**p),np.round(fct*0.85**p))
1225 | 
1226 | cat33u = np.arange(11)
1227 | cat67u = np.arange(11)
1228 | cat85u = np.arange(11)
1229 | 
1230 | x33 = ma.array(np.zeros((3,3,len(cat33u)),dtype=np.float32),mask=True)
1231 | x67 = ma.array(np.zeros((3,3,len(cat67u)),dtype=np.float32),mask=True)
1232 | x85 = ma.array(np.zeros((3,3,len(cat85u)),dtype=np.float32),mask=True)
1233 | y33 = ma.array(np.zeros((3,3,len(cat33u)),dtype=np.float32),mask=True)
1234 | y67 = ma.array(np.zeros((3,3,len(cat67u)),dtype=np.float32),mask=True)
1235 | y85 = ma.array(np.zeros((3,3,len(cat85u)),dtype=np.float32),mask=True)
1236 | freq33 = ma.array(np.zeros((3,3,len(cat33u)),dtype=np.float32),mask=True)
1237 | freq67 = ma.array(np.zeros((3,3,len(cat67u)),dtype=np.float32),mask=True)
1238 | freq85 = ma.array(np.zeros((3,3,len(cat85u)),dtype=np.float32),mask=True)
1239 | 
1240 | for ilead in range(3):
1241 |     f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz")
1242 |     exc33p = f1['exc33p']
1243 |     exc67p = f1['exc67p']
1244 |     exc85p = f1['exc85p']
1245 |     pot33pCSGD = f1['pot33pCSGD']
1246 |     pot67pCSGD = f1['pot67pCSGD']
1247 |     pot85pCSGD = f1['pot85pCSGD']
1248 |     pot33pANN = f1['pot33pANN']
1249 |     pot67pANN = f1['pot67pANN']
1250 |     pot85pANN = f1['pot85pANN']
1251 |     f1.close()
1252 |     cat33csgd = np.round(pot33pCSGD*10).flatten()
1253 |     cat67csgd = np.round(pot67pCSGD*10).flatten()
1254 |     cat85csgd = np.round(pot85pCSGD*10).flatten()
1255 |     cat33ann = np.round(pot33pANN*10).flatten()
1256 |     cat67ann = np.round(pot67pANN*10).flatten()
1257 |     cat85ann = np.round(pot85pANN*10).flatten()
1258 |     #cat33 = np.round(fct*np.sign(pot33pANN-0.67)*abs(pot33pANN-0.67)**p).flatten()
1259 |     #cat67 = np.round(fct*np.sign(pot67pANN-0.33)*abs(pot67pANN-0.33)**p).flatten()
1260 |     #cat85 = np.round(fct*np.sign(pot85pANN-0.15)*abs(pot85pANN-0.15)**p).flatten()
1261 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-cnn_week"+str(ilead+2)+".npz")
1262 |     pot33pCNN = f2['pot33pCNN']
1263 |     pot67pCNN = f2['pot67pCNN']
1264 |     pot85pCNN = f2['pot85pCNN']
1265 |     f2.close()
1266 |     cat33cnn = np.round(pot33pCNN*10).flatten()
1267 |     cat67cnn = np.round(pot67pCNN*10).flatten()
1268 |     cat85cnn = np.round(pot85pCNN*10).flatten()
1269 |     for i in range(len(cat33u)):
1270 |         freq33[0,ilead,i] = np.sum(cat33csgd==cat33u[i])
1271 |         if freq33[0,ilead,i]>nmin:
1272 |             x33[0,ilead,i] = np.mean(pot33pCSGD.flatten()[cat33csgd==cat33u[i]])
1273 |             y33[0,ilead,i] = np.mean(exc33p.flatten()[cat33csgd==cat33u[i]])
1274 |         freq33[1,ilead,i] = np.sum(cat33ann==cat33u[i])
1275 |         if freq33[1,ilead,i]>nmin:
1276 |             x33[1,ilead,i] = np.mean(pot33pANN.flatten()[cat33ann==cat33u[i]])
1277 |             y33[1,ilead,i] = np.mean(exc33p.flatten()[cat33ann==cat33u[i]])
1278 |         freq33[2,ilead,i] = np.sum(cat33cnn==cat33u[i])
1279 |         if freq33[2,ilead,i]>nmin:
1280 |             x33[2,ilead,i] = np.mean(pot33pCNN.flatten()[cat33cnn==cat33u[i]])
1281 |             y33[2,ilead,i] = np.mean(exc33p.flatten()[cat33cnn==cat33u[i]])
1282 |         freq67[0,ilead,i] = np.sum(cat67csgd==cat67u[i])
1283 |         if freq67[0,ilead,i]>nmin:
1284 |             x67[0,ilead,i] = np.mean(pot67pCSGD.flatten()[cat67csgd==cat67u[i]])
1285 |             y67[0,ilead,i] = np.mean(exc67p.flatten()[cat67csgd==cat67u[i]])
1286 |         freq67[1,ilead,i] = np.sum(cat67ann==cat67u[i])
1287 |         if freq67[1,ilead,i]>nmin:
1288 |             x67[1,ilead,i] = np.mean(pot67pANN.flatten()[cat67ann==cat67u[i]])
1289 |             y67[1,ilead,i] = np.mean(exc67p.flatten()[cat67ann==cat67u[i]])
1290 |         freq67[2,ilead,i] = np.sum(cat67cnn==cat67u[i])
1291 |         if freq67[2,ilead,i]>nmin:
1292 |             x67[2,ilead,i] = np.mean(pot67pCNN.flatten()[cat67cnn==cat67u[i]])
1293 |             y67[2,ilead,i] = np.mean(exc67p.flatten()[cat67cnn==cat67u[i]])
1294 |         freq85[0,ilead,i] = np.sum(cat85cnn==cat85u[i])
1295 |         if freq85[0,ilead,i]>nmin:
1296 |             x85[0,ilead,i] = np.mean(pot85pCSGD.flatten()[cat85csgd==cat85u[i]])
1297 |             y85[0,ilead,i] = np.mean(exc85p.flatten()[cat85csgd==cat85u[i]])
1298 |         freq85[1,ilead,i] = np.sum(cat85ann==cat85u[i])
1299 |         if freq85[1,ilead,i]>nmin:
1300 |             x85[1,ilead,i] = np.mean(pot85pANN.flatten()[cat85ann==cat85u[i]])
1301 |             y85[1,ilead,i] = np.mean(exc85p.flatten()[cat85ann==cat85u[i]])
1302 |         freq85[2,ilead,i] = np.sum(cat85cnn==cat85u[i])
1303 |         if freq85[2,ilead,i]>nmin:
1304 |             x85[2,ilead,i] = np.mean(pot85pCNN.flatten()[cat85cnn==cat85u[i]])
1305 |             y85[2,ilead,i] = np.mean(exc85p.flatten()[cat85cnn==cat85u[i]])
1306 | 
1307 | 
1308 | fig = plt.figure(figsize=(14,9))
1309 | 
1310 | for ilt in range(3):
1311 |     ax1 = fig.add_subplot(2,3,1+ilt)
1312 |     relCSGD = plt.plot(x33[0,ilt,:],y33[0,ilt,:],'-o',c='blueviolet')
1313 |     relANN = plt.plot(x33[1,ilt,:],y33[1,ilt,:],'-o',c='royalblue')
1314 |     relCNN = plt.plot(x33[2,ilt,:],y33[2,ilt,:],'-o',c='indigo')
1315 |     plt.plot([0,1],[0,1],c='k')
1316 |     plt.axvline(0.67,c='k',ls=':',lw=1,ymin=0.05,ymax=0.95)
1317 |     plt.title("Reliability for P(> 33th pctl), week-"+str(ilt+2)+"\n",fontsize=14)
1318 |     plt.legend((relCSGD[0],relANN[0],relCNN[0]),('CSGD','ANN','CNN'),loc=4,fontsize=12)
1319 |     ins1 = ax1.inset_axes([0.03,0.68,0.4,0.3])
1320 |     ins1.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False,right=False,left=False,labelleft=False)
1321 |     ins1.set_xlabel('Frequency of usage',fontsize=11)
1322 |     ins1.bar(cat33u-0.25,freq33[0,ilt,:],0.23,color='blueviolet')
1323 |     ins1.bar(cat33u-0.0,freq33[1,ilt,:],0.23,color='royalblue')
1324 |     ins1.bar(cat33u+0.25,freq33[2,ilt,:],0.23,color='indigo')
1325 |     #ins1.axvline(0.0,c='k',ls=':',lw=1)
1326 |     ins1.axvline(6.67,c='k',ls=':',lw=1)
1327 |     ax2 = fig.add_subplot(2,3,4+ilt)
1328 |     relCSGD = plt.plot(x67[0,ilt,:],y67[0,ilt,:],'-o',c='blueviolet')
1329 |     relANN = plt.plot(x67[1,ilt,:],y67[1,ilt,:],'-o',c='royalblue')
1330 |     relCNN = plt.plot(x67[2,ilt,:],y67[2,ilt,:],'-o',c='indigo')
1331 |     plt.plot([0,1],[0,1],c='k')
1332 |     plt.axvline(0.33,c='k',ls=':',lw=1,ymin=0.05,ymax=0.5)
1333 |     plt.title("Reliability for P(> 67th pctl), week-"+str(ilt+2)+"\n",fontsize=14)
1334 |     plt.legend((relCSGD[0],relANN[0],relCNN[0]),('CSGD','ANN','CNN'),loc=4,fontsize=12)
1335 |     ins2 = ax2.inset_axes([0.03,0.68,0.4,0.3])
1336 |     ins2.tick_params(axis='both',which='both',bottom=False,top=False,labelbottom=False,right=False,left=False,labelleft=False)
1337 |     ins2.set_xlabel('Frequency of usage',fontsize=11)
1338 |     ins2.bar(cat67u-0.25,freq67[0,ilt,:],0.23,color='blueviolet')
1339 |     ins2.bar(cat67u-0.0,freq67[1,ilt,:],0.23,color='royalblue')
1340 |     ins2.bar(cat67u+0.25,freq67[2,ilt,:],0.23,color='indigo')
1341 |     #ins2.axvline(0.0,c='k',ls=':',lw=1)
1342 |     ins2.axvline(3.33,c='k',ls=':',lw=1)
1343 | 
1344 | plt.tight_layout()
1345 | 
1346 | 
1347 | 
1348 | 
1349 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NeuralNetworkS2S
 2 | This repository contains files with Python code for the algorithms discussed in the paper 'Using Artificial Neural Networks for Generating Probabilistic Subseasonal Precipitation Forecasts over California', submitted to Monthly Weather Review. The following gives a brief description of the individual files:
 3 | 
 4 | - ANN-CalculateEnsembleStatistics.py: Reads the ensemble forecasts and calculates, separately for each member, the probability integral transform relative to the model climatology
 5 | 
 6 | - ANN-CalculateObsCategories.py: Reads the analysis data, calculates the climatology-dependent category boundaries, and uses them to categorize the analyzed precipitation amounts
 7 | 
 8 | - ANN-CalculateVerificationMetrics.py: Calculates various verification metrics for the ANN, CSGD, raw ensemble, and climatological probabilistic forecasts
 9 | 
10 | - ANN-FindTuningParameters.py: Calculates cross-validated scores for different ANN architectures and selects the optimal regularization parameters
11 | 
12 | - ANN-GenerateProbabilityForecasts.py: Calculates probability forecasts based on the selected ANN model with optimal regularization parameters
13 | 
14 | - CNN-CalculateLargeScalePredictors.py: Reads ensemble forecast and analyzed Z500 and TCW data, upscales them to 1 degree and 7-day averages, and saves as .npz file
15 | 
16 | - CNN-CalculateVerificationMetrics.py: Calculates various verification metrics for the CNN probabilistic forecasts, based on either analyzed or forecast Z500/TCW fields
17 | 
18 | - CNN-FindTuningParameters.py: Calculates cross-validated scores for different CNN architectures and selects the optimal dropout rate for each of them
19 | 
20 | - CNN-FitConvolutionalNetworkModel.py: Fits a CNN model based on the selected CNN architecture with optimal dropout rate
21 | 
22 | - CNN-GenerateProbabilityForecasts.py: Estimates the adjustment factor and calculates adjusted probability forecasts based on the forecast Z500/TCW fields
23 | 
24 | - CodeForGraphics.py: Python code used to generate the figures in the MWR paper and a few additional figures used for presentations
25 | 
26 | - CSGD-FitClimatologicalDistributions.py: Fits climatological censored, shifted gamma distributions to the analyzed precipitation amounts
27 | 
28 | - CSGD-GenerateForecastDistributions.py: Fits a simplified CSGD model that links forecast and analyzed precipitation data, and generates probabilistic forecasts
29 | 
30 | - S-ANN-GenerateProbabilityForecasts.py: Calculates probability forecasts for the ANN model discussed in 'SupplementB.pdf'
31 | 
32 | - S-CalculateVerificationMetrics.py: Calculates various verification metrics for the additional experiments with ANN, CNN, and CSGD in the supplements
33 | 
34 | - S-CNN-FindTuningParameters.py: Calculates cross-validated scores for different CNN architectures discussed in 'SupplementC.pdf'
35 | 
36 | - S-CNN-FitConvolutionalNetworkModel.py: Fits a CNN model with optimal dropout rate for the additional experiments in 'SupplementC.pdf'
37 | 
38 | - S-CodeForGraphics.py: Python code used to generate the figures in 'SupplementA.pdf', 'SupplementB.pdf', and 'SupplementC.pdf'
39 | 
40 | - S-CSGD-GenerateForecastDistributions.py: Fits other variants of the CSGD model discussed in 'SupplementA.pdf' and generates probabilistic forecasts
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/S-ANN-GenerateProbabilityForecasts.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import keras
 10 | import keras.backend as K
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | 
 15 | from scipy import stats
 16 | 
 17 | from keras import models
 18 | from keras import layers
 19 | from keras import regularizers
 20 | 
 21 | from keras.layers import Input, Dense, Add, Activation, Dropout
 22 | from keras.layers.merge import Concatenate
 23 | from keras.models import Model
 24 | from keras.optimizers import Adam
 25 | 
 26 | #plt.ion()
 27 | 
 28 | 
 29 | def build_cat_model(n_features, hidden_nodes, n_bins, par_reg):
 30 |     inp = Input(shape=(n_features,))
 31 |     x = Dense(hidden_nodes[0], activation='elu', kernel_regularizer=regularizers.l1(par_reg))(inp)
 32 |     if len(hidden_nodes) > 1:
 33 |         for h in hidden_nodes[1:]:
 34 |             x = Dense(h, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
 35 |     x = Dense(n_bins, activation='elu', kernel_regularizer=regularizers.l1(par_reg))(x)
 36 |     out = Activation('softmax')(x)
 37 |     return Model(inputs=inp, outputs=out)
 38 | 
 39 | 
 40 | def modified_categorical_crossentropy(y_mat, prob_fcst):
 41 |     prob_obs_cat = K.sum(y_mat*prob_fcst,axis=1)
 42 |     return -K.mean(K.log(prob_obs_cat))
 43 | 
 44 | 
 45 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 46 | #list(f1)
 47 | lat = f1['obs_lat']
 48 | lon = f1['obs_lon']
 49 | obs_dates_ord = f1['obs_dates_ord']
 50 | pop_doy = f1['pop_doy']
 51 | thr_doy = f1['thr_doy']
 52 | qtev_doy = f1['qtev_doy']
 53 | apcp_obs_cat = f1['apcp_obs_cat']
 54 | f1.close()
 55 | 
 56 | ncat = apcp_obs_cat.shape[2]
 57 | 
 58 | 
 59 | 
 60 | clead = 'week4'
 61 | 
 62 | f3 = np.load("/home/michael/Desktop/CalifAPCP/tuning/efi-20cl-m0-l1_"+clead+".npz")
 63 | opt_reg_param = f3['opt_reg_param']
 64 | f3.close()
 65 | 
 66 | 
 67 | for iyr in range(20):
 68 |     print(iyr)
 69 |     # Load smoothed ensemble forecast PIT values
 70 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz")
 71 |     doy_dts = f2['doy_dts']
 72 |     apcp_obs_ind_train = f2['apcp_obs_ind_train']
 73 |     apcp_obs_ind_verif = f2['apcp_obs_ind_verif']
 74 |     apcp_ens_pit_train = f2['apcp_ens_pit_train']
 75 |     apcp_ens_pit_verif = f2['apcp_ens_pit_verif']
 76 |     f2.close()
 77 |     ndts, nyrs_tr, nxy, nmem = apcp_ens_pit_train.shape
 78 |     # Calculate normalized coordinates and climatological probability of precipitation
 79 |     lon_train = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1))
 80 |     lon_verif = np.repeat(-1.+2.*(lon[np.newaxis,:]-lon[0])/(lon[-1]-lon[0]),ndts,axis=0).reshape((ndts,nxy,1))
 81 |     lat_train = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts*nyrs_tr,axis=0).reshape((ndts,nyrs_tr,nxy,1))
 82 |     lat_verif = np.repeat(-1.+2.*(lat[np.newaxis,:]-lat[-1])/(lat[0]-lat[-1]),ndts,axis=0).reshape((ndts,nxy,1))
 83 |     # Calculate predictors and classification targets
 84 |     apcp_efi_train = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_train),axis=3)[:,:,:,None]
 85 |     apcp_efi_verif = -1.+(2./np.pi)*np.mean(np.arccos(1.-2.*apcp_ens_pit_verif),axis=2)[:,:,None]
 86 |     train_predictors = np.concatenate((lon_train,lat_train,apcp_efi_train),axis=3).reshape((-1,3))
 87 |     train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].reshape((-1,ncat)).astype(float)
 88 |     # Define and fit ANN model
 89 |     keras.backend.clear_session()
 90 |     model = build_cat_model(train_predictors.shape[-1], [10], ncat, opt_reg_param[iyr])
 91 |     model.compile(optimizer=Adam(0.05), loss=modified_categorical_crossentropy)
 92 |     model.fit(train_predictors, train_cat_targets, epochs=100, batch_size=ndts*nyrs_tr*nxy, verbose=0)
 93 |     # Calculate probability forecasts
 94 |     verif_predictors = np.concatenate((lon_verif,lat_verif,apcp_efi_verif),axis=2).reshape((-1,3))
 95 |     prob_fcst_cat = model.predict(verif_predictors).reshape((ndts,nxy,ncat))
 96 |     ### Save out to file
 97 |     outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/ANN-rv/probfcst_10-l1_"+clead+"_yr"+str(iyr)
 98 |     np.savez(outfilename, prob_fcst_cat=prob_fcst_cat)
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/S-CNN-FindTuningParameters.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import keras
 10 | import keras.backend as K
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | from numpy.linalg import solve
 15 | from scipy import stats
 16 | from scipy.interpolate import interp1d
 17 | 
 18 | from keras import models
 19 | from keras import layers
 20 | from keras import regularizers
 21 | 
 22 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout
 23 | from keras.models import Model
 24 | from keras.optimizers import Adam
 25 | 
 26 | #plt.ion()
 27 | 
 28 | 
 29 | ##  Load categorical analysis data
 30 | 
 31 | f1 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 32 | lat = f1['obs_lat']
 33 | lon = f1['obs_lon']
 34 | obs_dates_ord = f1['obs_dates_ord']
 35 | pop_doy = f1['pop_doy']
 36 | thr_doy = f1['thr_doy']
 37 | qtev_doy = f1['qtev_doy']
 38 | apcp_obs_cat = f1['apcp_obs_cat']
 39 | apcp_obs = f1['apcp_obs']
 40 | f1.close()
 41 | 
 42 | ndts, nxy, ncat = apcp_obs_cat.shape
 43 | 
 44 | 
 45 | 
 46 | ##  Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields
 47 | 
 48 | ixl = 10
 49 | ixu = -6
 50 | jyl = 6
 51 | jyu = -6
 52 | 
 53 | f2 = np.load("/Users/mscheuerer/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz")
 54 | era5_dates_ord = f2['dates_ord']
 55 | era5_lon = f2['longitude'][ixl:ixu]
 56 | era5_lat = f2['latitude'][jyl:jyu]
 57 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu]
 58 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu]
 59 | f2.close()
 60 | 
 61 | ndts, nyrs, ny, nx = z500.shape
 62 | 
 63 | 
 64 | 
 65 | ##########################################################################################################################################################################
 66 | #
 67 | #  Upscale to 2 degrees
 68 | #
 69 | z500_1deg = z500
 70 | tcw_1deg = tcw
 71 | z500 = (z500_1deg[:,:,0:ny:2,0:nx:2]+z500_1deg[:,:,1:ny:2,0:nx:2]+z500_1deg[:,:,0:ny:2,1:nx:2]+z500_1deg[:,:,1:ny:2,1:nx:2])/4.
 72 | tcw = (tcw_1deg[:,:,0:ny:2,0:nx:2]+tcw_1deg[:,:,1:ny:2,0:nx:2]+tcw_1deg[:,:,0:ny:2,1:nx:2]+tcw_1deg[:,:,1:ny:2,1:nx:2])/4.
 73 | ny = ny//2
 74 | nx = nx//2
 75 | #
 76 | ##########################################################################################################################################################################
 77 | 
 78 | 
 79 | ## Calculate doy for each analysis date
 80 | 
 81 | doy_dts = np.zeros(ndts,dtype=np.int32)
 82 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32)
 83 | for idt in range(ndts):
 84 |     for iyr in range(nyrs):
 85 |         apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0]
 86 |     date_ord = int(era5_dates_ord[idt,0]-0.5)
 87 |     doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
 88 | 
 89 | 
 90 | 
 91 | ##  Normalize tcw to 10th/90th climatological percentiles at each grid point
 92 | 
 93 | tcw_q10 = np.percentile(tcw,10,axis=1)
 94 | tcw_q90 = np.percentile(tcw,90,axis=1)
 95 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32)
 96 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32)
 97 | 
 98 | X = np.ones((ndts,3), dtype=np.float32)                  # Fit harmonic function to annual cycle of tcw climatology
 99 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25)
100 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25)
101 | 
102 | for ix in range(nx):
103 |     for jy in range(ny):
104 |         coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix]))
105 |         tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10)
106 |         coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix]))
107 |         tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90)
108 | 
109 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:]
110 | 
111 | 
112 | 
113 | ##  Normalize z500 to 1st/99th climatological percentiles across all grid points
114 | 
115 | z500_q01 = np.percentile(z500,1,axis=(1,2,3))
116 | z500_q99 = np.percentile(z500,99,axis=(1,2,3))
117 | 
118 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01))
119 | z500_q01_sm = np.matmul(X,coef_q01)
120 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99))
121 | z500_q99_sm = np.matmul(X,coef_q99)
122 | 
123 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None]
124 | 
125 | 
126 | 
127 | # Define basis functions
128 | 
129 | r_basis = 7.
130 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]]
131 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]]
132 | 
133 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr))
134 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr))
135 | dst = np.sqrt(dst_lon**2+dst_lat**2)
136 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3)
137 | basis = basis/np.sum(basis,axis=1)[:,None]
138 | nbs = basis.shape[1]
139 | 
140 | 
141 | 
142 | ##  Define functions for building a CNN
143 | 
144 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate):
145 |     #inp_imgs = Input(shape=(18,22,2,))
146 |     inp_imgs = Input(shape=(9,11,2,))
147 |     inp_basis = Input(shape=(n_xy,n_basis,))
148 |     inp_cl = Input(shape=(n_xy,n_bins,))
149 |     c = Conv2D(4, (3,3), activation='elu')(inp_imgs)
150 |     #c = MaxPooling2D((2,2))(c)
151 |     c = Conv2D(8, (3,3), activation='elu')(c)
152 |     #c = MaxPooling2D((2,2))(c)
153 |     x = Flatten()(c)
154 |     for h in hidden_nodes:
155 |         x = Dropout(dropout_rate)(x)
156 |         x = Dense(h, activation='elu')(x)
157 |     x = Dense(n_bins*n_basis, activation='elu')(x)
158 |     x = Reshape((n_bins,n_basis))(x)
159 |     z = Dot(axes=2)([inp_basis, x])
160 |     z = Add()([z, inp_cl])
161 |     out = Activation('softmax')(z)
162 |     return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out)
163 | 
164 | 
165 | def modified_categorical_crossentropy(y_mat, prob_fcst):
166 |     prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2)
167 |     return -K.mean(K.log(prob_obs_cat))
168 | 
169 | 
170 | nyrs = 20
171 | 
172 | #reg = 10.**np.arange(-6,-2)
173 | reg = np.arange(0.1,0.6,0.1)
174 | nreg = len(reg)
175 | 
176 | imod = 0
177 | 
178 | mod = [[10],[20],[10,10]]
179 | 
180 | 
181 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical calculation of the CRPS
182 | dx = np.diff(x)
183 | 
184 | opt_reg_param = np.zeros(nyrs, dtype=np.float32)
185 | opt_valid_scores = np.zeros((nyrs,5), dtype=np.float32)
186 | opt_valid_crps = np.zeros((nyrs,5), dtype=np.float32)
187 | 
188 | 
189 | for iyr in range(nyrs):
190 |     print('year: ',iyr)
191 |     # Calculate image predictors and basis functions
192 |     apcp_obs_ind_cv = np.delete(apcp_obs_ind,iyr,axis=1)
193 |     z500_pred_cv = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
194 |     tcw_pred_cv = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
195 |     pred_imgs_cv = np.concatenate((z500_pred_cv,tcw_pred_cv),axis=3)
196 |     basis_cv = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0)
197 |     # Calculate climatological log probabilities for each class
198 |     apcp_pop_cl = np.repeat(pop_doy[doy_dts,np.newaxis,:],nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1))
199 |     logp_cl_cv = np.concatenate((np.log(1.-apcp_pop_cl),np.repeat(np.log(apcp_pop_cl),ncat-1,axis=2)-np.log(ncat-1)),axis=2)
200 |     # perform 5-fold cross validation to find optimal regularization
201 |     date_order = np.arange(ndts*(nyrs-1)).reshape(ndts,nyrs-1).T.flatten()
202 |     cv_ind = date_order[np.arange(ndts*(nyrs-1))%232<231]                        # remove the date between the 5 cross-validated blocks
203 |     valid_score = np.zeros((nreg,5), dtype=np.float32)
204 |     valid_crps = np.zeros((nreg,5), dtype=np.float32)
205 |     for cvi in range(5):
206 |         train_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)!=cvi]
207 |         valid_ind = cv_ind[np.arange(len(cv_ind))//(len(cv_ind)//5)==cvi]
208 |         pred_imgs_train = pred_imgs_cv[train_ind,:,:,:]
209 |         basis_train = basis_cv[train_ind,:,:]
210 |         logp_cl_train = logp_cl_cv[train_ind,:,:]
211 |         cat_targets_train = apcp_obs_cat[apcp_obs_ind_cv.flatten()[train_ind],:,:].astype(float)
212 |         pred_imgs_valid = pred_imgs_cv[valid_ind,:,:]
213 |         basis_valid = basis_cv[valid_ind,:,:]
214 |         logp_cl_valid = logp_cl_cv[valid_ind,:,:]
215 |         cat_targets_valid = apcp_obs_cat[apcp_obs_ind_cv.flatten()[valid_ind],:,:].astype(float)
216 |         doy_valid = np.repeat(doy_dts[:,np.newaxis],nyrs-1,axis=1).flatten()[valid_ind]
217 |         for ireg in range(nreg):
218 |             # Define and fit ANN model (using batch gradient descent)
219 |             keras.backend.clear_session()
220 |             model = model = build_cat_model(nxy, ncat, nbs, mod[imod], reg[ireg])
221 |             model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy)
222 |             model.fit([pred_imgs_train,basis_train,logp_cl_train], cat_targets_train, epochs=150, batch_size=len(train_ind), verbose=0)
223 |             valid_score[ireg,cvi] = model.evaluate([pred_imgs_valid,basis_valid,logp_cl_valid], cat_targets_valid, batch_size=len(valid_ind), verbose=0)
224 |             # Calculate CRPS for each cross-validation fold
225 |             prob_fcst_cat = model.predict([pred_imgs_valid,basis_valid,logp_cl_valid])
226 |             prob_fcst_chf = -np.log(np.maximum(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)],1.e-10))
227 |             crps_fold = np.zeros((len(valid_ind),nxy),dtype=np.float32)
228 |             for ivdt in range(len(valid_ind)):
229 |                 for ixy in range(nxy):
230 |                     itp_fct = interp1d(thr_doy[doy_valid[ivdt],ixy,:], prob_fcst_chf[ivdt,ixy,:], kind='linear',fill_value='extrapolate')
231 |                     bs = (1.-np.exp(-itp_fct(x))-1.*(apcp_obs[apcp_obs_ind_cv.flatten()[valid_ind[ivdt]],ixy]<=x))**2
232 |                     crps_fold[ivdt,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
233 |             valid_crps[ireg,cvi] = np.mean(crps_fold)
234 |     opt_reg_ind = np.argmin(np.mean(valid_score,axis=1))
235 |     opt_reg_param[iyr] = reg[opt_reg_ind]
236 |     opt_valid_scores[iyr,:] = valid_score[opt_reg_ind,:]
237 |     opt_valid_crps[iyr,:] =  valid_crps[opt_reg_ind,:]
238 |     print(np.mean(valid_score,axis=1).round(3))
239 |     print(np.mean(valid_crps,axis=1).round(2))
240 |     print(opt_reg_param[iyr])
241 | 
242 | ### Save out to file
243 | outfilename = "/Users/mscheuerer/Desktop/CalifAPCP/tuning/cnn-2deg-m"+str(imod)+"-drpt-f48"
244 | np.savez(outfilename, opt_reg_param=opt_reg_param, opt_valid_scores=opt_valid_scores, opt_valid_crps=opt_valid_crps)
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 


--------------------------------------------------------------------------------
/S-CNN-FitConvolutionalNetworkModel.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | #import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import keras
 10 | import keras.backend as K
 11 | 
 12 | from netCDF4 import Dataset
 13 | from numpy import ma
 14 | from numpy.linalg import solve
 15 | from scipy import stats
 16 | 
 17 | from keras import models
 18 | from keras import layers
 19 | from keras import regularizers
 20 | 
 21 | from keras.layers import Input, Dense, Dot, Add, Activation, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout
 22 | from keras.models import Model
 23 | from keras.optimizers import Adam
 24 | 
 25 | #plt.ion()
 26 | 
 27 | 
 28 | ##  Load categorical analysis data
 29 | 
 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 31 | #list(f1)
 32 | lat = f1['obs_lat']
 33 | lon = f1['obs_lon']
 34 | obs_dates_ord = f1['obs_dates_ord']
 35 | pop_doy = f1['pop_doy']
 36 | apcp_obs_cat = f1['apcp_obs_cat']
 37 | f1.close()
 38 | 
 39 | ndts, nxy, ncat = apcp_obs_cat.shape
 40 | 
 41 | 
 42 | 
 43 | ##  Load ERA5 z500 and tcw fields, subset to 22 x 18 image, same for the ensemble forecast fields
 44 | 
 45 | ixl = 10
 46 | ixu = -6
 47 | jyl = 6
 48 | jyu = -6
 49 | 
 50 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz")
 51 | era5_dates_ord = f2['dates_ord']
 52 | era5_lon = f2['longitude'][ixl:ixu]
 53 | era5_lat = f2['latitude'][jyl:jyu]
 54 | z500 = f2['z500_1wk'][:,:,jyl:jyu,ixl:ixu]
 55 | tcw = f2['tcw_1wk'][:,:,jyl:jyu,ixl:ixu]
 56 | f2.close()
 57 | 
 58 | ndts, nyrs, ny, nx = z500.shape
 59 | 
 60 | 
 61 | z500_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32)
 62 | tcw_fcst = np.zeros((3,ndts,nyrs,11,ny,nx),dtype=np.float32)
 63 | 
 64 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_predictor_cnn.npz")
 65 | mod_dates_ord = f3['mod_dates_ord'][:,:,6:21:7]
 66 | 
 67 | f4 = np.load("/home/michael/Desktop/CalifAPCP/data/tcw_predictor_cnn.npz")
 68 | 
 69 | for ilt in range(3):
 70 |     clead = 'week'+str(ilt+2)
 71 |     z500_fcst[ilt,:,:,:,:,:] = f3['z500_'+clead][:,:,:,jyl:jyu,ixl:ixu]      # subset to 22 x 18 image
 72 |     tcw_fcst[ilt,:,:,:,:,:] = f4['tcw_'+clead][:,:,:,jyl:jyu,ixl:ixu]
 73 | 
 74 | f3.close()
 75 | f4.close()
 76 | 
 77 | 
 78 | ## Upscale to 2 degrees
 79 | 
 80 | z500_1deg = z500
 81 | tcw_1deg = tcw
 82 | z500 = (z500_1deg[:,:,0:ny:2,0:nx:2]+z500_1deg[:,:,1:ny:2,0:nx:2]+z500_1deg[:,:,0:ny:2,1:nx:2]+z500_1deg[:,:,1:ny:2,1:nx:2])/4.
 83 | tcw = (tcw_1deg[:,:,0:ny:2,0:nx:2]+tcw_1deg[:,:,1:ny:2,0:nx:2]+tcw_1deg[:,:,0:ny:2,1:nx:2]+tcw_1deg[:,:,1:ny:2,1:nx:2])/4.
 84 | z500_fcst_1deg = z500_fcst
 85 | tcw_fcst_1deg = tcw_fcst
 86 | z500_fcst = (z500_fcst_1deg[:,:,:,:,0:ny:2,0:nx:2]+z500_fcst_1deg[:,:,:,:,1:ny:2,0:nx:2]+z500_fcst_1deg[:,:,:,:,0:ny:2,1:nx:2]+z500_fcst_1deg[:,:,:,:,1:ny:2,1:nx:2])/4.
 87 | tcw_fcst = (tcw_fcst_1deg[:,:,:,:,0:ny:2,0:nx:2]+tcw_fcst_1deg[:,:,:,:,1:ny:2,0:nx:2]+tcw_fcst_1deg[:,:,:,:,0:ny:2,1:nx:2]+tcw_fcst_1deg[:,:,:,:,1:ny:2,1:nx:2])/4.
 88 | ny = ny//2
 89 | nx = nx//2
 90 | 
 91 | 
 92 | 
 93 | ## Calculate doy for each analysis date and for each forecast valid date
 94 | 
 95 | doy_dts = np.zeros(ndts,dtype=np.int32)
 96 | apcp_obs_ind = np.zeros((ndts,nyrs),dtype=np.int32)
 97 | for idt in range(ndts):
 98 |     for iyr in range(nyrs):
 99 |         apcp_obs_ind[idt,iyr] = np.where(obs_dates_ord==era5_dates_ord[idt,iyr])[0][0]
100 |     date_ord = int(era5_dates_ord[idt,0]-0.5)
101 |     doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
102 | 
103 | doy_fcst = np.zeros((3,ndts),dtype=np.int32)
104 | for idt in range(ndts):
105 |     for ilt in range(3):
106 |         date_ord = int(int(mod_dates_ord[idt,0,ilt])-0.5)
107 |         doy_fcst[ilt,idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
108 | 
109 | 
110 | 
111 | ##  Normalize tcw to 10th/90th climatological percentiles at each grid point
112 | 
113 | tcw_q10 = np.percentile(tcw,10,axis=1)
114 | tcw_q90 = np.percentile(tcw,90,axis=1)
115 | tcw_q10_sm = np.zeros(tcw_q10.shape, dtype=np.float32)
116 | tcw_q90_sm = np.zeros(tcw_q90.shape, dtype=np.float32)
117 | 
118 | tcw_fcst_q10 = np.percentile(tcw_fcst,10,axis=(2,3))
119 | tcw_fcst_q90 = np.percentile(tcw_fcst,90,axis=(2,3))
120 | tcw_fcst_q10_sm = np.zeros(tcw_fcst_q10.shape, dtype=np.float32)
121 | tcw_fcst_q90_sm = np.zeros(tcw_fcst_q90.shape, dtype=np.float32)
122 | 
123 | X = np.ones((ndts,3), dtype=np.float32)                  # Fit harmonic function to annual cycle of tcw climatology
124 | X[:,1] = np.sin(2.*np.pi*era5_dates_ord[:,0]/365.25)
125 | X[:,2] = np.cos(2.*np.pi*era5_dates_ord[:,0]/365.25)
126 | 
127 | for ix in range(nx):
128 |     for jy in range(ny):
129 |         coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q10[:,jy,ix]))
130 |         tcw_q10_sm[:,jy,ix] = np.matmul(X,coef_q10)
131 |         coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_q90[:,jy,ix]))
132 |         tcw_q90_sm[:,jy,ix] = np.matmul(X,coef_q90)
133 |         for ilt in range(3):
134 |             coef_q10 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q10[ilt,:,jy,ix]))
135 |             tcw_fcst_q10_sm[ilt,:,jy,ix] = np.matmul(X,coef_q10)
136 |             coef_q90 = solve(np.matmul(X.T,X),np.matmul(X.T,tcw_fcst_q90[ilt,:,jy,ix]))
137 |             tcw_fcst_q90_sm[ilt,:,jy,ix] = np.matmul(X,coef_q90)
138 | 
139 | tcw_ano = -1.+2.*(tcw-tcw_q10_sm[:,None,:,:])/(tcw_q90_sm-tcw_q10_sm)[:,None,:,:]
140 | tcw_fcst_ano = -1.+2.*(tcw_fcst-tcw_fcst_q10_sm[:,:,None,None,:,:])/(tcw_fcst_q90_sm-tcw_fcst_q10_sm)[:,:,None,None,:,:]
141 | 
142 | 
143 | 
144 | ##  Normalize z500 to 1st/99th climatological percentiles across all grid points
145 | 
146 | z500_q01 = np.percentile(z500,1,axis=(1,2,3))
147 | z500_q99 = np.percentile(z500,99,axis=(1,2,3))
148 | z500_fcst_q01 = np.percentile(z500_fcst,1,axis=(2,3,4,5))
149 | z500_fcst_q99 = np.percentile(z500_fcst,99,axis=(2,3,4,5))
150 | 
151 | coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q01))
152 | z500_q01_sm = np.matmul(X,coef_q01)
153 | coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_q99))
154 | z500_q99_sm = np.matmul(X,coef_q99)
155 | 
156 | z500_fcst_q01_sm = np.zeros(z500_fcst_q01.shape, dtype=np.float32)
157 | z500_fcst_q99_sm = np.zeros(z500_fcst_q99.shape, dtype=np.float32)
158 | 
159 | for ilt in range(3):
160 |     coef_q01 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q01[ilt,:]))
161 |     z500_fcst_q01_sm[ilt,:] = np.matmul(X,coef_q01)
162 |     coef_q99 = solve(np.matmul(X.T,X),np.matmul(X.T,z500_fcst_q99[ilt,:]))
163 |     z500_fcst_q99_sm[ilt,:] = np.matmul(X,coef_q99)
164 | 
165 | z500_ano = -1.+2.*(z500-z500_q01_sm[:,None,None,None])/(z500_q99_sm-z500_q01_sm)[:,None,None,None]
166 | z500_fcst_ano = -1.+2.*(z500_fcst-z500_fcst_q01_sm[:,:,None,None,None,None])/(z500_fcst_q99_sm-z500_fcst_q01_sm)[:,:,None,None,None,None]
167 | 
168 | 
169 | # Define basis functions
170 | 
171 | r_basis = 7.
172 | lon_ctr = np.outer(np.arange(-124,-115,3.5),np.ones(3)).reshape(9)[[2,4,5,6,7]]
173 | lat_ctr = np.outer(np.ones(3),np.arange(33,42,3.5)).reshape(9)[[2,4,5,6,7]]
174 | 
175 | dst_lon = np.abs(np.subtract.outer(lon,lon_ctr))
176 | dst_lat = np.abs(np.subtract.outer(lat,lat_ctr))
177 | dst = np.sqrt(dst_lon**2+dst_lat**2)
178 | basis = np.where(dst>r_basis,0.,(1.-(dst/r_basis)**3)**3)
179 | basis = basis/np.sum(basis,axis=1)[:,None]
180 | nbs = basis.shape[1]
181 | 
182 | 
183 | ##  Define functions for building a CNN
184 | 
185 | def build_cat_model(n_xy, n_bins, n_basis, hidden_nodes, dropout_rate):
186 |     inp_imgs = Input(shape=(9,11,2,))
187 |     inp_basis = Input(shape=(n_xy,n_basis,))
188 |     inp_cl = Input(shape=(n_xy,n_bins,))
189 |     c = Conv2D(4, (3,3), activation='elu')(inp_imgs)
190 |     c = Conv2D(8, (3,3), activation='elu')(c)
191 |     x = Flatten()(c)
192 |     for h in hidden_nodes:
193 |         x = Dropout(dropout_rate)(x)
194 |         x = Dense(h, activation='elu')(x)
195 |     x = Dense(n_bins*n_basis, activation='elu')(x)
196 |     x = Reshape((n_bins,n_basis))(x)
197 |     z = Dot(axes=2)([inp_basis, x])     # Tensor product with basis functions
198 |     z = Add()([z, inp_cl])              # Add (log) probability anomalies to log climatological probabilities 
199 |     out = Activation('softmax')(z)
200 |     return Model(inputs=[inp_imgs, inp_basis, inp_cl], outputs=out)
201 | 
202 | 
203 | def modified_categorical_crossentropy(y_mat, prob_fcst):
204 |     prob_obs_cat = K.sum(y_mat*prob_fcst,axis=2)
205 |     return -K.mean(K.log(prob_obs_cat))
206 | 
207 | 
208 | 
209 | imod = 0
210 | 
211 | mod = [[10],[20],[10,10]]
212 | 
213 | f5 = np.load("/home/michael/Desktop/CalifAPCP/tuning/cnn-2deg-m"+str(imod)+"-drpt-f48.npz")
214 | opt_reg_param = f5['opt_reg_param']
215 | f5.close()
216 | 
217 | 
218 | for iyr in range(0,20):
219 |     print(iyr)
220 |     # Split data into training and verification data set
221 |     apcp_obs_ind_train = np.delete(apcp_obs_ind,iyr,axis=1)
222 |     apcp_obs_ind_verif = apcp_obs_ind[:,iyr]
223 |     z500_pred_train = np.delete(z500_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
224 |     z500_pred_verif = z500_ano[:,iyr,:,:,None]
225 |     z500_pred_fcst_train = np.delete(z500_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1))
226 |     z500_pred_fcst_verif = z500_fcst_ano[:,:,iyr,:,:,:,None]
227 |     tcw_pred_train = np.delete(tcw_ano,iyr,axis=1).reshape((ndts*(nyrs-1),ny,nx,1))
228 |     tcw_pred_verif = tcw_ano[:,iyr,:,:,None]
229 |     tcw_pred_fcst_train = np.delete(tcw_fcst_ano,iyr,axis=2).reshape((3,ndts*(nyrs-1),11,ny,nx,1))
230 |     tcw_pred_fcst_verif = tcw_fcst_ano[:,:,iyr,:,:,:,None]
231 |     # Calculate climatological log probabilities for each class
232 |     apcp_lgp0_cl_train = np.repeat(np.log(1.-pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1))
233 |     apcp_lgp0_cl_verif = np.log(1.-pop_doy[doy_dts,:])[:,:,None]
234 |     apcp_lgpop_cl_train = np.repeat(np.log(pop_doy[doy_dts,np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy,1))
235 |     apcp_lgpop_cl_verif = np.log(pop_doy[doy_dts,:])[:,:,None]
236 |     apcp_lgp0_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32)
237 |     apcp_lgp0_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32)
238 |     apcp_lgpop_cl_fcst_train = np.zeros((3,ndts*(nyrs-1),nxy,1), dtype=np.float32)
239 |     apcp_lgpop_cl_fcst_verif = np.zeros((3,ndts,nxy,1), dtype=np.float32)
240 |     for ilt in range(3):
241 |         apcp_lgp0_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(1.-pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy))
242 |         apcp_lgp0_cl_fcst_verif[ilt,:,:,0] = np.log(1.-pop_doy[doy_fcst[ilt,:],:])
243 |         apcp_lgpop_cl_fcst_train[ilt,:,:,0] = np.repeat(np.log(pop_doy[doy_fcst[ilt,:],np.newaxis,:]),nyrs-1,axis=1).reshape((ndts*(nyrs-1),nxy))
244 |         apcp_lgpop_cl_fcst_verif[ilt,:,:,0] = np.log(pop_doy[doy_fcst[ilt,:],:])
245 |     # Compose training data (large-scale predictors, auxiliary predictors, climatological probabilities, observed categories)
246 |     train_pred_imgs = np.concatenate((z500_pred_train,tcw_pred_train),axis=3)
247 |     train_basis = np.repeat(basis[np.newaxis,:,:],ndts*(nyrs-1),axis=0)
248 |     train_logp_cl = np.concatenate((apcp_lgp0_cl_train,np.repeat(apcp_lgpop_cl_train,ncat-1,axis=2)-np.log(ncat-1)),axis=2)
249 |     train_cat_targets = apcp_obs_cat[apcp_obs_ind_train.flatten(),:,:].astype(float)
250 |     # Define and fit CNN model
251 |     keras.backend.clear_session()
252 |     model = build_cat_model(nxy, ncat, nbs, mod[imod], opt_reg_param[iyr])
253 |     model.compile(optimizer=Adam(0.01), loss=modified_categorical_crossentropy)
254 |     model.fit([train_pred_imgs,train_basis,train_logp_cl], train_cat_targets, epochs=150, batch_size=ndts*(nyrs-1), verbose=1)
255 |     # Calculate ERA-5 probability forecasts
256 |     verif_pred_imgs = np.concatenate((z500_pred_verif,tcw_pred_verif),axis=3)
257 |     verif_basis = np.repeat(basis[np.newaxis,:,:],ndts,axis=0)
258 |     verif_logp_cl = np.concatenate((apcp_lgp0_cl_verif,np.repeat(apcp_lgpop_cl_verif,ncat-1,axis=2)-np.log(ncat-1)),axis=2)
259 |     prob_fcst_cat_era5 = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl])
260 |     # Calculate ensemble-based, mean probability forecasts
261 |     logp_ano_ensmean_train = np.zeros((3,ndts*(nyrs-1),nxy,ncat), dtype=np.float32)
262 |     logp_ano_ensmean_verif = np.zeros((3,ndts,nxy,ncat), dtype=np.float32)
263 |     for ilt in range(3):
264 |         train_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_train[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_train[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2)
265 |         verif_logp_cl = np.concatenate((apcp_lgp0_cl_fcst_verif[ilt,:,:,:],np.repeat(apcp_lgpop_cl_fcst_verif[ilt,:,:,:],ncat-1,axis=2)-np.log(ncat-1)),axis=2)
266 |         prob_fcst_cat_ens_train = np.zeros((11,ndts*(nyrs-1),nxy,ncat), dtype=np.float32)
267 |         prob_fcst_cat_ens_verif = np.zeros((11,ndts,nxy,ncat), dtype=np.float32)
268 |         for imem in range(11):
269 |             train_pred_imgs = np.concatenate((z500_pred_fcst_train[ilt,:,imem,:,:,:],tcw_pred_fcst_train[ilt,:,imem,:,:,:]),axis=3)
270 |             prob_fcst_cat_ens_train[imem,:,:,:] = model.predict([train_pred_imgs,train_basis,train_logp_cl])
271 |             verif_pred_imgs = np.concatenate((z500_pred_fcst_verif[ilt,:,imem,:,:,:],tcw_pred_fcst_verif[ilt,:,imem,:,:,:]),axis=3)
272 |             prob_fcst_cat_ens_verif[imem,:,:,:] = model.predict([verif_pred_imgs,verif_basis,verif_logp_cl])
273 |         logp_ano_ensmean_train[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_train),axis=0) - train_logp_cl     # Reconstruct the log probability anomalies
274 |         logp_ano_ensmean_verif[ilt,:,:,:] = np.mean(np.log(prob_fcst_cat_ens_verif),axis=0) - verif_logp_cl     #  for each ensemble member and calculate mean
275 |     ### Save out to file
276 |     outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/CNN-rv/probfcst_cnn-m"+str(imod)+"-drpt-2deg_yr"+str(iyr)
277 |     np.savez(outfilename, prob_fcst_cat_era5=prob_fcst_cat_era5, \
278 |                  logp_ano_ensmean_train=logp_ano_ensmean_train, \
279 |                  logp_ano_ensmean_verif=logp_ano_ensmean_verif, \
280 |                  apcp_lgp0_cl_fcst_train=apcp_lgp0_cl_fcst_train, \
281 |                  apcp_lgp0_cl_fcst_verif=apcp_lgp0_cl_fcst_verif, \
282 |                  apcp_lgpop_cl_fcst_train=apcp_lgpop_cl_fcst_train, \
283 |                  apcp_lgpop_cl_fcst_verif=apcp_lgpop_cl_fcst_verif)
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 


--------------------------------------------------------------------------------
/S-CSGD-GenerateForecastDistributions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy as sp
  4 | import math
  5 | import os, sys
  6 | import datetime
  7 | import time
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from netCDF4 import Dataset
 11 | from numpy import ma
 12 | from numpy.random import random_sample
 13 | from numpy.linalg import solve
 14 | from scipy import stats
 15 | from scipy.stats import kendalltau
 16 | from scipy.stats import gamma
 17 | from scipy.special import beta
 18 | from scipy.optimize import minimize
 19 | from scipy.interpolate import *
 20 | 
 21 | 
 22 | 
 23 | #plt.ion()
 24 | 
 25 | rho = 3        # neighborhood radius (degrees)
 26 | rho2 = rho**2
 27 | 
 28 | #r = 300.       # neighborhood radius (kilometers)
 29 | #R = 6373.      # earth radius (kilometers)
 30 | 
 31 | 
 32 | leadDay = 20       # Start of the forecast period
 33 | accumulation = 7  # Precipitation accumulation period
 34 | 
 35 | 
 36 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
 37 | #list(f1)
 38 | obs_precip = f1['precip']
 39 | obs_lat = f1['lat']
 40 | obs_lon = f1['lon']
 41 | obs_dates_ord = f1['dates_ord']
 42 | obs_dates = f1['dates']
 43 | f1.close()
 44 | 
 45 | ndays, nxy = obs_precip.shape
 46 | 
 47 | obs_precip_week = np.zeros((ndays-6,nxy), dtype=np.float32)
 48 | for iday in range(7):
 49 |     obs_precip_week += obs_precip[iday:(ndays-6+iday),:]
 50 | 
 51 | nwks, nxy = obs_precip_week.shape
 52 | 
 53 | obs_precip_week[obs_precip_week<0.254] = 0.
 54 | obs_dates_ord = obs_dates_ord[:nwks]
 55 | obs_dates = obs_dates[:nwks]
 56 | 
 57 | 
 58 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_climatological_csgd.npz")
 59 | pop_cl = f2['pop_cl_doy']
 60 | mean_cl = f2['mean_cl_doy']
 61 | shape_cl = f2['shape_cl_doy']
 62 | scale_cl = f2['scale_cl_doy']
 63 | shift_cl = f2['shift_cl_doy']
 64 | f2.close()
 65 | 
 66 | 
 67 | 
 68 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_calplus.npz")
 69 | ### Modeled precip is (reforecast time, member, year, lead time, lat, lon)
 70 | mod_precip = f3['precip']
 71 | #mod_dates_ord = f3['datesOrd']
 72 | mod_lon = f3['lon']
 73 | mod_lat = f3['lat']
 74 | f3.close()
 75 | 
 76 | f3 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 77 | mod_dates_ord = f3['dates_ord']                                                 # Need to load dates from other file since dates in 'mod_precip_calplus.npz' are incorrect
 78 | f3.close()
 79 | 
 80 | ndts, nmem, nyrs, nlts, nlat, nlon = mod_precip.shape
 81 | 
 82 | ### Modeled precip accumulated over forecast period (reforecast time, year, ensembles, space)
 83 | mod_precip_fcstperiod = np.sum(mod_precip[:,:,:,leadDay:leadDay+accumulation,:,:],axis=3).reshape((ndts,nmem,nyrs,nlon*nlat))
 84 | mod_dates_fcstperiod = mod_dates_ord[:,:,leadDay]
 85 | 
 86 | 
 87 | ### Calculate day of the year ('doy') for each reforecast date
 88 | doy = np.zeros(ndts,dtype=np.int32)
 89 | for idt in range(ndts):
 90 |     yyyy = datetime.date.fromordinal(int(mod_dates_fcstperiod[idt,0])).year
 91 |     doy[idt] = (datetime.date.fromordinal(int(mod_dates_fcstperiod[idt,0]))-datetime.date(yyyy,1,1)).days
 92 | 
 93 | 
 94 | ## Define function for calculating weighted mean absolute difference of a sample
 95 | def wgt_meandiff(ensfcst, weights):
 96 |     n, m, k = ensfcst.shape
 97 |     d = m*k
 98 |     res = np.zeros(n, dtype=np.float32)
 99 |     inz = np.where(np.greater(np.sum(ensfcst>0.0,axis=(1,2)),0))[0]
100 |     x = ensfcst[inz,:,:].reshape(len(inz),d)
101 |     w = weights.reshape(d)
102 |     x_ord = np.argsort(x,axis=1)
103 |     for i in range(len(inz)):
104 |         x_sort = x[i,x_ord[i,]]
105 |         W = np.cumsum(w[x_ord[i,]])
106 |         res[inz[i]] = 2*sum(W[0:(d-1)]*(1.0-W[0:(d-1)])*np.diff(x_sort))
107 |     return res
108 | 
109 | 
110 | def crpsCondCSGD(par,obs,ensmeanano,ensmeandiffano,muc,sigmac,shiftc):
111 |     # average CRPS for CSGD conditional on the ensemble statistics
112 |     logarg = par[1] + par[2]*ensmeanano
113 |     mu = muc * np.log1p(np.expm1(par[0])*logarg) / par[0]
114 | #    sigma = sigmac * (par[3]*np.sqrt(mu/muc))
115 |     sigma = sigmac * (par[3]*np.sqrt(mu/muc)+par[4]*ensmeandiffano)
116 |     shape = np.square(mu/sigma)
117 |     scale = np.square(sigma)/mu
118 |     shift = shiftc
119 |     betaf = beta(0.5,shape+0.5)
120 |     cstd = (0.254-shift)/scale
121 |     ystd = np.maximum(obs-shift,0.0)/scale
122 |     Fyk = sp.stats.gamma.cdf(ystd,shape,scale=1)
123 |     Fck = sp.stats.gamma.cdf(cstd,shape,scale=1)
124 |     FykP1 = sp.stats.gamma.cdf(ystd,shape+1,scale=1)
125 |     FckP1 = sp.stats.gamma.cdf(cstd,shape+1,scale=1)
126 |     F2c2k = sp.stats.gamma.cdf(2*cstd,2*shape,scale=1)
127 |     crps = ystd*(2.*Fyk-1.) - cstd*np.square(Fck) + shape*(1.+2.*Fck*FckP1-np.square(Fck)-2*FykP1) - (shape/float(math.pi))*betaf*(1.-F2c2k)
128 |     return ma.mean(scale*crps)
129 | 
130 | 
131 | param_initial = [0.05,0.5,0.5,0.7,0.5]
132 | param_ranges = ((0.001,1.0), (0.01,1.0), (0.0,3.0), (0.1,1.0), (0.0,3.0))
133 | 
134 | par_reg = np.zeros((nyrs,nxy,5), dtype=np.float32)
135 | csgd_pars_fcst = np.zeros((ndts,nyrs,nxy,3), dtype=np.float32)
136 | 
137 | for iyr in range(nyrs):
138 |     print(iyr)
139 |     ### Split data into training and verification data, save day index of observational data
140 |     doy_train = np.outer(doy,np.ones(19,dtype=np.int32)).flatten()
141 |     apcp_obs_ind_train = np.zeros((ndts,nyrs),dtype=np.int32)
142 |     apcp_obs_ind_verif = np.zeros(ndts,dtype=np.int32)
143 |     for idt in range(ndts):
144 |         apcp_obs_ind_verif[idt] = np.nonzero(mod_dates_fcstperiod[idt,iyr]==obs_dates_ord)[0][0]
145 |         for jyr in range(0,nyrs):
146 |             apcp_obs_ind_train[idt,jyr] = np.nonzero(mod_dates_fcstperiod[idt,jyr]==obs_dates_ord)[0][0]
147 |     apcp_obs_ind_train = np.delete(apcp_obs_ind_train,iyr,axis=1)
148 |     ensfcst_train = np.delete(mod_precip_fcstperiod,iyr,axis=2)
149 |     ensfcst_clavg = np.mean(ensfcst_train,axis=(1,2))
150 |     ensfcst_clavg_sm = np.zeros((ndts,nlon*nlat), dtype=np.float32)
151 |     for idt in range(ndts):
152 |         wnd_ind = np.minimum(np.minimum(abs(doy[idt]-doy),abs(doy[idt]-365-doy)),abs(doy[idt]+365-doy))<31
153 |         ensfcst_clavg_sm[idt,:] = np.mean(ensfcst_clavg[wnd_ind,:],axis=0)
154 |     ensfcst_ano_train = ensfcst_train / ensfcst_clavg_sm[:,None,None,:]
155 |     ensfcst_ano_verif = mod_precip_fcstperiod[:,:,iyr,:] / ensfcst_clavg_sm[:,None,:]
156 |     for ixy in range(nxy):
157 |         dx2 = np.square(obs_lon[ixy]-mod_lon)
158 |         dy2 = np.square(obs_lat[ixy]-mod_lat)
159 |         dst2 = np.add.outer(dy2,dx2).reshape(nlon*nlat)
160 |         use = (dst2<rho2)
161 |         wgt = (1-dst2[use]/rho2)/sum(1-dst2[use]/rho2)
162 |         ensmean_ano_train = ma.average(ma.mean(ensfcst_ano_train[:,:,:,use],axis=1),axis=2,weights=wgt)
163 |         ensmean_ano_verif = ma.average(ma.mean(ensfcst_ano_verif[:,:,use],axis=1),axis=1,weights=wgt)
164 |         ensmeandiff_ano_train = wgt_meandiff(np.swapaxes(ensfcst_ano_train[:,:,:,use],1,2).reshape(ndts*(nyrs-1),nmem,-1),weights=np.outer(np.ones(nmem)/nmem,wgt))
165 |         ensmeandiff_ano_verif = wgt_meandiff(ensfcst_ano_verif[:,:,use],weights=np.outer(np.ones(nmem)/nmem,wgt))
166 |         obs = obs_precip_week[apcp_obs_ind_train.flatten(),ixy].astype(np.float64)
167 |         ensmeanano = ensmean_ano_train.flatten().astype(np.float64)
168 |         ensmeandiffano = ensmeandiff_ano_train.astype(np.float64)
169 |         muc = (shape_cl[doy_train,ixy]*scale_cl[doy_train,ixy]).astype(np.float64)
170 |         sigmac = (np.sqrt(shape_cl[doy_train,ixy])*scale_cl[doy_train,ixy]).astype(np.float64)
171 |         shiftc = (shift_cl[doy_train,ixy]).astype(np.float64)
172 |         par_reg[iyr,ixy,:] = minimize(crpsCondCSGD, param_initial, args=(obs,ensmeanano,ensmeandiffano,muc,sigmac,shiftc), \
173 |                                       method='L-BFGS-B', bounds=param_ranges, tol=1e-6).x
174 |         ### Get mu, sigma and shift for each training day
175 |         mu_cl_verif = shape_cl[doy,ixy]*scale_cl[doy,ixy]
176 |         sigma_cl_verif = np.sqrt(shape_cl[doy,ixy])*scale_cl[doy,ixy]
177 |         shift_cl_verif = shift_cl[doy,ixy]
178 |         logarg = par_reg[iyr,ixy,1] + par_reg[iyr,ixy,2]*ensmean_ano_verif
179 |         csgd_pars_fcst[:,iyr,ixy,0] = mu_cl_verif * np.log1p(np.expm1(par_reg[iyr,ixy,0])*logarg) / par_reg[iyr,ixy,0]
180 |         csgd_pars_fcst[:,iyr,ixy,1] = sigma_cl_verif * (par_reg[iyr,ixy,3]*np.sqrt(csgd_pars_fcst[:,iyr,ixy,0]/mu_cl_verif)+par_reg[iyr,ixy,4]*ensmeandiff_ano_verif)
181 |         csgd_pars_fcst[:,iyr,ixy,2] = shift_cl_verif
182 | 
183 | 
184 | ### Save out to file
185 | outfilename = "/home/michael/Desktop/CalifAPCP/forecasts/csgd_fcsts_params_rv2_week4"
186 | np.savez(outfilename, par_reg= par_reg, csgd_pars_fcst=csgd_pars_fcst)
187 | 
188 | 
189 | 
190 | 
191 | 


--------------------------------------------------------------------------------
/S-CalculateVerificationMetrics.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy.stats as stats
  4 | import math
  5 | import os, sys
  6 | import matplotlib.pyplot as plt
  7 | import datetime
  8 | import time
  9 | import pdb
 10 | import pickle
 11 | 
 12 | from scipy.stats import gamma
 13 | from scipy.interpolate import interp1d
 14 | 
 15 | plt.ion()
 16 | 
 17 | 
 18 | 
 19 | leadDay = 20         # d works out to being a d+0.5 day forecast
 20 | accumulation = 7     # Precipitation accumulation period
 21 | 
 22 | clead = 'week'+str((leadDay+8)//7)
 23 | 
 24 | 
 25 | ##  Load PRISM data
 26 | 
 27 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/categorical_precip_obs_20cl.npz")
 28 | #list(f1)
 29 | obs_lat = f1['obs_lat']
 30 | obs_lon = f1['obs_lon']
 31 | obs_dates_ord = f1['obs_dates_ord']
 32 | pop_doy = f1['pop_doy']
 33 | thr_doy = f1['thr_doy']
 34 | qtev_doy = f1['qtev_doy']
 35 | apcp_obs_cat = f1['apcp_obs_cat']
 36 | obs_precip_week = f1['apcp_obs']
 37 | f1.close()
 38 | 
 39 | ndts, nxy, ncat = apcp_obs_cat.shape
 40 | 
 41 | 
 42 | ##  Load IFS ensemble forecasts
 43 | 
 44 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
 45 | mod_dates = f2['dates_ord'][:,:,leadDay]
 46 | mod_lon = f2['lon']
 47 | mod_lat = f2['lat']
 48 | #mod_precip = np.sum(f2['precip'][:,:,leadDay:leadDay+accumulation,:,:],axis=2)
 49 | f2.close()
 50 | 
 51 | ndts, nyrs = mod_dates.shape
 52 | 
 53 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32)
 54 | for idt in range(ndts):
 55 |     for iyr in range(nyrs):
 56 |         fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0]
 57 |         obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:]         # PRISM data on the verification days
 58 | 
 59 | 
 60 | 
 61 | ### Calculate skill scores for CSGD with different spatial smoothing and spread predictor
 62 | 
 63 | exc33p = np.zeros(obs_precip_vdate.shape)
 64 | brier33pClm = np.zeros(obs_precip_vdate.shape)
 65 | pot33pCSGD = np.zeros(obs_precip_vdate.shape)
 66 | brier33pCSGD = np.zeros(obs_precip_vdate.shape)
 67 | 
 68 | exc67p = np.zeros(obs_precip_vdate.shape)
 69 | brier67pClm = np.zeros(obs_precip_vdate.shape)
 70 | pot67pCSGD = np.zeros(obs_precip_vdate.shape)
 71 | brier67pCSGD = np.zeros(obs_precip_vdate.shape)
 72 | 
 73 | exc85p = np.zeros(obs_precip_vdate.shape)
 74 | brier85pClm = np.zeros(obs_precip_vdate.shape)
 75 | pot85pCSGD = np.zeros(obs_precip_vdate.shape)
 76 | brier85pCSGD = np.zeros(obs_precip_vdate.shape)
 77 | 
 78 | #rpsClm = np.zeros(obs_precip_vdate.shape)
 79 | rpsCSGD = np.zeros(obs_precip_vdate.shape)
 80 | 
 81 | #crpsClm = np.zeros(obs_precip_vdate.shape)
 82 | crpsCSGD = np.zeros(obs_precip_vdate.shape)
 83 | 
 84 | 
 85 | f3 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/csgd_fcsts_params_rv2_"+clead+".npz")
 86 | csgd_pars_fcst = f3['csgd_pars_fcst']
 87 | f3.close()
 88 | 
 89 | 
 90 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical approximation of the CRPS
 91 | dx = np.diff(x)
 92 | 
 93 | for iyr in range(nyrs):
 94 |     print(iyr)
 95 |     f4 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz")
 96 |     doy_dts = f4['doy_dts']
 97 |     f4.close()
 98 |     for idt in range(ndts):
 99 |         ### Calculate threshold exceedances for the Brier scores used to approximate the CRPS
100 |         crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x)
101 |         ## Calculate CRPS for CSGD
102 |         shape = np.square(csgd_pars_fcst[idt,iyr,:,0]/csgd_pars_fcst[idt,iyr,:,1])
103 |         scale = np.square(csgd_pars_fcst[idt,iyr,:,1])/csgd_pars_fcst[idt,iyr,:,0]
104 |         shift = csgd_pars_fcst[idt,iyr,:,2]
105 |         csgd_cdf = gamma.cdf((x[None,:]-shift[:,None])/scale[:,None],shape[:,None])
106 |         bs = (csgd_cdf-crps_exc)**2
107 |         crpsCSGD[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1)
108 |         ## Calculate Brier scores for different thresholds
109 |         p33 = qtev_doy[doy_dts[idt],:,0]
110 |         exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33)
111 |         pot33pCSGD[idt,iyr,:] = 1.-gamma.cdf((p33-shift)/scale,shape)
112 |         brier33pCSGD[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCSGD[idt,iyr,:])**2
113 |         p67 = qtev_doy[doy_dts[idt],:,1]
114 |         exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67)
115 |         pot67pCSGD[idt,iyr,:] = 1.-gamma.cdf((p67-shift)/scale,shape)
116 |         brier67pCSGD[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCSGD[idt,iyr,:])**2
117 |         p85 = qtev_doy[doy_dts[idt],:,2]
118 |         exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85)
119 |         pot85pCSGD[idt,iyr,:] = 1.-gamma.cdf((p85-shift)/scale,shape)
120 |         brier85pCSGD[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCSGD[idt,iyr,:])**2
121 | 
122 | 
123 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv2_"+clead
124 | np.savez(outfilename, crpsCSGD=crpsCSGD, \
125 |      exc33p=exc33p, pot33pCSGD=pot33pCSGD, Bs33pCSGD=brier33pCSGD, \
126 |      exc67p=exc67p, pot67pCSGD=pot67pCSGD, Bs67pCSGD=brier67pCSGD, \
127 |      exc85p=exc85p, pot85pCSGD=pot85pCSGD, Bs85pCSGD=brier85pCSGD)
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | ### Calculate skill scores for ANN predictions w/o climatology probabilities
135 | 
136 | exc33p = np.zeros(obs_precip_vdate.shape)
137 | pot33pANN = np.zeros(obs_precip_vdate.shape)
138 | brier33pANN = np.zeros(obs_precip_vdate.shape)
139 | 
140 | exc67p = np.zeros(obs_precip_vdate.shape)
141 | pot67pANN = np.zeros(obs_precip_vdate.shape)
142 | brier67pANN = np.zeros(obs_precip_vdate.shape)
143 | 
144 | exc85p = np.zeros(obs_precip_vdate.shape)
145 | pot85pANN = np.zeros(obs_precip_vdate.shape)
146 | brier85pANN = np.zeros(obs_precip_vdate.shape)
147 | 
148 | rpsANN = np.zeros(obs_precip_vdate.shape)
149 | crpsANN = np.zeros(obs_precip_vdate.shape)
150 | 
151 | 
152 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical approximation of the CRPS
153 | dx = np.diff(x)
154 | 
155 | for iyr in range(nyrs):
156 |     print(iyr)
157 |     f4 = np.load("/home/michael/Desktop/CalifAPCP/stats/ensemble_stats_"+clead+"_ANN_yr"+str(iyr)+".npz")
158 |     doy_dts = f4['doy_dts']
159 |     f4.close()
160 |     f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/ANN-rv/probfcst_10-l1_"+clead+"_yr"+str(iyr)+".npz")
161 |     prob_fcst_cat = f5['prob_fcst_cat']
162 |     f5.close()
163 |     prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)])
164 |     prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32)
165 |     for idt in range(ndts):
166 |         ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function
167 |         for ixy in range(nxy):
168 |             itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate')
169 |             prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:]))
170 |             pot33pANN[idt,iyr,ixy] = prob_over_thr[0]
171 |             pot67pANN[idt,iyr,ixy] = prob_over_thr[1]
172 |             pot85pANN[idt,iyr,ixy] = prob_over_thr[2]
173 |             ## Calculate CRPS for ANN
174 |             bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2
175 |             crpsANN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
176 |         ### Calculate threshold exceedances for the Brier scores used to approximate the CRPS
177 |         crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x)
178 |         ## Calculate Brier scores for different thresholds
179 |         p33 = qtev_doy[doy_dts[idt],:,0]
180 |         exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33)
181 |         brier33pANN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pANN[idt,iyr,:])**2
182 |         p67 = qtev_doy[doy_dts[idt],:,1]
183 |         exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67)
184 |         brier67pANN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pANN[idt,iyr,:])**2
185 |         p85 = qtev_doy[doy_dts[idt],:,2]
186 |         exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85)
187 |         brier85pANN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pANN[idt,iyr,:])**2
188 | 
189 | 
190 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv3_"+clead
191 | np.savez(outfilename, crpsANN=crpsANN, \
192 |      exc33p=exc33p, pot33pANN=pot33pANN, Bs33pANN=brier33pANN, \
193 |      exc67p=exc67p, pot67pANN=pot67pANN, Bs67pANN=brier67pANN, \
194 |      exc85p=exc85p, pot85pANN=pot85pANN, Bs85pANN=brier85pANN)
195 | 
196 | 
197 | 
198 | 
199 | ### Calculate skill scores for CNN predictions with different architectures for the convolutional layers
200 | 
201 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/z500_tcw_predictors_era5.npz")
202 | mod_dates = f2['dates_ord']
203 | f2.close()
204 | 
205 | ndts, nyrs = mod_dates.shape
206 | 
207 | 
208 | doy_dts = np.zeros(ndts,dtype=np.int32)
209 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32)
210 | for idt in range(ndts):
211 |     for iyr in range(nyrs):
212 |         fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0]
213 |         obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:]
214 |     date_ord = int(mod_dates[idt,-1]-0.5)
215 |     doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
216 | 
217 | 
218 | ### Calculate skill scores
219 | 
220 | exc33p = np.zeros(obs_precip_vdate.shape)
221 | brier33pClm = np.zeros(obs_precip_vdate.shape)
222 | pot33pCNN = np.zeros(obs_precip_vdate.shape)
223 | brier33pCNN = np.zeros(obs_precip_vdate.shape)
224 | 
225 | exc67p = np.zeros(obs_precip_vdate.shape)
226 | brier67pClm = np.zeros(obs_precip_vdate.shape)
227 | pot67pCNN = np.zeros(obs_precip_vdate.shape)
228 | brier67pCNN = np.zeros(obs_precip_vdate.shape)
229 | 
230 | exc85p = np.zeros(obs_precip_vdate.shape)
231 | brier85pClm = np.zeros(obs_precip_vdate.shape)
232 | pot85pCNN = np.zeros(obs_precip_vdate.shape)
233 | brier85pCNN = np.zeros(obs_precip_vdate.shape)
234 | 
235 | rpsClm = np.zeros(obs_precip_vdate.shape)
236 | rpsCNN = np.zeros(obs_precip_vdate.shape)
237 | 
238 | crpsClm = np.zeros(obs_precip_vdate.shape)
239 | crpsCNN = np.zeros(obs_precip_vdate.shape)
240 | 
241 | 
242 | wwCl = 15
243 | 
244 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical approximation of the CRPS
245 | dx = np.diff(x)
246 | 
247 | 
248 | imod = 0
249 | 
250 | for iyr in range(nyrs):
251 |     print(iyr)
252 |     f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN-rv/probfcst_cnn-m"+str(imod)+"-drpt-2deg_yr"+str(iyr)+".npz")
253 |     prob_fcst_cat = f5['prob_fcst_cat_era5']
254 |     f5.close()
255 |     #f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN/probfcst_cnn-m"+str(imod)+"-drpt-f48_"+clead+"_yr"+str(iyr)+".npz")
256 |     #prob_fcst_cat = f5['prob_fcst_cat']
257 |     #f5.close()
258 |     prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)])
259 |     prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32)
260 |     for idt in range(ndts):
261 |         windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl]
262 |         ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function
263 |         for ixy in range(nxy):
264 |             itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate')
265 |             prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:]))
266 |             pot33pCNN[idt,iyr,ixy] = prob_over_thr[0]
267 |             pot67pCNN[idt,iyr,ixy] = prob_over_thr[1]
268 |             pot85pCNN[idt,iyr,ixy] = prob_over_thr[2]
269 |             ## Calculate CRPS for CNN
270 |             bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2
271 |             crpsCNN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
272 |         ### Get current year and julian day to use to select climatological percentiles
273 |         currentYear = datetime.date.fromordinal(int(mod_dates[idt,iyr])).year
274 |         currentDay = (datetime.date.fromordinal(int(mod_dates[idt,iyr]))-datetime.date(currentYear,1,1)).days
275 |         obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy))
276 |         crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x)
277 |         ## Calculate CRPS for Clm
278 |         clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0)
279 |         bs = (clm_cdf-crps_exc)**2
280 |         crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1)
281 |         ## Calculate Brier scores for different thresholds
282 |         p33 = qtev_doy[doy_dts[idt],:,0]
283 |         exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33)
284 |         brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2
285 |         brier33pCNN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCNN[idt,iyr,:])**2
286 |         p67 = qtev_doy[doy_dts[idt],:,1]
287 |         exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67)
288 |         brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2
289 |         brier67pCNN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCNN[idt,iyr,:])**2
290 |         p85 = qtev_doy[doy_dts[idt],:,2]
291 |         exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85)
292 |         brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2
293 |         brier85pCNN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCNN[idt,iyr,:])**2
294 | 
295 | 
296 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv5"
297 | np.savez(outfilename, crpsClm=crpsClm, crpsCNN=crpsCNN, \
298 |      exc33p=exc33p, pot33pCNN=pot33pCNN, Bs33pClm=brier33pClm, Bs33pCNN=brier33pCNN, \
299 |      exc67p=exc67p, pot67pCNN=pot67pCNN, Bs67pClm=brier67pClm, Bs67pCNN=brier67pCNN, \
300 |      exc85p=exc85p, pot85pCNN=pot85pCNN, Bs85pClm=brier85pClm, Bs85pCNN=brier85pCNN)
301 | 
302 | 
303 | 
304 | 
305 | # calculate ranked probability score
306 | rpsClm = brier33pClm + brier67pClm + brier85pClm
307 | rpsCNN = brier33pCNN + brier67pCNN + brier85pCNN
308 | 
309 | # rpssAvgCNN
310 | round(1.-np.sum(rpsCNN)/np.sum(rpsClm),4)
311 | 
312 | 
313 | 0.4183    # 1deg, max pooling
314 | 0.4081    # 1deg, no max pooling
315 | 0.4172    # 2deg, no max pooling
316 | 
317 | 
318 | 
319 | 
320 | 
321 | 
322 | ##  Now based on IFS ensemble forecasts
323 | 
324 | leadDay = 20         # d works out to being a d+0.5 day forecast
325 | accumulation = 7     # Precipitation accumulation period
326 | 
327 | clead = 'week'+str((leadDay+8)//7)
328 | 
329 | 
330 | f2 = np.load("/home/michael/Desktop/CalifAPCP/data/mod_precip_cal.npz")
331 | mod_dates = f2['dates_ord'][:,:,leadDay]
332 | f2.close()
333 | 
334 | ndts, nyrs = mod_dates.shape
335 | 
336 | 
337 | doy_dts = np.zeros(ndts,dtype=np.int32)
338 | obs_precip_vdate = np.zeros((ndts,nyrs,nxy),dtype=np.float32)
339 | for idt in range(ndts):
340 |     for iyr in range(nyrs):
341 |         fnd = np.nonzero(obs_dates_ord==mod_dates[idt,iyr])[0][0]
342 |         obs_precip_vdate[idt,iyr,:] = obs_precip_week[fnd,:]
343 |     date_ord = int(mod_dates[idt,-1]-0.5)
344 |     doy_dts[idt] = min(364,(datetime.date.fromordinal(date_ord)-datetime.date(datetime.date.fromordinal(date_ord).year,1,1)).days)
345 | 
346 | 
347 | ### Calculate skill scores
348 | 
349 | exc33p = np.zeros(obs_precip_vdate.shape)
350 | brier33pClm = np.zeros(obs_precip_vdate.shape)
351 | pot33pCNN = np.zeros(obs_precip_vdate.shape)
352 | brier33pCNN = np.zeros(obs_precip_vdate.shape)
353 | 
354 | exc67p = np.zeros(obs_precip_vdate.shape)
355 | brier67pClm = np.zeros(obs_precip_vdate.shape)
356 | pot67pCNN = np.zeros(obs_precip_vdate.shape)
357 | brier67pCNN = np.zeros(obs_precip_vdate.shape)
358 | 
359 | exc85p = np.zeros(obs_precip_vdate.shape)
360 | brier85pClm = np.zeros(obs_precip_vdate.shape)
361 | pot85pCNN = np.zeros(obs_precip_vdate.shape)
362 | brier85pCNN = np.zeros(obs_precip_vdate.shape)
363 | 
364 | rpsClm = np.zeros(obs_precip_vdate.shape)
365 | rpsCNN = np.zeros(obs_precip_vdate.shape)
366 | 
367 | crpsClm = np.zeros(obs_precip_vdate.shape)
368 | crpsCNN = np.zeros(obs_precip_vdate.shape)
369 | 
370 | 
371 | wwCl = 15
372 | 
373 | x = (np.arange(0,101)/5)**2      # evaluation points for numerical approximation of the CRPS
374 | dx = np.diff(x)
375 | 
376 | 
377 | imod = 0
378 | 
379 | for iyr in range(nyrs):
380 |     print(iyr)
381 |     f5 = np.load("/home/michael/Desktop/CalifAPCP/forecasts/CNN-rv/probfcst_cnn-m"+str(imod)+"-drpt-2deg_"+clead+"_yr"+str(iyr)+".npz")
382 |     prob_fcst_cat = f5['prob_fcst_cat']
383 |     f5.close()
384 |     prob_fcst_chf = -np.log(1.-np.cumsum(prob_fcst_cat,axis=2)[:,:,:(ncat-1)])
385 |     prob_over_thr = np.zeros((ndts,nxy,qtev_doy.shape[2]),dtype=np.float32)
386 |     for idt in range(ndts):
387 |         windowClm = np.argsort(np.abs(idt-np.arange(ndts)))[:wwCl]
388 |         ### Calculate exceedance ANN probabilities from interpolated cumulative hazard function
389 |         for ixy in range(nxy):
390 |             itp_fct = interp1d(thr_doy[doy_dts[idt],ixy,:], prob_fcst_chf[idt,ixy,:], kind='linear',fill_value='extrapolate')
391 |             prob_over_thr = np.exp(-itp_fct(qtev_doy[doy_dts[idt],ixy,:]))
392 |             pot33pCNN[idt,iyr,ixy] = prob_over_thr[0]
393 |             pot67pCNN[idt,iyr,ixy] = prob_over_thr[1]
394 |             pot85pCNN[idt,iyr,ixy] = prob_over_thr[2]
395 |             ## Calculate CRPS for CNN
396 |             bs = (1.-np.exp(-itp_fct(x))-1.*(obs_precip_vdate[idt,iyr,ixy]<=x))**2
397 |             crpsCNN[idt,iyr,ixy] = 0.5*np.sum((bs[1:]+bs[:len(dx)])*dx)
398 |         ### Get current year and julian day to use to select climatological percentiles
399 |         currentYear = datetime.date.fromordinal(int(mod_dates[idt,iyr])).year
400 |         currentDay = (datetime.date.fromordinal(int(mod_dates[idt,iyr]))-datetime.date(currentYear,1,1)).days
401 |         obsClm = obs_precip_vdate[windowClm,:,:].reshape((wwCl*nyrs,nxy))
402 |         crps_exc = 1.*np.less_equal.outer(obs_precip_vdate[idt,iyr,:],x)
403 |         ## Calculate CRPS for Clm
404 |         clm_cdf = np.mean(obsClm[:,:,None]<=x[None,None,:],axis=0)
405 |         bs = (clm_cdf-crps_exc)**2
406 |         crpsClm[idt,iyr,:] = 0.5*np.sum((bs[:,1:]+bs[:,:len(dx)])*dx[None,:],axis=1)
407 |         ## Calculate Brier scores for different thresholds
408 |         p33 = qtev_doy[doy_dts[idt],:,0]
409 |         exc33p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p33)
410 |         brier33pClm[idt,iyr,:] = (exc33p[idt,iyr,:]-np.mean(obsClm>p33[None,:],axis=0))**2
411 |         brier33pCNN[idt,iyr,:] = (exc33p[idt,iyr,:]-pot33pCNN[idt,iyr,:])**2
412 |         p67 = qtev_doy[doy_dts[idt],:,1]
413 |         exc67p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p67)
414 |         brier67pClm[idt,iyr,:] = (exc67p[idt,iyr,:]-np.mean(obsClm>p67[None,:],axis=0))**2
415 |         brier67pCNN[idt,iyr,:] = (exc67p[idt,iyr,:]-pot67pCNN[idt,iyr,:])**2
416 |         p85 = qtev_doy[doy_dts[idt],:,2]
417 |         exc85p[idt,iyr,:] = (obs_precip_vdate[idt,iyr,:]>p85)
418 |         brier85pClm[idt,iyr,:] = (exc85p[idt,iyr,:]-np.mean(obsClm>p85[None,:],axis=0))**2
419 |         brier85pCNN[idt,iyr,:] = (exc85p[idt,iyr,:]-pot85pCNN[idt,iyr,:])**2
420 | 
421 | 
422 | outfilename = "/home/michael/Desktop/CalifAPCP/results/scores-rv5_"+clead
423 | np.savez(outfilename, crpsClm=crpsClm, crpsCNN=crpsCNN, \
424 |      exc33p=exc33p, pot33pCNN=pot33pCNN, Bs33pClm=brier33pClm, Bs33pCNN=brier33pCNN, \
425 |      exc67p=exc67p, pot67pCNN=pot67pCNN, Bs67pClm=brier67pClm, Bs67pCNN=brier67pCNN, \
426 |      exc85p=exc85p, pot85pCNN=pot85pCNN, Bs85pClm=brier85pClm, Bs85pCNN=brier85pCNN)
427 | 
428 | 
429 | 
430 | # calculate ranked probability score
431 | rpsClm = brier33pClm + brier67pClm + brier85pClm
432 | rpsCNN = brier33pCNN + brier67pCNN + brier85pCNN
433 | 
434 | 
435 | # rpssAvgCNN
436 | round(1.-np.sum(rpsCNN)/np.sum(rpsClm),4)
437 | 
438 | 
439 | 
440 | 


--------------------------------------------------------------------------------
/S-CodeForGraphics.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import numpy.ma as ma
  4 | import scipy as sp
  5 | import math
  6 | import os, sys
  7 | import matplotlib.pyplot as plt
  8 | import matplotlib.path as path
  9 | import matplotlib.patches as patches
 10 | 
 11 | import datetime
 12 | import time
 13 | import pandas as pd
 14 | import statsmodels.api as sm
 15 | import statsmodels.formula.api as smf
 16 | 
 17 | from netCDF4 import Dataset
 18 | from numpy import ma, loadtxt
 19 | from numpy.linalg import solve
 20 | from scipy import stats
 21 | from scipy.interpolate import interp1d
 22 | from scipy.stats import kendalltau
 23 | from colorspace import diverging_hcl, sequential_hcl
 24 | 
 25 | plt.ion()
 26 | 
 27 | divcmp = diverging_hcl("Tropic",rev=True).cmap(name = "Diverging Color Map")
 28 | 
 29 | 
 30 | f1 = np.load("/home/michael/Desktop/CalifAPCP/data/precip_PRISM_cal_19810101_20171231.npz")
 31 | obs_lat = f1['lat']
 32 | obs_lon = f1['lon']
 33 | f1.close()
 34 | 
 35 | nxy = len(obs_lon)
 36 | 
 37 | ndts = 61
 38 | nyrs = 20
 39 | 
 40 | 
 41 | ###################################################################################################
 42 | #                                                                                                 #
 43 | #  Figure S1:  Maps of RPSS comparing different CSGD implementations                              #
 44 | #                                                                                                 #
 45 | ###################################################################################################
 46 | 
 47 | 
 48 | acfRv1 = np.zeros((3,15),dtype=np.float32)
 49 | acfRv2 = np.zeros((3,15),dtype=np.float32)
 50 | pvalRv1 = np.zeros((3,nxy),dtype=np.float32)
 51 | pvalRv2 = np.zeros((3,nxy),dtype=np.float32)
 52 | alphaFDRrv1 = np.zeros(3,dtype=np.float32)
 53 | alphaFDRrv2 = np.zeros(3,dtype=np.float32)
 54 | 
 55 | rpssMapCSGD = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
 56 | rpssMapCSGDrv1 = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
 57 | rpssMapCSGDrv2 = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
 58 | 
 59 | rpssAvgCSGD = ma.array(np.zeros(3,dtype=np.float32),mask=True)
 60 | rpssAvgCSGDrv1 = ma.array(np.zeros(3,dtype=np.float32),mask=True)
 61 | rpssAvgCSGDrv2 = ma.array(np.zeros(3,dtype=np.float32),mask=True)
 62 | 
 63 | for ilead in range(3):
 64 |     f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz")
 65 |     Bs33Clm = f1['Bs33pClm']
 66 |     Bs33CSGD = f1['Bs33pCSGD']
 67 |     Bs67Clm = f1['Bs67pClm']
 68 |     Bs67CSGD = f1['Bs67pCSGD']
 69 |     Bs85Clm = f1['Bs85pClm']
 70 |     Bs85CSGD = f1['Bs85pCSGD']
 71 |     f1.close()
 72 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-rv1_week"+str(ilead+2)+".npz")
 73 |     Bs33CSGDrv1 = f2['Bs33pCSGD']
 74 |     Bs67CSGDrv1 = f2['Bs67pCSGD']
 75 |     Bs85CSGDrv1 = f2['Bs85pCSGD']
 76 |     f2.close()
 77 |     f3 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-rv2_week"+str(ilead+2)+".npz")
 78 |     Bs33CSGDrv2 = f3['Bs33pCSGD']
 79 |     Bs67CSGDrv2 = f3['Bs67pCSGD']
 80 |     Bs85CSGDrv2 = f3['Bs85pCSGD']
 81 |     f3.close()
 82 |     rpsClm = Bs33Clm + Bs67Clm + Bs85Clm       # calculate ranked probability score
 83 |     rpsCSGD = Bs33CSGD + Bs67CSGD + Bs85CSGD
 84 |     rpsCSGDrv1 = Bs33CSGDrv1 + Bs67CSGDrv1 + Bs85CSGDrv1
 85 |     rpsCSGDrv2 = Bs33CSGDrv2 + Bs67CSGDrv2 + Bs85CSGDrv2
 86 |     rpssMapCSGD[ilead,:] = 1.-np.sum(rpsCSGD,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
 87 |     rpssMapCSGDrv1[ilead,:] = 1.-np.sum(rpsCSGDrv1,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
 88 |     rpssMapCSGDrv2[ilead,:] = 1.-np.sum(rpsCSGDrv2,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
 89 |     rpssAvgCSGD[ilead] = 1.-np.sum(rpsCSGD)/np.sum(rpsClm)
 90 |     rpssAvgCSGDrv1[ilead] = 1.-np.sum(rpsCSGDrv1)/np.sum(rpsClm)
 91 |     rpssAvgCSGDrv2[ilead] = 1.-np.sum(rpsCSGDrv2)/np.sum(rpsClm)
 92 |     rpsDiffCSGDrv1 = rpsCSGD-rpsCSGDrv1
 93 |     rpsDiffCSGDrv2 = rpsCSGD-rpsCSGDrv2
 94 |     rpsDiffStdzCSGDrv1 = (rpsDiffCSGDrv1-np.mean(rpsDiffCSGDrv1,axis=(0,1))[None,None,:])/np.std(rpsDiffCSGDrv1,axis=(0,1))[None,None,:]
 95 |     rpsDiffStdzCSGDrv2 = (rpsDiffCSGDrv2-np.mean(rpsDiffCSGDrv2,axis=(0,1))[None,None,:])/np.std(rpsDiffCSGDrv2,axis=(0,1))[None,None,:]
 96 |     for lg in range(15):
 97 |         acfRv1[ilead,lg] = np.mean(rpsDiffStdzCSGDrv1[lg:,:,:]*rpsDiffStdzCSGDrv1[:(ndts-lg),:,:])         # Estimate temporal autocorrelation
 98 |         acfRv2[ilead,lg] = np.mean(rpsDiffStdzCSGDrv2[lg:,:,:]*rpsDiffStdzCSGDrv2[:(ndts-lg),:,:])
 99 |     rhoCSGDrv1 = acfRv1[ilead,1]/acfRv1[ilead,0]
100 |     rhoCSGDrv2 = acfRv2[ilead,1]/acfRv2[ilead,0]
101 |     print(rhoCSGDrv1,rhoCSGDrv2)
102 |     nCSGDrv1 = round(ndts*nyrs*(1-rhoCSGDrv1)/(1+rhoCSGDrv1))
103 |     nCSGDrv2 = round(ndts*nyrs*(1-rhoCSGDrv2)/(1+rhoCSGDrv2))
104 |     for ixy in range(nxy):
105 |         smplCSGDrv1 = rpsCSGD[:,:,ixy].flatten()-rpsCSGDrv1[:,:,ixy].flatten()
106 |         smplCSGDrv2 = rpsCSGD[:,:,ixy].flatten()-rpsCSGDrv2[:,:,ixy].flatten()
107 |         tstatCSGDrv1 = np.mean(smplCSGDrv1)/np.sqrt(np.var(smplCSGDrv1)/nCSGDrv1)        # test statistic for paired t-test
108 |         tstatCSGDrv2 = np.mean(smplCSGDrv2)/np.sqrt(np.var(smplCSGDrv2)/nCSGDrv2)
109 |         pvalRv1[ilead,ixy] = 1.-sp.stats.t.cdf(tstatCSGDrv1,df=nCSGDrv1-1)       # p-value for one-sided test
110 |         pvalRv2[ilead,ixy] = 1.-sp.stats.t.cdf(tstatCSGDrv2,df=nCSGDrv2-1)
111 |         #pval[ilead,ixy] = 2*min(1.-sp.stats.t.cdf(tstat,df=n-1),sp.stats.t.cdf(tstat,df=n-1))
112 |     pvalRv1_srt = np.sort(pvalRv1[ilead,:])
113 |     iCSGDrv1 = np.where(pvalRv1_srt<=0.1*np.arange(1,nxy+1)/nxy)[0]
114 |     if len(iCSGDrv1)>0:
115 |         alphaFDRrv1[ilead] = pvalRv1_srt[iCSGDrv1[-1]]
116 |     pvalRv2_srt = np.sort(pvalRv2[ilead,:])
117 |     iCSGDrv2 = np.where(pvalRv2_srt<=0.1*np.arange(1,nxy+1)/nxy)[0]
118 |     if len(iCSGDrv2)>0:
119 |         alphaFDRrv2[ilead] = pvalRv2_srt[iCSGDrv2[-1]]
120 |     plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalRv1_srt); plt.scatter(np.arange(663),pvalRv2_srt)
121 | 
122 | 
123 | 
124 | 
125 | fig = plt.figure(figsize=(11.3,9.))
126 | 
127 | for ilead in range(3):
128 |     ylim = np.array([0.26,0.052,0.026])[ilead]
129 |     #ylim = np.amax(abs(rpssMapCSGD[ilead,:]))
130 |     indSgnfRv1 = (pvalRv1[ilead,:]<alphaFDRrv1[ilead])
131 |     indSgnfRv2 = (pvalRv2[ilead,:]<alphaFDRrv2[ilead])
132 |     ax1 = fig.add_subplot(3,3,ilead+1)
133 |     ax1.set_xticks([])
134 |     ax1.set_yticks([])
135 |     plt.scatter(obs_lon,obs_lat,c=rpssMapCSGD[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
136 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
137 |     #plt.text(-117.5,39.6,rpssAvgCSGD[ilead].round(3),fontsize=12)
138 |     plt.title("RPSS - CSGD (week "+str(ilead+2)+")",fontsize=14)
139 |     ax2 = fig.add_subplot(3,3,ilead+4)
140 |     ax2.set_xticks([])
141 |     ax2.set_yticks([])
142 |     plt.scatter(obs_lon,obs_lat,c=rpssMapCSGDrv1[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
143 |     plt.scatter(obs_lon[indSgnfRv1],obs_lat[indSgnfRv1],c=rpssMapCSGDrv1[ilead,indSgnfRv1],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=19.75,lw=0.8,edgecolors=[.2,.2,.2])
144 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
145 |     #plt.text(-117.5,39.6,rpssAvgCSGDrv1[ilead].round(3),fontsize=12)
146 |     plt.title("RPSS - CSGD w stdz.",fontsize=14)
147 |     ax3 = fig.add_subplot(3,3,ilead+7)
148 |     ax3.set_xticks([])
149 |     ax3.set_yticks([])
150 |     plt.scatter(obs_lon,obs_lat,c=rpssMapCSGDrv2[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
151 |     plt.scatter(obs_lon[indSgnfRv2],obs_lat[indSgnfRv2],c=rpssMapCSGDrv2[ilead,indSgnfRv2],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=19.75,lw=0.8,edgecolors=[.2,.2,.2])
152 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
153 |     #plt.text(-117.5,39.6,rpssAvgCSGDrv2[ilead].round(3),fontsize=12)
154 |     plt.title("RPSS - CSGD w MD pred.",fontsize=14)
155 | 
156 | plt.tight_layout()
157 | 
158 | 
159 | 
160 | 
161 | ###################################################################################################
162 | #                                                                                                 #
163 | #  Figure S2:  Maps of RPSS comparing different ANN implementations                               #
164 | #                                                                                                 #
165 | ###################################################################################################
166 | 
167 | 
168 | acfRv = np.zeros((3,15),dtype=np.float32)
169 | pvalRv = np.zeros((3,nxy),dtype=np.float32)
170 | alphaFDRrv = np.zeros(3,dtype=np.float32)
171 | 
172 | rpssMapANN = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
173 | rpssMapANNrv = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
174 | 
175 | rpssAvgANN = ma.array(np.zeros(3,dtype=np.float32),mask=True)
176 | rpssAvgANNrv = ma.array(np.zeros(3,dtype=np.float32),mask=True)
177 | 
178 | for ilead in range(3):
179 |     f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz")
180 |     Bs33Clm = f1['Bs33pClm']
181 |     Bs33ANN = f1['Bs33pANN']
182 |     Bs67Clm = f1['Bs67pClm']
183 |     Bs67ANN = f1['Bs67pANN']
184 |     Bs85Clm = f1['Bs85pClm']
185 |     Bs85ANN = f1['Bs85pANN']
186 |     f1.close()
187 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-rv3_week"+str(ilead+2)+".npz")
188 |     Bs33ANNrv = f2['Bs33pANN']
189 |     Bs67ANNrv = f2['Bs67pANN']
190 |     Bs85ANNrv = f2['Bs85pANN']
191 |     f2.close()
192 |     rpsClm = Bs33Clm + Bs67Clm + Bs85Clm       # calculate ranked probability score
193 |     rpsANN = Bs33ANN + Bs67ANN + Bs85ANN
194 |     rpsANNrv = Bs33ANNrv + Bs67ANNrv + Bs85ANNrv
195 |     rpssMapANN[ilead,:] = 1.-np.sum(rpsANN,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
196 |     rpssMapANNrv[ilead,:] = 1.-np.sum(rpsANNrv,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
197 |     rpssAvgANN[ilead] = 1.-np.sum(rpsANN)/np.sum(rpsClm)
198 |     rpssAvgANNrv[ilead] = 1.-np.sum(rpsANNrv)/np.sum(rpsClm)
199 |     rpsDiffANNrv = rpsANN-rpsANNrv
200 |     rpsDiffStdzANNrv = (rpsDiffANNrv-np.mean(rpsDiffANNrv,axis=(0,1))[None,None,:])/np.std(rpsDiffANNrv,axis=(0,1))[None,None,:]
201 |     for lg in range(15):
202 |         acfRv[ilead,lg] = np.mean(rpsDiffStdzANNrv[lg:,:,:]*rpsDiffStdzANNrv[:(ndts-lg),:,:])         # Estimate temporal autocorrelation
203 |     rhoANNrv = acfRv[ilead,1]/acfRv[ilead,0]
204 |     print(rhoANNrv)
205 |     nANNrv = round(ndts*nyrs*(1-rhoANNrv)/(1+rhoANNrv))
206 |     for ixy in range(nxy):
207 |         smplANNrv = rpsANN[:,:,ixy].flatten()-rpsANNrv[:,:,ixy].flatten()
208 |         tstatANNrv = np.mean(smplANNrv)/np.sqrt(np.var(smplANNrv)/nANNrv)        # test statistic for paired t-test
209 |         pvalRv[ilead,ixy] = sp.stats.t.cdf(tstatANNrv,df=nANNrv-1)            # p-value for one-sided test
210 |         #pval[ilead,ixy] = 2*min(1.-sp.stats.t.cdf(tstat,df=n-1),sp.stats.t.cdf(tstat,df=n-1))
211 |     pvalRv_srt = np.sort(pvalRv[ilead,:])
212 |     iANNrv = np.where(pvalRv_srt<=0.1*np.arange(1,nxy+1)/nxy)[0]
213 |     if len(iANNrv)>0:
214 |         alphaFDRrv[ilead] = pvalRv_srt[iANNrv[-1]]
215 |     plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalRv_srt)
216 | 
217 | 
218 | 
219 | 
220 | fig = plt.figure(figsize=(11.8,6.))
221 | 
222 | for ilead in range(3):
223 |     ylim = np.array([0.26,0.052,0.026])[ilead]
224 |     #ylim = np.amax(abs(rpssMapCSGD[ilead,:]))
225 |     indSgnfRv = (pvalRv[ilead,:]<alphaFDRrv[ilead])
226 |     ax1 = fig.add_subplot(2,3,ilead+1)
227 |     ax1.set_xticks([])
228 |     ax1.set_yticks([])
229 |     plt.scatter(obs_lon,obs_lat,c=rpssMapANN[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
230 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
231 |     #plt.text(-117.5,39.6,rpssAvgANN[ilead].round(3),fontsize=12)
232 |     plt.title("RPSS - ANN (week "+str(ilead+2)+")",fontsize=14)
233 |     ax2 = fig.add_subplot(2,3,ilead+4)
234 |     ax2.set_xticks([])
235 |     ax2.set_yticks([])
236 |     plt.scatter(obs_lon,obs_lat,c=rpssMapANNrv[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
237 |     plt.scatter(obs_lon[indSgnfRv],obs_lat[indSgnfRv],c=rpssMapANNrv[ilead,indSgnfRv],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=19.75,lw=0.8,edgecolors=[.2,.2,.2])
238 |     #plt.text(-118.5,40.4,'Avg. skill:',fontsize=12)
239 |     #plt.text(-117.5,39.6,rpssAvgANNrv[ilead].round(3),fontsize=12)
240 |     plt.title("RPSS - ANN w/o "+r'$p_{cl}$'+" (week "+str(ilead+2)+")",fontsize=14)
241 | 
242 | plt.tight_layout()
243 | 
244 | 
245 | 
246 | 
247 | ###################################################################################################
248 | #                                                                                                 #
249 | #  Figure S3:  Maps of RPSS of the alternative CNN implementation                                 #
250 | #                                                                                                 #
251 | ###################################################################################################
252 | 
253 | 
254 | acfCNN = np.zeros((3,15),dtype=np.float32)
255 | pvalCNN = np.zeros((3,nxy),dtype=np.float32)
256 | alphaFDR_CNN = np.zeros(3,dtype=np.float32)
257 | 
258 | rpssMapCNN = ma.array(np.zeros((3,nxy),dtype=np.float32),mask=True)
259 | rpssAvgCNN = ma.array(np.zeros(3,dtype=np.float32),mask=True)
260 | 
261 | for ilead in range(3):
262 |     f1 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-ann_week"+str(ilead+2)+".npz")
263 |     Bs33Clm = f1['Bs33pClm']
264 |     Bs33CSGD = f1['Bs33pCSGD']
265 |     Bs67Clm = f1['Bs67pClm']
266 |     Bs67CSGD = f1['Bs67pCSGD']
267 |     Bs85Clm = f1['Bs85pClm']
268 |     Bs85CSGD = f1['Bs85pCSGD']
269 |     f1.close()
270 |     f2 = np.load("/home/michael/Desktop/CalifAPCP/results/scores-rv5_week"+str(ilead+2)+".npz")
271 |     Bs33CNN = f2['Bs33pCNN']
272 |     Bs67CNN = f2['Bs67pCNN']
273 |     Bs85CNN = f2['Bs85pCNN']
274 |     f2.close()
275 |     rpsClm = Bs33Clm + Bs67Clm + Bs85Clm       # calculate ranked probability score
276 |     rpsCSGD = Bs33CSGD + Bs67CSGD + Bs85CSGD
277 |     rpsCNN = Bs33CNN + Bs67CNN + Bs85CNN
278 |     rpssMapCNN[ilead,:] = 1.-np.sum(rpsCNN,axis=(0,1))/np.sum(rpsClm,axis=(0,1))
279 |     #rpssAvgCSGD[ilead] = 1.-np.sum(rpsCSGD)/np.sum(rpsClm)
280 |     rpssAvgCNN[ilead] = 1.-np.sum(rpsCNN)/np.sum(rpsClm)
281 |     rpsDiffCNN = rpsCSGD-rpsCNN
282 |     rpsDiffStdzCNN = (rpsDiffCNN-np.mean(rpsDiffCNN,axis=(0,1))[None,None,:])/np.std(rpsDiffCNN,axis=(0,1))[None,None,:]
283 |     for lg in range(15):
284 |         acfCNN[ilead,lg] = np.mean(rpsDiffStdzCNN[lg:,:,:]*rpsDiffStdzCNN[:(ndts-lg),:,:])
285 |     rhoCNN = acfCNN[ilead,1]/acfCNN[ilead,0]
286 |     print(rhoCNN)
287 |     nCNN = round(ndts*nyrs*(1-rhoCNN)/(1+rhoCNN))
288 |     for ixy in range(nxy):
289 |         smplCNN = rpsCSGD[:,:,ixy].flatten()-rpsCNN[:,:,ixy].flatten()
290 |         tstatCNN = np.mean(smplCNN)/np.sqrt(np.var(smplCNN)/nCNN)
291 |         pvalCNN[ilead,ixy] = 1.-sp.stats.t.cdf(tstatCNN,df=nCNN-1)
292 |     pvalCNN_srt = np.sort(pvalCNN[ilead,:])
293 |     iCNN = np.where(pvalCNN_srt<=0.1*np.arange(1,nxy+1)/nxy)[0]
294 |     if len(iCNN)>0:
295 |         alphaFDR_CNN[ilead] = pvalCNN_srt[iCNN[-1]]
296 |     plt.figure(); plt.scatter(np.arange(663),0.1*np.arange(1,664)/663); plt.scatter(np.arange(663),pvalCNN_srt)
297 | 
298 | 
299 | 
300 | fig = plt.figure(figsize=(11.3,3.1))
301 | 
302 | for ilead in range(3):
303 |     ylim = np.array([0.26,0.052,0.026])[ilead]
304 |     indSgnfCNN = (pvalCNN[ilead,:]<alphaFDR_CNN[ilead])
305 |     ax1 = fig.add_subplot(1,3,ilead+1)
306 |     ax1.set_xticks([])
307 |     ax1.set_yticks([])
308 |     plt.scatter(obs_lon,obs_lat,c=rpssMapCNN[ilead,:],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=20,lw=0.3,edgecolors=[.2,.2,.2]); plt.colorbar()
309 |     plt.scatter(obs_lon[indSgnfCNN],obs_lat[indSgnfCNN],c=rpssMapCNN[ilead,indSgnfCNN],marker='s',cmap=divcmp,vmin=-ylim,vmax=ylim,s=19.75,lw=0.8,edgecolors=[.2,.2,.2])
310 |     plt.title("RPSS - CNN (week "+str(ilead+2)+")",fontsize=14)
311 | 
312 | plt.tight_layout()
313 | 
314 | 
315 | 


--------------------------------------------------------------------------------