├── .gitattributes ├── .gitignore ├── Code ├── dataReader.py ├── main.py ├── modelEstimation.py └── scenarioGeneration.py ├── RE-Europe_dataset_package └── Put_data_here.txt ├── Results └── Results_to_be_stored_here.txt └── licence.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | -------------------------------------------------------------------------------- /Code/dataReader.py: -------------------------------------------------------------------------------- 1 | # License: BSD_3_clause 2 | # 3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are 7 | # met: 8 | # 9 | # Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # 12 | # Redistributions in binary form must reproduce the above copyright 13 | # notice, this list of conditions and the following disclaimer in 14 | # the documentation and/or other materials provided with the 15 | # distribution. 16 | # 17 | # Neither the name of the Technical University of Denmark (DTU) 18 | # nor the names of its contributors may be used to endorse or 19 | # promote products derived from this software without specific 20 | # prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | import os 36 | import sys 37 | import pandas as pd 38 | import numpy as np 39 | from datetime import datetime, time 40 | import gc 41 | import math 42 | import random 43 | 44 | class expando: 45 | pass 46 | 47 | 48 | #Function needed to define distances between nodes from longitudes and latitudes 49 | def distance_from_long_lat(lat1, long1, lat2, long2): 50 | # Convert latitude and longitude to spherical coordinates in radians. 51 | degrees_to_radians = math.pi/180.0 52 | # phi = 90 - latitude 53 | phi1 = (90.0 - lat1)*degrees_to_radians 54 | phi2 = (90.0 - lat2)*degrees_to_radians 55 | # theta = longitude 56 | theta1 = long1*degrees_to_radians 57 | theta2 = long2*degrees_to_radians 58 | # Compute spherical distance from spherical coordinates. 59 | # For two locations in spherical coordinates (1, theta, phi) and (1, theta', phi') 60 | # cosine( arc length ) = sin phi sin phi' cos(theta-theta') + cos phi cos phi' 61 | # distance = rho * arc length 62 | cos = (math.sin(phi1)*math.sin(phi2)*math.cos(theta1 - theta2) + 63 | math.cos(phi1)*math.cos(phi2)) 64 | if cos>1:#numerical approximations can bring to a number slightly >1 65 | cos=1 66 | arc = math.acos( cos ) 67 | # Remember to multiply arc by the radius of the earth 68 | # in your favorite set of units to get length. 69 | R_earth = 6371 #km 70 | arc = arc * R_earth 71 | return arc 72 | 73 | 74 | 75 | class dataReader: 76 | 77 | def __init__(self, countries,max_number_loc,renewable_type,data_type,start_time, 78 | end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_location): 79 | self._set_attributes(countries,max_number_loc,renewable_type,data_type,start_time, 80 | end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_location) 81 | self._check_countries() 82 | self._load_observations() 83 | self._tod_observations() 84 | self._load_forecasts() 85 | self._get_distances() 86 | print('Data has been imported!') 87 | 88 | pass 89 | 90 | #Function that stores all inputs as attributes of the output 91 | def _set_attributes(self, countries,max_number_loc,renewable_type,data_type,start_time, 92 | end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_location): 93 | 94 | self.attributes = expando() 95 | self.attributes.renew_type = renewable_type 96 | self.attributes.data_type = data_type 97 | self.attributes.folder_loc = folder_location 98 | self.attributes.start_time = start_time 99 | self.attributes.end_time = end_time 100 | self.attributes.fore_start_time = fore_start_time 101 | self.attributes.fore_end_time = fore_end_time 102 | self.attributes.nbr_leadT = nbr_leadTimes 103 | self.attributes.countries = countries 104 | self.attributes.max_number_loc = max_number_loc 105 | 106 | self.metadata = expando() 107 | 108 | pass 109 | 110 | 111 | #Function that check input countries and display an error message if they 112 | #don't correspond. Returns the available countries and indices from nodes 113 | def _check_countries(self): 114 | 115 | self.metadata.network_nodes = pd.read_csv(self.attributes.folder_loc+'/Metadata/network_nodes.csv', 116 | sep=',') 117 | available_countries = set(self.metadata.network_nodes.country) 118 | countries = self.attributes.countries 119 | if bool(countries-available_countries.intersection(countries)): 120 | print(', '.join(countries-available_countries.intersection(countries)) + \ 121 | ' are not in the country list. ' + 'See in:' + ', '.join(available_countries)) 122 | self.attributes.countries = list(available_countries.intersection(countries)) 123 | 124 | ix_net_nodes_bool = np.in1d(self.metadata.network_nodes.country, self.attributes.countries) 125 | self.metadata.ix_nodes = np.where(ix_net_nodes_bool)[0]+1 126 | if self.attributes.max_number_loc != None and len(self.metadata.ix_nodes)>self.attributes.max_number_loc: 127 | self.metadata.ix_nodes = np.sort(random.sample(list(self.metadata.ix_nodes), 128 | self.attributes.max_number_loc)) 129 | print('The number of nodes selected was higher than the maximum number of locations (' +\ 130 | str(self.attributes.max_number_loc) + ') and therefore reduced.') 131 | 132 | pass 133 | 134 | #Function that loads observations and stores them in the 'obs' attribute of output 135 | def _load_observations(self): 136 | 137 | filename = self.attributes.folder_loc + '/Nodal_TS/' + self.attributes.renew_type + \ 138 | '_signal_' + self.attributes.data_type + '.csv' 139 | data_observations_aux = pd.read_csv(filename, sep=',') 140 | 141 | #Getting observations of training period 142 | ix_time_bool = np.in1d(data_observations_aux.Time, 143 | [self.attributes.start_time,self.attributes.end_time]) 144 | ix_time = np.where(ix_time_bool)[0] 145 | if len(ix_time) == 1: 146 | sys.exit('Training period contains only one element.'+ \ 147 | 'There must be an error in the definition of starting/ending dates.'+\ 148 | 'Check day, month and year selected. Remember that data are available hourly only.') 149 | ix_net_nodes = np.append(0, self.metadata.ix_nodes) 150 | data_observations = data_observations_aux.ix[ix_time[0]:ix_time[len(ix_time)-1], 151 | ix_net_nodes] 152 | data_observations.Time = pd.to_datetime(data_observations.Time) 153 | del ix_time_bool, ix_time 154 | 155 | #Getting observations of testing period 156 | ix_time_bool = np.in1d(data_observations_aux.Time, 157 | [self.attributes.fore_start_time,self.attributes.fore_end_time]) 158 | ix_time = np.where(ix_time_bool)[0] 159 | data_observations_cf = data_observations_aux.ix[ix_time[0]:ix_time[len(ix_time)-1], 160 | ix_net_nodes] 161 | data_observations_cf.Time = pd.to_datetime(data_observations_cf.Time) 162 | 163 | #Define colnames with locations 164 | new_col_names = [None] * len(data_observations.columns) 165 | new_col_names[0] = 'Time' 166 | for icol, col_name in enumerate(data_observations.columns[1:], start=1): 167 | new_col_names[icol] = 'id_' + col_name 168 | self.metadata.id_nodes = new_col_names[1:] 169 | 170 | data_observations.columns = new_col_names 171 | data_observations_cf.columns = new_col_names 172 | 173 | data_observations.reset_index(drop=True, inplace=True) 174 | data_observations_cf.reset_index(drop=True, inplace=True) 175 | 176 | del data_observations_aux, filename 177 | 178 | self.obs = data_observations 179 | self.current_fore = expando() 180 | self.current_fore.obs = data_observations_cf 181 | 182 | pass 183 | 184 | #Function that defines the time of day horizon of predictions/observations 185 | #Dataset contains only hourly information but it can be adapted for other 186 | #markets 187 | def _tod_observations(self): 188 | 189 | #Assumption of an hourly day discretisation, to be adapted better if 190 | #intraday market or other kinds are to be considered 191 | time_of_day = [time(ih,0,0,0) for ih in range(24)] 192 | tod_name = [None] * len(time_of_day) 193 | #defining the repartition in day for later climatology application 194 | for index,itime in enumerate(time_of_day): 195 | if itime.hour<10: h_name= '0' + str(itime.hour) 196 | else: h_name = str(itime.hour) 197 | if itime.minute<10: min_name= '0' + str(itime.minute) 198 | else: min_name = str(itime.minute) 199 | tod_name[index] = 'h_'+ h_name + '_' + min_name 200 | 201 | self.metadata.tod = time_of_day 202 | self.metadata.tod_label = tod_name 203 | 204 | pass 205 | 206 | #Function that loads predictions and stores them in the 'fore' attribute of output 207 | def _load_forecasts(self): 208 | 209 | #Define lead times labels 210 | forecast_ahead = [None] * self.attributes.nbr_leadT 211 | for leadT in range(1,self.attributes.nbr_leadT+1): 212 | if leadT<10: nb_name= '0' + str(leadT) 213 | else: nb_name = str(leadT) 214 | forecast_ahead[leadT-1] = 'leadT_' + nb_name 215 | self.metadata.fore_leadT = forecast_ahead 216 | 217 | #loading of forecasts data under data_forecasts 218 | data_forecasts = expando() 219 | data_forecasts_cf = expando() 220 | empty_df = pd.DataFrame(columns=self.obs.columns) 221 | for leadT_name in self.metadata.fore_leadT: 222 | setattr(data_forecasts, leadT_name, empty_df) 223 | setattr(data_forecasts_cf, leadT_name, empty_df) 224 | 225 | for iforecast in os.listdir(self.attributes.folder_loc + '/Nodal_FC/'): 226 | iforecast_asDate = datetime(int(iforecast[:4]), int(iforecast[4:6]), int(iforecast[6:8]), 227 | int(iforecast[8:]),0,0) 228 | iforecast_asDate = iforecast_asDate.strftime("%Y-%m-%d %H:%M:%S") 229 | if iforecast_asDate>=self.attributes.start_time and iforecast_asDate<=self.attributes.end_time: 230 | filename = self.attributes.folder_loc + '/Nodal_FC/' + iforecast + \ 231 | '/' + self.attributes.renew_type + '_forecast.csv' 232 | data_forecasts_aux = pd.read_csv(filename, sep=',') 233 | for leadT, leadT_name in enumerate(self.metadata.fore_leadT, start = 1): 234 | temp_df = pd.DataFrame(np.nan, index=[0],columns=self.obs.columns) 235 | temp_df.loc[0,('Time')] = iforecast_asDate 236 | for iloc, location in enumerate(self.metadata.id_nodes): 237 | temp_df.loc[0,(location)] = data_forecasts_aux.ix[leadT,self.metadata.ix_nodes[iloc]] 238 | setattr(data_forecasts, leadT_name, 239 | getattr(data_forecasts, leadT_name).append(temp_df, ignore_index=True)) 240 | del temp_df 241 | del data_forecasts_aux, filename 242 | 243 | if iforecast_asDate>=self.attributes.fore_start_time and iforecast_asDate<=self.attributes.fore_end_time: 244 | filename = self.attributes.folder_loc + '/Nodal_FC/' + iforecast + \ 245 | '/' + self.attributes.renew_type + '_forecast.csv' 246 | data_forecasts_aux = pd.read_csv(filename, sep=',') 247 | for leadT, leadT_name in enumerate(self.metadata.fore_leadT, start = 1): 248 | temp_df = pd.DataFrame(np.nan, index=[0],columns=self.obs.columns) 249 | temp_df.loc[0,('Time')] = iforecast_asDate 250 | for iloc, location in enumerate(self.metadata.id_nodes): 251 | temp_df.loc[0,(location)] = data_forecasts_aux.ix[leadT,self.metadata.ix_nodes[iloc]] 252 | setattr(data_forecasts_cf, leadT_name, 253 | getattr(data_forecasts_cf, leadT_name).append(temp_df, ignore_index=True)) 254 | del temp_df 255 | del data_forecasts_aux, filename 256 | 257 | gc.collect 258 | 259 | for leadT_name in self.metadata.fore_leadT: 260 | getattr(data_forecasts, leadT_name).Time = \ 261 | pd.to_datetime(getattr(data_forecasts, leadT_name).Time) 262 | getattr(data_forecasts_cf, leadT_name).Time = \ 263 | pd.to_datetime(getattr(data_forecasts_cf, leadT_name).Time) 264 | 265 | self.fore = data_forecasts 266 | self.current_fore.fore = data_forecasts_cf 267 | 268 | pass 269 | 270 | 271 | #Function that calculates and stores distances between nodes 272 | def _get_distances(self): 273 | 274 | dist_df = pd.DataFrame(index=self.metadata.id_nodes, columns=self.metadata.id_nodes) 275 | for loc_ref in self.metadata.id_nodes: 276 | id_ref = int(loc_ref[3:]) 277 | ix_ref = np.where(self.metadata.network_nodes.ID == id_ref)[0] 278 | for loc_comp in self.metadata.id_nodes: 279 | id_comp = int(loc_comp[3:]) 280 | ix_comp = np.where(self.metadata.network_nodes.ID == id_comp)[0] 281 | dist_df.loc[(loc_ref, loc_comp)] = \ 282 | distance_from_long_lat(self.metadata.network_nodes.latitude.values[ix_ref], 283 | self.metadata.network_nodes.longitude.values[ix_ref], 284 | self.metadata.network_nodes.latitude.values[ix_comp], 285 | self.metadata.network_nodes.longitude.values[ix_comp]) 286 | self.metadata.distances = dist_df 287 | del dist_df 288 | 289 | pass 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | -------------------------------------------------------------------------------- /Code/main.py: -------------------------------------------------------------------------------- 1 | # License: BSD_3_clause 2 | # 3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are 7 | # met: 8 | # 9 | # Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # 12 | # Redistributions in binary form must reproduce the above copyright 13 | # notice, this list of conditions and the following disclaimer in 14 | # the documentation and/or other materials provided with the 15 | # distribution. 16 | # 17 | # Neither the name of the Technical University of Denmark (DTU) 18 | # nor the names of its contributors may be used to endorse or 19 | # promote products derived from this software without specific 20 | # prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | 36 | ##INPUTS TO BE MODIFIED 37 | #Path where all .py files are stored 38 | folder_code = '.' 39 | #Path to data 40 | folder_data = '../RE-Europe_dataset_package' 41 | #Output path to store scenarios in csv 42 | folder_output = '../Results' 43 | #Renewable type to be studied: 'wind' or 'solar' 44 | renewable_type = 'wind' # 45 | #Data type: 'COSMO' or 'ECMWF' (COSMO recommended) 46 | data_type = 'COSMO' 47 | #Countries to be studied - see in documentation for list of countries keywords 48 | countries = {'FRA'} 49 | #Mawimum number of locations (random selection among all selected countries) 50 | max_number_loc = 10 51 | #Number of lead times to be studied (up to 91) 52 | nbr_leadTimes = 10 53 | #Starting and ending time of training period ('YYYY-MM-DD HH:MM:SS') 54 | start_time = '2012-01-02 00:00:00' 55 | end_time = '2012-12-31 00:00:00' 56 | #Starting and ending time of testing period - when scenarios will be generated ('YYYY-MM-DD HH:MM:SS') 57 | fore_start_time = '2014-09-01 00:00:00' 58 | fore_end_time = '2014-09-10 00:00:00' 59 | #Use of the improved forecast model (0:no - 1:yes) - only relevant for wind case 60 | improv_forecast = 1 61 | #Number of scenarios to be computed 62 | nb_scenarios = 50 63 | 64 | 65 | ##CODE STRUCTURE - DON'T MODIFY IF ONLY USE 66 | import sys 67 | sys.path.insert(0, folder_code) 68 | from dataReader import dataReader 69 | from modelEstimation import modelEstimation 70 | from scenarioGeneration import scenarioGeneration, save_scenarios 71 | 72 | data = dataReader(countries,max_number_loc,renewable_type,data_type,start_time, 73 | end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_data) 74 | model = modelEstimation(data) 75 | scenarios = scenarioGeneration(model, data, improv_forecast, nb_scenarios) 76 | save_scenarios(scenarios, folder_output) 77 | 78 | -------------------------------------------------------------------------------- /Code/modelEstimation.py: -------------------------------------------------------------------------------- 1 | # License: BSD_3_clause 2 | # 3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are 7 | # met: 8 | # 9 | # Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # 12 | # Redistributions in binary form must reproduce the above copyright 13 | # notice, this list of conditions and the following disclaimer in 14 | # the documentation and/or other materials provided with the 15 | # distribution. 16 | # 17 | # Neither the name of the Technical University of Denmark (DTU) 18 | # nor the names of its contributors may be used to endorse or 19 | # promote products derived from this software without specific 20 | # prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | import numpy as np 36 | import pandas as pd 37 | import gc 38 | import warnings 39 | 40 | from scipy.stats import norm 41 | from scipy.interpolate import interp1d 42 | from scipy.optimize import curve_fit 43 | from sklearn import linear_model 44 | import statsmodels.formula.api as smf 45 | 46 | class expando: 47 | pass 48 | 49 | #Combined exp and Cauchy for correlation matrice fitting 50 | def mix_func(X, beta, tau, a, b): 51 | x,y = X 52 | return np.exp(-(tau*x)/(1+a*y**(2*b))**beta)/(1+a*y**(2*b)) 53 | 54 | #climatology cdf/inv_cdf function 55 | def clim_cdf(input_data_t, loc_NC, max_factor, cdf_keyword): 56 | probabilities = np.arange(1,len(input_data_t)+1)/(float(len(input_data_t))+1) 57 | quantiles = np.array(sorted(input_data_t)) 58 | quantiles[quantiles < 0] = 0. 59 | if (quantiles == np.zeros(len(quantiles))).all(): 60 | #night of solar cases 61 | quantiles_extended = np.array([0.0, loc_NC]) 62 | probabilities_extended = np.zeros(len(quantiles_extended)) 63 | else: 64 | #Extension of quantiles to reach nominal capacity. The climatology 65 | #functions are built with observations. This extension is to prevent the cases 66 | #when forecast are higher than observations and therefore out of range. 67 | #The value 1.2 is the lowest fit found so far. Could be generalize using directly 68 | #the real nominal capacity 69 | quantiles_extended = \ 70 | np.concatenate([[-1e-5,0], quantiles, [quantiles.max()*max_factor, loc_NC*max_factor]]) 71 | probabilities_extended = np.concatenate([[-1e-5,0.],probabilities,[1,1+1e-5]]) 72 | 73 | if cdf_keyword == 'cdf': 74 | interpolation = interp1d(quantiles_extended, probabilities_extended) 75 | elif cdf_keyword == 'inv_cdf': 76 | interpolation = interp1d(probabilities_extended, quantiles_extended) 77 | 78 | return interpolation 79 | 80 | #Function to labelize the hours when climatology will be applied. 81 | #Format 'h_HH_MM'. Ex: in intraday 'h_18_45' 82 | def get_label_time(time): 83 | if time.hour<10: name_hour= '0' + str(time.hour) 84 | else: name_hour = str(time.hour) 85 | if time.minute<10: name_minute= '0' + str(time.minute) 86 | else: name_minute = str(time.minute) 87 | label_time = 'h_'+name_hour+'_'+name_minute 88 | return label_time 89 | 90 | 91 | #cdf from conditional quantile regression 92 | def cqr_cdf(prediction, betas, cdf_keyword): 93 | prob = betas.loc[:,('probabilities')].values 94 | quantiles = np.zeros(len(prob)) 95 | for i in range(len(prob)): 96 | quantiles[i] = float(betas.loc[i,('intercept')] + betas.loc[i,('coefficient')]*prediction) 97 | quantiles[quantiles < 0] = 0 98 | quantiles[quantiles > 1] = 1 99 | quantiles_extended = np.concatenate([[0], sorted(quantiles), [1]]) 100 | probabilities_extended = np.concatenate([[0],prob,[1]]) 101 | if cdf_keyword == 'cdf': 102 | interpolation = interp1d(quantiles_extended, probabilities_extended) 103 | elif cdf_keyword == 'inv_cdf': 104 | interpolation = interp1d(probabilities_extended, quantiles_extended) 105 | return interpolation 106 | 107 | 108 | 109 | class modelEstimation: 110 | 111 | def __init__(self, data): 112 | print('Climatology transformation') 113 | self._set_climatology(data) 114 | self._apply_climatology(data) 115 | self._get_concurrent_clim_set(data) 116 | print('Getting the estimators to improve forecast') 117 | self._improvement_forecast(data) 118 | print('Quantiles calculation') 119 | self._set_quantiles(data) 120 | self._set_cdf(data) 121 | print('CDF transformation') 122 | self._apply_cdf(data) 123 | print('Generalization of correlation matrice') 124 | self._get_corr(data) 125 | self._generalize_corr(data) 126 | print('Estimation finished!') 127 | pass 128 | 129 | #Function that defines climatology cdf for every corresponding time horizon, 130 | #using training data. 131 | def _set_climatology(self, data): 132 | #time of day 133 | tod = data.metadata.tod 134 | #Making Climatology time-of-day transformation 135 | climatology = expando() 136 | for location in data.metadata.id_nodes: 137 | setattr(climatology, location, expando()) 138 | for index,itime in enumerate(tod): 139 | setattr(getattr(climatology, location), data.metadata.tod_label[index], 140 | getattr(data.obs,location)[data.obs.Time.dt.time == itime]) 141 | 142 | #Making the cdf and the inv_cdf for the climatology to use as a transformation 143 | self.clim = expando() 144 | self.clim.cdf = expando() 145 | self.clim.inv_cdf = expando() 146 | #As observations might not reach nominal capacity of the farms while forecasts 147 | #might predict it, necessity to define a factor to multiply to the maximum 148 | #of observations in the definition of climatology cdf/inv_cdf 149 | #This factor will not be needed if NC of farms are included/used 150 | if data.attributes.renew_type == 'wind': 151 | max_factor = 1.05 152 | elif data.attributes.renew_type == 'solar': 153 | max_factor = 1.2 154 | #For each location and time of day, creation of climatology cdf/inv_cdf 155 | for location in data.metadata.id_nodes: 156 | setattr(self.clim.cdf, location, expando()) 157 | setattr(self.clim.inv_cdf, location, expando()) 158 | loc_NC = max(getattr(data.obs,location)) #close to nominal capacity of farm 159 | for itime in data.metadata.tod_label: 160 | clim_loc_t = getattr(getattr(climatology,location),itime) 161 | setattr(getattr(self.clim.cdf, location), itime, 162 | clim_cdf(clim_loc_t, loc_NC, max_factor, 'cdf')) 163 | setattr(getattr(self.clim.inv_cdf, location), itime, 164 | clim_cdf(clim_loc_t, loc_NC, max_factor, 'inv_cdf')) 165 | pass 166 | 167 | 168 | #Function that applies climatology transformations to all observations and predictions. 169 | def _apply_climatology(self, data): 170 | 171 | #Transforming observations and predictions (by climatology) to get rid of seasonality 172 | #First transforming the observed power 173 | self.clim.obs = pd.DataFrame(np.nan, index=range(len(data.obs)), 174 | columns=['t_actual']+data.metadata.id_nodes) 175 | self.clim.obs.loc[:,('t_actual')] = data.obs.Time 176 | 177 | for location in data.metadata.id_nodes: 178 | for itime, time in data.obs.Time.dt.time.iteritems(): 179 | label_time = get_label_time(time) 180 | clim_cdf_loc_t = getattr(getattr(self.clim.cdf,location), label_time) 181 | self.clim.obs.loc[itime,(location)] = float(clim_cdf_loc_t(data.obs.loc[itime,(location)])) 182 | 183 | 184 | #Second, the predicted power is transformed 185 | self.clim.fore = expando() 186 | for ileadT, leadT in enumerate(data.metadata.fore_leadT, start = 1): 187 | data_fore_leadT = getattr(data.fore, leadT) 188 | 189 | init_df = pd.DataFrame(np.nan, index=range(len(data_fore_leadT.Time)), 190 | columns=['t_issue','t_actual']+data.metadata.id_nodes) 191 | init_df.loc[:,('t_issue')] = data.fore.leadT_01.Time 192 | init_df.loc[:,('t_actual')] = init_df.loc[:,('t_issue')] + data.obs.Time.dt.freq * ileadT 193 | 194 | for location in data.metadata.id_nodes: 195 | for idate in range(len(data_fore_leadT.Time)): 196 | power_to_be_transformed = getattr(data_fore_leadT,location)[idate] 197 | label_time = get_label_time(init_df.loc[idate,('t_actual')]) 198 | clim_cdf_loc_t = getattr(getattr(self.clim.cdf, location),label_time) 199 | init_df.loc[idate,(location)] = float(clim_cdf_loc_t(power_to_be_transformed)) 200 | 201 | setattr(self.clim.fore, leadT, init_df) 202 | del init_df 203 | 204 | pass 205 | 206 | #Function that organizes data per lead time in order to have an easy access to 207 | #predictions and corresponding observations. Persistence values are also stored 208 | #for the improvement forecast phase. 209 | def _get_concurrent_clim_set(self, data): 210 | 211 | self.clim.concurr = expando() 212 | for location in data.metadata.id_nodes: 213 | setattr(self.clim.concurr, location, expando()) 214 | clim_concurr_loc = getattr(self.clim.concurr, location) 215 | for leadT in data.metadata.fore_leadT: 216 | clim_fore_leadT = getattr(self.clim.fore, leadT) 217 | 218 | #Getting observations corresponding to this leadT predictions 219 | ix_match_obs_bool = np.in1d(self.clim.obs.t_actual, clim_fore_leadT.t_actual) 220 | ix_match_obs = np.where(ix_match_obs_bool)[0] 221 | observations = self.clim.obs.loc[ix_match_obs,(location)] 222 | observations.reset_index(drop=True, inplace=True) 223 | 224 | t_actual = self.clim.obs.t_actual[ix_match_obs] 225 | t_actual.reset_index(drop=True, inplace=True) 226 | 227 | #Getting prediction of this leadT, in accordance with previous observations 228 | ix_match_pred_bool = np.in1d(clim_fore_leadT.t_actual, self.clim.obs.t_actual) 229 | ix_match_pred = np.where(ix_match_pred_bool)[0] 230 | predictions = clim_fore_leadT.loc[ix_match_pred, (location)] 231 | predictions.reset_index(drop=True, inplace=True) 232 | 233 | #Getting the observation at time of issue, corresponding to the persistence value 234 | ix_match_per_bool = np.in1d(self.clim.obs.t_actual, clim_fore_leadT.loc[ix_match_pred,('t_issue')]) 235 | ix_match_per = np.where(ix_match_per_bool)[0] 236 | persistences = self.clim.obs.loc[ix_match_per,(location)] 237 | persistences.reset_index(drop=True, inplace=True) 238 | 239 | concurr_temp = pd.DataFrame({'observations':observations, \ 240 | 'predictions':predictions, 'persistences':persistences, 't_actual':t_actual}) 241 | setattr(clim_concurr_loc, leadT, concurr_temp) 242 | 243 | pass 244 | 245 | #Function that fits coefficients for a weighted combination of persistence 246 | #and actual forecasts in order to generate better point forecasts. Calculations 247 | #are made on the training sample 248 | def _improvement_forecast(self, data): 249 | self.imp_fore = expando() 250 | 251 | for location in data.metadata.id_nodes: 252 | betas = pd.DataFrame(columns = data.metadata.fore_leadT, 253 | index = ['intercept','beta_pers','beta_fore']) 254 | 255 | for leadT in data.metadata.fore_leadT: 256 | clim_concurr_loc_leadT = getattr(getattr(self.clim.concurr, location), leadT) 257 | #Fitting observations with persistence and predicted values 258 | for_fitting = np.zeros((len(clim_concurr_loc_leadT.observations),2)) 259 | for_fitting[:,0] = clim_concurr_loc_leadT.persistences 260 | for_fitting[:,1] = clim_concurr_loc_leadT.predictions 261 | regr_leadT = linear_model.LinearRegression() 262 | regr_leadT.fit(for_fitting.reshape((len(for_fitting),2)), 263 | clim_concurr_loc_leadT.observations) 264 | betas.loc[('intercept', leadT)] = regr_leadT.intercept_ 265 | betas.loc[('beta_pers', leadT)] = regr_leadT.coef_[0] 266 | betas.loc[('beta_fore', leadT)] = regr_leadT.coef_[1] 267 | del for_fitting, regr_leadT 268 | setattr(self.imp_fore, location, betas) 269 | pass 270 | 271 | 272 | def _set_quantiles(self, data): 273 | 274 | #Compute quantiles for the transformed power conditional on the transformed power prediction 275 | #for a specific location and a specific lead time. 276 | 277 | #smf.quantreg generates warning - see documentation for more details 278 | #warning off just for this section 279 | warnings.filterwarnings("ignore") 280 | #Performs the actual quantile regression and stores the variables of 281 | prob = np.concatenate([[0.001],np.arange(0.05,0.951,0.05),[0.999]]) 282 | self.betas = expando() 283 | for location in data.metadata.id_nodes: 284 | print(location) 285 | setattr(self.betas, location, expando()) 286 | for ileadT, leadT in enumerate(data.metadata.fore_leadT, start = 1): 287 | 288 | clim_concurr_loc_leadT = getattr(getattr(self.clim.concurr, location), leadT) 289 | 290 | betas_aux = pd.DataFrame(0, columns = ['probabilities','intercept', 'coefficient'], 291 | index = range(len(prob))) 292 | betas_aux.loc[:,('probabilities')] = prob 293 | #For solar cases, all quantiles are kepts to zeros 294 | if not np.all(clim_concurr_loc_leadT.observations == 0.): 295 | mod = smf.quantreg('observations ~ predictions', clim_concurr_loc_leadT) 296 | for iq,q in enumerate(prob): 297 | res = mod.fit(q=q) 298 | betas_aux.loc[iq,('intercept')] = res.params['Intercept'] 299 | betas_aux.loc[iq,('coefficient')] = res.params['predictions'] 300 | del res 301 | del mod 302 | 303 | setattr(getattr(self.betas,location), leadT, betas_aux) 304 | del betas_aux 305 | gc.collect() 306 | #warning on 307 | warnings.filterwarnings("always") 308 | pass 309 | 310 | def _set_cdf(self, data): 311 | 312 | #In order to use the copula approach we need to transform to uniform marginal distributions. 313 | #This is achieved by using the predictive marginal densities on the transformed domain to 314 | #do a second transformation to get uniformly distributed marginals. For a complete and 315 | #acessable introduction see the wikipedia page on Copulas. 316 | 317 | #First define the marginal cummulative density functions. They are stored as the cummulative 318 | #density function (cdf) and for easy use we also define the inverse cumulative density 319 | #function inv_cdf. Each is defined for every location and every lead time. 320 | 321 | self.cdf = expando() 322 | self.inv_cdf = expando() 323 | 324 | for location in data.metadata.id_nodes: 325 | setattr(self.cdf, location, expando()) 326 | setattr(self.inv_cdf, location, expando()) 327 | 328 | for leadT in data.metadata.fore_leadT: 329 | betas_loc_leadT = getattr(getattr(self.betas,location),leadT) 330 | 331 | cdf_loc_leadT = \ 332 | lambda prediction, betas=betas_loc_leadT, cdf_keyword='cdf': \ 333 | cqr_cdf(prediction, betas, cdf_keyword) 334 | setattr(getattr(self.cdf, location), leadT, cdf_loc_leadT) 335 | 336 | inv_cdf_loc_leadT = \ 337 | lambda prediction, betas=betas_loc_leadT, cdf_keyword='inv_cdf': \ 338 | cqr_cdf(prediction, betas, cdf_keyword) 339 | setattr(getattr(self.inv_cdf, location), leadT, inv_cdf_loc_leadT) 340 | 341 | pass 342 | 343 | 344 | def _apply_cdf(self, data): 345 | 346 | #Using the defined cummulative density function (cdf) we can now convert every observation 347 | #into the uniform domain. This is done for every location and every lead time. 348 | 349 | self.uniform = expando() 350 | for location in data.metadata.id_nodes: 351 | print(location) 352 | setattr(self.uniform, location, expando()) 353 | 354 | for ileadT, leadT in enumerate(data.metadata.fore_leadT, start = 1): 355 | cdf_loc_leadT = getattr(getattr(self.cdf, location), leadT) 356 | 357 | observations = getattr(getattr(self.clim.concurr, location), leadT).observations 358 | predictions = getattr(getattr(self.clim.concurr, location), leadT).predictions 359 | t_actual = getattr(getattr(self.clim.concurr, location), leadT).t_actual 360 | 361 | unif_aux = {} 362 | unif_aux['value'] = {} 363 | unif_aux['time'] = {} 364 | unif_aux['date'] = {} 365 | 366 | unif_aux['t'] = t_actual 367 | unif_aux['t'].index = range(len(observations)) 368 | 369 | for index in unif_aux['t'].keys(): 370 | conditional_cdf_loc_leadT = cdf_loc_leadT(predictions[index]) 371 | unif_aux['value'][index] = float(conditional_cdf_loc_leadT(observations[index])) 372 | unif_aux['time'][index] = unif_aux['t'][index].time() 373 | unif_aux['date'][index] = unif_aux['t'][index].date() 374 | del conditional_cdf_loc_leadT 375 | unif_aux = pd.DataFrame(unif_aux,columns=['t','value','time','date']) 376 | 377 | setattr(getattr(self.uniform, location), leadT, unif_aux) 378 | 379 | del unif_aux, observations, predictions 380 | gc.collect() 381 | 382 | pass 383 | 384 | 385 | def _get_corr(self, data): 386 | 387 | #Next we estimate the correlation matrix for the uniform variables. To facilitate this the 388 | #uniform variables are put on an appropriate form for computing a correlation matrix. This 389 | #is done through using a pivot table 390 | uniform_df = pd.DataFrame({'location': [], 't': [], 'value': [], 'ltname': [],\ 391 | 'date': [], 'time': []}) 392 | for location in data.metadata.id_nodes: 393 | for leadT in data.metadata.fore_leadT: 394 | uniform_loc_leadT = getattr(getattr(self.uniform, location), leadT) 395 | 396 | df_loc_leadT_temp = pd.DataFrame({'location': location, 't': uniform_loc_leadT.t, \ 397 | 'value': uniform_loc_leadT.value, 'ltname': leadT, 'date': uniform_loc_leadT.date, \ 398 | 'time': uniform_loc_leadT.time}) 399 | 400 | uniform_df = pd.concat([uniform_df, df_loc_leadT_temp]) 401 | del df_loc_leadT_temp 402 | 403 | 404 | uniform_df['value']=uniform_df['value'].astype(float) 405 | uniform_pivot = uniform_df.pivot_table(index='date',columns=('location','ltname'),values='value') 406 | 407 | norm_df = uniform_df 408 | norm_df['value'] = norm.ppf(uniform_df['value']) 409 | norm_pivot = norm_df.pivot_table(index='date',columns=('location','ltname'),values='value') 410 | 411 | #From the observations in the uniform domain we can now compute the correlation matrix. 412 | #The correlation matrix specifies the Gaussian copula used for combining the different models. 413 | #Where the computed correlation is NaN we set it to zero. 414 | correlation_matrix_na = norm_pivot.corr() 415 | where_are_NaNs = np.isnan(correlation_matrix_na) 416 | correlation_matrix = correlation_matrix_na 417 | correlation_matrix[where_are_NaNs] = 0. 418 | if not np.all(np.diag(correlation_matrix) == 1.): 419 | print('All diagonal values of correlation matrix are not 1!') 420 | np.fill_diagonal(correlation_matrix.values, 1.) 421 | 422 | self.corr = expando() 423 | self.corr.correlation_matrix = correlation_matrix 424 | self.corr.pivot_columns = uniform_pivot.columns 425 | 426 | pass 427 | 428 | 429 | 430 | def _generalize_corr(self, data): 431 | #The purpose of this function is to extrapolate the correlation values to 432 | #different distances and delta lead times. In order to do so, 433 | #an exponential function is used for fitting. 434 | self.corr.fit = expando() 435 | self.corr.fit.combined = expando() 436 | corr_original = [] 437 | loc_to_compare_with = data.metadata.id_nodes[:] 438 | for id_ref in data.metadata.id_nodes: 439 | for id_loc in loc_to_compare_with: 440 | dist_locs = data.metadata.distances.loc[(id_ref,id_loc)] 441 | leadT_to_compare_with = data.metadata.fore_leadT[:] 442 | for leadT_ref in data.metadata.fore_leadT: 443 | for leadT in leadT_to_compare_with: 444 | dleadT = abs(int(leadT[6:]) - int(leadT_ref[6:])) 445 | new_corr = self.corr.correlation_matrix[(id_ref, leadT_ref)][(id_loc, leadT)] 446 | new_value = np.matrix([[dist_locs, dleadT, new_corr]]) 447 | temp_values = corr_original 448 | try: 449 | corr_original = np.concatenate((temp_values,new_value)) 450 | except: 451 | corr_original = new_value 452 | if id_ref == id_loc: 453 | leadT_to_compare_with.remove(leadT_ref) 454 | loc_to_compare_with.remove(id_ref) 455 | 456 | self.corr.fit.original = pd.DataFrame(corr_original, columns = ['distances','dt','correlation']) 457 | 458 | #Fitting part, using curve_fit 459 | x = np.squeeze(np.asarray(corr_original[:,0])) 460 | y = np.squeeze(np.asarray(corr_original[:,1])) 461 | z = np.squeeze(np.asarray(corr_original[:,2])) 462 | coeff, pcov = curve_fit(mix_func, (x, y), z) 463 | self.corr.fit.combined.coeff = coeff 464 | self.corr.fit.combined.func = lambda X, beta=coeff[0], tau=coeff[1], a=coeff[2], b=coeff[3]: \ 465 | mix_func(X, coeff[0], coeff[1], coeff[2], coeff[3]) 466 | 467 | pass 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | -------------------------------------------------------------------------------- /Code/scenarioGeneration.py: -------------------------------------------------------------------------------- 1 | # License: BSD_3_clause 2 | # 3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are 7 | # met: 8 | # 9 | # Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # 12 | # Redistributions in binary form must reproduce the above copyright 13 | # notice, this list of conditions and the following disclaimer in 14 | # the documentation and/or other materials provided with the 15 | # distribution. 16 | # 17 | # Neither the name of the Technical University of Denmark (DTU) 18 | # nor the names of its contributors may be used to endorse or 19 | # promote products derived from this software without specific 20 | # prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | import os 35 | import numpy as np 36 | import pandas as pd 37 | from scipy.stats import multivariate_normal, norm 38 | from modelEstimation import get_label_time 39 | 40 | #Combined exp and Cauchy for correlation matrice fitting 41 | def mix_func(X, beta, tau, a, b): 42 | x,y = X 43 | return np.exp(-(tau*x)/(1+a*y**(2*b))**beta)/(1+a*y**(2*b)) 44 | 45 | 46 | #Function to define the forecast name from time 47 | def get_time_label(time): 48 | if time.month<10: name_month= '0' + str(time.month) 49 | else: name_month = str(time.month) 50 | if time.day<10: name_day= '0' + str(time.day) 51 | else: name_day = str(time.day) 52 | if time.hour<10: name_hour= '0' + str(time.hour) 53 | else: name_hour = str(time.hour) 54 | if time.minute<10: name_minute= '0' + str(time.minute) 55 | else: name_minute = str(time.minute) 56 | time_name = '_'+str(time.year)+name_month+name_day+name_hour+name_minute 57 | return time_name 58 | 59 | 60 | #Function to save scenarios in csv files 61 | def save_scenarios(scenarios,folder_output): 62 | for idate in scenarios.simulation.__dict__: 63 | if not os.path.isdir(folder_output+'/'+idate): 64 | os.makedirs(folder_output+'/'+idate) 65 | for iloc in getattr(scenarios.simulation, idate).__dict__: 66 | if iloc != 't_actual': 67 | getattr(getattr(scenarios.simulation, idate), iloc).to_csv(folder_output+'/'+idate+'/'+iloc+idate+'.csv') 68 | 69 | 70 | 71 | 72 | class expando: 73 | pass 74 | 75 | class scenarioGeneration: 76 | 77 | def __init__(self, model, data, improv_forecast, nb_scenarios): 78 | self._set_attributes(improv_forecast, nb_scenarios) 79 | self._get_covariance(model, data) 80 | self._get_scenarios(model, data) 81 | print('Scenarios computed!') 82 | pass 83 | 84 | 85 | def _set_attributes(self, improv_forecast, nb_scenarios): 86 | self.attributes = expando() 87 | self.attributes.improv_forecast = improv_forecast 88 | self.attributes.nb_scenarios = nb_scenarios 89 | pass 90 | 91 | 92 | def _get_covariance(self, model, data): 93 | 94 | self.correlation_matrix = pd.DataFrame(columns = model.corr.pivot_columns, 95 | index = model.corr.pivot_columns) 96 | for id_ref in data.metadata.id_nodes: 97 | for id_loc in data.metadata.id_nodes: 98 | for leadT_ref in data.metadata.fore_leadT: 99 | for leadT_loc in data.metadata.fore_leadT: 100 | dist_loc = getattr(getattr(data.metadata.distances, id_ref), id_loc) 101 | dleadT = abs(int(leadT_loc[6:]) - int(leadT_ref[6:])) 102 | self.correlation_matrix.loc[(id_ref,leadT_ref),(id_loc,leadT_loc)] = \ 103 | model.corr.fit.combined.func((dist_loc, dleadT)) 104 | 105 | 106 | self.correlation_matrix = self.correlation_matrix.astype(float) 107 | pass 108 | 109 | 110 | 111 | 112 | def _get_scenarios(self, model, data): 113 | 114 | self.simulation = expando() 115 | dates_of_issue = getattr(data.current_fore.fore, data.metadata.fore_leadT[0]).Time 116 | 117 | for i_date_issue, date_issue in enumerate(dates_of_issue): 118 | 119 | date_issue_name = get_time_label(date_issue) 120 | print(date_issue_name) 121 | setattr(self.simulation, date_issue_name, expando()) 122 | 123 | t_actual = expando() 124 | for ileadT, leadT in enumerate(data.metadata.fore_leadT, start=1): 125 | t_actual_temp = date_issue + data.current_fore.obs.Time.dt.freq * ileadT 126 | setattr(t_actual, leadT, t_actual_temp) 127 | setattr(getattr(self.simulation, date_issue_name), 't_actual', t_actual) 128 | mean = np.zeros(model.corr.correlation_matrix.shape[1]) 129 | #First we simulate uniform variables with the appropriate interdependence structure. 130 | #This is easily done by first simulating Gaussian varialbes with the same interdependence 131 | #sturcture and the transforming them to the uniform domain by their marginals. 132 | 133 | # rv_mvnorm = multivariate_normal(mean, self.correlation_matrix) 134 | # simulation_mvnorm = rv_mvnorm.rvs(self.attributes.nb_scenarios) 135 | rv_mvnorm = multivariate_normal(mean, model.corr.correlation_matrix) 136 | simulation_mvnorm = rv_mvnorm.rvs(self.attributes.nb_scenarios) 137 | simulation_uniform = pd.DataFrame(data = norm.cdf(simulation_mvnorm), 138 | columns = model.corr.pivot_columns) 139 | 140 | 141 | #Having obtained the simulated variables in the uniform domain, we need to get them into the transformed 142 | #domain. This is done by using the inverse cummulative density function (inv_cdf) for each region and 143 | #lead time. As the marginals depend on the predicted values, the predictions are required. 144 | #Here the predictions that came with the data are used. 145 | 146 | #first we put the transformed predictions on the appropriate form. To do this we need a set of 147 | #multi horizon point predictions spanning the locations considered and the prediction horizons. 148 | #Futher we need a starting time. In this implementation we simply choose a starting time from 149 | #the forecast data and choose the associated forecasts. 150 | scen_label = [None] * self.attributes.nb_scenarios 151 | for iscen in range(1, self.attributes.nb_scenarios+1): 152 | scen_label[iscen-1] = 'scen_' + str(iscen) 153 | scen_label.insert(0,'forecasts') 154 | scen_label.insert(0,'init_forecasts') 155 | self.attributes.scen_label = scen_label 156 | 157 | for id_loc in data.metadata.id_nodes: 158 | simulation_loc = pd.DataFrame(0, columns = data.metadata.fore_leadT, 159 | index = scen_label) 160 | 161 | for leadT in data.metadata.fore_leadT: 162 | 163 | predict_simulation = getattr(data.current_fore.fore, leadT).loc[i_date_issue,(id_loc)] 164 | simulation_loc.loc[(scen_label[0],leadT)] = predict_simulation 165 | 166 | label_time = get_label_time(getattr(t_actual, leadT).time()) 167 | 168 | #Get the prediction out of seasonality effects 169 | clim_cdf = \ 170 | getattr(getattr(model.clim.cdf, id_loc), label_time) 171 | predict_transf_simulation = clim_cdf(predict_simulation) 172 | #Improve the forecast from the weighted persistence-climatology model 173 | if self.attributes.improv_forecast == 1: 174 | #Makes sense only for wind predictions so far 175 | #Get the observation at time of issue out of seasonality effects, 176 | #which will represent persistence value 177 | label_time_issue = get_label_time(date_issue.time()) 178 | clim_cdf = \ 179 | getattr(getattr(model.clim.cdf, id_loc), label_time_issue) 180 | fore_obs_loc = getattr(data.current_fore.obs, id_loc) 181 | pers_obs_loc_trans = \ 182 | clim_cdf(fore_obs_loc[np.where(data.current_fore.obs.Time == date_issue)[0][0]]) 183 | # 184 | coeff_imp = \ 185 | getattr(getattr(model.imp_fore, id_loc), leadT) 186 | 187 | predict_transf_simulation = coeff_imp.intercept + \ 188 | coeff_imp.beta_pers * pers_obs_loc_trans+ \ 189 | coeff_imp.beta_fore* predict_transf_simulation 190 | predict_transf_simulation = predict_transf_simulation 191 | #When the transformation is made, the prediction might exceed 1 which is unrealistic 192 | if predict_transf_simulation > 1: 193 | predict_transf_simulation = 1 194 | 195 | #If the prediction is 0, we attributes scenarios to be zeros (for solar night) 196 | if predict_transf_simulation> 0: 197 | #Having obtained the predictions on the transformed domain, we can simulate on the 198 | #transformed domain. This is done by converting the simulated uniforms to the appropriate 199 | #transformed values through the inv_cdf transformation. 200 | inv_cdf = getattr(getattr(model.inv_cdf, id_loc), leadT) 201 | conditional_inv_cdf = inv_cdf(predict_transf_simulation) 202 | clim_inv_cdf = getattr(getattr(model.clim.inv_cdf,id_loc),label_time) 203 | for iscen in range(self.attributes.nb_scenarios): 204 | simulation_transformed_temp = \ 205 | float(conditional_inv_cdf(getattr(getattr(simulation_uniform, id_loc), leadT)[iscen])) 206 | #Transformes the simulations on the transformed domain back to the original domain. Here we 207 | #need to define an initial time to transform the simulation to the original power domain: 208 | simulation_loc.loc[(scen_label[iscen+1],leadT)] = clim_inv_cdf(simulation_transformed_temp) 209 | #Save the modified input forecast 210 | simulation_loc.loc[(scen_label[1],leadT)] = clim_inv_cdf(predict_transf_simulation) 211 | 212 | setattr(getattr(self.simulation, date_issue_name), id_loc, simulation_loc) 213 | 214 | pass 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /RE-Europe_dataset_package/Put_data_here.txt: -------------------------------------------------------------------------------- 1 | Expected data structure is 2 | Forecasts in 3 | ./Nodal_FC/YYYYMMDDHHmm/*.csv 4 | Observations in 5 | ./Nodal_TS/*.csv 6 | 7 | These directories will be automatically generated by extracting the data package here. -------------------------------------------------------------------------------- /Results/Results_to_be_stored_here.txt: -------------------------------------------------------------------------------- 1 | A folder will be created for each prediction horizon of the testing period under the name _YYYYMMDDHHMM. 2 | In these folders, csv files contain the generated scenarios per location. 3 | Ex: id_114_201406010000.csv -------------------------------------------------------------------------------- /licence.txt: -------------------------------------------------------------------------------- 1 | License: BSD_3_clause 2 | 3 | Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | Neither the name of the Technical University of Denmark (DTU) 18 | nor the names of its contributors may be used to endorse or 19 | promote products derived from this software without specific 20 | prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------