├── .gitattributes
├── .gitignore
├── Code
    ├── dataReader.py
    ├── main.py
    ├── modelEstimation.py
    └── scenarioGeneration.py
├── RE-Europe_dataset_package
    └── Put_data_here.txt
├── Results
    └── Results_to_be_stored_here.txt
└── licence.txt


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 


--------------------------------------------------------------------------------
/Code/dataReader.py:
--------------------------------------------------------------------------------
  1 | # License: BSD_3_clause
  2 | #
  3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are
  7 | # met:
  8 | #
  9 | # Redistributions of source code must retain the above copyright
 10 | # notice, this list of conditions and the following disclaimer.
 11 | #
 12 | # Redistributions in binary form must reproduce the above copyright
 13 | # notice, this list of conditions and the following disclaimer in
 14 | # the documentation and/or other materials provided with the
 15 | # distribution.
 16 | #
 17 | # Neither the name of the Technical University of Denmark (DTU)
 18 | # nor the names of its contributors may be used to endorse or
 19 | # promote products derived from this software without specific
 20 | # prior written permission.
 21 | #
 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 33 | 
 34 | 
 35 | import os
 36 | import sys
 37 | import pandas as pd
 38 | import numpy as np    
 39 | from datetime import datetime, time
 40 | import gc
 41 | import math
 42 | import random
 43 | 
 44 | class expando:
 45 |     pass 
 46 | 
 47 | 
 48 | #Function needed to define distances between nodes from longitudes and latitudes
 49 | def distance_from_long_lat(lat1, long1, lat2, long2):
 50 |     # Convert latitude and longitude to spherical coordinates in radians.
 51 |     degrees_to_radians = math.pi/180.0  
 52 |     # phi = 90 - latitude
 53 |     phi1 = (90.0 - lat1)*degrees_to_radians
 54 |     phi2 = (90.0 - lat2)*degrees_to_radians
 55 |     # theta = longitude
 56 |     theta1 = long1*degrees_to_radians
 57 |     theta2 = long2*degrees_to_radians
 58 |     # Compute spherical distance from spherical coordinates.
 59 |     # For two locations in spherical coordinates (1, theta, phi) and (1, theta', phi')
 60 |     # cosine( arc length ) = sin phi sin phi' cos(theta-theta') + cos phi cos phi'
 61 |     # distance = rho * arc length
 62 |     cos = (math.sin(phi1)*math.sin(phi2)*math.cos(theta1 - theta2) + 
 63 |            math.cos(phi1)*math.cos(phi2))
 64 |     if cos>1:#numerical approximations can bring to a number slightly >1
 65 |         cos=1
 66 |     arc = math.acos( cos )
 67 |     # Remember to multiply arc by the radius of the earth 
 68 |     # in your favorite set of units to get length.
 69 |     R_earth = 6371 #km
 70 |     arc = arc * R_earth
 71 |     return arc
 72 | 
 73 | 
 74 | 
 75 | class dataReader:
 76 |     
 77 |      def __init__(self, countries,max_number_loc,renewable_type,data_type,start_time,
 78 |                   end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_location):         
 79 |         self._set_attributes(countries,max_number_loc,renewable_type,data_type,start_time,
 80 |                              end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_location)
 81 |         self._check_countries()  
 82 |         self._load_observations()
 83 |         self._tod_observations()
 84 |         self._load_forecasts()
 85 |         self._get_distances()
 86 |         print('Data has been imported!')  
 87 |         
 88 |         pass
 89 |         
 90 |      #Function that stores all inputs as attributes of the output
 91 |      def _set_attributes(self, countries,max_number_loc,renewable_type,data_type,start_time,
 92 |                          end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_location):
 93 |                             
 94 |         self.attributes = expando()
 95 |         self.attributes.renew_type = renewable_type
 96 |         self.attributes.data_type = data_type
 97 |         self.attributes.folder_loc = folder_location
 98 |         self.attributes.start_time = start_time
 99 |         self.attributes.end_time = end_time
100 |         self.attributes.fore_start_time = fore_start_time
101 |         self.attributes.fore_end_time = fore_end_time
102 |         self.attributes.nbr_leadT = nbr_leadTimes
103 |         self.attributes.countries = countries
104 |         self.attributes.max_number_loc = max_number_loc
105 |         
106 |         self.metadata = expando()
107 |         
108 |         pass
109 |     
110 |     
111 |      #Function that check input countries and display an error message if they
112 |      #don't correspond. Returns the available countries and indices from nodes
113 |      def _check_countries(self):
114 | 
115 |          self.metadata.network_nodes = pd.read_csv(self.attributes.folder_loc+'/Metadata/network_nodes.csv', 
116 |                                                    sep=',')
117 |          available_countries = set(self.metadata.network_nodes.country)
118 |          countries = self.attributes.countries
119 |          if bool(countries-available_countries.intersection(countries)): 
120 |              print(', '.join(countries-available_countries.intersection(countries)) + \
121 |              ' are not in the country list. ' + 'See in:' + ', '.join(available_countries))
122 |          self.attributes.countries = list(available_countries.intersection(countries))
123 |          
124 |          ix_net_nodes_bool = np.in1d(self.metadata.network_nodes.country, self.attributes.countries)
125 |          self.metadata.ix_nodes = np.where(ix_net_nodes_bool)[0]+1
126 |          if self.attributes.max_number_loc != None and len(self.metadata.ix_nodes)>self.attributes.max_number_loc: 
127 |              self.metadata.ix_nodes = np.sort(random.sample(list(self.metadata.ix_nodes), 
128 |                                                             self.attributes.max_number_loc))
129 |              print('The number of nodes selected was higher than the maximum number of locations (' +\
130 |              str(self.attributes.max_number_loc) + ') and therefore reduced.')
131 |          
132 |          pass
133 |     
134 |      #Function that loads observations and stores them in the 'obs' attribute of output
135 |      def _load_observations(self):
136 |         
137 |         filename = self.attributes.folder_loc + '/Nodal_TS/' + self.attributes.renew_type + \
138 |         '_signal_' + self.attributes.data_type + '.csv'
139 |         data_observations_aux = pd.read_csv(filename, sep=',')
140 |         
141 |         #Getting observations of training period
142 |         ix_time_bool = np.in1d(data_observations_aux.Time, 
143 |                                [self.attributes.start_time,self.attributes.end_time])
144 |         ix_time = np.where(ix_time_bool)[0]
145 |         if len(ix_time) == 1:
146 |             sys.exit('Training period contains only one element.'+ \
147 |             'There must be an error in the definition of starting/ending dates.'+\
148 |             'Check day, month and year selected. Remember that data are available hourly only.')
149 |         ix_net_nodes = np.append(0, self.metadata.ix_nodes)
150 |         data_observations = data_observations_aux.ix[ix_time[0]:ix_time[len(ix_time)-1], 
151 |                                                      ix_net_nodes]
152 |         data_observations.Time = pd.to_datetime(data_observations.Time)  
153 |         del ix_time_bool, ix_time   
154 |         
155 |         #Getting observations of testing period
156 |         ix_time_bool = np.in1d(data_observations_aux.Time, 
157 |                                [self.attributes.fore_start_time,self.attributes.fore_end_time])
158 |         ix_time = np.where(ix_time_bool)[0]
159 |         data_observations_cf = data_observations_aux.ix[ix_time[0]:ix_time[len(ix_time)-1],
160 |                                                         ix_net_nodes]
161 |         data_observations_cf.Time = pd.to_datetime(data_observations_cf.Time)  
162 |         
163 |         #Define colnames with locations
164 |         new_col_names = [None] * len(data_observations.columns)
165 |         new_col_names[0] = 'Time'
166 |         for icol, col_name in enumerate(data_observations.columns[1:], start=1):
167 |             new_col_names[icol] = 'id_' + col_name
168 |         self.metadata.id_nodes = new_col_names[1:]
169 |         
170 |         data_observations.columns = new_col_names  
171 |         data_observations_cf.columns = new_col_names
172 |         
173 |         data_observations.reset_index(drop=True, inplace=True)
174 |         data_observations_cf.reset_index(drop=True, inplace=True)
175 |         
176 |         del data_observations_aux, filename
177 |         
178 |         self.obs = data_observations
179 |         self.current_fore = expando()
180 |         self.current_fore.obs = data_observations_cf
181 |         
182 |         pass
183 |     
184 |      #Function that defines the time of day horizon of predictions/observations
185 |      #Dataset contains only hourly information but it can be adapted for other
186 |      #markets
187 |      def _tod_observations(self):
188 |         
189 |         #Assumption of an hourly day discretisation, to be adapted better if 
190 |         #intraday market or other kinds are to be considered
191 |         time_of_day = [time(ih,0,0,0) for ih in range(24)]
192 |         tod_name = [None] * len(time_of_day)
193 |         #defining the repartition in day for later climatology application
194 |         for index,itime in enumerate(time_of_day):
195 |             if itime.hour<10: h_name= '0' + str(itime.hour)
196 |             else: h_name = str(itime.hour)
197 |             if itime.minute<10: min_name= '0' + str(itime.minute)
198 |             else: min_name = str(itime.minute)
199 |             tod_name[index] = 'h_'+ h_name + '_' + min_name
200 |             
201 |         self.metadata.tod = time_of_day
202 |         self.metadata.tod_label = tod_name
203 | 
204 |         pass
205 |  
206 |      #Function that loads predictions and stores them in the 'fore' attribute of output
207 |      def _load_forecasts(self):
208 |         
209 |         #Define lead times labels
210 |         forecast_ahead = [None] * self.attributes.nbr_leadT
211 |         for leadT in range(1,self.attributes.nbr_leadT+1):
212 |            if leadT<10: nb_name= '0' + str(leadT)
213 |            else: nb_name = str(leadT)
214 |            forecast_ahead[leadT-1] = 'leadT_' + nb_name
215 |         self.metadata.fore_leadT = forecast_ahead
216 |         
217 |         #loading of forecasts data under data_forecasts
218 |         data_forecasts = expando()
219 |         data_forecasts_cf = expando()
220 |         empty_df = pd.DataFrame(columns=self.obs.columns)
221 |         for leadT_name in  self.metadata.fore_leadT:
222 |             setattr(data_forecasts, leadT_name, empty_df)
223 |             setattr(data_forecasts_cf, leadT_name, empty_df)
224 |              
225 |         for iforecast in os.listdir(self.attributes.folder_loc + '/Nodal_FC/'):
226 |             iforecast_asDate = datetime(int(iforecast[:4]), int(iforecast[4:6]), int(iforecast[6:8]),
227 |                                         int(iforecast[8:]),0,0)
228 |             iforecast_asDate = iforecast_asDate.strftime("%Y-%m-%d %H:%M:%S")
229 |             if iforecast_asDate>=self.attributes.start_time and iforecast_asDate<=self.attributes.end_time:
230 |                 filename = self.attributes.folder_loc + '/Nodal_FC/' + iforecast + \
231 |                 '/' + self.attributes.renew_type + '_forecast.csv'
232 |                 data_forecasts_aux = pd.read_csv(filename, sep=',')
233 |                 for leadT, leadT_name in  enumerate(self.metadata.fore_leadT, start = 1):
234 |                     temp_df = pd.DataFrame(np.nan, index=[0],columns=self.obs.columns)
235 |                     temp_df.loc[0,('Time')] = iforecast_asDate
236 |                     for iloc, location in enumerate(self.metadata.id_nodes):
237 |                         temp_df.loc[0,(location)] = data_forecasts_aux.ix[leadT,self.metadata.ix_nodes[iloc]]
238 |                     setattr(data_forecasts, leadT_name, 
239 |                             getattr(data_forecasts, leadT_name).append(temp_df, ignore_index=True))
240 |                     del temp_df
241 |                 del data_forecasts_aux, filename
242 |                 
243 |             if iforecast_asDate>=self.attributes.fore_start_time and iforecast_asDate<=self.attributes.fore_end_time:
244 |                 filename = self.attributes.folder_loc + '/Nodal_FC/' + iforecast + \
245 |                 '/' + self.attributes.renew_type + '_forecast.csv'
246 |                 data_forecasts_aux = pd.read_csv(filename, sep=',')
247 |                 for leadT, leadT_name in  enumerate(self.metadata.fore_leadT, start = 1):
248 |                     temp_df = pd.DataFrame(np.nan, index=[0],columns=self.obs.columns)
249 |                     temp_df.loc[0,('Time')] = iforecast_asDate
250 |                     for iloc, location in enumerate(self.metadata.id_nodes):
251 |                         temp_df.loc[0,(location)] = data_forecasts_aux.ix[leadT,self.metadata.ix_nodes[iloc]]
252 |                     setattr(data_forecasts_cf, leadT_name, 
253 |                             getattr(data_forecasts_cf, leadT_name).append(temp_df, ignore_index=True))
254 |                     del temp_df
255 |                 del data_forecasts_aux, filename
256 |                 
257 |         gc.collect
258 |         
259 |         for leadT_name in  self.metadata.fore_leadT:
260 |             getattr(data_forecasts, leadT_name).Time = \
261 |             pd.to_datetime(getattr(data_forecasts, leadT_name).Time)  
262 |             getattr(data_forecasts_cf, leadT_name).Time = \
263 |             pd.to_datetime(getattr(data_forecasts_cf, leadT_name).Time)  
264 |             
265 |         self.fore = data_forecasts
266 |         self.current_fore.fore = data_forecasts_cf
267 |         
268 |         pass
269 |     
270 |     
271 |      #Function that calculates and stores distances between nodes 
272 |      def _get_distances(self):
273 |         
274 |         dist_df = pd.DataFrame(index=self.metadata.id_nodes, columns=self.metadata.id_nodes)
275 |         for loc_ref in self.metadata.id_nodes:
276 |             id_ref = int(loc_ref[3:])
277 |             ix_ref = np.where(self.metadata.network_nodes.ID == id_ref)[0]
278 |             for loc_comp in self.metadata.id_nodes:
279 |                 id_comp = int(loc_comp[3:])
280 |                 ix_comp = np.where(self.metadata.network_nodes.ID == id_comp)[0]
281 |                 dist_df.loc[(loc_ref, loc_comp)] = \
282 |                 distance_from_long_lat(self.metadata.network_nodes.latitude.values[ix_ref],
283 |                                        self.metadata.network_nodes.longitude.values[ix_ref],
284 |                                        self.metadata.network_nodes.latitude.values[ix_comp],
285 |                                        self.metadata.network_nodes.longitude.values[ix_comp])
286 |         self.metadata.distances = dist_df
287 |         del dist_df
288 |         
289 |         pass
290 |     
291 |     
292 |     
293 |      
294 |     
295 |     
296 |     
297 | 
298 |     
299 |     
300 |     
301 | 


--------------------------------------------------------------------------------
/Code/main.py:
--------------------------------------------------------------------------------
 1 | # License: BSD_3_clause
 2 | #
 3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin
 4 | #
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are
 7 | # met:
 8 | #
 9 | # Redistributions of source code must retain the above copyright
10 | # notice, this list of conditions and the following disclaimer.
11 | #
12 | # Redistributions in binary form must reproduce the above copyright
13 | # notice, this list of conditions and the following disclaimer in
14 | # the documentation and/or other materials provided with the
15 | # distribution.
16 | #
17 | # Neither the name of the Technical University of Denmark (DTU)
18 | # nor the names of its contributors may be used to endorse or
19 | # promote products derived from this software without specific
20 | # prior written permission.
21 | #
22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 
34 | 
35 | 
36 | ##INPUTS TO BE MODIFIED
37 | #Path where all .py files are stored
38 | folder_code = '.'
39 | #Path to data
40 | folder_data = '../RE-Europe_dataset_package'
41 | #Output path to store scenarios in csv
42 | folder_output = '../Results'
43 | #Renewable type to be studied: 'wind' or 'solar'
44 | renewable_type = 'wind' #
45 | #Data type: 'COSMO' or 'ECMWF' (COSMO recommended)
46 | data_type = 'COSMO'
47 | #Countries to be studied - see in documentation for list of countries keywords
48 | countries = {'FRA'}
49 | #Mawimum number of locations (random selection among all selected countries)
50 | max_number_loc = 10
51 | #Number of lead times to be studied (up to 91)
52 | nbr_leadTimes = 10
53 | #Starting and ending time of training period ('YYYY-MM-DD HH:MM:SS')
54 | start_time = '2012-01-02 00:00:00'
55 | end_time = '2012-12-31 00:00:00' 
56 | #Starting and ending time of testing period - when scenarios will be generated ('YYYY-MM-DD HH:MM:SS')
57 | fore_start_time = '2014-09-01 00:00:00'
58 | fore_end_time = '2014-09-10 00:00:00'
59 | #Use of the improved forecast model (0:no - 1:yes) - only relevant for wind case
60 | improv_forecast = 1
61 | #Number of scenarios to be computed
62 | nb_scenarios = 50 
63 | 
64 | 
65 | ##CODE STRUCTURE - DON'T MODIFY IF ONLY USE
66 | import sys
67 | sys.path.insert(0, folder_code)
68 | from dataReader import dataReader
69 | from modelEstimation import modelEstimation
70 | from scenarioGeneration import scenarioGeneration, save_scenarios
71 | 
72 | data = dataReader(countries,max_number_loc,renewable_type,data_type,start_time,
73 |                   end_time,fore_start_time,fore_end_time,nbr_leadTimes,folder_data)
74 | model = modelEstimation(data)
75 | scenarios = scenarioGeneration(model, data, improv_forecast, nb_scenarios)
76 | save_scenarios(scenarios, folder_output)
77 |     
78 | 


--------------------------------------------------------------------------------
/Code/modelEstimation.py:
--------------------------------------------------------------------------------
  1 | # License: BSD_3_clause
  2 | #
  3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are
  7 | # met:
  8 | #
  9 | # Redistributions of source code must retain the above copyright
 10 | # notice, this list of conditions and the following disclaimer.
 11 | #
 12 | # Redistributions in binary form must reproduce the above copyright
 13 | # notice, this list of conditions and the following disclaimer in
 14 | # the documentation and/or other materials provided with the
 15 | # distribution.
 16 | #
 17 | # Neither the name of the Technical University of Denmark (DTU)
 18 | # nor the names of its contributors may be used to endorse or
 19 | # promote products derived from this software without specific
 20 | # prior written permission.
 21 | #
 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 33 | 
 34 | 
 35 | import numpy as np                 
 36 | import pandas as pd  
 37 | import gc
 38 | import warnings
 39 | 
 40 | from scipy.stats import norm
 41 | from scipy.interpolate import interp1d
 42 | from scipy.optimize import curve_fit        
 43 | from sklearn import linear_model  
 44 | import statsmodels.formula.api as smf
 45 |           
 46 | class expando:
 47 |     pass 
 48 |     
 49 | #Combined exp and Cauchy for correlation matrice fitting
 50 | def mix_func(X, beta, tau, a, b):
 51 |     x,y = X
 52 |     return np.exp(-(tau*x)/(1+a*y**(2*b))**beta)/(1+a*y**(2*b))
 53 | 
 54 | #climatology cdf/inv_cdf function 
 55 | def clim_cdf(input_data_t, loc_NC, max_factor, cdf_keyword):
 56 |     probabilities = np.arange(1,len(input_data_t)+1)/(float(len(input_data_t))+1)     
 57 |     quantiles = np.array(sorted(input_data_t))
 58 |     quantiles[quantiles < 0] = 0.
 59 |     if (quantiles == np.zeros(len(quantiles))).all():
 60 |         #night of solar cases
 61 |         quantiles_extended = np.array([0.0, loc_NC])
 62 |         probabilities_extended = np.zeros(len(quantiles_extended))
 63 |     else:
 64 |         #Extension of quantiles to reach nominal capacity. The climatology 
 65 |         #functions are built with observations. This extension is to prevent the cases
 66 |         #when forecast are higher than observations and therefore out of range.
 67 |         #The value 1.2 is the lowest fit found so far. Could be generalize using directly 
 68 |         #the real nominal capacity
 69 |         quantiles_extended = \
 70 |         np.concatenate([[-1e-5,0], quantiles, [quantiles.max()*max_factor, loc_NC*max_factor]])
 71 |         probabilities_extended = np.concatenate([[-1e-5,0.],probabilities,[1,1+1e-5]])
 72 |     
 73 |     if cdf_keyword == 'cdf':
 74 |         interpolation = interp1d(quantiles_extended, probabilities_extended)
 75 |     elif cdf_keyword == 'inv_cdf':
 76 |         interpolation = interp1d(probabilities_extended, quantiles_extended)
 77 |     
 78 |     return interpolation
 79 | 
 80 | #Function to labelize the hours when climatology will be applied. 
 81 | #Format 'h_HH_MM'. Ex: in intraday 'h_18_45'
 82 | def get_label_time(time):
 83 |     if time.hour<10: name_hour= '0' + str(time.hour)
 84 |     else: name_hour = str(time.hour)
 85 |     if time.minute<10: name_minute= '0' + str(time.minute)
 86 |     else: name_minute = str(time.minute)
 87 |     label_time = 'h_'+name_hour+'_'+name_minute
 88 |     return label_time
 89 | 
 90 | 
 91 | #cdf from conditional quantile regression
 92 | def cqr_cdf(prediction, betas, cdf_keyword): 
 93 |     prob = betas.loc[:,('probabilities')].values
 94 |     quantiles = np.zeros(len(prob))
 95 |     for i in range(len(prob)):
 96 |         quantiles[i] = float(betas.loc[i,('intercept')] + betas.loc[i,('coefficient')]*prediction)
 97 |     quantiles[quantiles < 0] = 0
 98 |     quantiles[quantiles > 1] = 1
 99 |     quantiles_extended = np.concatenate([[0], sorted(quantiles), [1]])
100 |     probabilities_extended = np.concatenate([[0],prob,[1]])
101 |     if cdf_keyword == 'cdf':
102 |         interpolation = interp1d(quantiles_extended, probabilities_extended)
103 |     elif cdf_keyword == 'inv_cdf':
104 |         interpolation = interp1d(probabilities_extended, quantiles_extended)
105 |     return interpolation
106 |  
107 | 
108 | 
109 | class modelEstimation:
110 | 
111 |     def __init__(self, data): 
112 |         print('Climatology transformation')
113 |         self._set_climatology(data)
114 |         self._apply_climatology(data)
115 |         self._get_concurrent_clim_set(data)
116 |         print('Getting the estimators to improve forecast')
117 |         self._improvement_forecast(data)   
118 |         print('Quantiles calculation')
119 |         self._set_quantiles(data)
120 |         self._set_cdf(data)
121 |         print('CDF transformation')
122 |         self._apply_cdf(data)
123 |         print('Generalization of correlation matrice')
124 |         self._get_corr(data)
125 |         self._generalize_corr(data)
126 |         print('Estimation finished!')
127 |         pass
128 |     
129 |     #Function that defines climatology cdf for every corresponding time horizon,
130 |     #using training data.
131 |     def _set_climatology(self, data):       
132 |         #time of day
133 |         tod = data.metadata.tod 
134 |         #Making Climatology time-of-day transformation
135 |         climatology = expando()
136 |         for location in data.metadata.id_nodes:
137 |             setattr(climatology, location, expando())
138 |             for index,itime in enumerate(tod):
139 |                 setattr(getattr(climatology, location), data.metadata.tod_label[index], 
140 |                         getattr(data.obs,location)[data.obs.Time.dt.time == itime])
141 |                 
142 |         #Making the cdf and the inv_cdf for the climatology to use as a transformation
143 |         self.clim = expando()
144 |         self.clim.cdf = expando()
145 |         self.clim.inv_cdf = expando()
146 |         #As observations might not reach nominal capacity of the farms while forecasts 
147 |         #might predict it, necessity to define a factor to multiply to the maximum 
148 |         #of observations in the definition of climatology cdf/inv_cdf
149 |         #This factor will not be needed if NC of farms are included/used
150 |         if data.attributes.renew_type == 'wind':
151 |             max_factor = 1.05
152 |         elif data.attributes.renew_type == 'solar':
153 |             max_factor = 1.2
154 |         #For each location and time of day, creation of climatology cdf/inv_cdf   
155 |         for location in data.metadata.id_nodes:
156 |             setattr(self.clim.cdf, location, expando())
157 |             setattr(self.clim.inv_cdf, location, expando())
158 |             loc_NC = max(getattr(data.obs,location)) #close to nominal capacity of farm
159 |             for itime in data.metadata.tod_label:  
160 |                 clim_loc_t = getattr(getattr(climatology,location),itime)
161 |                 setattr(getattr(self.clim.cdf, location), itime, 
162 |                         clim_cdf(clim_loc_t, loc_NC, max_factor, 'cdf'))
163 |                 setattr(getattr(self.clim.inv_cdf, location), itime, 
164 |                         clim_cdf(clim_loc_t, loc_NC, max_factor, 'inv_cdf'))
165 |         pass    
166 |     
167 |     
168 |     #Function that applies climatology transformations to all observations and predictions.
169 |     def _apply_climatology(self, data):    
170 |         
171 |         #Transforming observations and predictions (by climatology) to get rid of seasonality
172 |         #First transforming the observed power
173 |         self.clim.obs = pd.DataFrame(np.nan, index=range(len(data.obs)),
174 |                                      columns=['t_actual']+data.metadata.id_nodes)                            
175 |         self.clim.obs.loc[:,('t_actual')] = data.obs.Time      
176 |         
177 |         for location in data.metadata.id_nodes:
178 |             for itime, time in data.obs.Time.dt.time.iteritems():
179 |                 label_time = get_label_time(time)
180 |                 clim_cdf_loc_t = getattr(getattr(self.clim.cdf,location), label_time)  
181 |                 self.clim.obs.loc[itime,(location)] = float(clim_cdf_loc_t(data.obs.loc[itime,(location)]))                       
182 |                                               
183 |     
184 |         #Second, the predicted power is transformed
185 |         self.clim.fore = expando()
186 |         for ileadT, leadT in enumerate(data.metadata.fore_leadT, start = 1):
187 |             data_fore_leadT = getattr(data.fore, leadT)
188 |             
189 |             init_df = pd.DataFrame(np.nan, index=range(len(data_fore_leadT.Time)),
190 |                                    columns=['t_issue','t_actual']+data.metadata.id_nodes)
191 |             init_df.loc[:,('t_issue')] = data.fore.leadT_01.Time
192 |             init_df.loc[:,('t_actual')] = init_df.loc[:,('t_issue')] + data.obs.Time.dt.freq * ileadT
193 | 
194 |             for location in data.metadata.id_nodes: 
195 |                 for idate in range(len(data_fore_leadT.Time)):
196 |                     power_to_be_transformed = getattr(data_fore_leadT,location)[idate]
197 |                     label_time = get_label_time(init_df.loc[idate,('t_actual')])
198 |                     clim_cdf_loc_t = getattr(getattr(self.clim.cdf, location),label_time)
199 |                     init_df.loc[idate,(location)] = float(clim_cdf_loc_t(power_to_be_transformed))         
200 |                     
201 |             setattr(self.clim.fore, leadT, init_df)
202 |             del init_df   
203 | 
204 |         pass
205 |     
206 |     #Function that organizes data per lead time in order to have an easy access to 
207 |     #predictions and corresponding observations. Persistence values are also stored
208 |     #for the improvement forecast phase.
209 |     def _get_concurrent_clim_set(self, data):   
210 |         
211 |         self.clim.concurr = expando()
212 |         for location in data.metadata.id_nodes:   
213 |             setattr(self.clim.concurr, location, expando())
214 |             clim_concurr_loc = getattr(self.clim.concurr, location)
215 |             for leadT in data.metadata.fore_leadT:
216 |                 clim_fore_leadT = getattr(self.clim.fore, leadT)
217 |                 
218 |                 #Getting observations corresponding to this leadT predictions
219 |                 ix_match_obs_bool = np.in1d(self.clim.obs.t_actual, clim_fore_leadT.t_actual)   
220 |                 ix_match_obs = np.where(ix_match_obs_bool)[0]
221 |                 observations = self.clim.obs.loc[ix_match_obs,(location)]
222 |                 observations.reset_index(drop=True, inplace=True)
223 |                 
224 |                 t_actual = self.clim.obs.t_actual[ix_match_obs]
225 |                 t_actual.reset_index(drop=True, inplace=True)
226 |                 
227 |                 #Getting prediction of this leadT, in accordance with previous observations
228 |                 ix_match_pred_bool = np.in1d(clim_fore_leadT.t_actual, self.clim.obs.t_actual)   
229 |                 ix_match_pred = np.where(ix_match_pred_bool)[0]
230 |                 predictions = clim_fore_leadT.loc[ix_match_pred, (location)]
231 |                 predictions.reset_index(drop=True, inplace=True)
232 |                 
233 |                 #Getting the observation at time of issue, corresponding to the persistence value
234 |                 ix_match_per_bool = np.in1d(self.clim.obs.t_actual, clim_fore_leadT.loc[ix_match_pred,('t_issue')])   
235 |                 ix_match_per = np.where(ix_match_per_bool)[0]
236 |                 persistences = self.clim.obs.loc[ix_match_per,(location)]
237 |                 persistences.reset_index(drop=True, inplace=True)
238 |                 
239 |                 concurr_temp = pd.DataFrame({'observations':observations, \
240 |                 'predictions':predictions, 'persistences':persistences, 't_actual':t_actual})
241 |                 setattr(clim_concurr_loc, leadT, concurr_temp)                
242 | 
243 |         pass
244 | 
245 |     #Function that fits coefficients for a weighted combination of persistence 
246 |     #and actual forecasts in order to generate better point forecasts. Calculations
247 |     #are made on the training sample
248 |     def _improvement_forecast(self, data):
249 |         self.imp_fore = expando()
250 |         
251 |         for location in data.metadata.id_nodes:  
252 |             betas = pd.DataFrame(columns = data.metadata.fore_leadT, 
253 |                                  index = ['intercept','beta_pers','beta_fore'])
254 |                               
255 |             for leadT in data.metadata.fore_leadT:
256 |                 clim_concurr_loc_leadT = getattr(getattr(self.clim.concurr, location), leadT)  
257 |                 #Fitting observations with persistence and predicted values
258 |                 for_fitting = np.zeros((len(clim_concurr_loc_leadT.observations),2))
259 |                 for_fitting[:,0] = clim_concurr_loc_leadT.persistences
260 |                 for_fitting[:,1] = clim_concurr_loc_leadT.predictions
261 |                 regr_leadT = linear_model.LinearRegression()
262 |                 regr_leadT.fit(for_fitting.reshape((len(for_fitting),2)), 
263 |                                clim_concurr_loc_leadT.observations)
264 |                 betas.loc[('intercept', leadT)] = regr_leadT.intercept_
265 |                 betas.loc[('beta_pers', leadT)] = regr_leadT.coef_[0]
266 |                 betas.loc[('beta_fore', leadT)] = regr_leadT.coef_[1]
267 |                 del for_fitting, regr_leadT
268 |             setattr(self.imp_fore, location, betas)
269 |         pass
270 | 
271 | 
272 |     def _set_quantiles(self, data): 
273 |         
274 |         #Compute quantiles for the transformed power conditional on the transformed power prediction
275 |         #for a specific location and a specific lead time.
276 |         
277 |         #smf.quantreg generates warning - see documentation for more details
278 |         #warning off just for this section
279 |         warnings.filterwarnings("ignore")
280 |         #Performs the actual quantile regression and stores the variables of 
281 |         prob = np.concatenate([[0.001],np.arange(0.05,0.951,0.05),[0.999]])
282 |         self.betas = expando()
283 |         for location in data.metadata.id_nodes: 
284 |             print(location)
285 |             setattr(self.betas, location, expando())
286 |             for ileadT, leadT in enumerate(data.metadata.fore_leadT, start = 1):
287 |                 
288 |                 clim_concurr_loc_leadT = getattr(getattr(self.clim.concurr, location), leadT)                
289 |                 
290 |                 betas_aux = pd.DataFrame(0, columns = ['probabilities','intercept', 'coefficient'], 
291 |                                          index = range(len(prob)))  
292 |                 betas_aux.loc[:,('probabilities')] = prob                            
293 |                 #For solar cases, all quantiles are kepts to zeros
294 |                 if not np.all(clim_concurr_loc_leadT.observations == 0.): 
295 |                     mod = smf.quantreg('observations ~ predictions', clim_concurr_loc_leadT)
296 |                     for iq,q in enumerate(prob):
297 |                         res = mod.fit(q=q)
298 |                         betas_aux.loc[iq,('intercept')] =  res.params['Intercept']
299 |                         betas_aux.loc[iq,('coefficient')] = res.params['predictions']
300 |                         del res
301 |                     del mod
302 | 
303 |                 setattr(getattr(self.betas,location), leadT, betas_aux)
304 |                 del betas_aux
305 |                 gc.collect()
306 |         #warning on
307 |         warnings.filterwarnings("always")
308 |         pass
309 |     
310 |     def _set_cdf(self, data):    
311 |         
312 |         #In order to use the copula approach we need to transform to uniform marginal distributions.
313 |         #This is achieved by using the predictive marginal densities on the transformed domain to 
314 |         #do a second transformation to get uniformly distributed marginals. For a complete and 
315 |         #acessable introduction see the wikipedia page on Copulas.
316 |     
317 |         #First define the marginal cummulative density functions. They are stored as the cummulative 
318 |         #density function (cdf) and for easy use we also define the inverse cumulative density 
319 |         #function inv_cdf. Each is defined for every location and every lead time.
320 | 
321 |         self.cdf = expando()
322 |         self.inv_cdf = expando()
323 |         
324 |         for location in data.metadata.id_nodes:
325 |             setattr(self.cdf, location, expando())
326 |             setattr(self.inv_cdf, location, expando())
327 |     
328 |             for leadT in data.metadata.fore_leadT:
329 |                 betas_loc_leadT = getattr(getattr(self.betas,location),leadT)
330 | 
331 |                 cdf_loc_leadT = \
332 |                 lambda prediction, betas=betas_loc_leadT, cdf_keyword='cdf': \
333 |                 cqr_cdf(prediction, betas, cdf_keyword)
334 |                 setattr(getattr(self.cdf, location), leadT, cdf_loc_leadT)
335 |                    
336 |                 inv_cdf_loc_leadT = \
337 |                 lambda prediction, betas=betas_loc_leadT, cdf_keyword='inv_cdf': \
338 |                 cqr_cdf(prediction, betas, cdf_keyword)
339 |                 setattr(getattr(self.inv_cdf, location), leadT, inv_cdf_loc_leadT)
340 |             
341 |         pass
342 | 
343 | 
344 |     def _apply_cdf(self, data):    
345 |         
346 |         #Using the defined cummulative density function (cdf) we can now convert every observation 
347 |         #into the uniform domain. This is done for every location and every lead time.
348 |         
349 |         self.uniform = expando()
350 |         for location in data.metadata.id_nodes:
351 |             print(location)
352 |             setattr(self.uniform, location, expando())
353 | 
354 |             for ileadT, leadT in enumerate(data.metadata.fore_leadT, start = 1):
355 |                 cdf_loc_leadT = getattr(getattr(self.cdf, location), leadT)
356 |                 
357 |                 observations = getattr(getattr(self.clim.concurr, location), leadT).observations
358 |                 predictions = getattr(getattr(self.clim.concurr, location), leadT).predictions
359 |                 t_actual = getattr(getattr(self.clim.concurr, location), leadT).t_actual
360 |     
361 |                 unif_aux = {}    
362 |                 unif_aux['value'] = {}
363 |                 unif_aux['time'] = {}
364 |                 unif_aux['date'] = {}
365 |                 
366 |                 unif_aux['t'] = t_actual
367 |                 unif_aux['t'].index = range(len(observations))
368 |                 
369 |                 for index in unif_aux['t'].keys():  
370 |                     conditional_cdf_loc_leadT = cdf_loc_leadT(predictions[index])
371 |                     unif_aux['value'][index] = float(conditional_cdf_loc_leadT(observations[index]))
372 |                     unif_aux['time'][index] = unif_aux['t'][index].time()
373 |                     unif_aux['date'][index] = unif_aux['t'][index].date()
374 |                     del conditional_cdf_loc_leadT
375 |                 unif_aux = pd.DataFrame(unif_aux,columns=['t','value','time','date'])
376 |                 
377 |                 setattr(getattr(self.uniform, location), leadT, unif_aux)
378 |                 
379 |             del unif_aux, observations, predictions
380 |             gc.collect()
381 |             
382 |         pass
383 | 
384 | 
385 |     def _get_corr(self, data):    
386 |         
387 |         #Next we estimate the correlation matrix for the uniform variables. To facilitate this the
388 |         #uniform variables are put on an appropriate form for computing a correlation matrix. This
389 |         #is done through using a pivot table      
390 |         uniform_df = pd.DataFrame({'location': [], 't': [], 'value': [], 'ltname': [],\
391 |         'date': [], 'time': []})
392 |         for location in data.metadata.id_nodes:
393 |             for leadT in data.metadata.fore_leadT:
394 |                 uniform_loc_leadT = getattr(getattr(self.uniform, location), leadT)
395 |                 
396 |                 df_loc_leadT_temp = pd.DataFrame({'location': location, 't': uniform_loc_leadT.t, \
397 |                 'value': uniform_loc_leadT.value, 'ltname': leadT, 'date': uniform_loc_leadT.date, \
398 |                 'time': uniform_loc_leadT.time})      
399 |                 
400 |                 uniform_df = pd.concat([uniform_df, df_loc_leadT_temp])
401 |                 del df_loc_leadT_temp
402 |     
403 |     
404 |         uniform_df['value']=uniform_df['value'].astype(float)
405 |         uniform_pivot = uniform_df.pivot_table(index='date',columns=('location','ltname'),values='value')
406 |         
407 |         norm_df =  uniform_df
408 |         norm_df['value'] = norm.ppf(uniform_df['value'])
409 |         norm_pivot = norm_df.pivot_table(index='date',columns=('location','ltname'),values='value')
410 |                
411 |         #From the observations in the uniform domain we can now compute the correlation matrix. 
412 |         #The correlation matrix specifies the Gaussian copula used for combining the different models. 
413 |         #Where the computed correlation is NaN we set it to zero.
414 |         correlation_matrix_na = norm_pivot.corr()
415 |         where_are_NaNs = np.isnan(correlation_matrix_na)
416 |         correlation_matrix = correlation_matrix_na
417 |         correlation_matrix[where_are_NaNs] = 0.
418 |         if not np.all(np.diag(correlation_matrix) == 1.):
419 |             print('All diagonal values of correlation matrix are not 1!')
420 |             np.fill_diagonal(correlation_matrix.values, 1.)
421 |         
422 |         self.corr = expando()
423 |         self.corr.correlation_matrix = correlation_matrix
424 |         self.corr.pivot_columns = uniform_pivot.columns
425 |             
426 |         pass
427 | 
428 | 
429 | 
430 |     def _generalize_corr(self, data):    
431 |         #The purpose of this function is to extrapolate the correlation values to
432 |         #different distances and delta lead times. In order to do so, 
433 |         #an exponential function is used for fitting.
434 |         self.corr.fit = expando()
435 |         self.corr.fit.combined = expando()
436 |         corr_original = []
437 |         loc_to_compare_with = data.metadata.id_nodes[:]
438 |         for id_ref in data.metadata.id_nodes:
439 |             for id_loc in loc_to_compare_with: 
440 |                 dist_locs = data.metadata.distances.loc[(id_ref,id_loc)]
441 |                 leadT_to_compare_with = data.metadata.fore_leadT[:]
442 |                 for leadT_ref in data.metadata.fore_leadT:
443 |                     for leadT in leadT_to_compare_with:
444 |                         dleadT = abs(int(leadT[6:]) - int(leadT_ref[6:]))
445 |                         new_corr = self.corr.correlation_matrix[(id_ref, leadT_ref)][(id_loc, leadT)]
446 |                         new_value = np.matrix([[dist_locs, dleadT, new_corr]])                
447 |                         temp_values = corr_original
448 |                         try:
449 |                             corr_original = np.concatenate((temp_values,new_value))
450 |                         except:
451 |                             corr_original = new_value
452 |                     if id_ref == id_loc:
453 |                         leadT_to_compare_with.remove(leadT_ref) 
454 |             loc_to_compare_with.remove(id_ref)
455 |     
456 |         self.corr.fit.original = pd.DataFrame(corr_original, columns = ['distances','dt','correlation'])
457 |         
458 |         #Fitting part, using curve_fit 
459 |         x = np.squeeze(np.asarray(corr_original[:,0])) 
460 |         y = np.squeeze(np.asarray(corr_original[:,1])) 
461 |         z = np.squeeze(np.asarray(corr_original[:,2]))   
462 |         coeff, pcov = curve_fit(mix_func, (x, y), z)
463 |         self.corr.fit.combined.coeff = coeff
464 |         self.corr.fit.combined.func = lambda X, beta=coeff[0], tau=coeff[1], a=coeff[2], b=coeff[3]: \
465 |         mix_func(X, coeff[0], coeff[1], coeff[2], coeff[3])
466 |           
467 |         pass
468 | 
469 | 
470 |     
471 |     
472 |     
473 |         
474 | 
475 | 


--------------------------------------------------------------------------------
/Code/scenarioGeneration.py:
--------------------------------------------------------------------------------
  1 | # License: BSD_3_clause
  2 | #
  3 | # Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are
  7 | # met:
  8 | #
  9 | # Redistributions of source code must retain the above copyright
 10 | # notice, this list of conditions and the following disclaimer.
 11 | #
 12 | # Redistributions in binary form must reproduce the above copyright
 13 | # notice, this list of conditions and the following disclaimer in
 14 | # the documentation and/or other materials provided with the
 15 | # distribution.
 16 | #
 17 | # Neither the name of the Technical University of Denmark (DTU)
 18 | # nor the names of its contributors may be used to endorse or
 19 | # promote products derived from this software without specific
 20 | # prior written permission.
 21 | #
 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 26 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 33 | 
 34 | import os
 35 | import numpy as np
 36 | import pandas as pd
 37 | from scipy.stats import multivariate_normal, norm
 38 | from modelEstimation import get_label_time
 39 | 
 40 | #Combined exp and Cauchy for correlation matrice fitting
 41 | def mix_func(X, beta, tau, a, b):
 42 |     x,y = X
 43 |     return np.exp(-(tau*x)/(1+a*y**(2*b))**beta)/(1+a*y**(2*b))
 44 | 
 45 | 
 46 | #Function to define the forecast name from time
 47 | def get_time_label(time):
 48 |     if time.month<10: name_month= '0' + str(time.month)
 49 |     else: name_month = str(time.month)
 50 |     if time.day<10: name_day= '0' + str(time.day)
 51 |     else: name_day = str(time.day)
 52 |     if time.hour<10: name_hour= '0' + str(time.hour)
 53 |     else: name_hour = str(time.hour)
 54 |     if time.minute<10: name_minute= '0' + str(time.minute)
 55 |     else: name_minute = str(time.minute)
 56 |     time_name = '_'+str(time.year)+name_month+name_day+name_hour+name_minute
 57 |     return time_name
 58 |     
 59 | 
 60 | #Function to save scenarios in csv files
 61 | def save_scenarios(scenarios,folder_output):
 62 |     for idate in scenarios.simulation.__dict__:
 63 |         if not os.path.isdir(folder_output+'/'+idate):
 64 |             os.makedirs(folder_output+'/'+idate)
 65 |         for iloc in getattr(scenarios.simulation, idate).__dict__:
 66 |             if iloc != 't_actual':
 67 |                 getattr(getattr(scenarios.simulation, idate), iloc).to_csv(folder_output+'/'+idate+'/'+iloc+idate+'.csv')
 68 |         
 69 |     
 70 |     
 71 | 
 72 | class expando:
 73 |     pass
 74 | 
 75 | class scenarioGeneration:
 76 |     
 77 |     def __init__(self, model, data, improv_forecast, nb_scenarios):         
 78 |         self._set_attributes(improv_forecast, nb_scenarios)  
 79 |         self._get_covariance(model, data)
 80 |         self._get_scenarios(model, data)  
 81 |         print('Scenarios computed!')       
 82 |         pass
 83 |     
 84 |     
 85 |     def _set_attributes(self, improv_forecast, nb_scenarios):
 86 |         self.attributes = expando()
 87 |         self.attributes.improv_forecast = improv_forecast
 88 |         self.attributes.nb_scenarios = nb_scenarios
 89 |         pass
 90 | 
 91 | 
 92 |     def _get_covariance(self, model, data):
 93 |         
 94 |         self.correlation_matrix = pd.DataFrame(columns = model.corr.pivot_columns, 
 95 |                                                index = model.corr.pivot_columns)
 96 |         for id_ref in data.metadata.id_nodes:
 97 |             for id_loc in data.metadata.id_nodes: 
 98 |                 for leadT_ref in data.metadata.fore_leadT:
 99 |                     for leadT_loc in data.metadata.fore_leadT:
100 |                         dist_loc = getattr(getattr(data.metadata.distances, id_ref), id_loc)
101 |                         dleadT = abs(int(leadT_loc[6:]) - int(leadT_ref[6:]))
102 |                         self.correlation_matrix.loc[(id_ref,leadT_ref),(id_loc,leadT_loc)] = \
103 |                         model.corr.fit.combined.func((dist_loc, dleadT))
104 | 
105 |         
106 |         self.correlation_matrix = self.correlation_matrix.astype(float)
107 |         pass
108 | 
109 | 
110 |     
111 | 
112 |     def _get_scenarios(self, model, data):     
113 |         
114 |         self.simulation = expando()      
115 |         dates_of_issue = getattr(data.current_fore.fore, data.metadata.fore_leadT[0]).Time
116 | 
117 |         for i_date_issue, date_issue in enumerate(dates_of_issue):
118 |             
119 |             date_issue_name = get_time_label(date_issue)
120 |             print(date_issue_name)
121 |             setattr(self.simulation, date_issue_name, expando())
122 |                                            
123 |             t_actual = expando()    
124 |             for ileadT, leadT in enumerate(data.metadata.fore_leadT, start=1):
125 |                 t_actual_temp = date_issue + data.current_fore.obs.Time.dt.freq * ileadT
126 |                 setattr(t_actual, leadT, t_actual_temp)
127 |             setattr(getattr(self.simulation, date_issue_name), 't_actual', t_actual)  
128 |             mean = np.zeros(model.corr.correlation_matrix.shape[1])     
129 |             #First we simulate uniform variables with the appropriate interdependence structure. 
130 |             #This is easily done by first simulating Gaussian varialbes with the same interdependence
131 |             #sturcture and the transforming them to the uniform domain by their marginals.
132 |             
133 | #            rv_mvnorm = multivariate_normal(mean, self.correlation_matrix)
134 | #            simulation_mvnorm = rv_mvnorm.rvs(self.attributes.nb_scenarios)
135 |             rv_mvnorm = multivariate_normal(mean, model.corr.correlation_matrix)
136 |             simulation_mvnorm = rv_mvnorm.rvs(self.attributes.nb_scenarios)
137 |             simulation_uniform = pd.DataFrame(data = norm.cdf(simulation_mvnorm), 
138 |                                               columns = model.corr.pivot_columns) 
139 | 
140 | 
141 |             #Having obtained the simulated variables in the uniform domain, we need to get them into the transformed 
142 |             #domain. This is done by using the inverse cummulative density function (inv_cdf) for each region and 
143 |             #lead time. As the marginals depend on the predicted values, the predictions are required. 
144 |             #Here the predictions that came with the data are used.
145 |         
146 |             #first we put the transformed predictions on the appropriate form. To do this we need a set of 
147 |             #multi horizon point predictions spanning the locations considered and the prediction horizons.
148 |             #Futher we need a starting time. In this implementation we simply choose a starting time from
149 |             #the forecast data and choose the associated forecasts.
150 |             scen_label = [None] * self.attributes.nb_scenarios
151 |             for iscen in range(1, self.attributes.nb_scenarios+1):
152 |                scen_label[iscen-1] = 'scen_' + str(iscen)
153 |             scen_label.insert(0,'forecasts')
154 |             scen_label.insert(0,'init_forecasts')
155 |             self.attributes.scen_label = scen_label
156 | 
157 |             for id_loc in data.metadata.id_nodes:
158 |                 simulation_loc = pd.DataFrame(0, columns = data.metadata.fore_leadT, 
159 |                                               index = scen_label)
160 |                 
161 |                 for leadT in data.metadata.fore_leadT:
162 |                     
163 |                     predict_simulation = getattr(data.current_fore.fore, leadT).loc[i_date_issue,(id_loc)]
164 |                     simulation_loc.loc[(scen_label[0],leadT)] = predict_simulation
165 |                     
166 |                     label_time = get_label_time(getattr(t_actual, leadT).time())
167 | 
168 |                     #Get the prediction out of seasonality effects
169 |                     clim_cdf = \
170 |                     getattr(getattr(model.clim.cdf, id_loc), label_time)
171 |                     predict_transf_simulation = clim_cdf(predict_simulation)
172 |                     #Improve the forecast from the weighted persistence-climatology model
173 |                     if self.attributes.improv_forecast == 1: 
174 |                         #Makes sense only for wind predictions so far
175 |                         #Get the observation at time of issue out of seasonality effects, 
176 |                         #which will represent persistence value
177 |                         label_time_issue = get_label_time(date_issue.time())
178 |                         clim_cdf = \
179 |                         getattr(getattr(model.clim.cdf, id_loc), label_time_issue)
180 |                         fore_obs_loc = getattr(data.current_fore.obs, id_loc)
181 |                         pers_obs_loc_trans = \
182 |                         clim_cdf(fore_obs_loc[np.where(data.current_fore.obs.Time == date_issue)[0][0]])
183 |                         #
184 |                         coeff_imp = \
185 |                         getattr(getattr(model.imp_fore, id_loc), leadT)
186 |                         
187 |                         predict_transf_simulation = coeff_imp.intercept + \
188 |                         coeff_imp.beta_pers * pers_obs_loc_trans+ \
189 |                         coeff_imp.beta_fore* predict_transf_simulation
190 |                         predict_transf_simulation = predict_transf_simulation
191 |                         #When the transformation is made, the prediction might exceed 1 which is unrealistic
192 |                         if predict_transf_simulation > 1:
193 |                             predict_transf_simulation = 1
194 |                             
195 |                     #If the prediction is 0, we attributes scenarios to be zeros (for solar night)
196 |                     if predict_transf_simulation> 0:
197 |                         #Having obtained the predictions on the transformed domain, we can simulate on the 
198 |                         #transformed domain. This is done by converting the simulated uniforms to the appropriate
199 |                         #transformed values through the inv_cdf transformation.
200 |                         inv_cdf = getattr(getattr(model.inv_cdf, id_loc), leadT)  
201 |                         conditional_inv_cdf = inv_cdf(predict_transf_simulation)
202 |                         clim_inv_cdf = getattr(getattr(model.clim.inv_cdf,id_loc),label_time)
203 |                         for iscen in range(self.attributes.nb_scenarios):
204 |                             simulation_transformed_temp = \
205 |                             float(conditional_inv_cdf(getattr(getattr(simulation_uniform, id_loc), leadT)[iscen]))
206 |                             #Transformes the simulations on the transformed domain back to the original domain. Here we
207 |                             #need to define an initial time to transform the simulation to the original power domain:
208 |                             simulation_loc.loc[(scen_label[iscen+1],leadT)] = clim_inv_cdf(simulation_transformed_temp)  
209 |                         #Save the modified input forecast
210 |                         simulation_loc.loc[(scen_label[1],leadT)] = clim_inv_cdf(predict_transf_simulation)
211 |                         
212 |                 setattr(getattr(self.simulation, date_issue_name), id_loc, simulation_loc)   
213 |                 
214 |         pass
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 


--------------------------------------------------------------------------------
/RE-Europe_dataset_package/Put_data_here.txt:
--------------------------------------------------------------------------------
1 | Expected data structure is
2 | Forecasts in
3 | ./Nodal_FC/YYYYMMDDHHmm/*.csv
4 | Observations in
5 | ./Nodal_TS/*.csv
6 | 
7 | These directories will be automatically generated by extracting the data package here.


--------------------------------------------------------------------------------
/Results/Results_to_be_stored_here.txt:
--------------------------------------------------------------------------------
1 | A folder will be created for each prediction horizon of the testing period under the name _YYYYMMDDHHMM.
2 | In these folders, csv files contain the generated scenarios per location.
3 | Ex: id_114_201406010000.csv


--------------------------------------------------------------------------------
/licence.txt:
--------------------------------------------------------------------------------
 1 | License: BSD_3_clause
 2 | 
 3 | Copyright (c) 2015, Jan Emil Banning Iversen, Pierre Pinson, Igor Arduin
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met:
 8 | 
 9 | Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in
14 | the documentation and/or other materials provided with the
15 | distribution.
16 | 
17 | Neither the name of the Technical University of Denmark (DTU)
18 | nor the names of its contributors may be used to endorse or
19 | promote products derived from this software without specific
20 | prior written permission.
21 | 
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------