├── Machine_Learning_Research.pdf
├── Wind Power Forecast  With Machine Learning Algorithms.pdf
├── README.md
├── feature_build.py
├── parameter_gen.py
├── read.py
├── main.py
├── main3.py
└── main2.py


/Machine_Learning_Research.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daehyunk927/Wind-Power-Generation-with-Machine-Learning/HEAD/Machine_Learning_Research.pdf


--------------------------------------------------------------------------------
/Wind Power Forecast  With Machine Learning Algorithms.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daehyunk927/Wind-Power-Generation-with-Machine-Learning/HEAD/Wind Power Forecast  With Machine Learning Algorithms.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Wind-Power-Generation-with-Machine-Learning
2 | Filters CSV files of wind sites and generates parameters and features used in predicting wind power using NumPy in Python. Evaluates performance of 7 different ML algorithms using Scikit-Learn and creates a visualization of the results using MatplotLib in Python.
3 | 


--------------------------------------------------------------------------------
/feature_build.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def feature_build(power, speed, para):
 4 | #  build the input vector and output from known time series without spatial
 5 | #  ===========================================================  
 6 | #  skip the first part that don't have the data
 7 | #  input features include the most recent power and near future prediction
 8 | #  output target is the the actual power output 
 9 | #  features are in the recent order, the prediction, the more recent, 
10 | #  and nearby sites the index is smaller
11 | #  input: 
12 | #     power: nLoc*1 cell, each with wind generation, also as input feature
13 | #     speed: nLoc*1 cell, each with wind speed, as input feature
14 | #     para:  parameters to decide the whole model information
15 | #  output:
16 | #     feature:  nLoc*1 cell, each contains m_sample*nFeatures
17 | #     target:   nLoc*1 cell, each contains m_sample*2,col1_true,col2_pred 
18 | 
19 |     nFarm = para.nFarm
20 | 
21 |     nDrop = para.drop_length           #  length of dropped data = fea_hist + horizon
22 |     nSample = para.nSample             #  number of whole sample excluding dropped data
23 | 
24 |     nFeaTotal = para.nFeature          #  total feature length = fea_hist+fea_pred if no space
25 | 
26 |     nFeaHist = para.fea_hist*para.resolution
27 |     #  feature length for power series (fea_hist)
28 |                                    
29 |     nFeaSpeed = nFeaHist//2             #  feature length for speed series (fea_pred)
30 | 
31 |     feature = []
32 |     target = []
33 |                             
34 |     # building features   
35 |     for iFarm in range(nFarm):
36 |         fea_temp = np.empty((nSample, nFeaTotal))
37 |         # set up input feature
38 |         for iFea1 in range(nFeaHist):
39 |             # add history as input feature
40 |             fea_temp[:,iFea1] = power[iFarm][nDrop-para.horizon-iFea1 : para.nSeries-para.horizon-iFea1]
41 |         
42 |         for iFea2 in range(nFeaSpeed):
43 |             fea_temp[:,nFeaHist+iFea2] = speed[iFarm][nDrop-iFea2 : para.nSeries-iFea2] 
44 |         
45 |         # set up target output, throw away the drop_length data
46 |         temp = [power[iFarm][nDrop:para.nSeries]]
47 |         target.append(np.transpose(temp))
48 |         feature.append(fea_temp)
49 |     
50 |     feature = np.array(feature)
51 |     target= np.array(target)
52 |         
53 |     return feature, target
54 | 


--------------------------------------------------------------------------------
/parameter_gen.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | import numpy as np
 3 | 
 4 | def parameter_gen(x, t, t_scale, t_lead, space_bool):
 5 | 
 6 | #  obtain program parameters based on given input
 7 | #  ==============================================
 8 | #  input:
 9 | #     x: a cell array, each cell is a data series
10 | #     t: number of days considering for the feature
11 | #     t_scale: number of points in one hour
12 | #     t_lead:  leading time for prediction = t_horizon
13 | #     space_bool: 1 if space considered, 0 otherwise
14 | #  output:
15 | #     para: a structure indicating many parameters
16 | 
17 | # ========= the system parameters ==================
18 |     # initialize a structure of parameters called para
19 |     para = namedtuple("para", "nFarm nSeries horizon resolution fea_hist fea_pred fea_type spa_hist spa_pred spa_nloc drop_length nSample nFeature evaluation")
20 |     [nFarm, nSeries] = np.shape(x) # number of wind farms and overall datapoints
21 |     horizon = t_lead               # forecast horizon, lead time
22 |     resolution = t_scale           # hourly data
23 |     
24 | # ========== feature building ====================
25 |     fea_hist = 24*t                # input feature length for history hours before prediction
26 |     fea_pred = 24*t//2     # input feature length for day-ahead predictions
27 |     fea_type = 1                   # number of features type include power and speed
28 |     
29 |     if (space_bool == 0):
30 |         spa_hist = 0                   # input feature length for nearby farm history days
31 |         spa_pred = 0                   # input feature length for nearby farm day-ahead predictions
32 |         spa_nloc = 0                   # number of extra locations builds 
33 |     elif (space_bool == 1):
34 |         spa_hist = 24*t
35 |         spa_pred = 24*t//2
36 |         spa_nloc = 3
37 |         
38 |     drop_length = resolution*fea_hist + horizon 
39 |     # dropped data length
40 |         
41 |     nSample = nSeries-drop_length  # total sample size
42 |     nFeature = ((fea_hist+fea_pred)*fea_type+(spa_hist+spa_pred)*spa_nloc)*resolution
43 |     # total length for each input vector
44 |                                    
45 | # =========== evaluation criteria ==================
46 |     evaluation = 'RMSE'            # evaluation criteria: MAE or RMSE
47 | 
48 |     p = para(nFarm, nSeries, horizon, resolution, fea_hist, fea_pred, fea_type, spa_hist, spa_pred, spa_nloc, drop_length, nSample, nFeature, evaluation)
49 |     return p
50 |     


--------------------------------------------------------------------------------
/read.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import csv
 3 | 
 4 | def readData(wf_name, wf_idx, resolution, year):
 5 | #     get the wind turbine parameters from NREL west wind dataset
 6 | #     each csv file contains a 10-min wind output in a year with 30MW
 7 | #     input:
 8 | #         wf_name: the cell matrix for wind farm info, nLocation*1
 9 | #                  matrix, each cell contains the wf name list in csv
10 | #         wf_idx: the matrix store the numbered folder contans csv
11 | #         year: if year is 2006 or 2005, have 52560 measurements
12 | #         resolution: the resolution within one hour
13 | #     output:
14 | #         speed: averaged wind speed among local area, nLocation*1 cell in
15 | #                desired resolution
16 | #         gen: total wind power generation among local area, nLocation*1 cell 
17 | #              in desired resolution
18 | #         wind_param: the output wind csv data file in the cell format, each
19 | #                     cell is one Location, contains n turbine
20 | #         capacity: the capacity of each wind farm, nLocation*1 matrix
21 | #  data:
22 | #       load data under the local folder '2006/'
23 | #  ========================================================================
24 | 
25 |     nLocation = len(wf_name) # number of sites
26 | 
27 |     if year % 4 == 0:        # number of measurements
28 |         nRow = 6*24*366      # lunar year
29 |     else:
30 |         nRow = 6*24*365
31 |     
32 |     # initialization
33 |     wind_param = []
34 |     speed_temp = []
35 |     speed = []
36 |     gen = []
37 |     gen_temp = []
38 |     capacity = []
39 |     
40 |     for iLocation in range(nLocation):
41 |         wf_id = wf_idx[iLocation] # the name (number) of iLocation in RTS
42 |         farm_idx = wf_name[iLocation] # pick wind sites in ith Location
43 |         nSite = len(farm_idx) # number of sites in iLocation
44 |         turbine_param = np.zeros((nSite, nRow, 4)) # parameters in each farm
45 |         
46 |         # copy from csv files into the matrices
47 |         for iSite in range(nSite):
48 |             with open('./2006/2006/' + str(wf_id) + '/' + str(farm_idx[iSite]) + '.csv') as f:
49 |                 reader = csv.reader(f)
50 |                 next(reader)
51 |                 count = 0
52 |                 for row in reader:
53 |                     turbine_param[iSite, count, :] = row[1:]
54 |                     count += 1  
55 |         # capacity
56 |         loc_capacity = 30*nSite
57 |         capacity.append(loc_capacity)            
58 |         wind_param.append(turbine_param)
59 |         speed_temp.append(np.mean(turbine_param[:,:,0],axis=0))
60 |         gen_temp.append(np.sum(turbine_param[:,:,3], axis=0)/(loc_capacity))
61 |         
62 |         # 1-hr resolution
63 |         if resolution == 1:
64 |             speed_per_hour = np.reshape(speed_temp[iLocation], (nRow//6, 6))
65 |             gen_per_hour = np.reshape(gen_temp[iLocation], (nRow//6, 6))
66 |             speed.append(np.mean(speed_per_hour, axis=1)/30)
67 |             gen.append(np.mean(gen_per_hour, axis=1)) 
68 |         # 10-min resolution
69 |         elif resolution == 6:
70 |             speed.append(speed_temp[iLocation]/30)
71 |             gen.append(gen_temp[iLocation])
72 |         else:
73 |             print ("desired resolution is not valid.")
74 |         
75 |     speed = np.array(speed)
76 |     gen = np.array(gen)
77 |     wind_param = np.array(wind_param)
78 |     capacity = np.array(capacity)
79 |     
80 |     return speed, gen, wind_param, capacity


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import math
  4 | import time
  5 | import matplotlib.pyplot as plt
  6 | from Input.read import readData
  7 | from Input.parameter_gen import parameter_gen
  8 | from Input.feature_build import feature_build
  9 | 
 10 | from sklearn import linear_model
 11 | from sklearn import svm
 12 | from sklearn.ensemble import RandomForestRegressor
 13 | from sklearn.ensemble import GradientBoostingRegressor
 14 | from sklearn import neural_network
 15 | from sklearn import kernel_ridge
 16 | from sklearn import neighbors
 17 | 
 18 | # WIND FORECAST CORRECTIVE SCENARIOS for 19 wind farms
 19 | # This script ends with graphical prediction results for 
 20 | # three testing days and three testing farms.
 21 | #   =================================================================
 22 | #   generate wind scenarios based on historic data
 23 | #   considering spatial and temporal correlation
 24 | #   provide scenarios with better forecast
 25 | #   provide scenarios with uncertainty quantification
 26 | #   provide reasonable boundary with scenarios
 27 | #   combining multiple data mining techniques
 28 | #   including Random Forest, SVM, Linear Regression, KNN, NN
 29 | #   the data is based on the NREL Western Wind Dataset
 30 | 
 31 | # Load Data
 32 | start_time = time.time()
 33 | year = 2006
 34 | resolution = 1  # 1 hr resolution
 35 | speed = []
 36 | gen = []
 37 | # directory where the data is stored
 38 | dataDir = os.listdir('./' + str(year) + '/' + str(year))
 39 | # number of wind farms in the directory
 40 | nLocation = len(dataDir)
 41 | 
 42 | # list storing names of wind farms  
 43 | wf_idx = []
 44 | for dirname in dataDir:
 45 |     wf_idx.append(dirname)
 46 | 
 47 | nSites = 0 
 48 | # list storing names of wind sites in each wind farm 
 49 | wf_name = []
 50 | for dirname in dataDir:
 51 |     temp = [] 
 52 |     for filename in os.listdir('./' + str(year) + '/' + str(year) + '/' + dirname):
 53 |         temp.append(os.path.splitext(filename)[0])
 54 |         nSites = nSites+1
 55 |     wf_name.append(temp)
 56 |      
 57 | wf_idx = np.array(wf_idx)
 58 | wf_name = np.array(wf_name)
 59 | print(nSites) # 230 in total
 60 | 
 61 | # Output cleaned wind speed and power based on the given data
 62 | speed, gen, wind_param, capacity = readData(wf_name, wf_idx, resolution, year)
 63 | # Load Parameters
 64 | para = parameter_gen(gen, 5, resolution, 1, 0)
 65 |    
 66 | # Build Feature and Target
 67 | feature, target = feature_build(gen, speed, para)
 68 | print(np.shape(feature[0]))
 69 | print(np.shape(target[0]))
 70 |    
 71 | # Build Training and Test sets
 72 | days = [124, 221, 306] # testing days: can be manipulated
 73 | farms = [0, 3, 6] # testing farms: can be manipulated
 74 | farm_axis = np.arange(nLocation)
 75 |   
 76 | for f in range(len(farms)):
 77 |     fig = plt.figure()
 78 |     for i in range(len(days)):
 79 |         # prediction hours: 7 days
 80 |         test_hour = np.arange((days[i]-1) * 24, (days[i]+6) * 24) - para.drop_length
 81 |         test_time = np.transpose(test_hour)
 82 |         train_length = 2160 # length of training sets
 83 |        
 84 |         nFarm = nLocation
 85 |         xTr = []
 86 |         yTr = []
 87 |         xTe = []
 88 |         yTe = []
 89 |         
 90 |         # build training and testing sets here
 91 |         for iFarm in range(nFarm):
 92 |             xTr1 = feature[iFarm][test_time[0]-train_length : test_time[0]]
 93 |             yTr1 = target[iFarm][test_time[0]-train_length : test_time[0]]
 94 |             xTe1 = feature[iFarm][test_time[0]:test_time[len(test_time)-1]+1]
 95 |             yTe1 = target[iFarm][test_time[0]:test_time[len(test_time)-1]+1]
 96 |           
 97 |             xTr.append(xTr1)
 98 |             yTr.append(yTr1)
 99 |             xTe.append(xTe1)
100 |             yTe.append(yTe1)
101 |            
102 |         xTr = np.array(xTr)
103 |         yTr = np.array(yTr)
104 |         xTe = np.array(xTe)
105 |         yTe = np.array(yTe)
106 |           
107 |         print(np.shape(xTr[0]))
108 |         print(np.shape(yTr[0]))
109 |         print(np.shape(xTe[0]))
110 |         print(np.shape(yTe[0]))
111 |         
112 |         # Scikit-Learn commands for multiple algorithms
113 |         Estimators = {
114 |                     "Linear Regression": linear_model.LinearRegression(),
115 |                     "Support Vector Machine": svm.LinearSVR(),
116 |                     "Kernel Ridge": kernel_ridge.KernelRidge(),
117 |                     "Random Forest": RandomForestRegressor(),
118 |                     "Gradient Boosting": GradientBoostingRegressor(),
119 |                     "Neural Network": neural_network.MLPRegressor(),
120 |                     "Nearest Neighbor": neighbors.KNeighborsRegressor() 
121 |         }
122 |         
123 |         # dictionary form to store prediction results
124 |         y_test_predict = dict()
125 |           
126 |         for name, estimator in Estimators.items():
127 |             t1 = time.time() # for computing time
128 |             print (name, "------")
129 |             # fit the training sets
130 |             estimator.fit(xTr[farms[f]], yTr[farms[f]].reshape(len(yTr[farms[f]]),))
131 |             # predict using each algorithm
132 |             y_test_predict[name] = estimator.predict(xTe[farms[f]])
133 |             
134 |             # the wind power should be in the range of 0 to 1, so outliers should be taken care of here.   
135 |             for h in range(len(y_test_predict[name])):
136 |                 if (y_test_predict[name][h] < 0):
137 |                     y_test_predict[name][h] = 0
138 |                 elif (y_test_predict[name][h] > 1):
139 |                     y_test_predict[name][h] = 1  
140 |             
141 |             # root mean squared error  
142 |             rmse = math.sqrt(np.mean((y_test_predict[name] - yTe[farms[f]].reshape(len(yTe[farms[f]]),))**2))
143 |             # mean absolute error
144 |             mae = np.mean(abs(y_test_predict[name] - yTe[farms[f]].reshape(len(yTe[farms[f]]),)))
145 |             t2 = time.time()
146 |             # Print the results of the performance of each algorithm
147 |             print ("Coefficient of Determination:", estimator.score(xTe[farms[f]], yTe[farms[f]].reshape(len(yTe[farms[f]]),)))
148 |             print ("Root-Mean-Squared Error:", rmse)
149 |             print ("Mean Absolute Error:", mae)
150 |             print ("Time for each algorithm:", t2-t1)
151 |             print()
152 |         
153 |         # Visualize the prediction results using MatplotLib      
154 |         ax = plt.subplot('%d%d%d' %(len(days),1,i+1))
155 |         for name, estimator in Estimators.items():
156 |             ax.plot(y_test_predict[name], label=name)
157 |   
158 |         ax.plot(yTe[farms[f]], label="Real Data", linestyle='--')
159 |         ax.set_title('Day %d' % days[i], fontsize=15)
160 |         ax.set_xlim(0,167)
161 |         ax.set_ylim(0,1)
162 |     fig.suptitle('Prediction Result for Farm %s' % wf_idx[farms[f]], fontsize=30)
163 |     plt.xlabel('7 days since the requested day (hrs)', fontsize=20)
164 |     plt.ylabel('Power Generated', fontsize=20)
165 |     plt.legend(loc='center left', bbox_to_anchor=(0.9, 1), 
166 |                  fancybox=True, shadow=True)  
167 | plt.show()
168 | end_time = time.time()
169 | print("Entire Program time: ", end_time - start_time)


--------------------------------------------------------------------------------
/main3.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import math
  4 | import time
  5 | import matplotlib.pyplot as plt
  6 | from Input.read import readData
  7 | from Input.parameter_gen import parameter_gen
  8 | from Input.feature_build import feature_build
  9 | 
 10 | from sklearn import linear_model
 11 | from sklearn import svm
 12 | from sklearn.ensemble import RandomForestRegressor
 13 | from sklearn.ensemble import GradientBoostingRegressor
 14 | from sklearn import neural_network
 15 | from sklearn import kernel_ridge
 16 | from sklearn import neighbors
 17 | 
 18 | 
 19 | # WIND FORECAST CORRECTIVE SCENARIOS for 19 wind farms with VARIATION in time_lead
 20 | # This script ends with graphical prediction results for three testing days and
 21 | # three testing farms but with different time_leads. It also provides
 22 | # errors and coefficients of determination for different time_leads. 
 23 | #   =================================================================
 24 | #   generate wind scenarios based on historic data
 25 | #   considering spatial and temporal correlation
 26 | #   provide scenarios with better forecast
 27 | #   provide scenarios with uncertainty quantification
 28 | #   provide reasonable boundary with scenarios
 29 | #   combining multiple data mining techniques
 30 | #   including Random Forest, SVM, Linear Regression, KNN, NN
 31 | #   the data is based on the NREL Western Wind Dataset
 32 | 
 33 | # Load Data
 34 | start_time = time.time()
 35 | year = 2006
 36 | resolution = 1  # 1 hr resolution
 37 | speed = []
 38 | gen = []
 39 | # directory where the data is stored
 40 | dataDir = os.listdir('./' + str(year) + '/' + str(year))
 41 | # number of wind farms in the directory
 42 | nLocation = len(dataDir)
 43 | 
 44 | # list storing names of wind farms    
 45 | wf_idx = []
 46 | for dirname in dataDir:
 47 |     wf_idx.append(dirname)
 48 | 
 49 | # list storing names of wind sites in each wind farm     
 50 | wf_name = []
 51 | for dirname in dataDir:
 52 |     temp = [] 
 53 |     for filename in os.listdir('./' + str(year) + '/' + str(year) + '/' + dirname):
 54 |         temp.append(os.path.splitext(filename)[0])
 55 |     wf_name.append(temp)
 56 |      
 57 | wf_idx = np.array(wf_idx)
 58 | wf_name = np.array(wf_name)
 59 | 
 60 | # Output cleaned wind speed and power based on the given data
 61 | speed, gen, wind_param, capacity = readData(wf_name, wf_idx, resolution, year)
 62 | 
 63 | # varied Time Leads
 64 | t_lead = [1, 2, 4, 6, 12, 24]
 65 | rmse_avg = dict()
 66 | mae_avg = dict()
 67 | CoDet_avg = dict()
 68 | days = [124, 221, 306] # testing days: can be manipulated
 69 | farms = [0, 3, 6] # testing farms: can be manipulated
 70 | # x-axis with 10 farms
 71 | farm_axis = np.arange(10)
 72 | 
 73 | # Scikit-Learn commands for multiple algorithms
 74 | Estimators = {
 75 |                           "Linear Regression": linear_model.LinearRegression(),
 76 |                           "Support Vector Machine": svm.LinearSVR(),
 77 |                           "Kernel Ridge": kernel_ridge.KernelRidge(),
 78 |                           "Random Forest": RandomForestRegressor(),
 79 |                           "Gradient Boosting": GradientBoostingRegressor(),
 80 |                           "Neural Network": neural_network.MLPRegressor(),
 81 |                           "Nearest Neighbor": neighbors.KNeighborsRegressor(), 
 82 | }
 83 | for name, estimator in Estimators.items():
 84 |     rmse_avg[name] = np.empty((len(t_lead), len(days)))
 85 |     mae_avg[name] = np.empty((len(t_lead), len(days)))
 86 |     CoDet_avg[name] = np.empty((len(t_lead), len(days)))
 87 | for t in range(len(t_lead)):
 88 | # Load Parameters
 89 |     para = parameter_gen(gen, 5, resolution, t_lead[t], 0)
 90 |      
 91 |     # Build Feature and Target
 92 |     feature, target = feature_build(gen, speed, para)
 93 | 
 94 |     # Build Training, Validation, and Test sets
 95 |     for i in range(len(days)):
 96 |         # prediction hours: 7 days
 97 |         test_hour = np.arange((days[i]-1) * 24, (days[i]+6) * 24) - para.drop_length
 98 |         test_time = np.transpose(test_hour)
 99 |         train_length = 2160 # length of training sets
100 |          
101 |         nFarm = nLocation
102 |         xTr = []
103 |         yTr = []
104 |         xTe = []
105 |         yTe = []
106 |         
107 |         # build training and testing sets here
108 |         for iFarm in range(nFarm):
109 |             xTr1 = feature[iFarm][test_time[0]-train_length : test_time[0]]
110 |             yTr1 = target[iFarm][test_time[0]-train_length : test_time[0]]
111 |             xTe1 = feature[iFarm][test_time[0]:test_time[len(test_time)-1]+1]
112 |             yTe1 = target[iFarm][test_time[0]:test_time[len(test_time)-1]+1]
113 |             
114 |             xTr.append(xTr1)
115 |             yTr.append(yTr1)
116 |             xTe.append(xTe1)
117 |             yTe.append(yTe1)
118 |              
119 |         xTr = np.array(xTr)
120 |         yTr = np.array(yTr)
121 |         xTe = np.array(xTe)
122 |         yTe = np.array(yTe)
123 |          
124 |         # dictionary form to store prediction results
125 |         y_test_predict = dict()
126 |         for name, estimator in Estimators.items():
127 |             temprmse = []
128 |             tempmae = []
129 |             tempcoef = []
130 |             temptime = []
131 |             for f in range(len(farm_axis)):
132 |                 # fit the training sets
133 |                 estimator.fit(xTr[f], yTr[f].reshape(len(yTr[f]),))
134 |                 # predict using each algorithm
135 |                 y_test_predict[name] = estimator.predict(xTe[f])
136 |                 
137 |                 # the wind power should be in the range of 0 to 1, so outliers should be taken care of here.
138 |                 for h in range(len(y_test_predict[name])):
139 |                     if (y_test_predict[name][h] < 0):
140 |                         y_test_predict[name][h] = 0
141 |                     elif (y_test_predict[name][h] > 1):
142 |                         y_test_predict[name][h] = 1  
143 |                 
144 |                 # root mean squared error         
145 |                 rmse = math.sqrt(np.mean((y_test_predict[name] - yTe[f].reshape(len(yTe[f]),))**2))
146 |                 # mean absolute error
147 |                 mae = np.mean(abs(y_test_predict[name] - yTe[f].reshape(len(yTe[f]),)))
148 |                 temprmse.append(rmse)
149 |                 tempmae.append(mae)
150 |                 tempcoef.append(estimator.score(xTe[f], yTe[f].reshape(len(yTe[f]),)))
151 |             rmse_avg[name][t][i] = np.mean(temprmse)
152 |             mae_avg[name][t][i] = np.mean(tempmae)
153 |             CoDet_avg[name][t][i] = np.mean(tempcoef)
154 |     
155 | # Visualize the error results for different time leads using MatplotLib                
156 | fig1 = plt.figure()
157 | plt.title("RMSE", fontsize=30)
158 | plt.xlabel("T_Leads", fontsize=20)
159 | plt.ylabel("Values", fontsize=20)
160 | for name, estimator in Estimators.items():
161 |     plt.plot(t_lead, np.mean(rmse_avg[name], axis=1), label=name)
162 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 
163 |                  fancybox=True, shadow=True) 
164 | plt.xlim(1,24)
165 | fig2 = plt.figure()
166 | plt.title("MAE", fontsize=30)
167 | plt.xlabel("T_Leads", fontsize=20)
168 | plt.ylabel("Values", fontsize=20)
169 | for name, estimator in Estimators.items():
170 |     plt.plot(t_lead, np.mean(mae_avg[name], axis=1), label=name)
171 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 
172 |                  fancybox=True, shadow=True)
173 | plt.xlim(1,24) 
174 | fig3 = plt.figure()
175 | plt.title("Coefficient of Determination", fontsize=30)
176 | plt.xlabel("T_Leads", fontsize=20)
177 | plt.ylabel("Values", fontsize=20)
178 | for name, estimator in Estimators.items():
179 |     plt.plot(t_lead, np.mean(CoDet_avg[name], axis=1), label=name)
180 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 
181 |                  fancybox=True, shadow=True)
182 | plt.xlim(1,24)
183 |  
184 | plt.show()
185 | end_time = time.time()
186 | print("Entire Program time: ", end_time - start_time)


--------------------------------------------------------------------------------
/main2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import math
  4 | import time
  5 | import matplotlib.pyplot as plt
  6 | from Input.read import readData
  7 | from Input.parameter_gen import parameter_gen
  8 | from Input.feature_build import feature_build
  9 | 
 10 | from sklearn import linear_model
 11 | from sklearn import svm
 12 | from sklearn.ensemble import RandomForestRegressor
 13 | from sklearn.ensemble import GradientBoostingRegressor
 14 | from sklearn import neural_network
 15 | from sklearn import kernel_ridge
 16 | from sklearn import neighbors
 17 | 
 18 | # WIND FORECAST CORRECTIVE SCENARIOS for 19 wind farms and ERRORS
 19 | # This script ends with graphical results for errors,
 20 | # coefficients of determination, and computing time for all 19 farms
 21 | #   =================================================================
 22 | #   generate wind scenarios based on historic data
 23 | #   considering spatial and temporal correlation
 24 | #   provide scenarios with better forecast
 25 | #   provide scenarios with uncertainty quantification
 26 | #   provide reasonable boundary with scenarios
 27 | #   combining multiple data mining techniques
 28 | #   including Random Forest, SVM, Linear Regression, KNN, NN
 29 | #   the data is based on the NREL Western Wind Dataset
 30 | 
 31 | # Load Data
 32 | start_time = time.time()
 33 | year = 2006
 34 | resolution = 1  # 1 hr resolution
 35 | speed = []
 36 | gen = []
 37 | # directory where the data is stored
 38 | dataDir = os.listdir('./' + str(year) + '/' + str(year))
 39 | # number of wind farms in the directory
 40 | nLocation = len(dataDir)
 41 | 
 42 | # list storing names of wind farms  
 43 | wf_idx = []
 44 | for dirname in dataDir:
 45 |     wf_idx.append(dirname)
 46 | 
 47 | # list storing names of wind sites in each wind farm   
 48 | wf_name = []
 49 | for dirname in dataDir:
 50 |     temp = [] 
 51 |     for filename in os.listdir('./' + str(year) + '/' + str(year) + '/' + dirname):
 52 |         temp.append(os.path.splitext(filename)[0])
 53 |     wf_name.append(temp)
 54 |      
 55 | wf_idx = np.array(wf_idx)
 56 | wf_name = np.array(wf_name)
 57 | 
 58 | # Output cleaned wind speed and power based on the given data
 59 | speed, gen, wind_param, capacity = readData(wf_name, wf_idx, resolution, year)
 60 | # Load Parameters
 61 | para = parameter_gen(gen, 5, resolution, 1, 0)
 62 |  
 63 | # Build Feature and Target
 64 | feature, target = feature_build(gen, speed, para)
 65 | print(np.shape(feature[0]))
 66 | print(np.shape(target[0]))
 67 |  
 68 | # Build Training and Test sets
 69 | days = [124, 221, 306] # testing days: can be manipulated
 70 | farms = [0, 3, 6] # testing farms: can be manipulated
 71 | # x-axis with all 19 farms
 72 | farm_axis = np.arange(nLocation)
 73 | 
 74 | # Scikit-Learn commands for multiple algorithms
 75 | Estimators = {
 76 |                       "Linear Regression": linear_model.LinearRegression(),
 77 |                       "Support Vector Machine": svm.LinearSVR(),
 78 |                       "Kernel Ridge": kernel_ridge.KernelRidge(),
 79 |                       "Random Forest": RandomForestRegressor(),
 80 |                       "Gradient Boosting": GradientBoostingRegressor(),
 81 |                       "Neural Network": neural_network.MLPRegressor(),
 82 |                       "Nearest Neighbor": neighbors.KNeighborsRegressor(), 
 83 | }
 84 | rmse_avg = dict()
 85 | mae_avg = dict()
 86 | CoDet_avg = dict()
 87 | time_avg = dict()
 88 | for name, estimator in Estimators.items():
 89 |     rmse_avg[name] = np.empty((len(days),len(farm_axis)))
 90 |     mae_avg[name] = np.empty((len(days),len(farm_axis)))
 91 |     CoDet_avg[name] = np.empty((len(days),len(farm_axis)))
 92 |     time_avg[name] = np.empty((len(days),len(farm_axis)))
 93 | 
 94 | 
 95 | for i in range(len(days)):
 96 |     # prediction hours: 7 days
 97 |     test_hour = np.arange((days[i]-1) * 24, (days[i]+6) * 24) - para.drop_length
 98 |     test_time = np.transpose(test_hour)
 99 |     train_length = 2160  # length of training sets
100 |      
101 |     nFarm = nLocation
102 |     xTr = []
103 |     yTr = []
104 |     xTe = []
105 |     yTe = []
106 | 
107 |     # build training and testing sets here
108 |     for iFarm in range(nFarm):
109 |         xTr1 = feature[iFarm][test_time[0]-train_length : test_time[0]]
110 |         yTr1 = target[iFarm][test_time[0]-train_length : test_time[0]]
111 |         xTe1 = feature[iFarm][test_time[0]:test_time[len(test_time)-1]+1]
112 |         yTe1 = target[iFarm][test_time[0]:test_time[len(test_time)-1]+1]
113 |         
114 |         xTr.append(xTr1)
115 |         yTr.append(yTr1)
116 |         xTe.append(xTe1)
117 |         yTe.append(yTe1)
118 |          
119 |     xTr = np.array(xTr)
120 |     yTr = np.array(yTr)
121 |     xTe = np.array(xTe)
122 |     yTe = np.array(yTe)
123 |      
124 |     # dictionary form to store prediction results
125 |     y_test_predict = dict()
126 |     for name, estimator in Estimators.items():
127 |         temprmse = []
128 |         tempmae = []
129 |         tempcoef = []
130 |         temptime = []
131 |         for f in range(len(farm_axis)):
132 |             t1 = time.time()  # for computing time
133 |             # fit the training sets
134 |             estimator.fit(xTr[f], yTr[f].reshape(len(yTr[f]),))
135 |             # predict using each algorithm
136 |             y_test_predict[name] = estimator.predict(xTe[f])
137 |             
138 |             # the wind power should be in the range of 0 to 1, so outliers should be taken care of here.
139 |             for h in range(len(y_test_predict[name])):
140 |                 if (y_test_predict[name][h] < 0):
141 |                     y_test_predict[name][h] = 0
142 |                 elif (y_test_predict[name][h] > 1):
143 |                     y_test_predict[name][h] = 1  
144 |                     
145 |             # root mean squared error 
146 |             rmse = math.sqrt(np.mean((y_test_predict[name] - yTe[f].reshape(len(yTe[f]),))**2))
147 |             # mean absolute error
148 |             mae = np.mean(abs(y_test_predict[name] - yTe[f].reshape(len(yTe[f]),)))
149 |             t2 = time.time()
150 |             temprmse.append(rmse)
151 |             tempmae.append(mae)
152 |             tempcoef.append(estimator.score(xTe[f], yTe[f].reshape(len(yTe[f]),)))
153 |             temptime.append(t2-t1)
154 |         rmse_avg[name][i] = np.array(temprmse)
155 |         mae_avg[name][i] = np.array(tempmae)
156 |         CoDet_avg[name][i] = np.array(tempcoef)
157 |         time_avg[name][i] = np.array(temptime)
158 | 
159 | # Visualize the error results for 19 farms using MatplotLib                 
160 | # Root Mean Squared Error
161 | fig1 = plt.figure()
162 | plt.title("RMSE", fontsize=30)
163 | plt.xlabel("Farms", fontsize=20)
164 | plt.ylabel("Values", fontsize=20)
165 | for name, estimator in Estimators.items():
166 |     plt.plot(farm_axis, np.mean(rmse_avg[name], axis=0), label=name)
167 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 
168 |            fancybox=True, shadow=True) 
169 | 
170 | # Mean Absolute Error
171 | fig2 = plt.figure()
172 | plt.title("MAE", fontsize=30)
173 | plt.xlabel("Farms", fontsize=20)
174 | plt.ylabel("Values", fontsize=20)
175 | for name, estimator in Estimators.items():
176 |     plt.plot(farm_axis, np.mean(mae_avg[name], axis=0), label=name)
177 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 
178 |            fancybox=True, shadow=True)
179 | 
180 | # Coefficient of Determination 
181 | fig3 = plt.figure()
182 | plt.title("Coefficient of Determination", fontsize=30)
183 | plt.xlabel("Farms", fontsize=20)
184 | plt.ylabel("Values", fontsize=20)
185 | for name, estimator in Estimators.items():
186 |     plt.plot(farm_axis, np.mean(CoDet_avg[name], axis=0), label=name)
187 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 
188 |            fancybox=True, shadow=True)
189 | 
190 | # Computing Time 
191 | fig4 = plt.figure()
192 | plt.title("Computing Time", fontsize=30)
193 | plt.xlabel("Farms", fontsize=20)
194 | plt.ylabel("Values", fontsize=20)
195 | for name, estimator in Estimators.items():
196 |     plt.plot(farm_axis, np.mean(time_avg[name], axis=0), label=name)
197 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 
198 |            fancybox=True, shadow=True) 
199 | plt.show()
200 | end_time = time.time()
201 | print("Entire Program time: ", end_time - start_time)


--------------------------------------------------------------------------------