├── Machine_Learning_Research.pdf ├── Wind Power Forecast With Machine Learning Algorithms.pdf ├── README.md ├── feature_build.py ├── parameter_gen.py ├── read.py ├── main.py ├── main3.py └── main2.py /Machine_Learning_Research.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daehyunk927/Wind-Power-Generation-with-Machine-Learning/HEAD/Machine_Learning_Research.pdf -------------------------------------------------------------------------------- /Wind Power Forecast With Machine Learning Algorithms.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daehyunk927/Wind-Power-Generation-with-Machine-Learning/HEAD/Wind Power Forecast With Machine Learning Algorithms.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wind-Power-Generation-with-Machine-Learning 2 | Filters CSV files of wind sites and generates parameters and features used in predicting wind power using NumPy in Python. Evaluates performance of 7 different ML algorithms using Scikit-Learn and creates a visualization of the results using MatplotLib in Python. 3 | -------------------------------------------------------------------------------- /feature_build.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def feature_build(power, speed, para): 4 | # build the input vector and output from known time series without spatial 5 | # =========================================================== 6 | # skip the first part that don't have the data 7 | # input features include the most recent power and near future prediction 8 | # output target is the the actual power output 9 | # features are in the recent order, the prediction, the more recent, 10 | # and nearby sites the index is smaller 11 | # input: 12 | # power: nLoc*1 cell, each with wind generation, also as input feature 13 | # speed: nLoc*1 cell, each with wind speed, as input feature 14 | # para: parameters to decide the whole model information 15 | # output: 16 | # feature: nLoc*1 cell, each contains m_sample*nFeatures 17 | # target: nLoc*1 cell, each contains m_sample*2,col1_true,col2_pred 18 | 19 | nFarm = para.nFarm 20 | 21 | nDrop = para.drop_length # length of dropped data = fea_hist + horizon 22 | nSample = para.nSample # number of whole sample excluding dropped data 23 | 24 | nFeaTotal = para.nFeature # total feature length = fea_hist+fea_pred if no space 25 | 26 | nFeaHist = para.fea_hist*para.resolution 27 | # feature length for power series (fea_hist) 28 | 29 | nFeaSpeed = nFeaHist//2 # feature length for speed series (fea_pred) 30 | 31 | feature = [] 32 | target = [] 33 | 34 | # building features 35 | for iFarm in range(nFarm): 36 | fea_temp = np.empty((nSample, nFeaTotal)) 37 | # set up input feature 38 | for iFea1 in range(nFeaHist): 39 | # add history as input feature 40 | fea_temp[:,iFea1] = power[iFarm][nDrop-para.horizon-iFea1 : para.nSeries-para.horizon-iFea1] 41 | 42 | for iFea2 in range(nFeaSpeed): 43 | fea_temp[:,nFeaHist+iFea2] = speed[iFarm][nDrop-iFea2 : para.nSeries-iFea2] 44 | 45 | # set up target output, throw away the drop_length data 46 | temp = [power[iFarm][nDrop:para.nSeries]] 47 | target.append(np.transpose(temp)) 48 | feature.append(fea_temp) 49 | 50 | feature = np.array(feature) 51 | target= np.array(target) 52 | 53 | return feature, target 54 | -------------------------------------------------------------------------------- /parameter_gen.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import numpy as np 3 | 4 | def parameter_gen(x, t, t_scale, t_lead, space_bool): 5 | 6 | # obtain program parameters based on given input 7 | # ============================================== 8 | # input: 9 | # x: a cell array, each cell is a data series 10 | # t: number of days considering for the feature 11 | # t_scale: number of points in one hour 12 | # t_lead: leading time for prediction = t_horizon 13 | # space_bool: 1 if space considered, 0 otherwise 14 | # output: 15 | # para: a structure indicating many parameters 16 | 17 | # ========= the system parameters ================== 18 | # initialize a structure of parameters called para 19 | para = namedtuple("para", "nFarm nSeries horizon resolution fea_hist fea_pred fea_type spa_hist spa_pred spa_nloc drop_length nSample nFeature evaluation") 20 | [nFarm, nSeries] = np.shape(x) # number of wind farms and overall datapoints 21 | horizon = t_lead # forecast horizon, lead time 22 | resolution = t_scale # hourly data 23 | 24 | # ========== feature building ==================== 25 | fea_hist = 24*t # input feature length for history hours before prediction 26 | fea_pred = 24*t//2 # input feature length for day-ahead predictions 27 | fea_type = 1 # number of features type include power and speed 28 | 29 | if (space_bool == 0): 30 | spa_hist = 0 # input feature length for nearby farm history days 31 | spa_pred = 0 # input feature length for nearby farm day-ahead predictions 32 | spa_nloc = 0 # number of extra locations builds 33 | elif (space_bool == 1): 34 | spa_hist = 24*t 35 | spa_pred = 24*t//2 36 | spa_nloc = 3 37 | 38 | drop_length = resolution*fea_hist + horizon 39 | # dropped data length 40 | 41 | nSample = nSeries-drop_length # total sample size 42 | nFeature = ((fea_hist+fea_pred)*fea_type+(spa_hist+spa_pred)*spa_nloc)*resolution 43 | # total length for each input vector 44 | 45 | # =========== evaluation criteria ================== 46 | evaluation = 'RMSE' # evaluation criteria: MAE or RMSE 47 | 48 | p = para(nFarm, nSeries, horizon, resolution, fea_hist, fea_pred, fea_type, spa_hist, spa_pred, spa_nloc, drop_length, nSample, nFeature, evaluation) 49 | return p 50 | -------------------------------------------------------------------------------- /read.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | 4 | def readData(wf_name, wf_idx, resolution, year): 5 | # get the wind turbine parameters from NREL west wind dataset 6 | # each csv file contains a 10-min wind output in a year with 30MW 7 | # input: 8 | # wf_name: the cell matrix for wind farm info, nLocation*1 9 | # matrix, each cell contains the wf name list in csv 10 | # wf_idx: the matrix store the numbered folder contans csv 11 | # year: if year is 2006 or 2005, have 52560 measurements 12 | # resolution: the resolution within one hour 13 | # output: 14 | # speed: averaged wind speed among local area, nLocation*1 cell in 15 | # desired resolution 16 | # gen: total wind power generation among local area, nLocation*1 cell 17 | # in desired resolution 18 | # wind_param: the output wind csv data file in the cell format, each 19 | # cell is one Location, contains n turbine 20 | # capacity: the capacity of each wind farm, nLocation*1 matrix 21 | # data: 22 | # load data under the local folder '2006/' 23 | # ======================================================================== 24 | 25 | nLocation = len(wf_name) # number of sites 26 | 27 | if year % 4 == 0: # number of measurements 28 | nRow = 6*24*366 # lunar year 29 | else: 30 | nRow = 6*24*365 31 | 32 | # initialization 33 | wind_param = [] 34 | speed_temp = [] 35 | speed = [] 36 | gen = [] 37 | gen_temp = [] 38 | capacity = [] 39 | 40 | for iLocation in range(nLocation): 41 | wf_id = wf_idx[iLocation] # the name (number) of iLocation in RTS 42 | farm_idx = wf_name[iLocation] # pick wind sites in ith Location 43 | nSite = len(farm_idx) # number of sites in iLocation 44 | turbine_param = np.zeros((nSite, nRow, 4)) # parameters in each farm 45 | 46 | # copy from csv files into the matrices 47 | for iSite in range(nSite): 48 | with open('./2006/2006/' + str(wf_id) + '/' + str(farm_idx[iSite]) + '.csv') as f: 49 | reader = csv.reader(f) 50 | next(reader) 51 | count = 0 52 | for row in reader: 53 | turbine_param[iSite, count, :] = row[1:] 54 | count += 1 55 | # capacity 56 | loc_capacity = 30*nSite 57 | capacity.append(loc_capacity) 58 | wind_param.append(turbine_param) 59 | speed_temp.append(np.mean(turbine_param[:,:,0],axis=0)) 60 | gen_temp.append(np.sum(turbine_param[:,:,3], axis=0)/(loc_capacity)) 61 | 62 | # 1-hr resolution 63 | if resolution == 1: 64 | speed_per_hour = np.reshape(speed_temp[iLocation], (nRow//6, 6)) 65 | gen_per_hour = np.reshape(gen_temp[iLocation], (nRow//6, 6)) 66 | speed.append(np.mean(speed_per_hour, axis=1)/30) 67 | gen.append(np.mean(gen_per_hour, axis=1)) 68 | # 10-min resolution 69 | elif resolution == 6: 70 | speed.append(speed_temp[iLocation]/30) 71 | gen.append(gen_temp[iLocation]) 72 | else: 73 | print ("desired resolution is not valid.") 74 | 75 | speed = np.array(speed) 76 | gen = np.array(gen) 77 | wind_param = np.array(wind_param) 78 | capacity = np.array(capacity) 79 | 80 | return speed, gen, wind_param, capacity -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import math 4 | import time 5 | import matplotlib.pyplot as plt 6 | from Input.read import readData 7 | from Input.parameter_gen import parameter_gen 8 | from Input.feature_build import feature_build 9 | 10 | from sklearn import linear_model 11 | from sklearn import svm 12 | from sklearn.ensemble import RandomForestRegressor 13 | from sklearn.ensemble import GradientBoostingRegressor 14 | from sklearn import neural_network 15 | from sklearn import kernel_ridge 16 | from sklearn import neighbors 17 | 18 | # WIND FORECAST CORRECTIVE SCENARIOS for 19 wind farms 19 | # This script ends with graphical prediction results for 20 | # three testing days and three testing farms. 21 | # ================================================================= 22 | # generate wind scenarios based on historic data 23 | # considering spatial and temporal correlation 24 | # provide scenarios with better forecast 25 | # provide scenarios with uncertainty quantification 26 | # provide reasonable boundary with scenarios 27 | # combining multiple data mining techniques 28 | # including Random Forest, SVM, Linear Regression, KNN, NN 29 | # the data is based on the NREL Western Wind Dataset 30 | 31 | # Load Data 32 | start_time = time.time() 33 | year = 2006 34 | resolution = 1 # 1 hr resolution 35 | speed = [] 36 | gen = [] 37 | # directory where the data is stored 38 | dataDir = os.listdir('./' + str(year) + '/' + str(year)) 39 | # number of wind farms in the directory 40 | nLocation = len(dataDir) 41 | 42 | # list storing names of wind farms 43 | wf_idx = [] 44 | for dirname in dataDir: 45 | wf_idx.append(dirname) 46 | 47 | nSites = 0 48 | # list storing names of wind sites in each wind farm 49 | wf_name = [] 50 | for dirname in dataDir: 51 | temp = [] 52 | for filename in os.listdir('./' + str(year) + '/' + str(year) + '/' + dirname): 53 | temp.append(os.path.splitext(filename)[0]) 54 | nSites = nSites+1 55 | wf_name.append(temp) 56 | 57 | wf_idx = np.array(wf_idx) 58 | wf_name = np.array(wf_name) 59 | print(nSites) # 230 in total 60 | 61 | # Output cleaned wind speed and power based on the given data 62 | speed, gen, wind_param, capacity = readData(wf_name, wf_idx, resolution, year) 63 | # Load Parameters 64 | para = parameter_gen(gen, 5, resolution, 1, 0) 65 | 66 | # Build Feature and Target 67 | feature, target = feature_build(gen, speed, para) 68 | print(np.shape(feature[0])) 69 | print(np.shape(target[0])) 70 | 71 | # Build Training and Test sets 72 | days = [124, 221, 306] # testing days: can be manipulated 73 | farms = [0, 3, 6] # testing farms: can be manipulated 74 | farm_axis = np.arange(nLocation) 75 | 76 | for f in range(len(farms)): 77 | fig = plt.figure() 78 | for i in range(len(days)): 79 | # prediction hours: 7 days 80 | test_hour = np.arange((days[i]-1) * 24, (days[i]+6) * 24) - para.drop_length 81 | test_time = np.transpose(test_hour) 82 | train_length = 2160 # length of training sets 83 | 84 | nFarm = nLocation 85 | xTr = [] 86 | yTr = [] 87 | xTe = [] 88 | yTe = [] 89 | 90 | # build training and testing sets here 91 | for iFarm in range(nFarm): 92 | xTr1 = feature[iFarm][test_time[0]-train_length : test_time[0]] 93 | yTr1 = target[iFarm][test_time[0]-train_length : test_time[0]] 94 | xTe1 = feature[iFarm][test_time[0]:test_time[len(test_time)-1]+1] 95 | yTe1 = target[iFarm][test_time[0]:test_time[len(test_time)-1]+1] 96 | 97 | xTr.append(xTr1) 98 | yTr.append(yTr1) 99 | xTe.append(xTe1) 100 | yTe.append(yTe1) 101 | 102 | xTr = np.array(xTr) 103 | yTr = np.array(yTr) 104 | xTe = np.array(xTe) 105 | yTe = np.array(yTe) 106 | 107 | print(np.shape(xTr[0])) 108 | print(np.shape(yTr[0])) 109 | print(np.shape(xTe[0])) 110 | print(np.shape(yTe[0])) 111 | 112 | # Scikit-Learn commands for multiple algorithms 113 | Estimators = { 114 | "Linear Regression": linear_model.LinearRegression(), 115 | "Support Vector Machine": svm.LinearSVR(), 116 | "Kernel Ridge": kernel_ridge.KernelRidge(), 117 | "Random Forest": RandomForestRegressor(), 118 | "Gradient Boosting": GradientBoostingRegressor(), 119 | "Neural Network": neural_network.MLPRegressor(), 120 | "Nearest Neighbor": neighbors.KNeighborsRegressor() 121 | } 122 | 123 | # dictionary form to store prediction results 124 | y_test_predict = dict() 125 | 126 | for name, estimator in Estimators.items(): 127 | t1 = time.time() # for computing time 128 | print (name, "------") 129 | # fit the training sets 130 | estimator.fit(xTr[farms[f]], yTr[farms[f]].reshape(len(yTr[farms[f]]),)) 131 | # predict using each algorithm 132 | y_test_predict[name] = estimator.predict(xTe[farms[f]]) 133 | 134 | # the wind power should be in the range of 0 to 1, so outliers should be taken care of here. 135 | for h in range(len(y_test_predict[name])): 136 | if (y_test_predict[name][h] < 0): 137 | y_test_predict[name][h] = 0 138 | elif (y_test_predict[name][h] > 1): 139 | y_test_predict[name][h] = 1 140 | 141 | # root mean squared error 142 | rmse = math.sqrt(np.mean((y_test_predict[name] - yTe[farms[f]].reshape(len(yTe[farms[f]]),))**2)) 143 | # mean absolute error 144 | mae = np.mean(abs(y_test_predict[name] - yTe[farms[f]].reshape(len(yTe[farms[f]]),))) 145 | t2 = time.time() 146 | # Print the results of the performance of each algorithm 147 | print ("Coefficient of Determination:", estimator.score(xTe[farms[f]], yTe[farms[f]].reshape(len(yTe[farms[f]]),))) 148 | print ("Root-Mean-Squared Error:", rmse) 149 | print ("Mean Absolute Error:", mae) 150 | print ("Time for each algorithm:", t2-t1) 151 | print() 152 | 153 | # Visualize the prediction results using MatplotLib 154 | ax = plt.subplot('%d%d%d' %(len(days),1,i+1)) 155 | for name, estimator in Estimators.items(): 156 | ax.plot(y_test_predict[name], label=name) 157 | 158 | ax.plot(yTe[farms[f]], label="Real Data", linestyle='--') 159 | ax.set_title('Day %d' % days[i], fontsize=15) 160 | ax.set_xlim(0,167) 161 | ax.set_ylim(0,1) 162 | fig.suptitle('Prediction Result for Farm %s' % wf_idx[farms[f]], fontsize=30) 163 | plt.xlabel('7 days since the requested day (hrs)', fontsize=20) 164 | plt.ylabel('Power Generated', fontsize=20) 165 | plt.legend(loc='center left', bbox_to_anchor=(0.9, 1), 166 | fancybox=True, shadow=True) 167 | plt.show() 168 | end_time = time.time() 169 | print("Entire Program time: ", end_time - start_time) -------------------------------------------------------------------------------- /main3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import math 4 | import time 5 | import matplotlib.pyplot as plt 6 | from Input.read import readData 7 | from Input.parameter_gen import parameter_gen 8 | from Input.feature_build import feature_build 9 | 10 | from sklearn import linear_model 11 | from sklearn import svm 12 | from sklearn.ensemble import RandomForestRegressor 13 | from sklearn.ensemble import GradientBoostingRegressor 14 | from sklearn import neural_network 15 | from sklearn import kernel_ridge 16 | from sklearn import neighbors 17 | 18 | 19 | # WIND FORECAST CORRECTIVE SCENARIOS for 19 wind farms with VARIATION in time_lead 20 | # This script ends with graphical prediction results for three testing days and 21 | # three testing farms but with different time_leads. It also provides 22 | # errors and coefficients of determination for different time_leads. 23 | # ================================================================= 24 | # generate wind scenarios based on historic data 25 | # considering spatial and temporal correlation 26 | # provide scenarios with better forecast 27 | # provide scenarios with uncertainty quantification 28 | # provide reasonable boundary with scenarios 29 | # combining multiple data mining techniques 30 | # including Random Forest, SVM, Linear Regression, KNN, NN 31 | # the data is based on the NREL Western Wind Dataset 32 | 33 | # Load Data 34 | start_time = time.time() 35 | year = 2006 36 | resolution = 1 # 1 hr resolution 37 | speed = [] 38 | gen = [] 39 | # directory where the data is stored 40 | dataDir = os.listdir('./' + str(year) + '/' + str(year)) 41 | # number of wind farms in the directory 42 | nLocation = len(dataDir) 43 | 44 | # list storing names of wind farms 45 | wf_idx = [] 46 | for dirname in dataDir: 47 | wf_idx.append(dirname) 48 | 49 | # list storing names of wind sites in each wind farm 50 | wf_name = [] 51 | for dirname in dataDir: 52 | temp = [] 53 | for filename in os.listdir('./' + str(year) + '/' + str(year) + '/' + dirname): 54 | temp.append(os.path.splitext(filename)[0]) 55 | wf_name.append(temp) 56 | 57 | wf_idx = np.array(wf_idx) 58 | wf_name = np.array(wf_name) 59 | 60 | # Output cleaned wind speed and power based on the given data 61 | speed, gen, wind_param, capacity = readData(wf_name, wf_idx, resolution, year) 62 | 63 | # varied Time Leads 64 | t_lead = [1, 2, 4, 6, 12, 24] 65 | rmse_avg = dict() 66 | mae_avg = dict() 67 | CoDet_avg = dict() 68 | days = [124, 221, 306] # testing days: can be manipulated 69 | farms = [0, 3, 6] # testing farms: can be manipulated 70 | # x-axis with 10 farms 71 | farm_axis = np.arange(10) 72 | 73 | # Scikit-Learn commands for multiple algorithms 74 | Estimators = { 75 | "Linear Regression": linear_model.LinearRegression(), 76 | "Support Vector Machine": svm.LinearSVR(), 77 | "Kernel Ridge": kernel_ridge.KernelRidge(), 78 | "Random Forest": RandomForestRegressor(), 79 | "Gradient Boosting": GradientBoostingRegressor(), 80 | "Neural Network": neural_network.MLPRegressor(), 81 | "Nearest Neighbor": neighbors.KNeighborsRegressor(), 82 | } 83 | for name, estimator in Estimators.items(): 84 | rmse_avg[name] = np.empty((len(t_lead), len(days))) 85 | mae_avg[name] = np.empty((len(t_lead), len(days))) 86 | CoDet_avg[name] = np.empty((len(t_lead), len(days))) 87 | for t in range(len(t_lead)): 88 | # Load Parameters 89 | para = parameter_gen(gen, 5, resolution, t_lead[t], 0) 90 | 91 | # Build Feature and Target 92 | feature, target = feature_build(gen, speed, para) 93 | 94 | # Build Training, Validation, and Test sets 95 | for i in range(len(days)): 96 | # prediction hours: 7 days 97 | test_hour = np.arange((days[i]-1) * 24, (days[i]+6) * 24) - para.drop_length 98 | test_time = np.transpose(test_hour) 99 | train_length = 2160 # length of training sets 100 | 101 | nFarm = nLocation 102 | xTr = [] 103 | yTr = [] 104 | xTe = [] 105 | yTe = [] 106 | 107 | # build training and testing sets here 108 | for iFarm in range(nFarm): 109 | xTr1 = feature[iFarm][test_time[0]-train_length : test_time[0]] 110 | yTr1 = target[iFarm][test_time[0]-train_length : test_time[0]] 111 | xTe1 = feature[iFarm][test_time[0]:test_time[len(test_time)-1]+1] 112 | yTe1 = target[iFarm][test_time[0]:test_time[len(test_time)-1]+1] 113 | 114 | xTr.append(xTr1) 115 | yTr.append(yTr1) 116 | xTe.append(xTe1) 117 | yTe.append(yTe1) 118 | 119 | xTr = np.array(xTr) 120 | yTr = np.array(yTr) 121 | xTe = np.array(xTe) 122 | yTe = np.array(yTe) 123 | 124 | # dictionary form to store prediction results 125 | y_test_predict = dict() 126 | for name, estimator in Estimators.items(): 127 | temprmse = [] 128 | tempmae = [] 129 | tempcoef = [] 130 | temptime = [] 131 | for f in range(len(farm_axis)): 132 | # fit the training sets 133 | estimator.fit(xTr[f], yTr[f].reshape(len(yTr[f]),)) 134 | # predict using each algorithm 135 | y_test_predict[name] = estimator.predict(xTe[f]) 136 | 137 | # the wind power should be in the range of 0 to 1, so outliers should be taken care of here. 138 | for h in range(len(y_test_predict[name])): 139 | if (y_test_predict[name][h] < 0): 140 | y_test_predict[name][h] = 0 141 | elif (y_test_predict[name][h] > 1): 142 | y_test_predict[name][h] = 1 143 | 144 | # root mean squared error 145 | rmse = math.sqrt(np.mean((y_test_predict[name] - yTe[f].reshape(len(yTe[f]),))**2)) 146 | # mean absolute error 147 | mae = np.mean(abs(y_test_predict[name] - yTe[f].reshape(len(yTe[f]),))) 148 | temprmse.append(rmse) 149 | tempmae.append(mae) 150 | tempcoef.append(estimator.score(xTe[f], yTe[f].reshape(len(yTe[f]),))) 151 | rmse_avg[name][t][i] = np.mean(temprmse) 152 | mae_avg[name][t][i] = np.mean(tempmae) 153 | CoDet_avg[name][t][i] = np.mean(tempcoef) 154 | 155 | # Visualize the error results for different time leads using MatplotLib 156 | fig1 = plt.figure() 157 | plt.title("RMSE", fontsize=30) 158 | plt.xlabel("T_Leads", fontsize=20) 159 | plt.ylabel("Values", fontsize=20) 160 | for name, estimator in Estimators.items(): 161 | plt.plot(t_lead, np.mean(rmse_avg[name], axis=1), label=name) 162 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 163 | fancybox=True, shadow=True) 164 | plt.xlim(1,24) 165 | fig2 = plt.figure() 166 | plt.title("MAE", fontsize=30) 167 | plt.xlabel("T_Leads", fontsize=20) 168 | plt.ylabel("Values", fontsize=20) 169 | for name, estimator in Estimators.items(): 170 | plt.plot(t_lead, np.mean(mae_avg[name], axis=1), label=name) 171 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 172 | fancybox=True, shadow=True) 173 | plt.xlim(1,24) 174 | fig3 = plt.figure() 175 | plt.title("Coefficient of Determination", fontsize=30) 176 | plt.xlabel("T_Leads", fontsize=20) 177 | plt.ylabel("Values", fontsize=20) 178 | for name, estimator in Estimators.items(): 179 | plt.plot(t_lead, np.mean(CoDet_avg[name], axis=1), label=name) 180 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 181 | fancybox=True, shadow=True) 182 | plt.xlim(1,24) 183 | 184 | plt.show() 185 | end_time = time.time() 186 | print("Entire Program time: ", end_time - start_time) -------------------------------------------------------------------------------- /main2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import math 4 | import time 5 | import matplotlib.pyplot as plt 6 | from Input.read import readData 7 | from Input.parameter_gen import parameter_gen 8 | from Input.feature_build import feature_build 9 | 10 | from sklearn import linear_model 11 | from sklearn import svm 12 | from sklearn.ensemble import RandomForestRegressor 13 | from sklearn.ensemble import GradientBoostingRegressor 14 | from sklearn import neural_network 15 | from sklearn import kernel_ridge 16 | from sklearn import neighbors 17 | 18 | # WIND FORECAST CORRECTIVE SCENARIOS for 19 wind farms and ERRORS 19 | # This script ends with graphical results for errors, 20 | # coefficients of determination, and computing time for all 19 farms 21 | # ================================================================= 22 | # generate wind scenarios based on historic data 23 | # considering spatial and temporal correlation 24 | # provide scenarios with better forecast 25 | # provide scenarios with uncertainty quantification 26 | # provide reasonable boundary with scenarios 27 | # combining multiple data mining techniques 28 | # including Random Forest, SVM, Linear Regression, KNN, NN 29 | # the data is based on the NREL Western Wind Dataset 30 | 31 | # Load Data 32 | start_time = time.time() 33 | year = 2006 34 | resolution = 1 # 1 hr resolution 35 | speed = [] 36 | gen = [] 37 | # directory where the data is stored 38 | dataDir = os.listdir('./' + str(year) + '/' + str(year)) 39 | # number of wind farms in the directory 40 | nLocation = len(dataDir) 41 | 42 | # list storing names of wind farms 43 | wf_idx = [] 44 | for dirname in dataDir: 45 | wf_idx.append(dirname) 46 | 47 | # list storing names of wind sites in each wind farm 48 | wf_name = [] 49 | for dirname in dataDir: 50 | temp = [] 51 | for filename in os.listdir('./' + str(year) + '/' + str(year) + '/' + dirname): 52 | temp.append(os.path.splitext(filename)[0]) 53 | wf_name.append(temp) 54 | 55 | wf_idx = np.array(wf_idx) 56 | wf_name = np.array(wf_name) 57 | 58 | # Output cleaned wind speed and power based on the given data 59 | speed, gen, wind_param, capacity = readData(wf_name, wf_idx, resolution, year) 60 | # Load Parameters 61 | para = parameter_gen(gen, 5, resolution, 1, 0) 62 | 63 | # Build Feature and Target 64 | feature, target = feature_build(gen, speed, para) 65 | print(np.shape(feature[0])) 66 | print(np.shape(target[0])) 67 | 68 | # Build Training and Test sets 69 | days = [124, 221, 306] # testing days: can be manipulated 70 | farms = [0, 3, 6] # testing farms: can be manipulated 71 | # x-axis with all 19 farms 72 | farm_axis = np.arange(nLocation) 73 | 74 | # Scikit-Learn commands for multiple algorithms 75 | Estimators = { 76 | "Linear Regression": linear_model.LinearRegression(), 77 | "Support Vector Machine": svm.LinearSVR(), 78 | "Kernel Ridge": kernel_ridge.KernelRidge(), 79 | "Random Forest": RandomForestRegressor(), 80 | "Gradient Boosting": GradientBoostingRegressor(), 81 | "Neural Network": neural_network.MLPRegressor(), 82 | "Nearest Neighbor": neighbors.KNeighborsRegressor(), 83 | } 84 | rmse_avg = dict() 85 | mae_avg = dict() 86 | CoDet_avg = dict() 87 | time_avg = dict() 88 | for name, estimator in Estimators.items(): 89 | rmse_avg[name] = np.empty((len(days),len(farm_axis))) 90 | mae_avg[name] = np.empty((len(days),len(farm_axis))) 91 | CoDet_avg[name] = np.empty((len(days),len(farm_axis))) 92 | time_avg[name] = np.empty((len(days),len(farm_axis))) 93 | 94 | 95 | for i in range(len(days)): 96 | # prediction hours: 7 days 97 | test_hour = np.arange((days[i]-1) * 24, (days[i]+6) * 24) - para.drop_length 98 | test_time = np.transpose(test_hour) 99 | train_length = 2160 # length of training sets 100 | 101 | nFarm = nLocation 102 | xTr = [] 103 | yTr = [] 104 | xTe = [] 105 | yTe = [] 106 | 107 | # build training and testing sets here 108 | for iFarm in range(nFarm): 109 | xTr1 = feature[iFarm][test_time[0]-train_length : test_time[0]] 110 | yTr1 = target[iFarm][test_time[0]-train_length : test_time[0]] 111 | xTe1 = feature[iFarm][test_time[0]:test_time[len(test_time)-1]+1] 112 | yTe1 = target[iFarm][test_time[0]:test_time[len(test_time)-1]+1] 113 | 114 | xTr.append(xTr1) 115 | yTr.append(yTr1) 116 | xTe.append(xTe1) 117 | yTe.append(yTe1) 118 | 119 | xTr = np.array(xTr) 120 | yTr = np.array(yTr) 121 | xTe = np.array(xTe) 122 | yTe = np.array(yTe) 123 | 124 | # dictionary form to store prediction results 125 | y_test_predict = dict() 126 | for name, estimator in Estimators.items(): 127 | temprmse = [] 128 | tempmae = [] 129 | tempcoef = [] 130 | temptime = [] 131 | for f in range(len(farm_axis)): 132 | t1 = time.time() # for computing time 133 | # fit the training sets 134 | estimator.fit(xTr[f], yTr[f].reshape(len(yTr[f]),)) 135 | # predict using each algorithm 136 | y_test_predict[name] = estimator.predict(xTe[f]) 137 | 138 | # the wind power should be in the range of 0 to 1, so outliers should be taken care of here. 139 | for h in range(len(y_test_predict[name])): 140 | if (y_test_predict[name][h] < 0): 141 | y_test_predict[name][h] = 0 142 | elif (y_test_predict[name][h] > 1): 143 | y_test_predict[name][h] = 1 144 | 145 | # root mean squared error 146 | rmse = math.sqrt(np.mean((y_test_predict[name] - yTe[f].reshape(len(yTe[f]),))**2)) 147 | # mean absolute error 148 | mae = np.mean(abs(y_test_predict[name] - yTe[f].reshape(len(yTe[f]),))) 149 | t2 = time.time() 150 | temprmse.append(rmse) 151 | tempmae.append(mae) 152 | tempcoef.append(estimator.score(xTe[f], yTe[f].reshape(len(yTe[f]),))) 153 | temptime.append(t2-t1) 154 | rmse_avg[name][i] = np.array(temprmse) 155 | mae_avg[name][i] = np.array(tempmae) 156 | CoDet_avg[name][i] = np.array(tempcoef) 157 | time_avg[name][i] = np.array(temptime) 158 | 159 | # Visualize the error results for 19 farms using MatplotLib 160 | # Root Mean Squared Error 161 | fig1 = plt.figure() 162 | plt.title("RMSE", fontsize=30) 163 | plt.xlabel("Farms", fontsize=20) 164 | plt.ylabel("Values", fontsize=20) 165 | for name, estimator in Estimators.items(): 166 | plt.plot(farm_axis, np.mean(rmse_avg[name], axis=0), label=name) 167 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 168 | fancybox=True, shadow=True) 169 | 170 | # Mean Absolute Error 171 | fig2 = plt.figure() 172 | plt.title("MAE", fontsize=30) 173 | plt.xlabel("Farms", fontsize=20) 174 | plt.ylabel("Values", fontsize=20) 175 | for name, estimator in Estimators.items(): 176 | plt.plot(farm_axis, np.mean(mae_avg[name], axis=0), label=name) 177 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 178 | fancybox=True, shadow=True) 179 | 180 | # Coefficient of Determination 181 | fig3 = plt.figure() 182 | plt.title("Coefficient of Determination", fontsize=30) 183 | plt.xlabel("Farms", fontsize=20) 184 | plt.ylabel("Values", fontsize=20) 185 | for name, estimator in Estimators.items(): 186 | plt.plot(farm_axis, np.mean(CoDet_avg[name], axis=0), label=name) 187 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 188 | fancybox=True, shadow=True) 189 | 190 | # Computing Time 191 | fig4 = plt.figure() 192 | plt.title("Computing Time", fontsize=30) 193 | plt.xlabel("Farms", fontsize=20) 194 | plt.ylabel("Values", fontsize=20) 195 | for name, estimator in Estimators.items(): 196 | plt.plot(farm_axis, np.mean(time_avg[name], axis=0), label=name) 197 | plt.legend(loc='center left', bbox_to_anchor=(0.95, 0.3), 198 | fancybox=True, shadow=True) 199 | plt.show() 200 | end_time = time.time() 201 | print("Entire Program time: ", end_time - start_time) --------------------------------------------------------------------------------