├── Methods ├── __init__.py ├── arima.py ├── clustering.py ├── dbn.py ├── ffnn.py ├── gbrt.py ├── lstm.py ├── rfr.py ├── seq2seq.py ├── svr.py └── xgboost_.py └── data └── load.csv /Methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getBolted/LoadPredicting/ddf53ce3f49f9ea88d490e3a39759f2f67741830/Methods/__init__.py -------------------------------------------------------------------------------- /Methods/arima.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import pywt 6 | #print(pywt.families,pywt.wavelist('coif')) 7 | import statistics 8 | import math 9 | import statsmodels.api as sm 10 | from statsmodels.tsa.ar_model import AR 11 | from statsmodels.tsa.arima_model import ARIMA 12 | from statsmodels.tsa.arima_model import ARMA 13 | import numpy as np 14 | import math 15 | import scipy.stats as stats 16 | 17 | # Computes the Mean Squared Error for predicted values against 18 | # actual values 19 | def meanSquareError(actual,pred): 20 | if (not len(actual) == len(pred) or len(actual) == 0): 21 | return -1.0 22 | total = 0.0 23 | for x in range(len(actual)): 24 | total += math.pow(actual[x]-pred[x],2) 25 | return total/len(actual) 26 | # actual values 27 | def mse(actual,pred): 28 | if (not len(actual) == len(pred) or len(actual) == 0): 29 | return -1.0 30 | total = 0.0 31 | for x in range(len(actual)): 32 | total += math.pow(actual[x]-pred[x],2) 33 | return total/(len(actual)*1000000) 34 | 35 | # Computes Normalized Root Mean Square Error (NRMSE) for 36 | # predicted values against actual values 37 | def normRmse(actual,pred): 38 | if (not len(actual) == len(pred) or len(actual) == 0): 39 | return -1.0 40 | sumSquares = 0.0 41 | maxY = actual[0] 42 | minY = actual[0] 43 | for x in range(len(actual)): 44 | sumSquares += math.pow(pred[x]-actual[x],2.0) 45 | maxY = max(maxY,actual[x]) 46 | minY = min(minY,actual[x]) 47 | return math.sqrt(sumSquares/len(actual))/(maxY-minY) 48 | 49 | # Computes Root Mean Square Error (RMSE) for 50 | # predicted values against actual values 51 | def Rmse(actual,pred): 52 | if (not len(actual) == len(pred) or len(actual) == 0): 53 | return -1.0 54 | sumSquares = 0.0 55 | for x in range(len(actual)): 56 | sumSquares += math.pow(pred[x]-actual[x],2.0) 57 | return math.sqrt(sumSquares/len(actual)) 58 | 59 | # Computes Mean Absolute Percent Error (MAPE) for predicted 60 | # values against actual values 61 | def mape(actual,pred): 62 | if (not len(actual) == len(pred) or len(actual) == 0): 63 | return -1.0 64 | total = 0.0 65 | for x in range(len(actual)): 66 | total += abs((actual[x]-pred[x])/actual[x]) 67 | return total/len(actual) 68 | 69 | # Computes Mean Absolute Percent Error (MAPE) for predicted 70 | # values against actual values 71 | def mae(actual,pred): 72 | if (not len(actual) == len(pred) or len(actual) == 0): 73 | return -1.0 74 | total = 0.0 75 | for x in range(len(actual)): 76 | total += abs(actual[x]-pred[x]) 77 | return total/len(actual) 78 | 79 | # define a function to convert a vector of time series into a 2D matrix 80 | def convertSeriesToMatrix(vectorSeries, sequence_length): 81 | matrix=[] 82 | for i in range(len(vectorSeries)-sequence_length+1): 83 | matrix.append(vectorSeries[i:i+sequence_length]) 84 | return matrix 85 | 86 | def dwt(a): 87 | [ca, cd] = pywt.dwt(a,'haar') 88 | return ca,cd 89 | 90 | def idwt(ca,cd): 91 | ori = pywt.idwt(ca,cd,'haar') 92 | return ori 93 | 94 | def generateData(sample, outputnum): 95 | a = np.array(sample) 96 | mu = np.mean(a) 97 | #sigma_2 = np.var(a) / 2 98 | sigma_2 = np.var(a) / 24 99 | result = np.random.normal(loc = mu, scale = np.sqrt(sigma_2), size = outputnum) 100 | # result = np.random.logistic(loc=mu, scale=np.sqrt(sigma_2), size=outputnum) 101 | # result = np.random.laplace(loc=mu, scale=np.sqrt(sigma_2), size=outputnum) 102 | print('mu = %f\tsigma^2 = %f'%(mu,sigma_2)) 103 | return mu,sigma_2,result 104 | 105 | def drawResult(mu,sigma_2,result): 106 | plt.figure(figsize=(10,8),dpi=80) 107 | count, bins, ignored = plt.hist(result, 30, normed=True) 108 | plt.plot(bins, 1/(np.sqrt(2 * np.pi * sigma_2)) *np.exp( - (bins - mu)**2 / (2 * sigma_2) ),linewidth=2, color='r') 109 | 110 | def dataset(matrix_load,train_row): 111 | matrix_load = np.array(matrix_load) 112 | print("Data shape: ", matrix_load.shape) 113 | train_set = matrix_load[:train_row, :] 114 | # random seed 115 | np.random.seed(1234) 116 | # shuffle the training set (but do not shuffle the test set) 117 | np.random.shuffle(train_set) 118 | # the training set 119 | X_train = train_set[:, :-1] 120 | y_train = train_set[:, -1] 121 | # the test set 122 | X_test = matrix_load[train_row:, :-1] 123 | y_test = matrix_load[train_row:, -1] 124 | # the input to LSTM layer needs to have the shape of (number of samples, the dimension of each element) 125 | X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 126 | X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) 127 | print(np.shape(X_train), np.shape(X_test)) 128 | return X_train,y_train,X_test,y_test 129 | 130 | # load raw data 131 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 132 | # numpy array 133 | df_raw_array = df_raw.values 134 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(1, len(df_raw))] 135 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 136 | k = 0 137 | for j in range(0, len(list_hourly_load)): 138 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 139 | k = k + 1 140 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 141 | sum = 0 142 | num = 0 143 | for t in range(1,8): 144 | if(j - 24*t >= 0): 145 | num = num + 1 146 | sum = sum + list_hourly_load[j - 24*t] 147 | if(j + 24*t < len(list_hourly_load)): 148 | num = num + 1 149 | sum = sum + list_hourly_load[j + 24*t] 150 | sum = sum / num 151 | if(abs(list_hourly_load[j] - sum)>3): 152 | k = k + 1 153 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 154 | else: list_hourly_load[j] = sum - 3 155 | print(k) 156 | list_hourly_load = np.array(list_hourly_load) 157 | shifted_value = list_hourly_load.mean() 158 | list_hourly_load -= shifted_value 159 | a2 , d2 , d1 = pywt.wavedec(list_hourly_load[:-48], 'db4', mode = 'sym', level = 2) 160 | # lhl = pywt.waverec([a2, d2, d1], 'db4') 161 | # print(np.shape(a2),np.shape(d2),np.shape(d1),np.shape(lhl)) 162 | # order_a2 = sm.tsa.arma_order_select_ic(a2, ic='aic')['aic_min_order'] 163 | # order_d2 = sm.tsa.arma_order_select_ic(d2, ic='aic')['aic_min_order'] 164 | # order_d1 = sm.tsa.arma_order_select_ic(d1, ic='aic')['aic_min_order'] 165 | order_a2 = [3, 2] # p ,q 166 | order_d2 = [4, 1, 2] # p, d ,q 167 | order_d1 = [4, 1, 2] 168 | print(order_a2,order_d2,order_d1) 169 | model_a2 = ARMA(a2, order = order_a2) 170 | model_d2 = ARIMA(d2, order = order_d2) 171 | model_d1 = ARIMA(d1, order = order_d1) 172 | result_a2 = model_a2.fit() 173 | result_d2 = model_d2.fit() 174 | result_d1 = model_d1.fit() 175 | plt.figure(figsize=(10,15)) 176 | plt.subplot(3,1,1) 177 | plt.plot(a2,'blue') 178 | plt.plot(result_a2.fittedvalues,'red') 179 | plt.title('model_a2') 180 | plt.subplot(3,1,2) 181 | plt.plot(d2,'blue') 182 | plt.plot(result_d2.fittedvalues,'red') 183 | plt.title('model_d2') 184 | plt.subplot(3,1,3) 185 | plt.plot(d1,'blue') 186 | plt.plot(result_d1.fittedvalues,'red') 187 | plt.title('model_d1') 188 | plt.show() 189 | a2_all , d2_all , d1_all = pywt.wavedec(list_hourly_load, 'db4', mode = 'sym', level = 2) 190 | delta = [len(a2_all) - len(a2), len(d2_all) - len(d2), len(d1_all) - len(d1)] 191 | print(delta) 192 | pa2 = model_a2.predict(params = result_a2.params, start = 1, end = len(a2) + delta[0]) 193 | pd2 = model_d2.predict(params = result_d2.params, start = 1, end = len(d2) + delta[1]) 194 | pd1 = model_d1.predict(params = result_d1.params, start = 1, end = len(d1) + delta[2]) 195 | predict_values = pywt.waverec([pa2, pd2, pd1], 'db4') 196 | print(np.shape(predict_values)) 197 | plt.plot(list_hourly_load[20710:20758], label="$Observed$", c='green') 198 | plt.plot(predict_values[20710:20758],label="$Predicted",c='red') 199 | plt.xlabel('Hour') 200 | plt.ylabel('Electricity load, kW') 201 | plt.show() 202 | # mape = statistics.mape([y_test_true[i]*1000 for i in range(0,len(y_test_true))],(predicted_values)*1000 203 | print(len(list_hourly_load),len(predict_values)) 204 | mape = mape((list_hourly_load+shifted_value)*1000,(predict_values+shifted_value)*1000) 205 | print('MAPE is ', mape) 206 | mae = mae((list_hourly_load+shifted_value)*1000,(predict_values+shifted_value)*1000) 207 | print('MAE is ', mae) 208 | mse = meanSquareError((list_hourly_load+shifted_value)*1000,(predict_values+shifted_value)*1000) 209 | print('MSE is ', mse) 210 | rmse = math.sqrt(mse) 211 | print('RMSE is ', rmse) 212 | nrmse = normRmse((list_hourly_load+shifted_value)*1000,(predict_values+shifted_value)*1000) 213 | print('NRMSE is ', nrmse) -------------------------------------------------------------------------------- /Methods/clustering.py: -------------------------------------------------------------------------------- 1 | import math 2 | from tools import statistics 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from scipy.spatial import distance 7 | from scipy.cluster.vq import kmeans 8 | from scipy.spatial.distance import euclidean 9 | from sklearn.cluster import SpectralClustering 10 | 11 | # Performs K-Means Clustering on the ordered sequence 12 | # of vectors x with parameter k, and returns a 2-tuple: 13 | # First tuple value is list of centroids 14 | # Second tuple value is vector x' of length equal to that 15 | # of x, such that the ith 16 | # value of x' is the cluster label for the ith example 17 | # of the input x 18 | def kMeansClustering(x,k): 19 | 20 | # Convert list into numpy format 21 | conv = np.asarray(x) 22 | 23 | # Compute the centroids 24 | centroids = kmeans(conv,k,iter=10)[0] 25 | 26 | # Relabel the x's 27 | labels = [] 28 | for y in range(len(x)): 29 | minDist = float('inf') 30 | minLabel = -1 31 | for z in range(len(centroids)): 32 | e = euclidean(conv[y],centroids[z]) # 欧式距离 33 | if (e < minDist): 34 | minDist = e 35 | minLabel = z 36 | labels.append(minLabel) 37 | 38 | # Return the list of centroids and labels 39 | return (centroids,labels) 40 | 41 | # Performs a weighted clustering on the examples in xTest 42 | # Returns a 1-d vector of predictions 43 | def predictClustering(clusters,clusterSets,xTest,metric): 44 | clustLabels = [] 45 | simFunction = getDistLambda(metric) 46 | for x in range(len(xTest)): 47 | clustDex = -1 48 | clustDist = float('inf') 49 | for y in range(len(clusters)): 50 | dist = simFunction(clusters[y],xTest[x]) 51 | if (dist < clustDist): 52 | clustDist = dist 53 | clustDex = y 54 | clustLabels.append(clustDex) 55 | predict = np.zeros(len(xTest)) 56 | for x in range(len(xTest)): 57 | predict[x] = weightedClusterClass(xTest[x],clusterSets[clustLabels[x]],simFunction) 58 | return predict 59 | 60 | # Performs a weighted cluster classification 61 | def weightedClusterClass(xVector,examples,simFunction): 62 | pred = 0.0 63 | normalizer = 0.0 64 | ctr = 0 65 | for x in examples: 66 | similarity = 1.0/simFunction(xVector,x[0]) 67 | pred += similarity*x[1] 68 | normalizer += similarity 69 | ctr += 1 70 | return (pred/normalizer) 71 | 72 | def getDistLambda(metric): 73 | if (metric == "manhattan"): 74 | return lambda x,y : distance.cityblock(x,y) 75 | elif (metric == "cosine"): 76 | return lambda x,y : distance.cosine(x,y) 77 | else: 78 | return lambda x,y : distance.euclidean(x,y) 79 | 80 | # define a function to convert a vector of time series into a 2D matrix 81 | def convertSeriesToMatrix(vectorSeries, sequence_length): 82 | matrix=[] 83 | for i in range(len(vectorSeries)-sequence_length+1): 84 | matrix.append(vectorSeries[i:i+sequence_length]) 85 | return matrix 86 | 87 | # load raw data 88 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 89 | # numpy array 90 | df_raw_array = df_raw.values 91 | # daily load 92 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 93 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 94 | k = 0 95 | for j in range(0, len(list_hourly_load)): 96 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 97 | k = k + 1 98 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 99 | sum = 0 100 | num = 0 101 | for t in range(1,8): 102 | if(j - 24*t >= 0): 103 | num = num + 1 104 | sum = sum + list_hourly_load[j - 24*t] 105 | if(j + 24*t < len(list_hourly_load)): 106 | num = num + 1 107 | sum = sum + list_hourly_load[j + 24*t] 108 | sum = sum / num 109 | if(abs(list_hourly_load[j] - sum)>3): 110 | k = k + 1 111 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 112 | else: list_hourly_load[j] = sum - 3 113 | # shift all data by mean 114 | list_hourly_load = np.array(list_hourly_load) 115 | shifted_value = list_hourly_load.mean() 116 | list_hourly_load -= shifted_value 117 | # the length of the sequnce for predicting the future value 118 | sequence_length = 25 119 | # convert the vector to a 2D matrix 120 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 121 | matrix_load = np.array(matrix_load) 122 | print ("Data shape: ", matrix_load.shape) 123 | # train_row = int(round(0.9 * matrix_load.shape[0])) 124 | train_row = matrix_load.shape[0] - 48 125 | print('train:',train_row,'test:', 48) 126 | train_set = matrix_load[:train_row, :] 127 | # random seed 128 | np.random.seed(1234) 129 | # shuffle the training set (but do not shuffle the test set) 130 | np.random.shuffle(train_set) 131 | # the training set 132 | X_train = train_set[:, :-1] 133 | # the last column is the true value to compute the mean-squared-error loss 134 | y_train = train_set[:, -1] 135 | print(X_train[0],y_train[0]) 136 | # the test set 137 | X_test = matrix_load[train_row:, :-1] 138 | y_test = matrix_load[train_row:, -1] 139 | time_test = [df_raw_array[i,0] for i in range(train_row+23, len(df_raw))] 140 | # clustering 141 | # Compute centroids and labels of data 142 | ckmeans_365,lkmeans_365 = kMeansClustering(X_train,365) 143 | c = [ckmeans_365] 144 | l = [lkmeans_365] 145 | algNames = ["Observed","Predicted"] 146 | preds = [] 147 | preds.append(y_test) 148 | for t in range(len(c)): 149 | # The centroids computed by the current clustering algorithm 150 | centroids = c[t] 151 | # The labels for the examples defined by the current clustering assignment 152 | labels = l[t] 153 | # Separate the training samples into cluster sets 154 | clusterSets = [] 155 | # Time labels for the examples, separated into clusters 156 | timeLabels = [] 157 | for x in range(len(centroids)): 158 | clusterSets.append([]) 159 | for x in range(len(labels)): 160 | # Place the example into its cluster 161 | clusterSets[labels[x]].append((X_train[x], y_train[x])) 162 | # Compute predictions for each of the test examples 163 | predicted_values = predictClustering(centroids, clusterSets, X_test, "euclidean") 164 | mape = statistics.mape((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 165 | print('MAPE is ', mape) 166 | mae = statistics.mae((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 167 | print('MAE is ', mae) 168 | mse = statistics.meanSquareError((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 169 | print('MSE is ', mse) 170 | rmse = math.sqrt(mse) 171 | print('RMSE is ', rmse) 172 | nrmse = statistics.normRmse((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 173 | print('NRMSE is ', nrmse) 174 | preds.append(predicted_values) 175 | # show 176 | fig = plt.figure() 177 | colors = ["g","r","b","c","m","y","k","w"] 178 | legendVars = [] 179 | for j in range(len(preds)): 180 | print(j) 181 | x, = plt.plot(preds[j]+shifted_value, color=colors[j]) 182 | legendVars.append(x) 183 | plt.xlabel('Hour') 184 | plt.ylabel('Electricity load, kW') 185 | plt.legend(legendVars, algNames) 186 | plt.show() 187 | plt.ylim(0,8) 188 | -------------------------------------------------------------------------------- /Methods/dbn.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import math 3 | import xlrd 4 | from tools import statistics 5 | #import visualizer 6 | import pandas as pd 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | # from keras.layers.core import Dense, Activation, Dropout 10 | # from keras.layers.recurrent import LSTM 11 | # from keras.models import Sequential 12 | # #from datagen import constructData 13 | # from keras import backend as K 14 | from keras import Sequential 15 | from keras.layers import Dense 16 | # from keras.optimizers import SGD 17 | from keras import regularizers 18 | from sklearn.neural_network import BernoulliRBM 19 | 20 | # define a function to convert a vector of time series into a 2D matrix 21 | def convertSeriesToMatrix(vectorSeries, sequence_length): 22 | matrix=[] 23 | for i in range(len(vectorSeries)-sequence_length+1): 24 | matrix.append(vectorSeries[i:i+sequence_length]) 25 | return matrix 26 | 27 | # load raw data 28 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 29 | # numpy array 30 | df_raw_array = df_raw.values 31 | # daily load 32 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 33 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 34 | # 异常值处理 35 | k = 0 36 | for j in range(0, len(list_hourly_load)): 37 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 38 | k = k + 1 39 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 40 | sum = 0 41 | num = 0 42 | for t in range(1,8): 43 | if(j - 24*t >= 0): 44 | num = num + 1 45 | sum = sum + list_hourly_load[j - 24*t] 46 | if(j + 24*t < len(list_hourly_load)): 47 | num = num + 1 48 | sum = sum + list_hourly_load[j + 24*t] 49 | sum = sum / num 50 | if(abs(list_hourly_load[j] - sum)>3): 51 | k = k + 1 52 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 53 | else: list_hourly_load[j] = sum - 3 54 | print(k) 55 | plt.plot(list_hourly_load) 56 | plt.show() 57 | # shift all data by mean 58 | list_hourly_load = np.array(list_hourly_load) 59 | shifted_value = list_hourly_load.mean() 60 | list_hourly_load -= shifted_value 61 | # the length of the sequnce for predicting the future value 62 | sequence_length = 25 63 | # convert the vector to a 2D matrix 64 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 65 | matrix_load = np.array(matrix_load) 66 | print ("Data shape: ", matrix_load.shape) 67 | # train_row = int(round(0.9 * matrix_load.shape[0])) 68 | train_row = matrix_load.shape[0] - 48 69 | print('train:',train_row,'test:',48) 70 | train_set = matrix_load[:train_row, :] 71 | # random seed 72 | np.random.seed(1234) 73 | # shuffle the training set (but do not shuffle the test set) 74 | np.random.shuffle(train_set) 75 | # the training set 76 | X_train = train_set[:, :-1] 77 | # the last column is the true value to compute the mean-squared-error loss 78 | y_train = train_set[:, -1] 79 | print(X_train[0],y_train[0]) 80 | # the test set 81 | X_test = matrix_load[train_row:, :-1] 82 | y_test = matrix_load[train_row:, -1] 83 | time_test = [df_raw_array[i,0] for i in range(train_row+23, len(df_raw))] 84 | X_train = np.reshape(X_train, (np.shape(X_train)[0], np.shape(X_train)[1])) 85 | X_test = np.reshape(X_test, (np.shape(X_test)[0], np.shape(X_test)[1])) 86 | print(np.shape(X_train), np.shape(X_test)) 87 | print(np.shape(y_train), np.shape(y_test)) 88 | # dbn 89 | input_layer = X_train 90 | hidden_layer=[250,500,200] 91 | weight_rbm = [] 92 | bias_rbm = [] 93 | for i in range(len(hidden_layer)): 94 | print("DBN Layer {0} Pre-training".format(i + 1)) 95 | rbm = BernoulliRBM(n_components=hidden_layer[i],learning_rate=0.0005,batch_size=512,n_iter=200,verbose=2,random_state=1) 96 | rbm.fit(input_layer) 97 | # size of weight matrix is [input_layer, hidden_layer] 98 | weight_rbm.append(rbm.components_.T) 99 | bias_rbm.append(rbm.intercept_hidden_) 100 | input_layer = rbm.transform(input_layer) 101 | print('Pre-training finish.',np.shape(weight_rbm[0]),np.shape(bias_rbm[0])) 102 | test_rms = 0 103 | result = [] 104 | model = Sequential() 105 | print('Fine-tuning start.') 106 | for i in range(0, len(hidden_layer)): 107 | print('i:',i) 108 | if i == 0: 109 | model.add(Dense(hidden_layer[i], activation='sigmoid',input_dim=np.shape(X_train)[1])) 110 | elif i >= 1: 111 | model.add(Dense(hidden_layer[i], activation='sigmoid')) 112 | else: 113 | pass 114 | layer = model.layers[i] 115 | layer.set_weights([weight_rbm[i], bias_rbm[i]]) 116 | # model.add(Dense(np.shape(yTrain)[1], activation='linear')) 117 | model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))) 118 | # sgd = SGD(lr=0.005, decay=0) 119 | model.compile(loss='mse',optimizer="rmsprop")#sgd 120 | model.fit(X_train, y_train, batch_size=150, epochs=100, verbose=5) 121 | # save model 122 | model.save('../model/dbn.h5') 123 | print('Fine-tuning finish.') 124 | # get the predicted values 125 | predicted_values = model.predict(X_test) 126 | num_test_samples = len(predicted_values) 127 | predicted_values = np.reshape(predicted_values, (num_test_samples,1)) 128 | # evaluation 129 | mape = statistics.mape((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 130 | print('MAPE is ', mape) 131 | mae = statistics.mae((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 132 | print('MAE is ', mae) 133 | mse = statistics.meanSquareError((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 134 | print('MSE is ', mse) 135 | rmse = math.sqrt(mse) 136 | print('RMSE is ', rmse) 137 | nrmse = statistics.normRmse((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 138 | print('NRMSE is ', nrmse) 139 | # plot the results 140 | fig = plt.figure() 141 | plt.plot(y_test + shifted_value, label="$Observed$", c='green') 142 | plt.plot(predicted_values + shifted_value, label="$Predicted$", c='red') 143 | plt.xlabel('Hour') 144 | plt.ylabel('Electricity load, kW ') 145 | plt.legend() 146 | plt.show() 147 | 148 | -------------------------------------------------------------------------------- /Methods/ffnn.py: -------------------------------------------------------------------------------- 1 | import math 2 | from tools import statistics 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from pybrain.structure import FeedForwardNetwork 7 | from pybrain.structure import FullConnection 8 | from pybrain.structure import LinearLayer, SigmoidLayer 9 | from pybrain.datasets import SupervisedDataSet 10 | from pybrain.supervised.trainers import BackpropTrainer 11 | from sklearn.decomposition import PCA 12 | 13 | # Constructs and fits a neural network with the given number of neurons 14 | # to the training data for the specified number of epochs and returns a 15 | # vector of the predicted values for the given test data - assumes the target 16 | # is univariate (e.g. single valued output) 17 | def fit_predict(xTrain, yTrain, xTest, epochs, neurons): 18 | # Check edge cases 19 | if (not len(xTrain) == len(yTrain) or len(xTrain) == 0 or 20 | len(xTest) == 0 or epochs <= 0): 21 | return 22 | 23 | # Randomize the training data (probably not necessary but pybrain might 24 | # not shuffle the data itself, so perform as safety check) 25 | indices = np.arange(len(xTrain)) 26 | np.random.shuffle(indices) 27 | 28 | trainSwapX = [xTrain[x] for x in indices] 29 | trainSwapY = [yTrain[x] for x in indices] 30 | 31 | supTrain = SupervisedDataSet(len(xTrain[0]), 1) 32 | for x in range(len(trainSwapX)): 33 | supTrain.addSample(trainSwapX[x], trainSwapY[x]) 34 | 35 | # Construct the feed-forward neural network 36 | 37 | n = FeedForwardNetwork() 38 | 39 | inLayer = LinearLayer(len(xTrain[0])) 40 | hiddenLayer1 = SigmoidLayer(neurons) 41 | outLayer = LinearLayer(1) 42 | 43 | n.addInputModule(inLayer) 44 | n.addModule(hiddenLayer1) 45 | n.addOutputModule(outLayer) 46 | 47 | in_to_hidden = FullConnection(inLayer, hiddenLayer1) 48 | hidden_to_out = FullConnection(hiddenLayer1, outLayer) 49 | 50 | n.addConnection(in_to_hidden) 51 | n.addConnection(hidden_to_out) 52 | 53 | n.sortModules() 54 | 55 | # Train the neural network on the training partition, validating 56 | # the training progress on the validation partition 57 | 58 | trainer = BackpropTrainer(n, dataset=supTrain, momentum=0.1, learningrate=0.01 59 | , verbose=False, weightdecay=0.01) 60 | 61 | trainer.trainUntilConvergence(dataset=supTrain, 62 | maxEpochs=epochs, validationProportion=0.30) 63 | 64 | outputs = [] 65 | for x in xTest: 66 | outputs.append(n.activate(x)) 67 | 68 | return outputs 69 | 70 | # define a function to convert a vector of time series into a 2D matrix 定义将时间序列向量转换为二维矩阵的函数 71 | def convertSeriesToMatrix(vectorSeries, sequence_length): 72 | matrix=[] 73 | for i in range(len(vectorSeries)-sequence_length+1): 74 | matrix.append(vectorSeries[i:i+sequence_length]) 75 | return matrix 76 | 77 | # load raw data 78 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 79 | # numpy array 80 | df_raw_array = df_raw.values 81 | # daily load 82 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 83 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 84 | k = 0 85 | for j in range(0, len(list_hourly_load)): 86 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 87 | k = k + 1 88 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 89 | sum = 0 90 | num = 0 91 | for t in range(1,8): 92 | if(j - 24*t >= 0): 93 | num = num + 1 94 | sum = sum + list_hourly_load[j - 24*t] 95 | if(j + 24*t < len(list_hourly_load)): 96 | num = num + 1 97 | sum = sum + list_hourly_load[j + 24*t] 98 | sum = sum / num 99 | if(abs(list_hourly_load[j] - sum)>3): 100 | k = k + 1 101 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 102 | else: list_hourly_load[j] = sum - 3 103 | # shift all data by mean 104 | list_hourly_load = np.array(list_hourly_load) 105 | shifted_value = list_hourly_load.mean() 106 | list_hourly_load -= shifted_value 107 | # the length of the sequnce for predicting the future value 108 | sequence_length = 25 109 | # convert the vector to a 2D matrix 110 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 111 | matrix_load = np.array(matrix_load) 112 | print ("Data shape: ", matrix_load.shape) 113 | # train_row = int(round(0.9 * matrix_load.shape[0])) 114 | train_row = matrix_load.shape[0] - 48 115 | print('train:',train_row,'test:',48) 116 | train_set = matrix_load[:train_row, :] 117 | # random seed 118 | np.random.seed(1234) 119 | # shuffle the training set (but do not shuffle the test set) 120 | np.random.shuffle(train_set) 121 | # the training set 122 | X_train = train_set[:, :-1] 123 | # the last column is the true value to compute the mean-squared-error loss 124 | y_train = train_set[:, -1] 125 | print(X_train[0],y_train[0]) 126 | # the test set 127 | X_test = matrix_load[train_row:, :-1] 128 | y_test = matrix_load[train_row:, -1] 129 | time_test = [df_raw_array[i,0] for i in range(train_row+23, len(df_raw))] 130 | # nn 131 | dimensions = [18] 132 | neurons = [75] 133 | names = [] 134 | names.append('true') 135 | for x in range(len(dimensions)): 136 | s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) 137 | names.append(s) 138 | preds = [] 139 | preds.append(y_test) 140 | for x in range(len(dimensions)): 141 | i = 0 142 | # Perform dimensionality reduction on the feature vectors 143 | pca = PCA(n_components=dimensions[x]) 144 | pca.fit(X_train) 145 | xTrainRed = pca.transform(X_train) 146 | xTestRed = pca.transform(X_test) 147 | predicted_values = fit_predict(xTrainRed, y_train, xTestRed, 40, neurons[x]) 148 | mape = statistics.mape((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 149 | print('MAPE is ', mape) 150 | mae = statistics.mae((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 151 | print('MAE is ', mae) 152 | mse = statistics.meanSquareError((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 153 | print('MSE is ', mse) 154 | rmse = math.sqrt(mse) 155 | print('RMSE is ', rmse) 156 | nrmse = statistics.normRmse((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 157 | print('NRMSE is ', nrmse) 158 | print (i+1) 159 | preds.append(predicted_values) 160 | # show 161 | fig = plt.figure() 162 | colors = ["g","r","b","c","m","y","k","w"] 163 | legendVars = [] 164 | for j in range(len(preds)): 165 | print(j) 166 | x, = plt.plot(preds[j]+shifted_value, color=colors[j]) 167 | legendVars.append(x) 168 | plt.xlabel('Hour') 169 | plt.ylabel('Electricity load, kW') 170 | plt.legend(legendVars, names) 171 | plt.show() 172 | -------------------------------------------------------------------------------- /Methods/gbrt.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from sklearn.model_selection import GridSearchCV 6 | from sklearn.ensemble import GradientBoostingRegressor 7 | from tools import statistics 8 | import math 9 | import time 10 | import operator as op 11 | from matplotlib.font_manager import FontProperties 12 | from tools import statistics 13 | 14 | # define a function to convert a vector of time series into a 2D matrix 15 | def convertSeriesToMatrix(vectorSeries, sequence_length): 16 | matrix=[] 17 | for i in range(len(vectorSeries)-sequence_length+1): 18 | matrix.append(vectorSeries[i:i+sequence_length]) 19 | return matrix 20 | 21 | # load raw data 22 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 23 | # numpy array 24 | df_raw_array = df_raw.values 25 | # daily load 26 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 27 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 28 | k = 0 29 | for j in range(0, len(list_hourly_load)): 30 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 31 | k = k + 1 32 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 33 | sum = 0 34 | num = 0 35 | for t in range(1,8): 36 | if(j - 24*t >= 0): 37 | num = num + 1 38 | sum = sum + list_hourly_load[j - 24*t] 39 | if(j + 24*t < len(list_hourly_load)): 40 | num = num + 1 41 | sum = sum + list_hourly_load[j + 24*t] 42 | sum = sum / num 43 | if(abs(list_hourly_load[j] - sum)>3): 44 | k = k + 1 45 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 46 | else: list_hourly_load[j] = sum - 3 47 | # shift all data by mean 48 | list_hourly_load = np.array(list_hourly_load) 49 | shifted_value = list_hourly_load.mean() 50 | list_hourly_load -= shifted_value 51 | # the length of the sequnce for predicting the future value 52 | sequence_length = 25 53 | # convert the vector to a 2D matrix 54 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 55 | matrix_load = np.array(matrix_load) 56 | print ("Data shape: ", matrix_load.shape) 57 | # split dataset: 90% for training and 10% for testing 58 | # train_row = int(round(0.9 * matrix_load.shape[0])) 59 | train_row = matrix_load.shape[0] - 48 60 | print('train:',train_row,'test:',48) 61 | train_set = matrix_load[:train_row, :] 62 | # random seed 63 | np.random.seed(1234) 64 | # shuffle the training set (but do not shuffle the test set) 65 | np.random.shuffle(train_set) 66 | # the training set 67 | X_train = train_set[:, :-1] 68 | # the last column is the true value to compute the mean-squared-error loss 69 | y_train = train_set[:, -1] 70 | print(X_train[0],y_train[0]) 71 | # the test set 72 | X_test = matrix_load[train_row:, :-1] 73 | y_test = matrix_load[train_row:, -1] 74 | print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) 75 | # gbdt 76 | # gbdt = GradientBoostingRegressor(subsample=1, 77 | # min_samples_split=2, min_samples_leaf=1, max_depth=3, alpha=0.9, 78 | # verbose=0) 79 | # param_grid = { 80 | # 'loss': ['ls', 'lad', 'huber'], 81 | # 'learning_rate': [0.01, 0.02, 0.05, 0.1, 0.2], 82 | # 'n_estimators': [100, 200, 400, 800, 1000], 83 | # 'max_depth': [3, 4, 5, 6], 84 | # 'alpha': [0.7, 0.8, 0.9]} 85 | # gbm = GridSearchCV(gbdt, param_grid) 86 | # gbm.fit(X_train, y_train[:,i]) 87 | # print('Best parameters found by grid search are:', gbm.best_params_) 88 | gbdt = GradientBoostingRegressor(loss='ls', learning_rate=0.2, n_estimators=400, subsample=1, 89 | min_samples_split=2, min_samples_leaf=1, max_depth=3, alpha=0.7, 90 | verbose=0) 91 | gbdt.fit(X_train, y_train) 92 | feature_importance = gbdt.feature_importances_ 93 | # get the predicted values 94 | start = time.clock() 95 | predicted_values = gbdt.predict(X_test) 96 | print('预测耗时:', time.clock() - start, 's') 97 | plt.figure() 98 | plt.scatter(np.arange(1, len(feature_importance) + 1), feature_importance, c='r', zorder=10) 99 | plt.plot(np.arange(1, len(feature_importance) + 1), feature_importance) 100 | plt.xlabel('Feature index') 101 | plt.ylabel('Feature importance') 102 | plt.show() 103 | # evaluation 104 | mape = statistics.mape((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 105 | print('MAPE is ', mape) 106 | mae = statistics.mae((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 107 | print('MAE is ', mae) 108 | mse = statistics.meanSquareError((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 109 | print('MSE is ', mse) 110 | rmse = math.sqrt(mse) 111 | print('RMSE is ', rmse) 112 | nrmse = statistics.normRmse((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 113 | print('NRMSE is ', nrmse) 114 | # plot the results 115 | fig = plt.figure() 116 | plt.plot(y_test + shifted_value, label="$Observed$", c='green') 117 | plt.plot(predicted_values + shifted_value, label="$Predicted$", c='red') 118 | plt.xlabel('Hour') 119 | plt.ylabel('Electricity load, kW') 120 | plt.legend() 121 | plt.show() -------------------------------------------------------------------------------- /Methods/lstm.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from keras.layers.core import Dense, Activation, Dropout 6 | from keras.layers.recurrent import LSTM 7 | from keras.models import Sequential 8 | from keras.models import load_model 9 | from tools import statistics 10 | import math 11 | 12 | # define a function to convert a vector of time series into a 2D matrix 13 | def convertSeriesToMatrix(vectorSeries, sequence_length): 14 | matrix=[] 15 | for i in range(len(vectorSeries)-sequence_length+1): 16 | matrix.append(vectorSeries[i:i+sequence_length]) 17 | return matrix 18 | 19 | # load raw data 20 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 21 | # numpy array 22 | df_raw_array = df_raw.values 23 | # daily load 24 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 25 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 26 | k = 0 27 | for j in range(0, len(list_hourly_load)): 28 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 29 | k = k + 1 30 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 31 | sum = 0 32 | num = 0 33 | for t in range(1,8): 34 | if(j - 24*t >= 0): 35 | num = num + 1 36 | sum = sum + list_hourly_load[j - 24*t] 37 | if(j + 24*t < len(list_hourly_load)): 38 | num = num + 1 39 | sum = sum + list_hourly_load[j + 24*t] 40 | sum = sum / num 41 | if(abs(list_hourly_load[j] - sum)>3): 42 | k = k + 1 43 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 44 | else: list_hourly_load[j] = sum - 3 45 | print(k) 46 | plt.plot(list_hourly_load) 47 | plt.show() 48 | # shift all data by mean 49 | list_hourly_load = np.array(list_hourly_load) 50 | shifted_value = list_hourly_load.mean() 51 | list_hourly_load -= shifted_value 52 | # the length of the sequnce for predicting the future value 53 | sequence_length = 25 54 | # convert the vector to a 2D matrix 55 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 56 | matrix_load = np.array(matrix_load) 57 | print ("Data shape: ", matrix_load.shape) 58 | # train_row = int(round(0.9 * matrix_load.shape[0])) 59 | train_row = matrix_load.shape[0] - 48 60 | print('train:',train_row,'test:',48) 61 | train_set = matrix_load[:train_row, :] 62 | # random seed 63 | np.random.seed(1234) 64 | # shuffle the training set (but do not shuffle the test set) 65 | np.random.shuffle(train_set) 66 | # the training set 67 | X_train = train_set[:, :-1] 68 | # the last column is the true value to compute the mean-squared-error loss 69 | y_train = train_set[:, -1] 70 | print(X_train[0],y_train[0]) 71 | # the test set 72 | X_test = matrix_load[train_row:, :-1] 73 | y_test = matrix_load[train_row:, -1] 74 | time_test = [df_raw_array[i,0] for i in range(train_row+23, len(df_raw))] 75 | # print(time_test[0]) # 7/10/2016 19:00 76 | # the input to LSTM layer needs to have the shape of (number of samples, the dimension of each element) 77 | X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 78 | X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) 79 | # lstm 80 | # build the model 81 | model = Sequential() 82 | # layer 1: LSTM 83 | model.add(LSTM( input_dim=1, output_dim=50, return_sequences=True)) 84 | model.add(Dropout(0.2)) 85 | # layer 2: LSTM 86 | model.add(LSTM(output_dim=100, return_sequences=False)) 87 | model.add(Dropout(0.2)) 88 | # layer 3: dense 89 | # linear activation: a(x) = x 90 | model.add(Dense(output_dim=1, activation='linear')) 91 | # show model 92 | model.summary() 93 | # compile the model 94 | model.compile(loss="mse", optimizer="rmsprop") 95 | # train the model 96 | model.fit(X_train, y_train, batch_size=1024, nb_epoch=100, validation_split=0.05, verbose=2) 97 | # save model 98 | model.save('../lstm.h5') 99 | # load model 100 | # from keras.models import load_model 101 | model = load_model('../lstm.h5') 102 | # evaluate the result 103 | test_mse = model.evaluate(X_test, y_test, verbose=2) 104 | print ('\nThe MSE on the test data set is %.3f over %d test samples.' % (test_mse, len(y_test))) 105 | # get the predicted values 106 | predicted_values = model.predict(X_test) 107 | num_test_samples = len(predicted_values) 108 | predicted_values = np.reshape(predicted_values, (num_test_samples,1)) 109 | # evaluation 110 | mape = statistics.mape((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 111 | print('MAPE is ', mape) 112 | mae = statistics.mae((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 113 | print('MAE is ', mae) 114 | mse = statistics.meanSquareError((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 115 | print('MSE is ', mse) 116 | rmse = math.sqrt(mse) 117 | print('RMSE is ', rmse) 118 | nrmse = statistics.normRmse((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 119 | print('NRMSE is ', nrmse) 120 | # plot the results 121 | fig = plt.figure() 122 | plt.plot(y_test + shifted_value, label="$Observed$", c='green') 123 | plt.plot(predicted_values + shifted_value, label="$Predicted$", c='red') 124 | plt.xlabel('Hour') 125 | plt.ylabel('Electricity load, kW') 126 | plt.legend() 127 | plt.show() 128 | 129 | -------------------------------------------------------------------------------- /Methods/rfr.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from sklearn.ensemble import RandomForestRegressor 6 | from tools import statistics 7 | import math 8 | import time 9 | import operator as op 10 | from matplotlib.font_manager import FontProperties 11 | from sklearn.externals import joblib 12 | from tools import statistics 13 | 14 | # define a function to convert a vector of time series into a 2D matrix 15 | def convertSeriesToMatrix(vectorSeries, sequence_length): 16 | matrix=[] 17 | for i in range(len(vectorSeries)-sequence_length+1): 18 | matrix.append(vectorSeries[i:i+sequence_length]) 19 | return matrix 20 | 21 | # load raw data 22 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 23 | # numpy array 24 | df_raw_array = df_raw.values 25 | # daily load 26 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 27 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 28 | k = 0 29 | for j in range(0, len(list_hourly_load)): 30 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 31 | k = k + 1 32 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 33 | sum = 0 34 | num = 0 35 | for t in range(1,8): 36 | if(j - 24*t >= 0): 37 | num = num + 1 38 | sum = sum + list_hourly_load[j - 24*t] 39 | if(j + 24*t < len(list_hourly_load)): 40 | num = num + 1 41 | sum = sum + list_hourly_load[j + 24*t] 42 | sum = sum / num 43 | if(abs(list_hourly_load[j] - sum)>3): 44 | k = k + 1 45 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 46 | else: list_hourly_load[j] = sum - 3 47 | # shift all data by mean 48 | list_hourly_load = np.array(list_hourly_load) 49 | shifted_value = list_hourly_load.mean() 50 | list_hourly_load -= shifted_value 51 | # the length of the sequnce for predicting the future value 52 | sequence_length = 25 53 | # convert the vector to a 2D matrix 54 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 55 | matrix_load = np.array(matrix_load) 56 | print ("Data shape: ", matrix_load.shape) 57 | # train_row = int(round(0.9 * matrix_load.shape[0])) 58 | train_row = matrix_load.shape[0] - 48 59 | print('train:',train_row,'test:',48) 60 | train_set = matrix_load[:train_row, :] 61 | # random seed 62 | np.random.seed(1234) 63 | # shuffle the training set (but do not shuffle the test set) 64 | np.random.shuffle(train_set) 65 | # the training set 66 | X_train = train_set[:, :-1] 67 | # the last column is the true value to compute the mean-squared-error loss 68 | y_train = train_set[:, -1] 69 | print(X_train[0],y_train[0]) 70 | # the test set 71 | X_test = matrix_load[train_row:, :-1] 72 | y_test = matrix_load[train_row:, -1] 73 | print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) 74 | 75 | # rfr 76 | model = RandomForestRegressor(n_estimators = 100, max_features=5) 77 | model.fit(X_train, y_train) 78 | joblib.dump(model, '../rfr.model') 79 | model =joblib.load('../rfr.model') 80 | 81 | feature_importance = model.feature_importances_ 82 | X = [' Lag_24 ',' Lag_23 ',' Lag_22 ',' Lag_21 ',' Lag_20 ',' Lag_19 ',' Lag_18 ',' Lag_17 ',' Lag_16 ', 83 | ' Lag_15 ',' Lag_14 ',' Lag_13 ',' Lag_12 ',' Lag_11 ',' Lag_10 ',' Lag_9 ',' Lag_8 ',' Lag_7 ', 84 | ' Lag_6 ',' Lag_5 ',' Lag_4 ',' Lag_3 ',' Lag_2 ',' Lag_1 '] 85 | s = 0 86 | for i in range(len(feature_importance)): 87 | s += feature_importance[i] 88 | 89 | plt.figure() 90 | plt.bar(np.arange(1, len(feature_importance) + 1), feature_importance/s, color='lightsteelblue') 91 | plt.plot(np.arange(1, len(feature_importance) + 1), feature_importance/s) 92 | plt.xticks(np.arange(1, len(feature_importance) + 1),X) 93 | plt.xlabel('Feature') 94 | plt.ylabel('Feature importance') 95 | plt.grid(True) 96 | plt.show() 97 | 98 | # get the predicted values 99 | start = time.clock() 100 | predicted_values = model.predict(X_test) 101 | # evaluation 102 | mape = statistics.mape((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 103 | print('MAPE is ', mape) 104 | mae = statistics.mae((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 105 | print('MAE is ', mae) 106 | mse = statistics.meanSquareError((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 107 | print('MSE is ', mse) 108 | rmse = math.sqrt(mse) 109 | print('RMSE is ', rmse) 110 | nrmse = statistics.normRmse((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 111 | print('NRMSE is ', nrmse) 112 | # plot the results 113 | fig = plt.figure() 114 | plt.plot(y_test + shifted_value, label="$Observed$", c='green') 115 | plt.plot(predicted_values + shifted_value, label="$Predicted$", c='red') 116 | plt.xlabel('Hour') 117 | plt.ylabel('Electricity load, kW') 118 | plt.legend() 119 | plt.show() 120 | -------------------------------------------------------------------------------- /Methods/seq2seq.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from math import sqrt 6 | from pandas import DataFrame 7 | from pandas import concat 8 | from numpy import argmax 9 | from keras.models import Sequential 10 | from keras.layers.core import Dense,Dropout 11 | from keras.layers.recurrent import LSTM 12 | from keras.layers import TimeDistributed 13 | from keras.layers import RepeatVector 14 | from sklearn.metrics import mean_squared_error 15 | from pandas import read_csv 16 | from keras.models import load_model 17 | from tools import statistics 18 | import math 19 | 20 | # convert time series into supervised learning problem 21 | # data: Sequence of observations as a list or 2D NumPy array. Required. 22 | # n_in: Number of lag observations as input (X). Values may be between [1..len(data)] Optional. Defaults to 1. 23 | # n_out: Number of observations as output (y). Values may be between [0..len(data)-1]. Optional. Defaults to 1. 24 | # dropnan: Boolean whether or not to drop rows with NaN values. Optional. Defaults to True. 25 | def series_to_supervised(data, n_in, n_out, dropnan=True): 26 | n_vars = 1 if type(data) is list else data.shape[1] 27 | df = DataFrame(data) 28 | cols, names = [], [] 29 | # input sequence (t-n, ... t-1) 30 | for i in range(n_in, 0, -1): 31 | cols.append(df.shift(i)) 32 | names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)] 33 | # forecast sequence (t, t+1, ... t+n) 34 | for i in range(0, n_out): 35 | cols.append(df.shift(-i)) 36 | if i == 0: 37 | names += [('var%d(t)' % (j + 1)) for j in range(n_vars)] 38 | else: 39 | names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)] 40 | # put it all together 41 | agg = concat(cols, axis=1) 42 | agg.columns = names 43 | # drop rows with NaN values 44 | if dropnan: 45 | agg.dropna(inplace=True) 46 | agg = agg.applymap(lambda x: np.int32(x)) 47 | return agg 48 | 49 | # convert data to strings 50 | def to_string(X, y, n_numbers, largest): 51 | max_length = 3 52 | Xstr = [] 53 | for pattern in X: 54 | element_list=[] 55 | for element in pattern: 56 | strp =str(element) 57 | strp=''.join([' ' for _ in range(max_length-len(strp))])+strp 58 | element_list.append(strp) 59 | element_ensem=','.join([aa for aa in element_list]) 60 | Xstr.append(element_ensem) 61 | ystr=[] 62 | for pattern in y: 63 | element_list=[] 64 | for element in pattern: 65 | strp =str(element) 66 | strp=''.join([' ' for _ in range(max_length-len(strp))])+strp 67 | element_list.append(strp) 68 | element_ensem=','.join([aa for aa in element_list]) 69 | ystr.append(element_ensem) 70 | return Xstr,ystr 71 | 72 | def one_hot_encode(X, series_min,series_max,n_unique): 73 | gap=(series_max-series_min)/n_unique 74 | Xenc=[] 75 | for sequence in X: 76 | new_index_ensem=[] 77 | for value in sequence: 78 | new_index=(value-series_min)/gap 79 | if value == 18544: 80 | new_index = new_index-0.1 81 | new_index_ensem.append(int(new_index)) 82 | encoding=[] 83 | if value == 18544: 84 | print(new_index_ensem, new_index, value, series_max, series_min, gap) 85 | for index in new_index_ensem: 86 | vector=[0 for _ in range(n_unique)] 87 | vector[index]=1 88 | encoding.append(vector) 89 | Xenc.append(encoding) 90 | return np.array(Xenc) 91 | 92 | # decode a one hot encoded string 93 | def one_hot_decode(y,series_min,series_max,n_unique): 94 | gap=(series_max-series_min)/n_unique 95 | y_dec=[] 96 | for encoded_seq in y: 97 | decoded_seq=[argmax(vector) for vector in encoded_seq] 98 | decoded_seq=np.array(decoded_seq) 99 | decoded_seq_tran=list(decoded_seq*gap+series_min) 100 | y_dec.append(decoded_seq_tran) 101 | return y_dec 102 | 103 | 104 | def convertSeriesToMatrix(vectorSeries, sequence_length): 105 | matrix=[] 106 | for i in range(len(vectorSeries)-sequence_length+1): 107 | matrix.append(vectorSeries[i:i+sequence_length]) 108 | return matrix 109 | 110 | if __name__=='__main__': 111 | # load raw data 112 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0, 1]) 113 | # numpy array 114 | df_raw_array = df_raw.values 115 | # daily load 116 | list_hourly_load = [df_raw_array[i, 1] / 1000 for i in range(0, len(df_raw))] 117 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 118 | k = 0 119 | for j in range(0, len(list_hourly_load)): 120 | if (abs(list_hourly_load[j] - list_hourly_load[j - 1]) > 2 and abs( 121 | list_hourly_load[j] - list_hourly_load[j + 1]) > 2): 122 | k = k + 1 123 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - \ 124 | list_hourly_load[j - 24 - 1] / 2 125 | sum = 0 126 | num = 0 127 | for t in range(1, 8): 128 | if (j - 24 * t >= 0): 129 | num = num + 1 130 | sum = sum + list_hourly_load[j - 24 * t] 131 | if (j + 24 * t < len(list_hourly_load)): 132 | num = num + 1 133 | sum = sum + list_hourly_load[j + 24 * t] 134 | sum = sum / num 135 | if (abs(list_hourly_load[j] - sum) > 3): 136 | k = k + 1 137 | if (list_hourly_load[j] > sum): 138 | list_hourly_load[j] = sum + 3 139 | else: 140 | list_hourly_load[j] = sum - 3 141 | print(k) 142 | # plt.plot(list_hourly_load) 143 | # plt.show() 144 | # shift all data by mean 145 | list_hourly_load = np.array(list_hourly_load) 146 | shifted_value = list_hourly_load.mean() 147 | list_hourly_load -= shifted_value 148 | # the length of the sequnce for predicting the future value 149 | sequence_length = 25 150 | # convert the vector to a 2D matrix 151 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 152 | matrix_load = np.array(matrix_load) 153 | print ("Data shape: ", matrix_load.shape) 154 | # train_row = int(round(0.9 * matrix_load.shape[0])) 155 | train_row = matrix_load.shape[0] - 24*14 156 | print('train:', train_row, 'test:', 24*14) 157 | train_set = matrix_load[:train_row, :] 158 | # random seed 159 | np.random.seed(1234) 160 | # shuffle the training set (but do not shuffle the test set) 161 | np.random.shuffle(train_set) 162 | # the training set 163 | X_train = train_set[:, :-1] 164 | # the last column is the true value to compute the mean-squared-error loss 165 | y_train = train_set[:, -1] 166 | # print(X_train[0], y_train[0]) 167 | # the test set 168 | X_test = matrix_load[train_row:, :-1] 169 | y_test = matrix_load[train_row:, -1] 170 | time_test = [df_raw_array[i, 0] for i in range(train_row + 23, len(df_raw))] 171 | # print(time_test[0]) 172 | # the input to LSTM layer needs to have the shape of (number of samples, the dimension of each element) 173 | X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 174 | y_train = np.reshape(y_train, (y_train.shape[0], 1, 1)) 175 | X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) 176 | print(np.shape(X_train), np.shape(y_train)) 177 | # create LSTM 178 | model = Sequential() 179 | model.add(LSTM(150, batch_input_shape=(None,X_train.shape[1], X_train.shape[2]))) 180 | model.add(Dropout(0.2)) 181 | model.add(RepeatVector(1)) 182 | model.add(LSTM(150, return_sequences=True)) #decoder 183 | model.add(Dropout(0.2)) 184 | model.add(LSTM(150, return_sequences=True)) #decoder 185 | model.add(Dropout(0.3)) 186 | model.add(TimeDistributed(Dense(1, activation='linear'))) 187 | model.compile(loss='mse', optimizer='rmsprop', metrics=['accuracy']) 188 | # show model 189 | # print(model.summary()) 190 | # train LSTM 191 | history=model.fit(X_train, y_train, epochs=50, batch_size=50, validation_split=0.05, shuffle=False, verbose=2) 192 | # save model 193 | model.save('../seq2seq.h5') 194 | # model = load_model('../model/seq2seq.h5') 195 | # evaluate on some new patterns 196 | predicted_values = model.predict(X_test) 197 | num_test_samples = len(predicted_values) 198 | predicted_values = np.reshape(predicted_values, (num_test_samples, 1)) 199 | # evaluation 200 | mape = statistics.mape((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 201 | print('MAPE is ', mape) 202 | mae = statistics.mae((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 203 | print('MAE is ', mae) 204 | mse = statistics.meanSquareError((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 205 | print('MSE is ', mse) 206 | rmse = math.sqrt(mse) 207 | print('RMSE is ', rmse) 208 | nrmse = statistics.normRmse((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 209 | print('NRMSE is ', nrmse) 210 | # plot the results 211 | fig = plt.figure() 212 | plt.plot(y_test + shifted_value, label="$Observed$", c='green') 213 | plt.plot(predicted_values + shifted_value, label="$Predicted$", c='red') 214 | plt.xlabel('Hour') 215 | plt.ylabel('Electricity load, kW') 216 | plt.legend() 217 | plt.show() 218 | -------------------------------------------------------------------------------- /Methods/svr.py: -------------------------------------------------------------------------------- 1 | import math 2 | from tools import statistics 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from sklearn import svm 7 | 8 | # define a function to convert a vector of time series into a 2D matrix 9 | def convertSeriesToMatrix(vectorSeries, sequence_length): 10 | matrix=[] 11 | for i in range(len(vectorSeries)-sequence_length+1): 12 | matrix.append(vectorSeries[i:i+sequence_length]) 13 | return matrix 14 | 15 | # load raw data 16 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 17 | # numpy array 18 | df_raw_array = df_raw.values 19 | # daily load 20 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 21 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 22 | k = 0 23 | for j in range(0, len(list_hourly_load)): 24 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 25 | k = k + 1 26 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 27 | sum = 0 28 | num = 0 29 | for t in range(1,8): 30 | if(j - 24*t >= 0): 31 | num = num + 1 32 | sum = sum + list_hourly_load[j - 24*t] 33 | if(j + 24*t < len(list_hourly_load)): 34 | num = num + 1 35 | sum = sum + list_hourly_load[j + 24*t] 36 | sum = sum / num 37 | if(abs(list_hourly_load[j] - sum)>3): 38 | k = k + 1 39 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 40 | else: list_hourly_load[j] = sum - 3 41 | # shift all data by mean 42 | list_hourly_load = np.array(list_hourly_load) 43 | shifted_value = list_hourly_load.mean() 44 | list_hourly_load -= shifted_value 45 | # the length of the sequnce for predicting the future value 46 | sequence_length = 25 47 | # convert the vector to a 2D matrix 48 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 49 | matrix_load = np.array(matrix_load) 50 | print ("Data shape: ", matrix_load.shape) 51 | # train_row = int(round(0.9 * matrix_load.shape[0])) 52 | train_row = matrix_load.shape[0] - 24*7 53 | print('train:',train_row,'test:',24*7) 54 | train_set = matrix_load[:train_row, :] 55 | # random seed 56 | np.random.seed(1234) 57 | # shuffle the training set (but do not shuffle the test set) 58 | np.random.shuffle(train_set) 59 | # the training set 60 | X_train = train_set[:, :-1] 61 | # the last column is the true value to compute the mean-squared-error loss 62 | y_train = train_set[:, -1] 63 | print(X_train[0],y_train[0]) 64 | # the test set 65 | X_test = matrix_load[train_row:, :-1] 66 | y_test = matrix_load[train_row:, -1] 67 | time_test = [df_raw_array[i,0] for i in range(train_row+23, len(df_raw))] 68 | # svr 69 | kernelList = ["rbf"] 70 | names = ["Observed","Predicted"] 71 | preds = [] 72 | preds.append(y_test) 73 | for i in range(len(kernelList)): 74 | clf = svm.SVR(C=2.0, kernel=kernelList[i]) 75 | clf.fit(X_train, y_train) 76 | predicted_values = clf.predict(X_test) 77 | mape = statistics.mape((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 78 | print('MAPE is ', mape) 79 | mae = statistics.mae((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 80 | print('MAE is ', mae) 81 | mse = statistics.meanSquareError((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 82 | print('MSE is ', mse) 83 | rmse = math.sqrt(mse) 84 | print('RMSE is ', rmse) 85 | nrmse = statistics.normRmse((y_test + shifted_value) * 1000, (predicted_values + shifted_value) * 1000) 86 | print('NRMSE is ', nrmse) 87 | preds.append(predicted_values) 88 | # show 89 | fig = plt.figure() 90 | colors = ["g","r","b","c","m","y","k","w"] 91 | legendVars = [] 92 | for j in range(len(preds)): 93 | print(j) 94 | x, = plt.plot(preds[j]+shifted_value, color=colors[j]) 95 | legendVars.append(x) 96 | plt.xlabel('Hour') 97 | plt.ylabel('Electricity load, kW') 98 | plt.legend(legendVars, names) 99 | plt.show() 100 | -------------------------------------------------------------------------------- /Methods/xgboost_.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import xgboost as xgb 6 | from tools import statistics 7 | import math 8 | import time 9 | import operator as op 10 | from matplotlib.font_manager import FontProperties 11 | 12 | 13 | def convertSeriesToMatrix(vectorSeries, sequence_length): 14 | matrix=[] 15 | for i in range(len(vectorSeries)-sequence_length+1): 16 | matrix.append(vectorSeries[i:i+sequence_length]) 17 | return matrix 18 | 19 | 20 | def create_feature_map(before): 21 | outfile = open('../xgb.fmap', 'w') 22 | for i in range(before): 23 | j = before - i 24 | outfile.write('{0}\t{1}\tq\n'.format(i, 'Lag_'+str(j))) 25 | outfile.close() 26 | 27 | # load raw data 28 | df_raw = pd.read_csv('../data/load.csv', header=0, usecols=[0,1]) 29 | # numpy array 30 | df_raw_array = df_raw.values 31 | # daily load 32 | list_hourly_load = [df_raw_array[i,1]/1000 for i in range(0, len(df_raw))] 33 | print ("Data shape of list_hourly_load: ", np.shape(list_hourly_load)) 34 | k = 0 35 | for j in range(0, len(list_hourly_load)): 36 | if(abs(list_hourly_load[j]-list_hourly_load[j-1])>2 and abs(list_hourly_load[j]-list_hourly_load[j+1])>2): 37 | k = k + 1 38 | list_hourly_load[j] = (list_hourly_load[j - 1] + list_hourly_load[j + 1]) / 2 + list_hourly_load[j - 24] - list_hourly_load[j - 24 - 1] / 2 39 | sum = 0 40 | num = 0 41 | for t in range(1,8): 42 | if(j - 24*t >= 0): 43 | num = num + 1 44 | sum = sum + list_hourly_load[j - 24*t] 45 | if(j + 24*t < len(list_hourly_load)): 46 | num = num + 1 47 | sum = sum + list_hourly_load[j + 24*t] 48 | sum = sum / num 49 | if(abs(list_hourly_load[j] - sum)>3): 50 | k = k + 1 51 | if(list_hourly_load[j] > sum): list_hourly_load[j] = sum + 3 52 | else: list_hourly_load[j] = sum - 3 53 | # print(k) 54 | # plt.plot(list_hourly_load) 55 | # plt.show() 56 | # shift all data by mean 57 | list_hourly_load = np.array(list_hourly_load) 58 | shifted_value = list_hourly_load.mean() 59 | list_hourly_load -= shifted_value 60 | # the length of the sequnce for predicting the future value 61 | sequence_length = 25 62 | # convert the vector to a 2D matrix 63 | matrix_load = convertSeriesToMatrix(list_hourly_load, sequence_length) 64 | matrix_load = np.array(matrix_load) 65 | print ("Data shape: ", matrix_load.shape) 66 | # train_row = int(round(0.9 * matrix_load.shape[0])) 67 | train_row = matrix_load.shape[0] - matrix_load.shape[0] 68 | print('train:',train_row,'test:', 24*14) 69 | train_set = matrix_load[:train_row, :] 70 | # random seed 71 | np.random.seed(1234) 72 | # shuffle the training set (but do not shuffle the test set) 73 | np.random.shuffle(train_set) 74 | # the training set 75 | X_train = train_set[:, :-1] 76 | # the last column is the true value to compute the mean-squared-error loss 77 | y_train = train_set[:, -1] 78 | print(X_train[0],y_train[0]) 79 | # the test set 80 | X_test = matrix_load[train_row:, :-1] 81 | y_test = matrix_load[train_row:, -1] 82 | print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) 83 | 84 | # xgboost 85 | create_feature_map(24) 86 | X_train, X_test = map(lambda a: np.array(a), [X_train, X_test]) 87 | data_train = xgb.DMatrix(X_train, label=y_train) 88 | data_test = xgb.DMatrix(X_test, label=y_test) 89 | watch_list = [(data_test, 'eval'), (data_train, 'train')] 90 | param = {'max_depth': 6, 'eta': 0.1, 'silent': 1, 'objective': 'reg:linear'} 91 | bst = xgb.train(param, data_train, num_boost_round=60, evals=watch_list) 92 | # save model 93 | bst.save_model('../xgboost.model') 94 | # load model 95 | bst = xgb.Booster() 96 | bst.load_model('../xgboost.model') 97 | 98 | # xgb.plot_importance(bst) 99 | importance = bst.get_fscore(fmap='../xgb.fmap') 100 | print(importance) 101 | importance = sorted(importance.items(), key=op.itemgetter(1)) 102 | df = pd.DataFrame(importance, columns=['feature', 'fscore']) 103 | df['fscore'] = df['fscore'] / df['fscore'].sum() 104 | df.plot(kind='barh', x='feature', y='fscore') 105 | font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc', size=12) 106 | font_title = FontProperties(fname='C:\Windows\Fonts\simsun.ttc', size=14) 107 | plt.show() 108 | # get the predicted values 109 | start = time.clock() 110 | predicted_values = bst.predict(data_test) 111 | # evaluation 112 | mape = statistics.mape((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 113 | print('MAPE is ', mape) 114 | mae = statistics.mae((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 115 | print('MAE is ', mae) 116 | mse = statistics.meanSquareError((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 117 | print('MSE is ', mse) 118 | rmse = math.sqrt(mse) 119 | print('RMSE is ', rmse) 120 | nrmse = statistics.normRmse((y_test+shifted_value)*1000,(predicted_values+shifted_value)*1000) 121 | print('NRMSE is ', nrmse) 122 | # plot the results 123 | fig = plt.figure() 124 | plt.plot(y_test + shifted_value, label="$Observed$", c='green') 125 | plt.plot(predicted_values + shifted_value, label="$Predicted$", c='red') 126 | plt.xlabel('Hour') 127 | plt.ylabel('Electricity load, kW') 128 | plt.legend() 129 | plt.show() 130 | --------------------------------------------------------------------------------