├── .gitattributes ├── assignment └── 挑战性任务介绍-2022.pdf ├── presentation ├── 能源互联网导论大作业报告_组7.pdf └── 2022.5.30第七组题目A_v2.pptx ├── README.md └── code ├── frequency_distribution.py ├── forecast_train.py └── forecast_pred.py /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.js linguist-language=python 3 | -------------------------------------------------------------------------------- /assignment/挑战性任务介绍-2022.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AMINmiNn/Energy_Internet_Final/HEAD/assignment/挑战性任务介绍-2022.pdf -------------------------------------------------------------------------------- /presentation/能源互联网导论大作业报告_组7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AMINmiNn/Energy_Internet_Final/HEAD/presentation/能源互联网导论大作业报告_组7.pdf -------------------------------------------------------------------------------- /presentation/2022.5.30第七组题目A_v2.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AMINmiNn/Energy_Internet_Final/HEAD/presentation/2022.5.30第七组题目A_v2.pptx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Energy_Internet_Final 2 | ## The final project of Introduction of Energy Internet(2022-Spring) 3 | **Basic Assignment**:Based on skleran,try to find a best model to give an accurate prediction of the PV and the wind power output while the interpretation of the model is required. 4 | **Furthur Task**:Search for publications and find a feasible method to give the probability prediction. 5 | **And this project got a score of 4.0/4.0 finally**. 6 | **Co-authors:Liangyuchen LU、Bowen Su、Min Yang** 7 | 8 | 9 | ### Index Terms:Sk-learn、Probability prediction、PV power、wind power 10 | ### 关键词:Sk-learn、概率预测、光伏出力、风电出力 11 | -------------------------------------------------------------------------------- /code/frequency_distribution.py: -------------------------------------------------------------------------------- 1 | import scipy.stats as st 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import collections 5 | from sklearn.preprocessing import MinMaxScaler 6 | import numpy as np 7 | import csv 8 | import math 9 | from pylab import * 10 | import matplotlib.mlab as mlab 11 | from sklearn.utils import shuffle 12 | import math 13 | 14 | i = 0 15 | j = [] 16 | data = [] 17 | X = [] 18 | indicess = [] 19 | xback = 24 20 | with open(r'D:\error01冬季雨天.csv') as f: 21 | reader = csv.reader(f) 22 | for row in reader: 23 | data.append(row[:]) # 提取出每一行中的2:14列 24 | data1 = [] 25 | data = np.array(data) 26 | m, n = np.shape(data) 27 | for i in range(m): 28 | for j in range(n): 29 | # print(data[i][j]) 30 | data[i][j] = data[i][j].astype('float64') # 是从第三列开始的 31 | for i in range(m): 32 | for j in range(n): 33 | # print(data[i][j]) 34 | data1.append(data[i][j]) 35 | print("the type of data1", type(data1[1])) 36 | data = data.astype('float64') 37 | 38 | # print(data) 39 | print("the shape of data", len(data)) 40 | 41 | 42 | # 定义最大似然函数后的结果 43 | def mle(x): 44 | u = np.mean(x) 45 | thea = np.std(x) 46 | return u, thea 47 | 48 | 49 | # 确定了分布 50 | print(mle(data)) 51 | u, thea = mle(data) 52 | print(u) 53 | print(thea) 54 | y = st.norm.pdf(data[:6], u, thea) 55 | print(y) 56 | count, bins, ignored = plt.hist(data, bins=20, normed=False) 57 | print("count", len(count)) 58 | print("bins", len(bins)) 59 | plt.plot(bins[:20], count, "r") 60 | pro = count / np.sum(count) 61 | plt.xlabel("x") 62 | plt.ylabel("probability density") 63 | plt.show() 64 | 65 | plt.plot(bins[:20], pro, "r", lw=2) 66 | plt.show() 67 | low = -1.65 * thea + u # 对应90%的置信度 68 | up = 1.65 * thea + u 69 | data0 = [] 70 | print("下界为", low) 71 | print("上界为:", up) 72 | 73 | with open(r'D:\真实值冬季雨天.csv') as f: 74 | reader = csv.reader(f) 75 | for row in reader: 76 | data0.append(row[:]) # 提取出每一行中的2:14列 77 | data01 = [] 78 | data0 = np.array(data0) 79 | # print(data0) 80 | m, n = np.shape(data0) 81 | print("the shape of data0", np.shape(data0)) 82 | for i in range(m): 83 | for j in range(n): 84 | # print(data0[i][j]) 85 | data0[i][j] = data0[i][j].astype('float64') # 是从第三列开始的 86 | for i in range(m): 87 | for j in range(n): 88 | # print(data[i][j]) 89 | data01.append(data0[i][j]) 90 | # print("the type of data1",type(data1[1])) 91 | data0 = data0.astype('float64') 92 | data01 = map(eval, data01) 93 | print(np.shape(data0)) 94 | print(data0[:4]) 95 | print(data0[:2, 0]) 96 | datamax = np.max(data0[:, 0]) 97 | datamax = np.max(data0[:, 0]) 98 | p_low = list(map(lambda x: (x - abs(low) * datamax), data0[:, 0])) 99 | p_up = list(map(lambda x: (x + up * datamax), data0[:, 1])) 100 | x = [i for i in range(len(p_low))] 101 | print(x) 102 | # 显示置信区间范围 103 | l = 90 104 | k = 0 105 | plt.plot(x[k:l], p_low[k:l], 'g', lw=2, label='下界曲线') 106 | plt.plot(x[k:l], p_up[k:l], 'g', lw=2, label='上界曲线') 107 | plt.plot(x[k:l], data0[k:l, 0], 'b', lw=2, label='真实值') 108 | plt.plot(data0[k:l, 1], 'r', lw=2, label='预测值') 109 | plt.fill_between(x[k:l], p_low[k:l], p_up[k:l], color="c", alpha=0.1) 110 | plt.title('置信区间', fontsize=18) # 表的名称 111 | plt.legend(loc=0, numpoints=1) 112 | leg = plt.gca().get_legend() 113 | ltext = leg.get_texts() 114 | plt.setp(ltext, fontsize='small') 115 | # 负责绘制与图或轴相关的数据 116 | # savefig('D:/十折交叉验证/LSTM1.jpg') 117 | plt.show() 118 | # 评价置信区间PICP,PINAW,CWC,PICP用来评价预测区间的覆盖率,PINAW预测区间的宽带 119 | count = 0 120 | 121 | for i in range(len(p_low)): 122 | if data0[i][1] >= p_low[i] and data0[i][1] <= p_up[i]: 123 | count = count + 1 124 | 125 | PICP = count / len(p_low) 126 | print("PICP", PICP) 127 | 128 | # 对于概率性的区间预测方法,在置信度一样的情况下,预测区间越窄越好 129 | max0 = np.max(data0[:, 1]) 130 | min0 = np.min(data0[:, 1]) 131 | sum0 = list(map(lambda x: (x[1] - x[0]), zip(p_low, p_up))) 132 | sum1 = np.sum(sum0) / len(sum0) 133 | PINAW = 1 / (max0 - min0) * sum1 134 | print("PINAW", PINAW) 135 | # 综合指标的评价cwcCWC = PINAW*(1+R(PICP)*np.exp(-y(PICP-U))) 136 | g = 90 # 取值在50-100 137 | e0 = math.exp(-g * (PICP - u)) 138 | if PICP >= u: 139 | r = 0 140 | else: 141 | r = 1 142 | CWC = PINAW * (1 + r * PICP * e0) 143 | print("CWC", CWC) 144 | -------------------------------------------------------------------------------- /code/forecast_train.py: -------------------------------------------------------------------------------- 1 | # 导入库 2 | import os 3 | 4 | import numpy 5 | import numpy as np # numpy库 6 | import sklearn.neighbors._regression 7 | from sklearn.neighbors import KNeighborsRegressor 8 | from sklearn.preprocessing import MinMaxScaler 9 | from sklearn.neural_network import MLPRegressor 10 | from sklearn.tree import DecisionTreeRegressor 11 | # from sklearn.neighbors import KNeighborsRegressor 12 | # from sklearn.gaussian_process import GaussianProcessRegressor 13 | from sklearn.kernel_ridge import KernelRidge 14 | from sklearn.svm import SVR # SVM中的回归算法 15 | from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor 16 | from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_absolute_percentage_error 17 | import pandas as pd # 导入pandas 18 | import matplotlib.pyplot as plt # 导入图形展示库 19 | from sklearn.model_selection import GridSearchCV 20 | import distfit 21 | import scipy.stats as st 22 | from sklearn import linear_model 23 | import time 24 | 25 | 26 | # %%数据导入 27 | dataset = pd.read_csv('AT_power2.csv', header=0, index_col=0) 28 | 29 | dataset['time'] = pd.to_datetime(dataset['time']) 30 | # train_df['label'] = (train_df['repay_date'] - train_df['auditing_date']).dt.days 31 | dataset.loc[:, 'dayofweek'] = dataset['time'].dt.dayofweek 32 | dataset.loc[:, 'month'] = dataset['time'].dt.month 33 | 34 | dataset['VirtualDay'] = (dataset.index / 24).astype('int') 35 | dataset['VirtualDay'] = dataset['VirtualDay'] % 365 36 | 37 | dataset.loc[:, 'VDay_sin'] = round(np.sin(2 * np.pi * dataset['VirtualDay'] / 365), 5) 38 | dataset.loc[:, 'VDay_cos'] = round(np.cos(2 * np.pi * dataset['VirtualDay'] / 365), 5) 39 | 40 | dataset.loc[:, 'hour_sin'] = round(np.sin(2 * np.pi * dataset['time'].dt.hour / 24), 2) 41 | dataset.loc[:, 'hour_cos'] = round(np.cos(2 * np.pi * dataset['time'].dt.hour / 24), 2) 42 | 43 | dataset.loc[:, 'month_sin'] = round(np.sin(2 * np.pi * dataset['time'].dt.month / 12), 2) 44 | dataset.loc[:, 'month_cos'] = round(np.cos(2 * np.pi * dataset['time'].dt.month / 12), 2) 45 | 46 | dataset = dataset.drop(['time'], axis=1) 47 | values = dataset.values 48 | # integer encode direction 49 | # encoder = LabelEncoder() 50 | # values[:, 4] = encoder.fit_transform(values[:, 4]) 51 | # ensure all data is float 52 | values = values.astype('float32') 53 | # normalize features 54 | # scaler = MinMaxScaler(feature_range=(0, 1)).fit(values) 55 | # scaled = scaler.fit_transform(values) 56 | # frame as supervised learning 57 | # values = scaled 58 | for i in range(values.shape[1]): 59 | col = values[:, i] # 获取当前列数据 60 | 61 | # 判断当前列的数据中是否含有nan 62 | nan_col = np.count_nonzero(col != col) 63 | if nan_col != 0: # 条件成立说明含有nan 64 | not_nan = col[col == col] # 找出不含nan的 65 | col[np.isnan(col)] = np.mean(not_nan) # 将nan替换成这一列的平均值 66 | 67 | n_train_hours = 365 * 24 # 1年数据 68 | train = values[:n_train_hours, :] 69 | test = values[n_train_hours:n_train_hours + 24 * 365, :] # 预测24*7小时后数据 70 | # split into input and outputs 71 | train_x_raw, train_y_raw = train[:, 1:], train[:, 0] 72 | test_x_raw, test_y_raw = test[:, 1:], test[:, 0] 73 | scaler1 = MinMaxScaler() 74 | scaler2 = MinMaxScaler() 75 | scaler3 = MinMaxScaler() 76 | scaler4 = MinMaxScaler() 77 | train_x = scaler1.fit_transform(train_x_raw) 78 | train_y = scaler2.fit_transform(train_y_raw.reshape(-1, 1)) 79 | test_x = scaler3.fit_transform(test_x_raw) 80 | test_y = scaler4.fit_transform(test_y_raw.reshape(-1, 1)) 81 | 82 | # reshape input to be 3D [samples, timesteps, features] 83 | # %% 84 | # 模型 85 | model_SVR = SVR(kernel='rbf', gamma=0.01, C=450) # 建立支持向量机回归模型对象 86 | 87 | model_MLP = MLPRegressor(alpha=0.03, hidden_layer_sizes=(180, 90), activation='relu', solver='adam', random_state=11) 88 | # model_RF = RandomForestRegressor(n_estimators=460, random_state=11, max_depth=21, max_features=12) 89 | model_RF = RandomForestRegressor(random_state=11, n_estimators=460, max_features=None, max_depth=19, 90 | min_samples_split=2, min_samples_leaf=1) 91 | model_GBDT = GradientBoostingRegressor(random_state=11, n_estimators=350, learning_rate=0.05, max_depth=7, 92 | subsample=0.55, min_samples_split=100) 93 | model_ABR = AdaBoostRegressor(DecisionTreeRegressor(max_depth=7, min_samples_split=20, min_samples_leaf=5), 94 | random_state=11, n_estimators=400, learning_rate=0.05, loss='linear') 95 | 96 | model_names = ['SVM', 'MLP', 'RF', 'GBDT'] # 不同模型的名称列表 97 | # model_dic = [model_SVR, model_MLP, model_RF, model_GBDT] # 不同回归模型对象的集合 98 | model_dic = [model_ABR] 99 | 100 | # %%model_SVR 101 | model_SVR = SVR(kernel='rbf') 102 | pre_y_list = [] # 各个回归模型预测的y值列表 103 | # ''' 104 | gamma = [0.3, 0.1, 0.03, 0.01] 105 | C = [10, 25, 450] 106 | # C = range(330, 500, 20) 107 | param_grid = dict(gamma=gamma, C=C) 108 | gsearch1 = GridSearchCV(model_SVR, param_grid, cv=4, verbose=2, scoring="neg_mean_squared_error", ) 109 | gsearch1.fit(train_x, train_y.ravel()) 110 | 111 | means = gsearch1.cv_results_['mean_test_score'] 112 | params = gsearch1.cv_results_['params'] 113 | for mean, param in zip(means, params): 114 | print("%f with: %r" % (mean, param)) 115 | 116 | print(gsearch1.best_params_) 117 | print(gsearch1.best_score_) 118 | 119 | # ''' 120 | ''' 121 | gammas = [] 122 | Cs=[] 123 | score = [] 124 | for gamma in np.arange(0.015, 0.035, 0.002): 125 | for C in np.arange(100, 118, 2): 126 | print('~' * 30, '\ngamma={}:, C={}:\n'.format(gamma, C)) 127 | gammas.append(gamma) 128 | Cs.append(C) 129 | model_SVR = SVR(kernel='rbf', gamma=gamma, C=C) 130 | sc = np.sqrt( 131 | -cross_val_score(model_SVR, train_x, train_y.ravel(), cv=n_folds, scoring="neg_mean_squared_error", 132 | verbose=1)) 133 | score.append(sc.mean()) 134 | plt.plot(gammas, score) 135 | plt.xlabel('gamma') 136 | plt.ylabel('score') 137 | plt.show() 138 | ''' 139 | pre_y_list = [] # 各个回归模型预测的y值列表 140 | model_SVR = SVR(kernel='rbf', gamma=0.01, C=10) 141 | pre_y_list.append(scaler4.inverse_transform(model_SVR.fit(train_x, train_y.ravel()).predict(test_x).reshape(-1, 1))) 142 | 143 | # %%model_RF 144 | n_folds = 6 # 设置交叉检验的次数 145 | ''' 146 | model_RF = RandomForestRegressor( 147 | n_estimators=100, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, 148 | min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, 149 | min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=11, verbose=1, 150 | warm_start=False 151 | ) 152 | ''' 153 | model_RF = RandomForestRegressor(random_state=11, n_estimators=460, max_depth=19, max_features=None, oob_score=True) 154 | n_e = [460, 480] 155 | m_f = [13, 14, 15] 156 | m_d = range(19, 24, 1) 157 | m_ss = range(5, 11, 5) 158 | m_sl = range(5, 11, 5) 159 | param_grid = dict(min_samples_leaf=m_sl, min_samples_split=m_ss) 160 | gsearch1 = GridSearchCV(model_RF, param_grid, cv=3, verbose=2, scoring='neg_mean_squared_error', ) 161 | gsearch1.fit(train_x, train_y.ravel()) 162 | 163 | print(gsearch1.best_params_) 164 | print(gsearch1.best_score_) 165 | 166 | pre_y_list = [] 167 | model_RF = RandomForestRegressor(random_state=11, n_estimators=300, max_depth=5, max_features=None, 168 | min_samples_split=5, 169 | min_samples_leaf=10) 170 | pre_y_list.append(scaler4.inverse_transform(model_RF.fit(train_x, train_y.ravel()).predict(test_x).reshape(-1, 1))) 171 | # %%model_GBDT 172 | model_GBDT = GradientBoostingRegressor(random_state=11) 173 | pre_y_list = [] # 各个回归模型预测的y值列表 174 | lr = [0.01, 0.03, 0.05] 175 | n_e = [300, 350, 400] 176 | subsample = [0.55, 0.65, 0.75] 177 | m_d = [5, 6, 7, 8, 9] 178 | m_ss = [40, 100] 179 | param_grid = dict(learning_rate=lr, n_estimators=n_e, subsample=subsample, max_depth=m_d, min_samples_split=m_ss) 180 | gsearch1 = GridSearchCV(model_GBDT, param_grid, cv=3, verbose=2, scoring='neg_mean_squared_error', ) 181 | gsearch1.fit(train_x, train_y.ravel()) 182 | 183 | means = gsearch1.cv_results_['mean_test_score'] 184 | params = gsearch1.cv_results_['params'] 185 | for mean, param in zip(means, params): 186 | print("%f with: %r" % (mean, param)) 187 | print(gsearch1.best_params_) 188 | print(gsearch1.best_score_) 189 | ''' 190 | for m_d in np.arange(10, 101, 10): 191 | print('~' * 30, '\nn_e={}:,\n'.format(m_d)) 192 | m_ds.append(m_d) 193 | model_RF = GradientBoostingRegressor(n_estimators=m_d, random_state=11) 194 | sc = np.sqrt( 195 | -cross_val_score(model_RF, train_x, train_y.ravel(), cv=n_folds, scoring="neg_mean_squared_error", 196 | verbose=1)) 197 | score.append(sc.mean()) 198 | plt.plot(m_ds, score) 199 | plt.xlabel('m_f') 200 | plt.ylabel('score') 201 | plt.show() 202 | ''' 203 | pre_y_list = [] 204 | model_GBDT = GradientBoostingRegressor(random_state=11, n_estimators=400, learning_rate=0.01, subsample=0.55, 205 | max_depth=5, min_samples_split=100) 206 | pre_y_list.append(scaler4.inverse_transform(model_GBDT.fit(train_x, train_y.ravel()).predict(test_x).reshape(-1, 1))) 207 | 208 | # %% 209 | model_ABR = AdaBoostRegressor(DecisionTreeRegressor(max_depth=7, min_samples_split=20, min_samples_leaf=5), 210 | random_state=11, n_estimators=400, learning_rate=0.05, loss='linear') 211 | # AdaBoostRegressor(DecisionTreeRegressor(max_depth=7, min_samples_split=20, min_samples_leaf=5), 212 | # random_state=11, n_estimators=400, learning_rate=0.05, loss='linear') 213 | pre_y_list = [] # 各个回归模型预测的y值列表 214 | lr = [0.2, 0.5] 215 | n_e = [450, 500] 216 | # subsample = [0.55, 0.65, 0.75] 217 | m_d = [5, 7, 9] 218 | m_ss = [40, 100] 219 | param_grid = dict(learning_rate=lr, n_estimators=n_e) 220 | gsearch1 = GridSearchCV(model_ABR, param_grid, cv=4, verbose=2, scoring='neg_mean_squared_error', ) 221 | gsearch1.fit(train_x, train_y.ravel()) 222 | 223 | means = gsearch1.cv_results_['mean_test_score'] 224 | params = gsearch1.cv_results_['params'] 225 | for mean, param in zip(means, params): 226 | print("%f with: %r" % (mean, param)) 227 | print(gsearch1.best_params_) 228 | print(gsearch1.best_score_) 229 | 230 | pre_y_list = [] 231 | model_ABR = AdaBoostRegressor(DecisionTreeRegressor(max_depth=7, min_samples_split=20, min_samples_leaf=5), 232 | random_state=11, n_estimators=450, learning_rate=0.2, loss='linear') 233 | pre_y_list.append(scaler4.inverse_transform(model_ABR.fit(train_x, train_y.ravel()).predict(test_x).reshape(-1, 1))) 234 | 235 | # %%model_KNR 236 | model_KNR = KNeighborsRegressor(weights="distance", algorithm="auto") 237 | 238 | pre_y_list = [] # 各个回归模型预测的y值列表 239 | l_s = [5] 240 | n_n = [14, 15, 16] 241 | param_grid = dict(leaf_size=l_s, n_neighbors=n_n) 242 | gsearch1 = GridSearchCV(model_KNR, param_grid, cv=4, verbose=2, scoring='neg_mean_squared_error', ) 243 | gsearch1.fit(train_x, train_y.ravel()) 244 | 245 | means = gsearch1.cv_results_['mean_test_score'] 246 | params = gsearch1.cv_results_['params'] 247 | for mean, param in zip(means, params): 248 | print("%f with: %r" % (mean, param)) 249 | print(gsearch1.best_params_) 250 | print(gsearch1.best_score_) 251 | 252 | pre_y_list = [] 253 | model_KNR = KNeighborsRegressor(weights="distance", algorithm="auto") 254 | pre_y_list.append(scaler4.inverse_transform(model_KNR.fit(train_x, train_y.ravel()).predict(test_x).reshape(-1, 1))) 255 | 256 | # %%kernel=RBF, normalize_y=False, random_state=11 257 | model_KR = KernelRidge(kernel='rbf') 258 | # AdaBoostRegressor(DecisionTreeRegressor(max_depth=7, min_samples_split=20, min_samples_leaf=5), 259 | # random_state=11, n_estimators=400, learning_rate=0.05, loss='linear') 260 | pre_y_list = [] # 各个回归模型预测的y值列表 261 | al = [0.03, 0.1] 262 | ga = [0.3, 0.1] 263 | param_grid = dict(alpha=al, gamma=ga) 264 | gsearch1 = GridSearchCV(model_KR, param_grid, cv=4, verbose=2, scoring='neg_mean_squared_error', ) 265 | gsearch1.fit(train_x, train_y.ravel()) 266 | 267 | means = gsearch1.cv_results_['mean_test_score'] 268 | params = gsearch1.cv_results_['params'] 269 | for mean, param in zip(means, params): 270 | print("%f with: %r" % (mean, param)) 271 | print(gsearch1.best_params_) 272 | print(gsearch1.best_score_) 273 | 274 | # %%model_MLP 275 | # model_MLP = MLPRegressor( 276 | # alpha=0.03, hidden_layer_sizes=(140, 35), activation='relu', solver='adam', random_state=11 277 | # ) 278 | model_MLP = MLPRegressor(random_state=11, activation='relu', solver='adam') 279 | pre_y_list = [] # 各个回归模型预测的y值列表 280 | al = [0.02, 0.03, 0.05] 281 | h_l = [(i, j) for i in range(160, 210, 10) for j in range(80, 105, 5)] 282 | param_grid = dict(hidden_layer_sizes=h_l, alpha=al) 283 | gsearch1 = GridSearchCV(model_MLP, param_grid, scoring='neg_mean_squared_error', cv=4, verbose=2) 284 | gsearch1.fit(train_x, train_y.ravel()) 285 | 286 | means = gsearch1.cv_results_['mean_test_score'] 287 | params = gsearch1.cv_results_['params'] 288 | for mean, param in zip(means, params): 289 | print("%f with: %r" % (mean, param)) 290 | print(gsearch1.best_params_) 291 | print(gsearch1.best_score_) 292 | 293 | pre_y_list = [] 294 | model_MLP = MLPRegressor(alpha=0.02, hidden_layer_sizes=(170, 90), activation='relu', solver='adam', random_state=11) 295 | pre_y_list.append(scaler4.inverse_transform(model_MLP.fit(train_x, train_y.ravel()).predict(test_x).reshape(-1, 1))) 296 | # %% 297 | a = 5000 298 | b = 24 299 | # plt.plot(train_y[a:a + b], label='train') 300 | 301 | plt.plot(test_y[a:a + b], label='real') 302 | plt.plot(pre_y_list[0][a:a + b], label='pred') 303 | # plt.plot(pre_y_list[1][a:a + b], label='ANN') 304 | # plt.plot(pre_y_list[2][a:a + b], label='SVM') 305 | plt.legend() 306 | plt.show() 307 | # %%预测结果处理 308 | # 平移处理,负荷无负值 309 | if np.min(pre_y_list[0]) < 0: 310 | temp_min = np.min(pre_y_list[0]) 311 | for i in range(len(pre_y_list[0])): 312 | pre_y_list[0][i] = pre_y_list[0][i] - temp_min 313 | 314 | real = test_y_raw.reshape(-1, 1) 315 | # 作图 316 | plt.plot(test_y_raw, label='real') 317 | plt.plot(pre_y_list[0], label='pred') 318 | plt.title("MAPE = {:.2f}%".format(100 * mean_absolute_percentage_error(test_y_raw, pre_y_list[0]))) 319 | plt.legend() 320 | plt.show() 321 | # %%求特征重要性 322 | fi = model_GBDT.feature_importances_.reshape(-1, 1) 323 | 324 | # %%求误差分布 325 | predict = pre_y_list[0] 326 | percentage_error_up = [] 327 | percentage_error_low = [] 328 | 329 | k = 0 330 | for i in range(len(real)): 331 | if real[i] != 0: 332 | p_e = (predict[i] - real[i]) / real[i] 333 | if p_e >= 0: 334 | percentage_error_up.append(p_e) 335 | else: 336 | percentage_error_low.append(-p_e) 337 | 338 | percentage_error_up = np.array(percentage_error_up) 339 | percentage_error_low = np.array(percentage_error_low) 340 | dist1 = distfit.distfit(todf=True, alpha=0.05) 341 | dist1.fit_transform(percentage_error_up) 342 | dist1.plot() 343 | plt.show() 344 | dist2 = distfit.distfit(todf=True, alpha=0.05) 345 | dist2.fit_transform(percentage_error_low) 346 | dist2.plot() 347 | plt.show() 348 | # %%绘制概率分布图 349 | x = range(168) 350 | plt.title("AT_wind_interval_prediction") 351 | plt.xlabel("t/hour") 352 | plt.ylabel("AT_wind_power/MW") 353 | predict_up_90 = predict + predict * 0.491348592248658 354 | predict_up_80 = predict + predict * 0.36800481592970944 355 | predict_up_60 = predict + predict * 0.2891681902876261 356 | predict_up_40 = predict + predict * 0.21777260713011395 357 | predict_up_20 = predict + predict * 0.17254764502471595 358 | predict_up_10 = predict + predict * 0.15360228719589675 359 | 360 | predict_low_90 = predict - predict * 0.44443957248573357 361 | predict_low_80 = predict - predict * 0.3441282064736304 362 | predict_low_60 = predict - predict * 0.24327358312921868 363 | predict_low_40 = predict - predict * 0.18386844327028132 364 | predict_low_20 = predict - predict * 0.14142363237771383 365 | predict_low_10 = predict - predict * 0.12394431130732217 366 | 367 | plt.plot(x, predict, label='predict', color='darkgreen') 368 | plt.plot(x, real,label='real', color='darkblue') 369 | plt.fill_between(x, predict_up_90.ravel(), predict_up_80.ravel(), 370 | label='Confidence Interval:90%', # 上限,下限 371 | facecolor='blue', # 填充颜色 372 | alpha=0.4) # 透明度 373 | plt.fill_between(x, predict_up_80.ravel(), predict_up_60.ravel(), # 上限,下限 374 | label='Confidence Interval:80%', 375 | facecolor='blue', # 填充颜色 376 | alpha=0.5) # 透明度 377 | plt.fill_between(x, predict_up_60.ravel(), predict_up_40.ravel(), # 上限,下限 378 | label='Confidence Interval:60%', 379 | facecolor='blue', # 填充颜色 380 | alpha=0.7) # 透明度 381 | plt.fill_between(x, predict_up_40.ravel(), predict_up_20.ravel(), # 上限,下限 382 | label='Confidence Interval:40%', 383 | facecolor='blue', # 填充颜色 384 | alpha=0.8) # 透明度 385 | plt.fill_between(x, predict_up_20.ravel(), predict_up_10.ravel(), # 上限,下限 386 | label='Confidence Interval:20%', 387 | facecolor='blue', # 填充颜色 388 | alpha=0.9) # 透明度 389 | plt.fill_between(x, predict_up_10.ravel(), predict.ravel(), # 上限,下限 390 | label='Confidence Interval:10%', 391 | facecolor='blue', # 填充颜色 392 | alpha=1) # 透明度 393 | 394 | 395 | plt.fill_between(x, predict_low_90.ravel(), predict_low_80.ravel(),# 上限,下限 396 | facecolor='blue', # 填充颜色 397 | alpha=0.4) # 透明度 398 | plt.fill_between(x, predict_low_80.ravel(), predict_low_60.ravel(), # 上限,下限 399 | facecolor='blue', # 填充颜色 400 | alpha=0.5) # 透明度 401 | plt.fill_between(x, predict_low_60.ravel(), predict_low_40.ravel(), # 上限,下限 402 | facecolor='blue', # 填充颜色 403 | alpha=0.7) # 透明度 404 | plt.fill_between(x, predict_low_40.ravel(), predict_low_20.ravel(), # 上限,下限 405 | facecolor='blue', # 填充颜色 406 | alpha=0.8) # 透明度 407 | plt.fill_between(x, predict_low_20.ravel(), predict_low_10.ravel(), # 上限,下限 408 | facecolor='blue', # 填充颜色 409 | alpha=0.9) # 透明度 410 | plt.fill_between(x, predict_low_10.ravel(), predict.ravel(), # 上限,下限 411 | facecolor='blue', # 填充颜色 412 | alpha=1.0) # 透明度 413 | plt.legend() 414 | plt.show() 415 | -------------------------------------------------------------------------------- /code/forecast_pred.py: -------------------------------------------------------------------------------- 1 | # 导入库 2 | import os 3 | import numpy as np # numpy库 4 | from sklearn.preprocessing import MinMaxScaler 5 | from sklearn.neural_network import MLPRegressor 6 | from sklearn.svm import SVR # SVM中的回归算法 7 | from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor # RF,GBDT集成算法 8 | from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_absolute_percentage_error, \ 9 | mean_squared_error, r2_score 10 | import pandas as pd # 导入pandas 11 | import matplotlib.pyplot as plt # 导入图形展示库 12 | from sklearn.model_selection import GridSearchCV 13 | from ShapleyValue_new import shapley_value, federate 14 | from sklearn import linear_model 15 | 16 | 17 | def record_metrics(test_y, pred_y): 18 | model_metrics_name = [explained_variance_score, mean_absolute_percentage_error, mean_squared_error, 19 | r2_score] # 回归评估指标对象集 20 | tmp_list = [] # 每个内循环的临时结果列表 21 | for m in model_metrics_name: # 循环每个指标对象 22 | tmp_score = m(test_y, pred_y) # 计算每个回归指标结果 23 | tmp_list.append(tmp_score) # 将结果存入每个内循环的临时结果列表 24 | return tmp_list 25 | 26 | 27 | def ensemble(A, coef): 28 | pred_en = np.ones(A.shape[0]) * coef[-1] 29 | for i in range(A.shape[1]): 30 | pred_en += coef[i] * A[:, i] 31 | return pred_en 32 | 33 | 34 | def leaveoneout_m(pre_y_list2, test_y2, pred_mean2): 35 | allo_loo_mm = [] 36 | pred_3mean = [ 37 | federate([pre_y_list2[2], pre_y_list2[3]]), 38 | federate([pre_y_list2[1], pre_y_list2[3]]), 39 | federate([pre_y_list2[1], pre_y_list2[2]]), 40 | ] 41 | 42 | Lomega_m = mean_absolute_percentage_error(test_y2, pred_mean2) 43 | for i in range(3): 44 | Lxk_3mean = mean_absolute_percentage_error(test_y2, pred_3mean[i]) 45 | allo_loo_mm.append(Lxk_3mean - Lomega_m) 46 | allo_loo_mm = allo_loo_mm / np.sum(np.array(allo_loo_mm), axis=0) 47 | return allo_loo_mm 48 | 49 | 50 | # %% Read dataset 51 | dataset = pd.DataFrame() 52 | for i in range(1, 16): 53 | pathcsv = os.path.abspath("GEFCom2014 Data/GEFCom2014-L_V2/Load/Task " + str(i) + "/L" + str(i) + "-train.csv") 54 | file = pd.read_csv(pathcsv) 55 | if i == 1: 56 | dataset = file.dropna(axis=0) 57 | else: 58 | dataset = pd.concat([dataset, file], axis=0, ignore_index=True) 59 | 60 | dataset['TIMESTAMP'] = pd.date_range(start='2005-1-1 1:00:00', end='2011-12-1 00:00:00', freq='H') 61 | temp = dataset[['w' + str(i) for i in range(1, 26)]].apply(pd.to_numeric, downcast='float') 62 | dataset = dataset.drop(['ZONEID'] + ['w' + str(i) for i in range(1, 26)], axis=1) 63 | dataset = dataset.copy() 64 | dataset['w'] = temp.mean(axis=1) 65 | time_before = [24, 48, 72, 96, 120, 144, 168] 66 | for i in time_before: 67 | dataset['L-' + str(i)] = np.nan 68 | dataset.loc[i:, 'L-' + str(i)] = dataset['LOAD'].values[:-i] 69 | dataset['w-' + str(i)] = np.nan 70 | dataset.loc[i:, 'w-' + str(i)] = dataset['w'].values[:-i] 71 | for i in [1, 2, 3]: 72 | dataset['w-' + str(i)] = np.nan 73 | dataset.loc[i:, 'w-' + str(i)] = dataset['w'].values[:-i] 74 | dataset = dataset[23:].reset_index(drop=True) 75 | 76 | dataset = dataset[(dataset['TIMESTAMP'].dt.year >= 2008) & (dataset['TIMESTAMP'].dt.year <= 2011)].reset_index( 77 | drop=True) 78 | # dataset.loc[:, 'hour'] = dataset['TIMESTAMP'].dt.hour 79 | # dataset.loc[:, 'month'] = dataset['TIMESTAMP'].dt.month 80 | # dataset.loc[:, 'dayofweek'] = dataset['TIMESTAMP'].dt.dayofweek 81 | dataset.loc[:, 'day'] = dataset['TIMESTAMP'].dt.day 82 | dataset.loc[:, 'hour_sin'] = np.sin(2 * np.pi * dataset['TIMESTAMP'].dt.hour / 24) 83 | dataset.loc[:, 'hour_cos'] = np.cos(2 * np.pi * dataset['TIMESTAMP'].dt.hour / 24) 84 | dataset.loc[:, 'dayofweek_sin'] = np.sin(2 * np.pi * dataset['TIMESTAMP'].dt.dayofweek / 7) 85 | dataset.loc[:, 'dayofweek_cos'] = np.cos(2 * np.pi * dataset['TIMESTAMP'].dt.dayofweek / 7) 86 | dataset.loc[:, 'month_sin'] = np.sin(2 * np.pi * dataset['TIMESTAMP'].dt.month / 12) 87 | dataset.loc[:, 'month_cos'] = np.cos(2 * np.pi * dataset['TIMESTAMP'].dt.month / 12) 88 | # %% 89 | # 数据准备 90 | scaler1 = MinMaxScaler() 91 | scaler2 = MinMaxScaler() 92 | # 2008 2009 93 | train = dataset[(dataset['TIMESTAMP'].dt.year >= 2008) & (dataset['TIMESTAMP'].dt.year <= 2009)].reset_index( 94 | drop=True) # 分割自变量 95 | train_x = train.drop(['TIMESTAMP', 'LOAD'], axis=1).reset_index(drop=True) 96 | train_y = train['LOAD'].values.reshape(-1, 1) 97 | train_x = scaler1.fit_transform(train_x) 98 | train_y = scaler2.fit_transform(train_y) 99 | # 2010 100 | test1 = dataset[(dataset['TIMESTAMP'].dt.year == 2010)].reset_index(drop=True) # 分割因变量 101 | test_x1 = test1.drop(['TIMESTAMP', 'LOAD'], axis=1).reset_index(drop=True) 102 | test_y1 = test1['LOAD'].values.reshape(-1, 1) 103 | test_x1 = scaler1.transform(test_x1) 104 | # 2011 105 | test2 = dataset[(dataset['TIMESTAMP'].dt.year == 2011)].reset_index(drop=True) # 分割因变量 106 | test_x2 = test2.drop(['TIMESTAMP', 'LOAD'], axis=1).reset_index(drop=True) 107 | test_y2 = test2['LOAD'].values.reshape(-1, 1) 108 | test_x2 = scaler1.transform(test_x2) 109 | # %% 110 | # 模型 111 | model_SVR = SVR(kernel='rbf', gamma=0.01, C=450) # 建立支持向量机回归模型对象 112 | model_MLP = MLPRegressor( 113 | alpha=0.03, hidden_layer_sizes=(180, 90), activation='relu', solver='adam', random_state=11 114 | ) 115 | # model_RF = RandomForestRegressor(n_estimators=460, random_state=11, max_depth=21, max_features=12) 116 | model_RF = RandomForestRegressor(random_state=11, n_estimators=460, max_features=None, max_depth=19, 117 | min_samples_split=2, min_samples_leaf=1) 118 | model_GBDT = GradientBoostingRegressor(random_state=11, n_estimators=350, learning_rate=0.05, max_depth=7, 119 | subsample=0.55, min_samples_split=100) 120 | 121 | model_names = ['SVM', 'MLP', 'RF', 'GBDT'] # 不同模型的名称列表 122 | model_dic = [model_SVR, model_MLP, model_RF, model_GBDT] # 不同回归模型对象的集合 123 | # %% fit and predict 124 | pre_y_list1 = [] # 各个回归模型预测的y值列表 125 | for model in model_dic: # 读出每个回归模型对象 126 | ''' 127 | model.fit(train_x, train_y.ravel()) 128 | pred_y = [] 129 | for i in range(len(test_x)): 130 | pred_point = model.predict(test_x[i].reshape(1, -1)) 131 | if i + 3 < len(test_x): 132 | test_x[i + 1, 1] = pred_point 133 | test_x[i + 2, 2] = pred_point 134 | test_x[i + 3, 3] = pred_point 135 | elif i + 2 < len(test_x): 136 | test_x[i + 1, 1] = pred_point 137 | test_x[i + 2, 2] = pred_point 138 | elif i + 1 < len(test_x): 139 | test_x[i + 1, 1] = pred_point 140 | pred_y.append(pred_point) 141 | pre_y_list.append(pred_y) 142 | ''' 143 | pre_y_list1.append(scaler2.inverse_transform( 144 | model.fit(train_x, train_y.ravel()).predict(test_x1).reshape(-1, 1))) # 将回归训练中得到的预测y存入列表 145 | 146 | 147 | 148 | # %% 149 | # 模型效果指标评估 150 | n_samples, n_features = train_x.shape # 总样本量,总特征数 151 | model_metrics_list1 = [] # 回归评估指标列表 152 | for i in range(len(model_dic)): # 循环每个模型索引 153 | model_metrics_list1.append(record_metrics(test_y1, pre_y_list1[i])) # 将结果存入回归评估指标列表 154 | df1 = pd.DataFrame(model_metrics_list1, index=model_names, columns=['ev', 'mape', 'mse', 'r2']) # 建立回归指标的数据框 155 | 156 | print('samples: %d \t features: %d' % (n_samples, n_features)) # 打印输出样本量和特征数量 157 | print(70 * '-') # 打印分隔线 158 | print('regression metrics:') # 打印输出标题 159 | print(df1) # 打印输出回归指标的数据框 160 | print(70 * '-') # 打印分隔线 161 | print('short name \t full name') # 打印输出缩写和全名标题 162 | print('ev \t explained_variance_score') 163 | print('mape \t mean_absolute_percentage_error') 164 | print('mse \t mean_squared_error') 165 | print('r2 \t r2') 166 | print(70 * '-') # 打印分隔线 167 | # %% 168 | ''' 169 | # 模型效果可视化 170 | color_list = ['c', 'r', 'b', 'g', 'y'] # 颜色列表 171 | linestyle_list = ['-', '.', 'o', 'v', '*'] # 样式列表 172 | plt.figure() # 创建画布 173 | plt.plot(np.arange(test_x1.shape[0]), test_y1, color='orange', label='actual data') # 画出原始值的曲线 174 | for i in range(len(pre_y_list1) - 1): # 读出通过回归模型预测得到的索引及结果 175 | i += 1 176 | plt.plot(np.arange(test_x1.shape[0]), pre_y_list1[i], color_list[i], label=model_names[i]) # 画出每条预测结果线 177 | plt.title('regression result comparison') # 标题 178 | plt.legend(loc='upper right') # 图例位置 179 | plt.ylabel('real and predicted value') # y轴标题 180 | plt.show() # 展示图像 181 | a = 5000 182 | b = 168 183 | plt.figure() # 创建画布 184 | plt.plot(np.arange(test_x1.shape[0])[a:a + b], test_y1[a:a + b], color='orange', label='actual data') # 画出原始值的曲线 185 | # color_list = ['c', 'r', 'b', 'g', 'y'] # 颜色列表 186 | # linestyle_list = ['-', '.', 'o', 'v', '*'] # 样式列表 187 | for i in range(len(pre_y_list1) - 1): # 读出通过回归模型预测得到的索引及结果 188 | i += 1 189 | plt.plot(np.arange(test_x1.shape[0])[a:a + b], pre_y_list1[i][a:a + b], color_list[i], 190 | label=model_names[i]) # 画出每条预测结果线 191 | plt.title('regression result comparison') # 标题 192 | plt.legend(loc='upper center') # 图例位置 193 | plt.ylabel('real and predicted value') # y轴标题 194 | plt.show() # 展示图像 195 | ''' 196 | # %% 197 | pred_mean1 = np.sum(pre_y_list1[1:], axis=0) / 3 198 | df_pm1 = pd.DataFrame(np.array(record_metrics(test_y1, pred_mean1)).reshape((1, 4)), index=['pred_mean'], 199 | columns=['ev', 'mape', 'mse', 'r2']) 200 | df1 = pd.concat([df1, df_pm1], axis=0) 201 | print('regression metrics:') # 打印输出标题 202 | print(df_pm1) # 打印输出回归指标的数据框 203 | print(70 * '-') # 打印分隔线 204 | 205 | plt.plot(test_y1[5000:5000 + 168], label='real') 206 | plt.plot(pred_mean1[5000:5000 + 168], label='pred_mean') 207 | plt.title("MAPE = {:.2f}%".format(100 * mean_absolute_percentage_error(test_y1, pred_mean1))) 208 | plt.legend() 209 | plt.show() 210 | # %% 211 | allo_sv_m1 = shapley_value(test_y1, pre_y_list1[1:], pre_ori=pre_y_list1[0], methods='mean') 212 | allo_loo_mm1 = leaveoneout_m(pre_y_list1, test_y1, pred_mean1) 213 | allo_mape_mm1 = [df1['mape'][0] - df1['mape'][i + 1] for i in range(3)] 214 | allo_mape_mm1 = allo_mape_mm1 / np.sum(np.array(allo_mape_mm1), axis=0) 215 | print(allo_sv_m1, allo_loo_mm1, allo_mape_mm1) 216 | # %% 217 | A1 = np.asarray(pre_y_list1[1:]).reshape(3, 8760).transpose() 218 | model_Li1 = linear_model.LinearRegression() 219 | model_Li1.fit(A1, test_y1) 220 | coef1 = np.concatenate((model_Li1.coef_.reshape(A1.shape[1]), model_Li1.intercept_)) 221 | print('sum of coef1:', np.sum(model_Li1.coef_)) 222 | pred_en1 = ensemble(A1, coef1) 223 | df_en1 = pd.DataFrame(np.array(record_metrics(test_y1, pred_en1)).reshape((1, 4)), index=['pred_en1'], 224 | columns=['ev', 'mape', 'mse', 'r2']) 225 | 226 | df1 = pd.concat([df1, df_en1], axis=0) 227 | print(df_en1) 228 | print(70 * '-') # 打印分隔线 229 | print(df1) 230 | # %% 231 | pre_y_list2 = [] # 各个回归模型预测的y值列表 232 | for model in model_dic: # 读出每个回归模型对象 233 | pre_y_list2.append(scaler2.inverse_transform(model.predict(test_x2).reshape(-1, 1))) # 将回归训练中得到的预测y存入列表 234 | 235 | # 模型效果指标评估 236 | n_samples, n_features = train_x.shape # 总样本量,总特征数 237 | model_metrics_list2 = [] # 回归评估指标列表 238 | for i in range(len(model_dic)): # 循环每个模型索引 239 | model_metrics_list2.append(record_metrics(test_y2, pre_y_list2[i])) # 将结果存入回归评估指标列表 240 | df2 = pd.DataFrame(model_metrics_list2, index=model_names, columns=['ev', 'mape', 'mse', 'r2']) # 建立回归指标的数据框 241 | 242 | pred_mean2 = np.sum(pre_y_list2[1:], axis=0) / 3 243 | df_pm2 = pd.DataFrame(np.array(record_metrics(test_y2, pred_mean2)).reshape((1, 4)), index=['pred_mean2'], 244 | columns=['ev', 'mape', 'mse', 'r2']) 245 | df2 = pd.concat([df2, df_pm2], axis=0) 246 | # print('regression metrics:') # 打印输出标题 247 | # print(df_pm2) # 打印输出回归指标的数据框 248 | # print(70 * '-') # 打印分隔线 249 | # %% 250 | A2 = np.asarray(pre_y_list2[1:]).reshape(3, len(test_y2)).transpose() 251 | pred_en2 = ensemble(A2, coef1) 252 | df_en2 = pd.DataFrame(np.array(record_metrics(test_y2, pred_en2)).reshape((1, 4)), index=['pred_en2'], 253 | columns=['ev', 'mape', 'mse', 'r2']) 254 | 255 | df2 = pd.concat([df2, df_en2], axis=0) 256 | 257 | print('samples: %d \t features: %d' % (n_samples, n_features)) # 打印输出样本量和特征数量 258 | print(70 * '-') # 打印分隔线 259 | print('regression metrics:') # 打印输出标题 260 | print(df2) # 打印输出回归指标的数据框 261 | print(70 * '-') # 打印分隔线 262 | print('short name \t full name') # 打印输出缩写和全名标题 263 | print('ev \t explained_variance_score') 264 | print('mape \t mean_absolute_percentage_error') 265 | print('mse \t mean_squared_error') 266 | print('r2 \t r2') 267 | print(70 * '-') # 打印分隔线 268 | # %% 269 | ''' 270 | Shapley Value 271 | ''' 272 | allo_sv_m = shapley_value(test_y2, pre_y_list2[1:], pre_ori=pre_y_list2[0], methods='mean', metrics=mean_squared_error) 273 | allo_sv_w = shapley_value(test_y2, pre_y_list2[1:], pre_ori=pre_y_list2[0], metrics=mean_squared_error, 274 | methods='weight', test_y_w=test_y1, pre_all_w=pre_y_list1[1:], pre_ori_w=pre_y_list1[0]) 275 | # %% 276 | ''' 277 | # leave one out: mean 278 | 279 | allo_loo_mm = [] 280 | allo_loo_mp = [] 281 | 282 | pred_3mean = [ 283 | federate([pre_y_list2[0], pre_y_list2[2], pre_y_list2[3]]), 284 | federate([pre_y_list2[0], pre_y_list2[1], pre_y_list2[3]]), 285 | federate([pre_y_list2[0], pre_y_list2[1], pre_y_list2[2]]), 286 | ] 287 | pred_2mean = [ 288 | federate([pre_y_list2[0], pre_y_list2[1]]), 289 | federate([pre_y_list2[0], pre_y_list2[2]]), 290 | federate([pre_y_list2[0], pre_y_list2[3]]), 291 | ] 292 | 293 | Lwi = mean_absolute_percentage_error(test_y2, pre_y_list2[0]) 294 | Lomega_m = mean_absolute_percentage_error(test_y2, pred_mean2) 295 | for i in range(3): 296 | Lxk_3mean = mean_absolute_percentage_error(test_y2, pred_3mean[i]) 297 | Lxk_2mean = mean_absolute_percentage_error(test_y2, pred_2mean[i]) 298 | allo_loo_mm.append(leaveoneout(Lwi, Lomega_m, Lxk_3mean, method='minus')) 299 | allo_loo_mp.append(leaveoneout(Lwi, Lomega_m, Lxk_2mean, method='plus')) 300 | allo_loo_mm = allo_loo_mm / np.sum(np.array(allo_loo_mm), axis=0) 301 | allo_loo_mp = allo_loo_mp / np.sum(np.array(allo_loo_mp), axis=0) 302 | 303 | # %% 304 | # leave one out: mse ensemble 305 | 306 | allo_loo_enm = [] 307 | allo_loo_enp = [] 308 | 309 | pred_3en = [ 310 | federate([pre_y_list2[0], pre_y_list2[2], pre_y_list2[3]], methods='weight', test_y_w=test_y1, 311 | pre_all_w=[pre_y_list1[0], pre_y_list1[2], pre_y_list1[3]]), 312 | federate([pre_y_list2[0], pre_y_list2[1], pre_y_list2[3]], methods='weight', test_y_w=test_y1, 313 | pre_all_w=[pre_y_list1[0], pre_y_list1[1], pre_y_list1[3]]), 314 | federate([pre_y_list2[0], pre_y_list2[1], pre_y_list2[2]], methods='weight', test_y_w=test_y1, 315 | pre_all_w=[pre_y_list1[0], pre_y_list1[1], pre_y_list1[2]]), 316 | ] 317 | pred_2en = [ 318 | federate([pre_y_list2[0], pre_y_list2[1]], methods='weight', test_y_w=test_y1, 319 | pre_all_w=[pre_y_list1[0], pre_y_list1[1]]), 320 | federate([pre_y_list2[0], pre_y_list2[2]], methods='weight', test_y_w=test_y1, 321 | pre_all_w=[pre_y_list1[0], pre_y_list1[2]]), 322 | federate([pre_y_list2[0], pre_y_list2[3]], methods='weight', test_y_w=test_y1, 323 | pre_all_w=[pre_y_list1[0], pre_y_list1[3]]), 324 | ] 325 | 326 | Lwi = mean_absolute_percentage_error(test_y2, pre_y_list2[0]) 327 | Lomega_en = mean_absolute_percentage_error(test_y2, pred_en2) 328 | for i in range(3): 329 | Lxk_3en = mean_absolute_percentage_error(test_y2, pred_3en[i]) 330 | Lxk_2en = mean_absolute_percentage_error(test_y2, pred_2en[i]) 331 | allo_loo_enm.append(leaveoneout(Lwi, Lomega_en, Lxk_3en, method='minus')) 332 | allo_loo_enp.append(leaveoneout(Lwi, Lomega_en, Lxk_2en, method='plus')) 333 | allo_loo_enm = allo_loo_enm / np.sum(np.array(allo_loo_enm), axis=0) 334 | allo_loo_enp = allo_loo_enp / np.sum(np.array(allo_loo_enp), axis=0) 335 | ''' 336 | # %% 337 | ''' 338 | leave one out:mean 339 | ''' 340 | allo_loo_mm = [] 341 | pred_3mean = [ 342 | federate([pre_y_list2[2], pre_y_list2[3]]), 343 | federate([pre_y_list2[1], pre_y_list2[3]]), 344 | federate([pre_y_list2[1], pre_y_list2[2]]), 345 | ] 346 | measure = mean_squared_error 347 | Lomega_m = measure(test_y2, pred_mean2) 348 | for i in range(3): 349 | Lxk_3mean = measure(test_y2, pred_3mean[i]) 350 | allo_loo_mm.append(Lxk_3mean - Lomega_m) 351 | allo_loo_mm = allo_loo_mm / np.sum(np.array(allo_loo_mm), axis=0) 352 | # %% 353 | ''' 354 | leave one out:mse ensemble 355 | ''' 356 | allo_loo_enm = [] 357 | 358 | pred_3en = [ 359 | federate([pre_y_list2[2], pre_y_list2[3]], methods='weight', test_y_w=test_y1, 360 | pre_all_w=[pre_y_list1[2], pre_y_list1[3]]), 361 | federate([pre_y_list2[1], pre_y_list2[3]], methods='weight', test_y_w=test_y1, 362 | pre_all_w=[pre_y_list1[1], pre_y_list1[3]]), 363 | federate([pre_y_list2[1], pre_y_list2[2]], methods='weight', test_y_w=test_y1, 364 | pre_all_w=[pre_y_list1[1], pre_y_list1[2]]), 365 | ] 366 | measure = mean_squared_error 367 | Lomega_en = measure(test_y2, pred_en2) 368 | for i in range(3): 369 | Lxk_3en = measure(test_y2, pred_3en[i]) 370 | allo_loo_enm.append(Lxk_3en - Lomega_en) 371 | allo_loo_enm = allo_loo_enm / np.sum(np.array(allo_loo_enm), axis=0) 372 | # %% 373 | allo_mape_mm = [df2['mape'][0] - df2['mape'][i + 1] for i in range(3)] 374 | allo_mape_mm = allo_mape_mm / np.sum(np.array(allo_mape_mm), axis=0) 375 | 376 | allo_mse_mm = [df2['mse'][0] - df2['mse'][i + 1] for i in range(3)] 377 | allo_mse_mm = allo_mse_mm / np.sum(np.array(allo_mse_mm), axis=0) 378 | # %% 379 | # 模型效果可视化 380 | color_list = ['c', 'r', 'b', 'g', 'y'] # 颜色列表 381 | linestyle_list = ['-', '.', 'o', 'v', '*'] # 样式列表 382 | plt.figure() # 创建画布 383 | plt.plot(np.arange(test_x2.shape[0]), test_y2, color='orange', label='actual data') # 画出原始值的曲线 384 | for i in range(len(pre_y_list2) - 1): # 读出通过回归模型预测得到的索引及结果 385 | i += 1 386 | plt.plot(np.arange(test_x2.shape[0]), pre_y_list2[i], color_list[i], label=model_names[i]) # 画出每条预测结果线 387 | plt.title('regression result comparison') # 标题 388 | plt.legend(loc='upper right') # 图例位置 389 | plt.ylabel('real and predicted value') # y轴标题 390 | plt.show() # 展示图像 391 | 392 | 393 | a = 3000 394 | b = 168 395 | plt.figure() # 创建画布 396 | plt.plot(np.arange(test_x2.shape[0])[a:a + b], test_y2[a:a + b], color='orange', label='actual data') # 画出原始值的曲线 397 | # color_list = ['c', 'r', 'b', 'g', 'y'] # 颜色列表 398 | # linestyle_list = ['-', '.', 'o', 'v', '*'] # 样式列表 399 | for i in range(len(pre_y_list2) - 1): # 读出通过回归模型预测得到的索引及结果 400 | i += 1 401 | plt.plot(np.arange(test_x2.shape[0])[a:a + b], pre_y_list2[i][a:a + b], color_list[i], 402 | label=model_names[i]) # 画出每条预测结果线 403 | plt.title('regression result comparison') # 标题 404 | plt.legend(loc='upper center') # 图例位置 405 | plt.ylabel('real and predicted value') # y轴标题 406 | plt.show() # 展示图像 407 | 408 | 409 | #%% 410 | 411 | --------------------------------------------------------------------------------