├── report.pdf ├── test_data_of_each_station.xls ├── rain_shuffle ├── MODEL │ ├── train_GBRT_371.m │ ├── mlp_371_64_2_0.005.pth │ └── lstm_371_128_2_0.001.pth ├── MODEL_NO │ ├── train_GBRT_371.m │ ├── mlp_371_64_4_0.001.pth │ ├── lstm_371_128_2_0.001.pth │ └── seq2seq_371_128_0.1_0.001.pth ├── models │ ├── seq2seq_313_best.pth │ ├── seq2seq_314_best.pth │ ├── seq2seq_371_best.pth │ ├── seq2seq_372_best.pth │ └── seq2seq_393_best.pth ├── image_example │ ├── seq2seq_313.png │ ├── seq2seq_314.png │ ├── seq2seq_371.png │ ├── seq2seq_372.png │ └── seq2seq_393.png ├── __pycache__ │ ├── eval.cpython-36.pyc │ ├── eval.cpython-37.pyc │ ├── seq2seq_module.cpython-36.pyc │ └── seq2seq_module.cpython-37.pyc ├── make_test.py ├── hour_cat.py ├── hour2day.py ├── csv2txt.py ├── hour2txt.py ├── hour2txt_ifshuffle.py ├── test.py ├── ARIMA.py ├── mlp.py ├── run_hyp.sh ├── seq2seq.py ├── lstm_.py ├── att_seq2seq.py ├── SVR_sigmoid.py ├── SVR_rbf.py ├── SVR_poly.py ├── GBRT.py ├── XGB.py ├── MLP_module.py ├── run_no.sh ├── run_no_time2.sh ├── run_no_time1.sh ├── LSTM_module.py ├── run_time1.sh ├── run_time2.sh ├── run.sh ├── ensemble_learn_np.py ├── ensemble_learn.py ├── ensemble_learn_np 1.py ├── seq2seq_module.py ├── att_seq2seq_module.py └── eval.py ├── test_data_of_each_station_1_hour.xls ├── test_data_of_each_station_2_hours.xls ├── test_data_of_each_station_shuffle.xls └── README.md /report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/report.pdf -------------------------------------------------------------------------------- /test_data_of_each_station.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station.xls -------------------------------------------------------------------------------- /rain_shuffle/MODEL/train_GBRT_371.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL/train_GBRT_371.m -------------------------------------------------------------------------------- /test_data_of_each_station_1_hour.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station_1_hour.xls -------------------------------------------------------------------------------- /test_data_of_each_station_2_hours.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station_2_hours.xls -------------------------------------------------------------------------------- /test_data_of_each_station_shuffle.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station_shuffle.xls -------------------------------------------------------------------------------- /rain_shuffle/MODEL_NO/train_GBRT_371.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/train_GBRT_371.m -------------------------------------------------------------------------------- /rain_shuffle/models/seq2seq_313_best.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_313_best.pth -------------------------------------------------------------------------------- /rain_shuffle/models/seq2seq_314_best.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_314_best.pth -------------------------------------------------------------------------------- /rain_shuffle/models/seq2seq_371_best.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_371_best.pth -------------------------------------------------------------------------------- /rain_shuffle/models/seq2seq_372_best.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_372_best.pth -------------------------------------------------------------------------------- /rain_shuffle/models/seq2seq_393_best.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_393_best.pth -------------------------------------------------------------------------------- /rain_shuffle/MODEL/mlp_371_64_2_0.005.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL/mlp_371_64_2_0.005.pth -------------------------------------------------------------------------------- /rain_shuffle/image_example/seq2seq_313.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_313.png -------------------------------------------------------------------------------- /rain_shuffle/image_example/seq2seq_314.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_314.png -------------------------------------------------------------------------------- /rain_shuffle/image_example/seq2seq_371.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_371.png -------------------------------------------------------------------------------- /rain_shuffle/image_example/seq2seq_372.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_372.png -------------------------------------------------------------------------------- /rain_shuffle/image_example/seq2seq_393.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_393.png -------------------------------------------------------------------------------- /rain_shuffle/MODEL/lstm_371_128_2_0.001.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL/lstm_371_128_2_0.001.pth -------------------------------------------------------------------------------- /rain_shuffle/MODEL_NO/mlp_371_64_4_0.001.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/mlp_371_64_4_0.001.pth -------------------------------------------------------------------------------- /rain_shuffle/__pycache__/eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/eval.cpython-36.pyc -------------------------------------------------------------------------------- /rain_shuffle/__pycache__/eval.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/eval.cpython-37.pyc -------------------------------------------------------------------------------- /rain_shuffle/MODEL_NO/lstm_371_128_2_0.001.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/lstm_371_128_2_0.001.pth -------------------------------------------------------------------------------- /rain_shuffle/MODEL_NO/seq2seq_371_128_0.1_0.001.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/seq2seq_371_128_0.1_0.001.pth -------------------------------------------------------------------------------- /rain_shuffle/__pycache__/seq2seq_module.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/seq2seq_module.cpython-36.pyc -------------------------------------------------------------------------------- /rain_shuffle/__pycache__/seq2seq_module.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/seq2seq_module.cpython-37.pyc -------------------------------------------------------------------------------- /rain_shuffle/make_test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | path = os.getcwd() 6 | 7 | #选择数据集 8 | num_array=np.array([313,314,371,372,393]) 9 | 10 | #测试集的比例 11 | ratio =0.1 12 | 13 | for num in num_array: 14 | df = pd.read_csv('{}/sample/hour{}.csv'.format(path,num)) 15 | 16 | data_num = len(df) 17 | 18 | test_num =int(ratio*data_num) 19 | 20 | test_df =df.iloc[(data_num-test_num):,1:] 21 | 22 | test_df.to_csv('{}/sample/station{}.csv'.format(path, num)) 23 | 24 | print('station{}ok'.format(num)) -------------------------------------------------------------------------------- /rain_shuffle/hour_cat.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | record=pd.read_csv('sudeste.csv') 6 | 7 | path ='./hour_data' 8 | if os.path.exists(path): 9 | pass 10 | else: 11 | os.mkdir(path) 12 | 13 | print(record.columns) 14 | print(record.index) 15 | 16 | print(record.head()) 17 | 18 | print(record[record.wsid==178].head()) 19 | 20 | print(record.wsid.value_counts()) 21 | 22 | #获得数据的气象点标签 23 | idx = record.wsid.values 24 | idx= np.unique(idx) 25 | idx_num=record.wsid.value_counts() 26 | 27 | #保存数据较多的气象点 并按天得到数据的整理 28 | for i in idx: 29 | if idx_num[i]>=120000: 30 | df=record[record.wsid==i] 31 | else: 32 | continue 33 | df = df.dropna(axis=0, how='all') 34 | 35 | df = df.fillna(value=0) 36 | 37 | df.to_csv('{}/hour{}.csv'.format(path,i)) 38 | 39 | print('yy') 40 | -------------------------------------------------------------------------------- /rain_shuffle/hour2day.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | record=pd.read_csv('sudeste.csv') 6 | 7 | path ='./day_data' 8 | if os.path.exists(path): 9 | pass 10 | else: 11 | os.mkdir(path) 12 | 13 | print(record.columns) 14 | print(record.index) 15 | 16 | print(record.head()) 17 | 18 | print(record[record.wsid==178].head()) 19 | 20 | print(record.wsid.value_counts()) 21 | 22 | #获得数据的气象点标签 23 | idx = record.wsid.values 24 | idx= np.unique(idx) 25 | idx_num=record.wsid.value_counts() 26 | 27 | #保存数据较多的气象点 并按天得到数据的整理 28 | for i in idx: 29 | if idx_num[i]>=100000: 30 | df=record[record.wsid==i] 31 | else: 32 | continue 33 | df = df.dropna(axis=0, how='all') 34 | 35 | df = df.fillna(value=0) 36 | 37 | tim = df.date.values 38 | 39 | tim = np.unique(tim) 40 | 41 | k = 0 42 | # 得到每天的降水数据 43 | for j in tim: 44 | k = k + 1 45 | df1 = df[df.date == j] 46 | djsk = df1.values 47 | data = np.mean(djsk[:, 14:30], axis=0) 48 | rain = np.sum(djsk[:, 14]) 49 | djsk = djsk[-1, :][:, np.newaxis].T 50 | djsk[:, 14:30] = data 51 | djsk[:, 14] = rain 52 | 53 | if k == 1: 54 | df_all = djsk 55 | else: 56 | df_all = np.vstack([df_all, djsk]) 57 | 58 | df = pd.DataFrame(df_all) 59 | 60 | df.to_csv('{}/tian{}.csv'.format(path,i)) 61 | 62 | 63 | print('yy') 64 | -------------------------------------------------------------------------------- /rain_shuffle/csv2txt.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | path = os.getcwd() 6 | 7 | #选择数据集 8 | num=373 9 | 10 | df = pd.read_csv('{}/day_data/tian{}.csv'.format(path,num)) 11 | 12 | data_num = len(df) 13 | #特征的index 14 | # feature_idx=[15,16,19,20,26,28,29] 15 | feature_idx=[15,19,20,21,26,29,30] 16 | rain_idx =[15] 17 | #天数 18 | time_ser=3 19 | 20 | #数据集划分 21 | ratio = 0.9 22 | 23 | train_list=[] 24 | label_list=[] 25 | 26 | #该部分实现对全零数据的筛选 27 | #k为计数有效数据 28 | k=0 29 | for i in range(data_num): 30 | j=i+1 31 | t = df.iloc[i, feature_idx].values 32 | if not np.any(t): 33 | k = 0 34 | continue 35 | else: 36 | k =k+1 37 | if j - time_ser >=0 and k == time_ser : 38 | train = df.iloc[j-time_ser:j, feature_idx].values 39 | label = df.iloc[j,rain_idx].values 40 | train=np.reshape(train,(1,-1)) 41 | train_list.append(train) 42 | label_list.append(label) 43 | k =0 44 | if i >= data_num-2: 45 | break 46 | 47 | train_arr =np.array(train_list).reshape((len(train_list),-1)) 48 | label_arr =np.array(label_list).reshape((len(label_list),-1)) 49 | 50 | path ='./train_test' 51 | if os.path.exists(path): 52 | pass 53 | else: 54 | os.mkdir(path) 55 | 56 | np.savetxt('{}/train{}.txt'.format(path,num),train_arr) 57 | np.savetxt('{}/label{}.txt'.format(path,num),label_arr) 58 | 59 | print('ss') 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /rain_shuffle/hour2txt.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | path = os.getcwd() 6 | 7 | #选择数据集 8 | num=396 9 | 10 | df = pd.read_csv('{}/hour_data/hour{}.csv'.format(path,num)) 11 | 12 | data_num = len(df) 13 | #特征的index 14 | # feature_idx=[15,16,19,20,26,28,29] 15 | feature_idx=[15,19,20,21,26,29,30] 16 | rain_idx =[15] 17 | #天数 18 | time_ser=3 19 | 20 | #数据集划分 21 | ratio = 0.9 22 | 23 | train_list=[] 24 | label_list=[] 25 | 26 | #该部分实现对全零数据的筛选 27 | #k为计数有效数据 28 | k=0 29 | for i in range(data_num): 30 | j=i+1 31 | t = df.iloc[i, feature_idx].values 32 | if not np.any(t): 33 | k = 0 34 | continue 35 | else: 36 | k =k+1 37 | if j - time_ser >=0 and k == time_ser : 38 | train = df.iloc[j-time_ser:j, feature_idx].values 39 | label = df.iloc[j,rain_idx].values 40 | train=np.reshape(train,(1,-1)) 41 | train_list.append(train) 42 | label_list.append(label) 43 | k =0 44 | if i >= data_num-2: 45 | break 46 | 47 | train_arr =np.array(train_list).reshape((len(train_list),-1)) 48 | label_arr =np.array(label_list).reshape((len(label_list),-1)) 49 | 50 | path ='./train_test_hour' 51 | if os.path.exists(path): 52 | pass 53 | else: 54 | os.mkdir(path) 55 | 56 | np.savetxt('{}/train{}.txt'.format(path,num),train_arr) 57 | np.savetxt('{}/label{}.txt'.format(path,num),label_arr) 58 | 59 | print('ss') 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /rain_shuffle/hour2txt_ifshuffle.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | from random import shuffle 5 | import sys 6 | 7 | # 定义打乱函数 8 | def myShuffle(X, y): 9 | y = y.reshape(-1,1) 10 | X_temp = np.concatenate((X, y),axis=1) 11 | shuffle(X_temp) 12 | return X_temp[:,:len(X_temp[0])-1], X_temp[:,-1].reshape(-1,1) 13 | 14 | path = os.getcwd() 15 | #特征的index 16 | # feature_idx=[15,16,19,20,26,28,29] 17 | feature_idx = [15,19,20,21,26,29,30] 18 | rain_idx = [15] 19 | 20 | time_ser = 3 #小时数 21 | ratio = 0.9 22 | ifshuffle = False 23 | 24 | #该部分实现对全零数据的筛选 25 | #k为计数有效数据 26 | for num in [312,313,314,315,316,371,372,373,374,393,394,396]: 27 | k=0 28 | df = pd.read_csv('{}/hour_data/hour{}.csv'.format(path,num),engine='python') 29 | data_num = len(df) 30 | train_list=[] 31 | label_list=[] 32 | for i in range(data_num): 33 | j=i+1 34 | t = df.iloc[i, feature_idx].values 35 | if not np.any(t): 36 | k = 0 37 | continue 38 | else: 39 | k =k+1 40 | if j - time_ser >=0 and k == time_ser : 41 | train = df.iloc[j-time_ser:j, feature_idx].values 42 | label = df.iloc[j,rain_idx].values 43 | train=np.reshape(train,(1,-1)) 44 | train_list.append(train) 45 | label_list.append(label) 46 | k =0 47 | if i >= data_num-2: 48 | break 49 | 50 | train_arr_old =np.array(train_list).reshape((len(train_list),-1)) 51 | label_arr_old =np.array(label_list).reshape((len(label_list),-1)) 52 | if ifshuffle: 53 | train_arr, label_arr = myShuffle(train_arr_old, label_arr_old) 54 | path ='./train_test_hour_shuffle' 55 | else: 56 | train_arr, label_arr = train_arr_old, label_arr_old 57 | path ='./train_test_hour' 58 | 59 | len_feature = len(train_arr) 60 | train_feature = train_arr[:int(ratio*len_feature),:] 61 | train_rain = label_arr[:int(ratio*len_feature),:] 62 | test_feature = train_arr[int(ratio*len_feature):,:] 63 | test_rain = label_arr[int(ratio*len_feature):,:] 64 | 65 | if os.path.exists(path): 66 | pass 67 | else: 68 | os.mkdir(path) 69 | 70 | np.savetxt('{}/train{}.txt'.format(path,num),train_feature) 71 | np.savetxt('{}/train_label{}.txt'.format(path,num),train_rain) 72 | np.savetxt('{}/test{}.txt'.format(path,num),test_feature) 73 | np.savetxt('{}/test_label{}.txt'.format(path,num),test_rain) 74 | 75 | path = os.getcwd() 76 | print('num%d结束'%num) 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /rain_shuffle/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.preprocessing import * 4 | import os 5 | from eval import evaluation 6 | from sklearn.metrics import * 7 | import matplotlib.pyplot as plt 8 | import argparse 9 | from torch import Tensor 10 | from torch.autograd import Variable 11 | 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0' 13 | # torch.cuda.set_device(0) 14 | 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 16 | 17 | #设计传参数 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--station', type=int, default=393, help='id of station') 20 | parser.add_argument('--model_type',type=str,default='seq2seq') 21 | parser.add_argument('--model',type=str,default='models/seq2seq_393_best.pth',help='model name') 22 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour') 23 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour') 24 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not') 25 | opt = parser.parse_args() 26 | 27 | path =os.getcwd() 28 | #station的id 29 | num = opt.station 30 | 31 | path_2 = 'sample' 32 | 33 | if opt.ifshuffle: 34 | path_2 = 'sample_shuffle' 35 | 36 | ratio=0.9 37 | #和数据集设计有关 38 | input_dim = opt.input_dim 39 | seq_len= opt.seq_len 40 | 41 | #测试集 42 | test_feature = np.loadtxt("{}/{}/station{}/test{}.txt".format(path,path_2,num, num)).astype(np.float32) 43 | test_rain = np.loadtxt("{}/{}/station{}/test_label{}.txt".format(path,path_2, num,num)).astype(np.float32) 44 | test_feature = scale(test_feature, axis=0) 45 | test_rain = np.reshape(test_rain, (-1, 1)) 46 | 47 | 48 | model = torch.load('{}/{}'.format(path,opt.model), map_location=torch.device('cpu')) 49 | x_tensor = Tensor(test_feature) 50 | if opt.model_type =='mlp': 51 | pass 52 | else: 53 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 54 | y_pre_temp = model(x_tensor).detach().numpy() 55 | 56 | error = mean_squared_error(test_rain, y_pre_temp) 57 | print('Model Test MSE: %.3f' % error) 58 | 59 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_pre_temp) 60 | 61 | print('SEQ2SEQ_rmse: %r' % rmse, 62 | 'SEQ2SEQ_mae: %r' % mae, 63 | 'SEQ2SEQ_mdae: %r' % mdae, 64 | 'SEQ2SEQ_r2: %r' % r2, 65 | 'SEQ2SEQ_var: %r' % var) 66 | 67 | path ='./models' 68 | fig = plt.figure() 69 | plt.plot(test_rain, 'b', label='real') 70 | plt.plot(y_pre_temp, 'r', label='prediction',alpha=0.3) 71 | plt.legend(loc='best') 72 | plt.show() 73 | fig.savefig('{}/{}_{}.png'.format(path,opt.model_type,num), dpi=300) 74 | 75 | f = open('{}/test_SEQ2SEQ_{}.txt'.format(path,num), 'w+') 76 | f.write('SEQ2SEQ_rmse: %r ' % rmse + 77 | 'SEQ2SEQ_mae: %r ' % mae + 78 | 'SEQ2SEQ_mdae: %r ' % mdae + 79 | 'SEQ2SEQ_r2: %r ' % r2 + 80 | 'SEQ2SEQ_var: %r ' % var) 81 | f.close() 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /rain_shuffle/ARIMA.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import * 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from sklearn.svm import SVR 6 | from pandas.plotting import * 7 | from statsmodels.graphics.tsaplots import * 8 | from statsmodels.stats.diagnostic import acorr_ljungbox 9 | from statsmodels.tsa.arima_model import ARIMA 10 | from sklearn.metrics import mean_squared_error 11 | import os 12 | from eval import evaluation 13 | import argparse 14 | 15 | #设计传参数 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--station', type=int, default=371, help='id of station') 18 | opt = parser.parse_args() 19 | 20 | path =os.getcwd() 21 | #station的id 22 | num = opt.station 23 | path_2 ='train_test_hour' 24 | ratio=0.9 25 | 26 | 27 | rain=np.loadtxt("{}/{}/label{}.txt".format(path,path_2,num)) 28 | # min_max_scaler = MinMaxScaler() 29 | # rain = min_max_scaler.fit_transform(rain) 30 | 31 | # df = pd.read_csv('tian.csv') 32 | # 33 | # rain_series=df.iloc[:4000,16] 34 | 35 | autocorrelation_plot(rain) 36 | plt.show() 37 | # 38 | # lag_plot(rain_series) 39 | # plt.show() 40 | 41 | # plot_acf(rain_series) 42 | # plt.show() 43 | # 44 | # diff1 = rain_series.diff(1).dropna() 45 | # diff1.plot() 46 | # plt.show() 47 | 48 | 49 | # plot_acf(diff1) 50 | # plt.show() 51 | # 52 | # plot_pacf(rain_series) 53 | # plt.show() 54 | 55 | # print(u'差分序列的白噪声检验结果为:', acorr_ljungbox( diff1, lags=1)) 56 | # 57 | # model = ARIMA(diff1[:1000], (0,1,2)).fit() 58 | # model.summary2() 59 | # re=model.fittedvalues 60 | # results=model.predict() 61 | # 62 | # 63 | # output = model.forecast() 64 | 65 | 66 | X = rain 67 | size = int(len(X) * ratio) 68 | train, test = X[0:size], X[size:len(X)] 69 | history = [x for x in train] 70 | predictions = list() 71 | for t in range(len(test)): 72 | model = ARIMA(history[:5000], order=(5,1,0)) 73 | model_fit = model.fit(disp=0) 74 | output = model_fit.forecast() 75 | yhat = output[0] 76 | predictions.append(yhat) 77 | obs = test[t] 78 | history.append(obs) 79 | print('predicted=%f, expected=%f' % (yhat, obs)) 80 | error = mean_squared_error(test, predictions) 81 | print('Test MSE: %.3f' % error) 82 | 83 | 84 | 85 | rmse, mae, mdae,r2,var = evaluation(test, predictions) 86 | 87 | print('ARIMA_rmse: %r' % rmse, 88 | 'ARIMA_mae: %r' % mae, 89 | 'ARIMA_mdae: %r' % mdae, 90 | 'ARIMA_r2: %r' % r2, 91 | 'ARIMA_var: %r' % var) 92 | 93 | 94 | plt.plot(test, 'b', label='real') 95 | plt.plot(predictions, 'r', label='prediction',alpha=0.3) 96 | plt.legend(loc='best') 97 | plt.show() 98 | 99 | """ 100 | 保存数据 101 | """ 102 | 103 | path ='./ARIMA' 104 | """ 105 | 建立文件夹 106 | """ 107 | if os.path.exists(path): 108 | pass 109 | else: 110 | os.mkdir(path) 111 | 112 | f = open('{}/test_ARIMA_{}.txt'.format(path,num), 'w+') 113 | f.write('ARIMA_rmse: %r ' % rmse + 114 | 'ARIMA_mae: %r ' % mae + 115 | 'ARIMA_mdae: %r ' % mdae + 116 | 'ARIMA_r2: %r ' % r2 + 117 | 'ARIMA_var: %r ' % var) 118 | f.close() 119 | -------------------------------------------------------------------------------- /rain_shuffle/mlp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.preprocessing import * 4 | import os 5 | from sklearn.model_selection import train_test_split 6 | from eval import evaluation 7 | from MLP_module import MLP 8 | from sklearn.metrics import * 9 | import matplotlib.pyplot as plt 10 | import argparse 11 | 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0' 13 | # torch.cuda.set_device(0) 14 | 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 16 | 17 | #设计传参数 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--station', type=int, default=371, help='id of station') 20 | parser.add_argument('--epochs', type=int, default=30, help='number of epochs') 21 | parser.add_argument('--batch-size', type=int, default=100, help='batch size') 22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate') 23 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour') 24 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour') 25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not') 26 | parser.add_argument('--hidden_dim', type=int, default=64, help='hidden_dim') 27 | parser.add_argument('--n_layer', type=int, default=2, help='n_layer') 28 | opt = parser.parse_args() 29 | 30 | path =os.getcwd() 31 | #station的id 32 | num = opt.station 33 | 34 | path_2 = 'train_test_hour' 35 | 36 | if opt.ifshuffle: 37 | path_2 = 'train_test_hour_shuffle' 38 | 39 | ratio=0.9 40 | #和数据集设计有关 41 | input_dim = opt.input_dim 42 | seq_len= opt.seq_len 43 | # 定义超参数 44 | batch_size = opt.batch_size 45 | learning_rate = opt.lr 46 | num_epoches = opt.epochs 47 | hidden_dim = opt.hidden_dim 48 | n_layer = opt.n_layer 49 | 50 | 51 | # 雨水信息 52 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32) 53 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32) 54 | train_feature = scale(train_feature, axis=0) 55 | train_rain = np.reshape(train_rain, (-1, 1)) 56 | 57 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32) 58 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32) 59 | test_feature = scale(test_feature, axis=0) 60 | test_rain = np.reshape(test_rain, (-1, 1)) 61 | 62 | model = MLP(num,seq_len*input_dim,hidden_dim=hidden_dim,n_layer=n_layer,batch_size=batch_size,learning_rate=learning_rate,shuffle=False,device_pu=device) 63 | 64 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches) 65 | print("best val:"+str(eval_best)+'\n') 66 | 67 | y_mlp=model.predict(test_feature,test_rain) 68 | 69 | error = mean_squared_error(test_rain, y_mlp) 70 | print('Model Test MSE: %.3f' % error) 71 | 72 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_mlp) 73 | 74 | print('MLP_rmse: %r' % rmse, 75 | 'MLP_mae: %r' % mae, 76 | 'MLP_mdae: %r' % mdae, 77 | 'MLP_r2: %r' % r2, 78 | 'MLP_var: %r' % var) 79 | 80 | 81 | plt.plot(test_rain, 'b', label='real') 82 | plt.plot(y_mlp, 'r', label='prediction',alpha=0.3) 83 | plt.legend(loc='best') 84 | plt.show() 85 | 86 | #保存数据 87 | path ='./MLP' 88 | f = open('{}/test_MLP_{}.txt'.format(path,num), 'w+') 89 | f.write('MLP_rmse: %r ' % rmse + 90 | 'MLP_mae: %r ' % mae + 91 | 'MLP_mdae: %r ' % mdae + 92 | 'MLP_r2: %r ' % r2 + 93 | 'MLP_var: %r ' % var) 94 | f.close() 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /rain_shuffle/run_hyp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 第一个bash文件 3 | clear; 4 | echo 'Hello'; 5 | # source activate py3 6 | 7 | # #运行lstm_ 8 | # #学习率 9 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 1e-4 10 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 5e-4 11 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 1e-3 12 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 5e-3 13 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 1e-2 14 | # #隐藏单元个数 15 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 16 16 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 32 17 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 64 18 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 128 19 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 256 20 | # #层数 21 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --n_layer 1 22 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --n_layer 2 23 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --n_layer 3 24 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --n_layer 4 25 | # python lstm_.py --station 371 --epochs 100 --batch-size 100 --n_layer 5 26 | 27 | 28 | # #运行mlp 29 | # #学习率 30 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --lr 1e-4 31 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --lr 5e-4 32 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --lr 1e-3 33 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --lr 5e-3 34 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --lr 1e-2 35 | # #隐藏单元个数 36 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 16 37 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 32 38 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 64 39 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 128 40 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --hidden_dim 256 41 | #层数 42 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --n_layer 1 43 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --n_layer 2 44 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --n_layer 3 45 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --n_layer 4 46 | # python mlp.py --station 371 --epochs 100 --batch-size 100 --n_layer 5 47 | 48 | # # # #运行seq2seq 49 | # #学习率 50 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 1e-4 51 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 5e-4 52 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 1e-3 53 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 5e-3 54 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 1e-2 55 | # #隐藏单元个数 56 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --hidden_dim 16 57 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --hidden_dim 32 58 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --hidden_dim 64 59 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --hidden_dim 128 60 | # python seq2seq.py --station 371 --epochs 100 --batch-size 30 --hidden_dim 256 61 | 62 | 63 | -------------------------------------------------------------------------------- /rain_shuffle/seq2seq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.preprocessing import * 4 | import os 5 | from sklearn.model_selection import train_test_split 6 | from eval import evaluation 7 | from seq2seq_module import Seq2Seq 8 | from sklearn.metrics import * 9 | import matplotlib.pyplot as plt 10 | import argparse 11 | 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0' 13 | # torch.cuda.set_device(0) 14 | 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 16 | 17 | #设计传参数 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--station', type=int, default=371, help='id of station') 20 | parser.add_argument('--epochs', type=int, default=100, help='number of epochs') 21 | parser.add_argument('--batch-size', type=int, default=30, help='batch size') 22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate') 23 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour') 24 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour') 25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not') 26 | #超参数 27 | parser.add_argument('--hidden_dim', type=int, default=128, help='hidden unit number') 28 | 29 | opt = parser.parse_args() 30 | 31 | path =os.getcwd() 32 | #station的id 33 | num = opt.station 34 | 35 | path_2 = 'train_test_hour' 36 | 37 | if opt.ifshuffle: 38 | path_2 = 'train_test_hour_shuffle' 39 | 40 | ratio=0.9 41 | #和数据集设计有关 42 | input_dim = opt.input_dim 43 | seq_len= opt.seq_len 44 | # 定义超参数 45 | batch_size = opt.batch_size 46 | learning_rate = opt.lr 47 | num_epoches = opt.epochs 48 | hidden_dim = opt.hidden_dim 49 | 50 | 51 | # 雨水信息 52 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32) 53 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32) 54 | train_feature = scale(train_feature, axis=0) 55 | train_rain = np.reshape(train_rain, (-1, 1)) 56 | 57 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32) 58 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32) 59 | test_feature = scale(test_feature, axis=0) 60 | test_rain = np.reshape(test_rain, (-1, 1)) 61 | 62 | 63 | 64 | model = Seq2Seq(num,input_dim,seq_len,output_dim=1,hidden_size=hidden_dim,dropout=0.1,learning_rate=learning_rate,batch_size=batch_size,device_pu=device) 65 | 66 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches,shuffle=False) 67 | print("best val:"+str(eval_best)+'\n') 68 | 69 | y_seq2seq=model.predict(test_feature,test_rain) 70 | 71 | error = mean_squared_error(test_rain, y_seq2seq) 72 | print('Model Test MSE: %.3f' % error) 73 | 74 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_seq2seq) 75 | 76 | print('SEQ2SEQ_rmse: %r' % rmse, 77 | 'SEQ2SEQ_mae: %r' % mae, 78 | 'SEQ2SEQ_mdae: %r' % mdae, 79 | 'SEQ2SEQ_r2: %r' % r2, 80 | 'SEQ2SEQ_var: %r' % var) 81 | 82 | 83 | plt.plot(test_rain, 'b', label='real') 84 | plt.plot(y_seq2seq, 'r', label='prediction',alpha=0.3) 85 | plt.legend(loc='best') 86 | plt.show() 87 | 88 | 89 | path ='./SEQ2SEQ' 90 | f = open('{}/test_SEQ2SEQ_{}.txt'.format(path,num), 'w+') 91 | f.write('SEQ2SEQ_rmse: %r ' % rmse + 92 | 'SEQ2SEQ_mae: %r ' % mae + 93 | 'SEQ2SEQ_mdae: %r ' % mdae + 94 | 'SEQ2SEQ_r2: %r ' % r2 + 95 | 'SEQ2SEQ_var: %r ' % var) 96 | f.close() 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /rain_shuffle/lstm_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.preprocessing import * 4 | import os 5 | from sklearn.model_selection import train_test_split 6 | from eval import evaluation 7 | from LSTM_module import lstm 8 | from sklearn.metrics import * 9 | import matplotlib.pyplot as plt 10 | import argparse 11 | 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='2' 13 | # torch.cuda.set_device(2) 14 | 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 16 | 17 | #设计传参数 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--station', type=int, default=313, help='id of station') 20 | parser.add_argument('--epochs', type=int, default=30, help='number of epochs') 21 | parser.add_argument('--batch-size', type=int, default=100, help='batch size') 22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate') 23 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour') 24 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour') 25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not') 26 | #超参数 27 | parser.add_argument('--hidden_dim', type=int, default=128, help='hidden unit number') 28 | parser.add_argument('--n_layer', type=int, default=2, help='model layer number') 29 | 30 | opt = parser.parse_args() 31 | 32 | path =os.getcwd() 33 | #station的id 34 | num = opt.station 35 | 36 | path_2 = 'train_test_hour' 37 | 38 | if opt.ifshuffle: 39 | path_2 = 'train_test_hour_shuffle' 40 | 41 | ratio=0.9 42 | #和数据集设计有关 43 | input_dim = opt.input_dim 44 | seq_len= opt.seq_len 45 | # 定义超参数 46 | batch_size = opt.batch_size 47 | learning_rate = opt.lr 48 | num_epoches = opt.epochs 49 | hidden_dim = opt.hidden_dim 50 | n_layer = opt.n_layer 51 | 52 | 53 | # 雨水信息 54 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32) 55 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32) 56 | train_feature = scale(train_feature, axis=0) 57 | train_rain = np.reshape(train_rain, (-1, 1)) 58 | 59 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32) 60 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32) 61 | test_feature = scale(test_feature, axis=0) 62 | test_rain = np.reshape(test_rain, (-1, 1)) 63 | 64 | 65 | 66 | model = lstm(num,input_dim,seq_len,hidden_dim=hidden_dim,n_layer=n_layer,batch_size=batch_size,learning_rate=learning_rate,shuffle=False,device_pu=device) 67 | 68 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches) 69 | print("best val:"+str(eval_best)+'\n') 70 | 71 | y_lstm=model.predict(test_feature,test_rain) 72 | 73 | error = mean_squared_error(test_rain, y_lstm) 74 | print('Model Test MSE: %.3f' % error) 75 | 76 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_lstm) 77 | 78 | print('LSTM_rmse: %r' % rmse, 79 | 'LSTM_mae: %r' % mae, 80 | 'LSTM_mdae: %r' % mdae, 81 | 'LSTM_r2: %r' % r2, 82 | 'LSTM_var: %r' % var) 83 | 84 | 85 | plt.plot(test_rain, 'b', label='real') 86 | plt.plot(y_lstm, 'r', label='prediction',alpha=0.3) 87 | plt.legend(loc='best') 88 | plt.show() 89 | 90 | #保存数据 91 | path ='./LSTM' 92 | f = open('{}/test_LSTM_{}.txt'.format(path,num), 'w+') 93 | f.write('LSTM_rmse: %r ' % rmse + 94 | 'LSTM_mae: %r ' % mae + 95 | 'LSTM_mdae: %r ' % mdae + 96 | 'LSTM_r2: %r ' % r2 + 97 | 'LSTM_var: %r ' % var) 98 | f.close() 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /rain_shuffle/att_seq2seq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.preprocessing import * 4 | import os 5 | from sklearn.model_selection import train_test_split 6 | from eval import evaluation 7 | from att_seq2seq_module import Seq2Seq 8 | from sklearn.metrics import * 9 | import matplotlib.pyplot as plt 10 | import argparse 11 | 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0' 13 | # torch.cuda.set_device(0) 14 | 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 16 | 17 | #设计传参数 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--station', type=int, default=371, help='id of station') 20 | parser.add_argument('--epochs', type=int, default=100, help='number of epochs') 21 | parser.add_argument('--batch-size', type=int, default=30, help='batch size') 22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate') 23 | parser.add_argument('--input-dim', type=int, default=7, help='num of hour') 24 | parser.add_argument('--seq-len', type=int, default=3, help='num of parameter each hour') 25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not') 26 | opt = parser.parse_args() 27 | 28 | path =os.getcwd() 29 | #station的id 30 | num = opt.station 31 | 32 | path_2 = 'train_test_hour' 33 | 34 | if opt.ifshuffle: 35 | path_2 = 'train_test_hour_shuffle' 36 | 37 | ratio=0.9 38 | #和数据集设计有关 39 | input_dim = opt.input_dim 40 | seq_len= opt.seq_len 41 | # 定义超参数 42 | batch_size = opt.batch_size 43 | learning_rate = opt.lr 44 | num_epoches = opt.epochs 45 | 46 | 47 | # 雨水信息 48 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32) 49 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32) 50 | train_feature = scale(train_feature, axis=0) 51 | train_rain = np.reshape(train_rain, (-1, 1)) 52 | 53 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32) 54 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32) 55 | test_feature = scale(test_feature, axis=0) 56 | test_rain = np.reshape(test_rain, (-1, 1)) 57 | 58 | 59 | model = Seq2Seq(num,input_dim,seq_len,output_dim=1,hidden_size=128,dropout=0.1,learning_rate=learning_rate,batch_size=batch_size,device_pu=device) 60 | 61 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches,shuffle=False) 62 | print("best val:"+str(eval_best)+'\n') 63 | 64 | y_seq2seq,y_attention=model.predict(test_feature,test_rain) 65 | 66 | error = mean_squared_error(test_rain, y_seq2seq) 67 | print('Model Test MSE: %.3f' % error) 68 | 69 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_seq2seq) 70 | 71 | print('ATTSEQ2SEQ_rmse: %r' % rmse, 72 | 'ATTSEQ2SEQ_mae: %r' % mae, 73 | 'ATTSEQ2SEQ_mdae: %r' % mdae, 74 | 'ATTSEQ2SEQ_r2: %r' % r2, 75 | 'ATTSEQ2SEQ_var: %r' % var) 76 | 77 | 78 | plt.plot(test_rain, 'b', label='real') 79 | plt.plot(y_seq2seq, 'r', label='prediction',alpha=0.3) 80 | plt.legend(loc='best') 81 | plt.show() 82 | 83 | 84 | fig =plt.figure() 85 | ax = fig.add_subplot(111) 86 | cax = ax.matshow(y_attention[:5,:], cmap='bone') 87 | fig.colorbar(cax) 88 | plt.show() 89 | 90 | path ='./ATT_SEQ2SEQ' 91 | f = open('{}/test_ATT_SEQ2SEQ_{}.txt'.format(path,num), 'w+') 92 | f.write('ATT_SEQ2SEQ_rmse: %r ' % rmse + 93 | 'ATT_SEQ2SEQ_mae: %r ' % mae + 94 | 'ATT_SEQ2SEQ_mdae: %r ' % mdae + 95 | 'ATT_SEQ2SEQ_r2: %r ' % r2 + 96 | 'ATT_SEQ2SEQ_var: %r ' % var) 97 | f.close() 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /rain_shuffle/SVR_sigmoid.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import * 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from sklearn.svm import SVR 6 | from sklearn.metrics import mean_squared_error 7 | import os 8 | from sklearn.model_selection import train_test_split 9 | import joblib 10 | from eval import evaluation 11 | import argparse 12 | 13 | #设计传参数 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--station', type=int, default=371, help='id of station') 16 | opt = parser.parse_args() 17 | 18 | path =os.getcwd() 19 | #station的id 20 | num = opt.station 21 | path_2 ='train_test_hour' 22 | ratio=0.9 23 | 24 | feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)) 25 | rain = np.loadtxt("{}/{}/label{}.txt".format(path,path_2, num)) 26 | 27 | feature = scale(feature, axis=0) 28 | rain = np.reshape(rain, (-1, 1)) 29 | # min_max_scaler = MinMaxScaler() 30 | # rain = min_max_scaler.fit_transform(rain) 31 | 32 | # 雨水信息 33 | train_feature = feature[:int(ratio * len(feature)), :] 34 | train_rain = rain[:int(ratio * len(feature)), :] 35 | 36 | test_feature = feature[int(ratio * len(feature)):, :] 37 | test_rain = rain[int(ratio * len(feature)):, :] 38 | 39 | x_train, x_test, y_train, y_test = train_test_split(train_feature, train_rain, test_size=0.22, random_state=42) 40 | 41 | 42 | """ 43 | 超参数 44 | """ 45 | n_gamma = np.arange(0.005,0.05,0.005) 46 | n_C =np.arange(0.1,1,0.1) 47 | n_degree =np.arange(1,10,1) 48 | paramters =['C','gamma','degree'] 49 | kernel ='sigmoid' 50 | 51 | path ='./SVR_{}'.format(kernel) 52 | 53 | 54 | gamma_g=0.01 55 | c_g=50 56 | degree_g=3 57 | 58 | if os.path.exists(path): 59 | pass 60 | else: 61 | os.mkdir(path) 62 | 63 | 64 | 65 | error = 100 66 | f = open('{}/train_{}_{}.txt'.format(path,num,paramters[0]), 'w+') 67 | for c in n_C: 68 | # 模型训练,使用GBDT算法 69 | regressor = SVR(kernel = kernel,gamma=gamma_g,C=c,degree=degree_g) 70 | regressor.fit(x_train, y_train.ravel()) 71 | y_pre_train = regressor.predict(x_train) 72 | y_pre_test = regressor.predict(x_test) 73 | error_1 = mean_squared_error(y_train, y_pre_train) 74 | print('Train MSE: %.3f' % error_1) 75 | error_2 = mean_squared_error(y_test, y_pre_test) 76 | print('Val MSE: %.3f' % error_2) 77 | 78 | f.write('{}: '.format(paramters[0]) + str(c) + 79 | " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n') 80 | 81 | if error_2 < error: 82 | c_g = c 83 | error =error_2 84 | f.close() 85 | 86 | 87 | f = open('{}/train_{}_{}.txt'.format(path,num,paramters[1]), 'w+') 88 | for gamma in n_gamma: 89 | # 模型训练,使用GBDT算法 90 | regressor = SVR(kernel = kernel,gamma=gamma,C=c_g,degree=degree_g) 91 | regressor.fit(x_train, y_train.ravel()) 92 | y_pre_train = regressor.predict(x_train) 93 | y_pre_test = regressor.predict(x_test) 94 | error_1 = mean_squared_error(y_train, y_pre_train) 95 | print('Train MSE: %.3f' % error_1) 96 | error_2 = mean_squared_error(y_test, y_pre_test) 97 | print('Val MSE: %.3f' % error_2) 98 | 99 | f.write('{}: '.format(paramters[1]) + str(gamma) + 100 | " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n') 101 | 102 | if error_2 =2: 53 | out= self.classifier1(x) 54 | out =torch.relu(out) 55 | if self.n_layer == 3: 56 | out =self.classifier1_2(out) 57 | if self.n_layer == 4: 58 | out =self.classifier2_3(out) 59 | if self.n_layer == 5: 60 | out =self.classifier3_4(out) 61 | out = self.classifier2(out) 62 | out = torch.relu(out) 63 | return out 64 | 65 | 66 | class MLP(): 67 | def __init__(self,num,input_dim,hidden_dim,n_layer,batch_size = 100,learning_rate = 2e-3,shuffle=True,device_pu='cpu'): 68 | self.station =num 69 | self.input_dim =input_dim 70 | self.hidden_dim =hidden_dim 71 | self.n_layer=n_layer 72 | self.bs=batch_size 73 | self.lr =learning_rate 74 | self.shuffle =shuffle 75 | self.device =device_pu 76 | self.model = MLP_module(input_dim, hidden_dim, n_layer, 1).to(self.device) 77 | self.criterion = nn.MSELoss() 78 | self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate) 79 | 80 | def fit(self,data,label,num_epoches = 100): 81 | 82 | x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42) 83 | 84 | train = GetLoader(x_train, y_train) 85 | data_train=torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle) 86 | test = GetLoader(x_test, y_test) 87 | data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle) 88 | 89 | if os.path.exists(path): 90 | pass 91 | else: 92 | os.mkdir(path) 93 | 94 | eval_loss_best = np.inf 95 | 96 | f = open('{}/train_{}_{}_{}_{}.txt'.format(path,self.station,self.hidden_dim, self.n_layer,self.lr), 'w+') 97 | # 开始训练 98 | for epoch in range(num_epoches): 99 | self.model.train() 100 | print('epoch {}'.format(epoch + 1)) 101 | print('**************************************') 102 | running_loss = 0.0 103 | 104 | for i, data in enumerate(data_train, 1): 105 | """ 106 | 随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题 107 | """ 108 | img, label = data 109 | img = Variable(img).to(self.device) 110 | label = Variable(label).to(self.device) 111 | 112 | # 向前传播 113 | out = self.model(img) 114 | loss = self.criterion(out, label) 115 | running_loss += loss.data.item() * label.size(0) 116 | # 向后传播 117 | self.optimizer.zero_grad() 118 | loss.backward() 119 | self.optimizer.step() 120 | 121 | train_loss =running_loss / (len(y_train)) 122 | print('Finish {} epoch, Loss: {:.6f}'.format( 123 | epoch + 1, train_loss)) 124 | 125 | self.model.eval() 126 | eval_loss = 0. 127 | for data in data_test: 128 | img, label = data 129 | 130 | img = Variable(img).to(self.device) 131 | label = Variable(label).to(self.device) 132 | out = self.model(img) 133 | loss = self.criterion(out, label) 134 | eval_loss += loss.data.item() * label.size(0) 135 | val_loss =eval_loss / (len(y_test)) 136 | print('Val Loss: {:.6f}'.format(val_loss)) 137 | 138 | 139 | f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n') 140 | 141 | 142 | if val_loss < eval_loss_best: 143 | eval_loss_best =val_loss 144 | self.eval = eval_loss_best 145 | torch.save(self.model, '{}/mlp_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_dim, self.n_layer,self.lr)) 146 | f.close() 147 | 148 | return self.eval 149 | 150 | def predict(self,test_data,test_label,): 151 | 152 | test_model = torch.load('{}/mlp_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_dim, self.n_layer,self.lr)).to(self.device) 153 | 154 | test_loss = 0 155 | criterion = nn.MSELoss() 156 | test = GetLoader(test_data, test_label) 157 | data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False) 158 | 159 | y_mlp = [] 160 | for data in data_test: 161 | 162 | img, label = data 163 | img = Variable(img).to(self.device) 164 | label = Variable(label).to(self.device) 165 | out = test_model(img) 166 | loss = criterion(out, label) 167 | test_loss += loss.data 168 | y_mlp.append(out.data) 169 | 170 | 171 | print('Test Loss: {:.6f}'.format(test_loss / (len( 172 | test_label)))) 173 | 174 | y_mlp = np.array(y_mlp).squeeze()[:,np.newaxis] 175 | 176 | 177 | return y_mlp 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /rain_shuffle/run_no.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 第一个bash文件 3 | clear; 4 | echo 'Hello'; 5 | # source activate py3 6 | 7 | #运行ARIMA 8 | # python ARIMA.py --station 312 9 | # python ARIMA.py --station 313 10 | # python ARIMA.py --station 314 11 | # python ARIMA.py --station 315 12 | # python ARIMA.py --station 316 13 | # python ARIMA.py --station 371 14 | # python ARIMA.py --station 372 15 | # python ARIMA.py --station 373 16 | # python ARIMA.py --station 374 17 | # python ARIMA.py --station 393 18 | # python ARIMA.py --station 394 19 | # python ARIMA.py --station 396 20 | 21 | # #运行SVR_rbf 22 | python SVR_rbf.py --station 312 23 | python SVR_rbf.py --station 313 24 | python SVR_rbf.py --station 314 25 | python SVR_rbf.py --station 315 26 | python SVR_rbf.py --station 316 27 | python SVR_rbf.py --station 371 28 | python SVR_rbf.py --station 372 29 | python SVR_rbf.py --station 373 30 | python SVR_rbf.py --station 374 31 | python SVR_rbf.py --station 393 32 | python SVR_rbf.py --station 394 33 | python SVR_rbf.py --station 396 34 | 35 | 36 | #运行GBRT 37 | python GBRT.py --station 312 38 | python GBRT.py --station 313 39 | python GBRT.py --station 314 40 | python GBRT.py --station 315 41 | python GBRT.py --station 316 42 | python GBRT.py --station 371 43 | python GBRT.py --station 372 44 | python GBRT.py --station 373 45 | python GBRT.py --station 374 46 | python GBRT.py --station 393 47 | python GBRT.py --station 394 48 | python GBRT.py --station 396 49 | 50 | 51 | # #运行lstm_ 52 | python lstm_.py --station 312 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 53 | python lstm_.py --station 313 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 54 | python lstm_.py --station 314 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 55 | python lstm_.py --station 315 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 56 | python lstm_.py --station 316 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 57 | python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 58 | python lstm_.py --station 372 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 59 | python lstm_.py --station 373 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 60 | python lstm_.py --station 374 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 61 | python lstm_.py --station 393 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 62 | python lstm_.py --station 394 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 63 | python lstm_.py --station 396 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 64 | 65 | 66 | # #运行mlp 67 | python mlp.py --station 312 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 68 | python mlp.py --station 313 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 69 | python mlp.py --station 314 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 70 | python mlp.py --station 315 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 71 | python mlp.py --station 316 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 72 | python mlp.py --station 371 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 73 | python mlp.py --station 372 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 74 | python mlp.py --station 373 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 75 | python mlp.py --station 374 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 76 | python mlp.py --station 393 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 77 | python mlp.py --station 394 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 78 | python mlp.py --station 396 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 79 | 80 | # # #运行seq2seq 81 | python seq2seq.py --station 312 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 82 | python seq2seq.py --station 313 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 83 | python seq2seq.py --station 314 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 84 | python seq2seq.py --station 315 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 85 | python seq2seq.py --station 316 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 86 | python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 87 | python seq2seq.py --station 372 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 88 | python seq2seq.py --station 373 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 89 | python seq2seq.py --station 374 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 90 | python seq2seq.py --station 393 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 91 | python seq2seq.py --station 394 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 92 | python seq2seq.py --station 396 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 93 | 94 | 95 | # # #运行att_seq2seq 96 | python att_seq2seq.py --station 312 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 97 | python att_seq2seq.py --station 313 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 98 | python att_seq2seq.py --station 314 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 99 | python att_seq2seq.py --station 315 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 100 | python att_seq2seq.py --station 316 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 101 | python att_seq2seq.py --station 371 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 102 | python att_seq2seq.py --station 372 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 103 | python att_seq2seq.py --station 373 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 104 | python att_seq2seq.py --station 374 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 105 | python att_seq2seq.py --station 393 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 106 | python att_seq2seq.py --station 394 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 107 | python att_seq2seq.py --station 396 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 108 | 109 | 110 | # # #运行XGB 111 | python XGB.py --station 312 112 | python XGB.py --station 313 113 | python XGB.py --station 314 114 | python XGB.py --station 315 115 | python XGB.py --station 316 116 | python XGB.py --station 371 117 | python XGB.py --station 372 118 | python XGB.py --station 373 119 | python XGB.py --station 374 120 | python XGB.py --station 393 121 | python XGB.py --station 394 122 | python XGB.py --station 396 -------------------------------------------------------------------------------- /rain_shuffle/run_no_time2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 第一个bash文件 3 | clear; 4 | echo 'Hello'; 5 | # source activate py3 6 | 7 | #运行ARIMA 8 | # python ARIMA.py --station 312 9 | # python ARIMA.py --station 313 10 | # python ARIMA.py --station 314 11 | # python ARIMA.py --station 315 12 | # python ARIMA.py --station 316 13 | # python ARIMA.py --station 371 14 | # python ARIMA.py --station 372 15 | # python ARIMA.py --station 373 16 | # python ARIMA.py --station 374 17 | # python ARIMA.py --station 393 18 | # python ARIMA.py --station 394 19 | # python ARIMA.py --station 396 20 | 21 | # #运行SVR_rbf 22 | python SVR_rbf.py --station 312 23 | python SVR_rbf.py --station 313 24 | python SVR_rbf.py --station 314 25 | python SVR_rbf.py --station 315 26 | python SVR_rbf.py --station 316 27 | python SVR_rbf.py --station 371 28 | python SVR_rbf.py --station 372 29 | python SVR_rbf.py --station 373 30 | python SVR_rbf.py --station 374 31 | python SVR_rbf.py --station 393 32 | python SVR_rbf.py --station 394 33 | python SVR_rbf.py --station 396 34 | 35 | 36 | #运行GBRT 37 | python GBRT.py --station 312 38 | python GBRT.py --station 313 39 | python GBRT.py --station 314 40 | python GBRT.py --station 315 41 | python GBRT.py --station 316 42 | python GBRT.py --station 371 43 | python GBRT.py --station 372 44 | python GBRT.py --station 373 45 | python GBRT.py --station 374 46 | python GBRT.py --station 393 47 | python GBRT.py --station 394 48 | python GBRT.py --station 396 49 | 50 | 51 | # #运行lstm_ 52 | python lstm_.py --station 312 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 53 | python lstm_.py --station 313 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 54 | python lstm_.py --station 314 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 55 | python lstm_.py --station 315 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 56 | python lstm_.py --station 316 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 57 | python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 58 | python lstm_.py --station 372 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 59 | python lstm_.py --station 373 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 60 | python lstm_.py --station 374 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 61 | python lstm_.py --station 393 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 62 | python lstm_.py --station 394 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 63 | python lstm_.py --station 396 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 64 | 65 | 66 | # #运行mlp 67 | python mlp.py --station 312 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 68 | python mlp.py --station 313 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 69 | python mlp.py --station 314 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 70 | python mlp.py --station 315 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 71 | python mlp.py --station 316 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 72 | python mlp.py --station 371 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 73 | python mlp.py --station 372 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 74 | python mlp.py --station 373 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 75 | python mlp.py --station 374 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 76 | python mlp.py --station 393 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 77 | python mlp.py --station 394 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 78 | python mlp.py --station 396 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 79 | 80 | # # #运行seq2seq 81 | python seq2seq.py --station 312 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 82 | python seq2seq.py --station 313 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 83 | python seq2seq.py --station 314 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 84 | python seq2seq.py --station 315 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 85 | python seq2seq.py --station 316 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 86 | python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 87 | python seq2seq.py --station 372 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 88 | python seq2seq.py --station 373 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 89 | python seq2seq.py --station 374 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 90 | python seq2seq.py --station 393 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 91 | python seq2seq.py --station 394 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 92 | python seq2seq.py --station 396 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 93 | 94 | 95 | # # #运行att_seq2seq 96 | python att_seq2seq.py --station 312 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 97 | python att_seq2seq.py --station 313 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 98 | python att_seq2seq.py --station 314 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 99 | python att_seq2seq.py --station 315 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 100 | python att_seq2seq.py --station 316 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 101 | python att_seq2seq.py --station 371 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 102 | python att_seq2seq.py --station 372 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 103 | python att_seq2seq.py --station 373 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 104 | python att_seq2seq.py --station 374 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 105 | python att_seq2seq.py --station 393 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 106 | python att_seq2seq.py --station 394 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 107 | python att_seq2seq.py --station 396 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 108 | 109 | 110 | # # #运行XGB 111 | # python XGB.py --station 312 112 | # python XGB.py --station 313 113 | # python XGB.py --station 314 114 | # python XGB.py --station 315 115 | # python XGB.py --station 316 116 | # python XGB.py --station 371 117 | # python XGB.py --station 372 118 | # python XGB.py --station 373 119 | # python XGB.py --station 374 120 | # python XGB.py --station 393 121 | # python XGB.py --station 394 122 | # python XGB.py --station 396 -------------------------------------------------------------------------------- /rain_shuffle/run_no_time1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 第一个bash文件 3 | clear; 4 | echo 'Hello'; 5 | # source activate py3 6 | 7 | #运行ARIMA 8 | # python ARIMA.py --station 312 9 | # python ARIMA.py --station 313 10 | # python ARIMA.py --station 314 11 | # python ARIMA.py --station 315 12 | # python ARIMA.py --station 316 13 | # python ARIMA.py --station 371 14 | # python ARIMA.py --station 372 15 | # python ARIMA.py --station 373 16 | # python ARIMA.py --station 374 17 | # python ARIMA.py --station 393 18 | # python ARIMA.py --station 394 19 | # python ARIMA.py --station 396 20 | 21 | # # #运行SVR_rbf 22 | # python SVR_rbf.py --station 312 23 | # python SVR_rbf.py --station 313 24 | # python SVR_rbf.py --station 314 25 | # python SVR_rbf.py --station 315 26 | # python SVR_rbf.py --station 316 27 | # python SVR_rbf.py --station 371 28 | # python SVR_rbf.py --station 372 29 | # python SVR_rbf.py --station 373 30 | # python SVR_rbf.py --station 374 31 | # python SVR_rbf.py --station 393 32 | # python SVR_rbf.py --station 394 33 | # python SVR_rbf.py --station 396 34 | 35 | 36 | # #运行GBRT 37 | # python GBRT.py --station 312 38 | # python GBRT.py --station 313 39 | # python GBRT.py --station 314 40 | # python GBRT.py --station 315 41 | # python GBRT.py --station 316 42 | # python GBRT.py --station 371 43 | # python GBRT.py --station 372 44 | # python GBRT.py --station 373 45 | # python GBRT.py --station 374 46 | # python GBRT.py --station 393 47 | # python GBRT.py --station 394 48 | # python GBRT.py --station 396 49 | 50 | 51 | # #运行mlp 52 | python mlp.py --station 312 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 53 | python mlp.py --station 313 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 54 | python mlp.py --station 314 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 55 | python mlp.py --station 315 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 56 | python mlp.py --station 316 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 57 | python mlp.py --station 371 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 58 | python mlp.py --station 372 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 59 | python mlp.py --station 373 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 60 | python mlp.py --station 374 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 61 | python mlp.py --station 393 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 62 | python mlp.py --station 394 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 63 | python mlp.py --station 396 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 64 | 65 | # #运行lstm_ 66 | python lstm_.py --station 312 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 67 | python lstm_.py --station 313 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 68 | python lstm_.py --station 314 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 69 | python lstm_.py --station 315 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 70 | python lstm_.py --station 316 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 71 | python lstm_.py --station 371 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 72 | python lstm_.py --station 372 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 73 | python lstm_.py --station 373 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 74 | python lstm_.py --station 374 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 75 | python lstm_.py --station 393 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 76 | python lstm_.py --station 394 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 77 | python lstm_.py --station 396 --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 78 | 79 | 80 | # # #运行seq2seq 81 | python seq2seq.py --station 312 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 82 | python seq2seq.py --station 313 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 83 | python seq2seq.py --station 314 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 84 | python seq2seq.py --station 315 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 85 | python seq2seq.py --station 316 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 86 | python seq2seq.py --station 371 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 87 | python seq2seq.py --station 372 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 88 | python seq2seq.py --station 373 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 89 | python seq2seq.py --station 374 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 90 | python seq2seq.py --station 393 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 91 | python seq2seq.py --station 394 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 92 | python seq2seq.py --station 396 --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 93 | 94 | 95 | # # # #运行att_seq2seq 96 | # python att_seq2seq.py --station 312 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 97 | # python att_seq2seq.py --station 313 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 98 | # python att_seq2seq.py --station 314 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 99 | # python att_seq2seq.py --station 315 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 100 | # python att_seq2seq.py --station 316 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 101 | # python att_seq2seq.py --station 371 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 102 | # python att_seq2seq.py --station 372 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 103 | # python att_seq2seq.py --station 373 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 104 | # python att_seq2seq.py --station 374 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 105 | # python att_seq2seq.py --station 393 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 106 | # python att_seq2seq.py --station 394 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 107 | # python att_seq2seq.py --station 396 --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 108 | 109 | 110 | # # #运行XGB 111 | # python XGB.py --station 312 112 | # python XGB.py --station 313 113 | # python XGB.py --station 314 114 | # python XGB.py --station 315 115 | # python XGB.py --station 316 116 | # python XGB.py --station 371 117 | # python XGB.py --station 372 118 | # python XGB.py --station 373 119 | # python XGB.py --station 374 120 | # python XGB.py --station 393 121 | # python XGB.py --station 394 122 | # python XGB.py --station 396 -------------------------------------------------------------------------------- /rain_shuffle/LSTM_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, optim 3 | from torch.autograd import Variable 4 | from torch import Tensor 5 | from torch.utils.data import DataLoader 6 | import numpy as np 7 | from sklearn.preprocessing import * 8 | import os 9 | from sklearn.model_selection import train_test_split 10 | from eval import evaluation 11 | from sklearn.metrics import * 12 | 13 | path = './LSTM' 14 | 15 | 16 | # 定义GetLoader类,继承Dataset方法,并重写__getitem__()和__len__()方法 17 | class GetLoader(torch.utils.data.Dataset): 18 | # 初始化函数,得到数据 19 | def __init__(self, data_root, data_label): 20 | self.data = data_root 21 | self.label = data_label 22 | 23 | # index是根据batchsize划分数据后得到的索引,最后将data和对应的labels进行一起返回 24 | def __getitem__(self, index): 25 | data = self.data[index] 26 | labels = self.label[index] 27 | return data, labels 28 | 29 | # 该函数返回数据大小长度,目的是DataLoader方便划分,如果不知道大小,DataLoader会一脸懵逼 30 | def __len__(self): 31 | return len(self.data) 32 | 33 | 34 | # 定义 Recurrent Network 模型 35 | class LSTM_module(nn.Module): 36 | def __init__(self, in_dim, hidden_dim, n_layer, n_class): 37 | super(LSTM_module, self).__init__() 38 | self.n_layer = n_layer 39 | self.hidden_dim = hidden_dim 40 | self.lstm = nn.LSTM(in_dim, hidden_dim, n_layer, batch_first=True) 41 | self.classifier = nn.Linear(hidden_dim, n_class) 42 | 43 | def forward(self, x): 44 | out, _ = self.lstm(x, None) 45 | 46 | out = self.classifier(out[:, -1, :]) 47 | out = torch.relu(out) 48 | return out 49 | 50 | 51 | class lstm(): 52 | def __init__(self, num, input_dim, seq_len, hidden_dim, n_layer, batch_size=100, learning_rate=1e-3, shuffle=True, 53 | device_pu='cpu'): 54 | self.station = num 55 | self.input_dim = input_dim 56 | self.hidden_dim = hidden_dim 57 | self.n_layer = n_layer 58 | self.seq_len = seq_len 59 | self.bs = batch_size 60 | self.lr = learning_rate 61 | self.shuffle = shuffle 62 | self.device = device_pu 63 | self.model = LSTM_module(input_dim, hidden_dim, n_layer, 1).to(self.device) 64 | self.criterion = nn.MSELoss() 65 | self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate) 66 | 67 | def fit(self, data, label, num_epoches=100): 68 | 69 | x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42) 70 | 71 | train = GetLoader(x_train, y_train) 72 | data_train = torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle) 73 | test = GetLoader(x_test, y_test) 74 | data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle) 75 | 76 | if os.path.exists(path): 77 | pass 78 | else: 79 | os.mkdir(path) 80 | 81 | eval_loss_best = np.inf 82 | 83 | uncorrect =True 84 | while uncorrect: 85 | self.model = LSTM_module(self.input_dim, self.hidden_dim, self.n_layer, 1).to(self.device) 86 | self.criterion = nn.MSELoss() 87 | self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) 88 | f = open('{}/train_{}_{}_{}_{}.txt'.format(path, self.station, self.hidden_dim, self.n_layer, self.lr), 'w+') 89 | train_loss_last = np.inf 90 | # 开始训练 91 | for epoch in range(num_epoches): 92 | self.model.train() 93 | print('epoch {}'.format(epoch + 1)) 94 | print('**************************************') 95 | running_loss = 0.0 96 | 97 | for i, data in enumerate(data_train, 1): 98 | """ 99 | 随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题 100 | """ 101 | img, label = data 102 | img = Variable(img).to(self.device) 103 | label = Variable(label).to(self.device) 104 | # 向前传播 105 | out = self.model(img.view(-1, self.seq_len, self.input_dim)) 106 | loss = self.criterion(out, label) 107 | running_loss += loss.data.item() * label.size(0) 108 | # 向后传播 109 | self.optimizer.zero_grad() 110 | loss.backward() 111 | self.optimizer.step() 112 | 113 | train_loss = running_loss / (len(y_train)) 114 | print('Finish {} epoch, Loss: {:.6f}'.format( 115 | epoch + 1, train_loss)) 116 | 117 | if train_loss_last == train_loss and epoch < 3: 118 | break 119 | if train_loss_last > train_loss and epoch >=3: 120 | uncorrect = False 121 | 122 | train_loss_last = train_loss 123 | 124 | self.model.eval() 125 | eval_loss = 0. 126 | for data in data_test: 127 | img, label = data 128 | 129 | img = Variable(img).to(self.device) 130 | label = Variable(label).to(self.device) 131 | out = self.model(img.view(-1, self.seq_len, self.input_dim)) 132 | loss = self.criterion(out, label) 133 | eval_loss += loss.data.item() * label.size(0) 134 | val_loss = eval_loss / (len(y_test)) 135 | print('Val Loss: {:.6f}'.format(val_loss)) 136 | 137 | f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n') 138 | 139 | if val_loss < eval_loss_best: 140 | eval_loss_best = val_loss 141 | self.eval = eval_loss_best 142 | torch.save(self.model, 143 | '{}/lstm_{}_{}_{}_{}.pth'.format(path, self.station, self.hidden_dim, self.n_layer, self.lr)) 144 | f.close() 145 | 146 | return self.eval 147 | 148 | def predict(self, test_data, test_label): 149 | 150 | test_model = torch.load( 151 | '{}/lstm_{}_{}_{}_{}.pth'.format(path, self.station, self.hidden_dim, self.n_layer, self.lr)).to( 152 | self.device) 153 | 154 | test_loss = 0 155 | criterion = nn.MSELoss() 156 | test = GetLoader(test_data, test_label) 157 | data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False) 158 | 159 | y_mlp = [] 160 | for data in data_test: 161 | img, label = data 162 | img = Variable(img).to(self.device) 163 | label = Variable(label).to(self.device) 164 | out = test_model(img.view(-1, self.seq_len, self.input_dim)) 165 | loss = criterion(out, label) 166 | test_loss += loss.data 167 | y_mlp.append(out.data) 168 | 169 | print('Test Loss: {:.6f}'.format(test_loss / (len( 170 | test_label)))) 171 | 172 | y_mlp = np.array(y_mlp).squeeze()[:, np.newaxis] 173 | 174 | return y_mlp 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /rain_shuffle/run_time1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 第一个bash文件 3 | clear; 4 | echo 'Hello'; 5 | # source activate py3 6 | 7 | #运行ARIMA 8 | # python ARIMA.py --station 312 9 | # python ARIMA.py --station 313 10 | # python ARIMA.py --station 314 11 | # python ARIMA.py --station 315 12 | # python ARIMA.py --station 316 13 | # python ARIMA.py --station 371 14 | # python ARIMA.py --station 372 15 | # python ARIMA.py --station 373 16 | # python ARIMA.py --station 374 17 | # python ARIMA.py --station 393 18 | # python ARIMA.py --station 394 19 | # python ARIMA.py --station 396 20 | 21 | # #运行SVR_rbf 22 | python SVR_rbf.py --station 312 --ifshuffle 23 | python SVR_rbf.py --station 313 --ifshuffle 24 | python SVR_rbf.py --station 314 --ifshuffle 25 | python SVR_rbf.py --station 315 --ifshuffle 26 | python SVR_rbf.py --station 316 --ifshuffle 27 | python SVR_rbf.py --station 371 --ifshuffle 28 | python SVR_rbf.py --station 372 --ifshuffle 29 | python SVR_rbf.py --station 373 --ifshuffle 30 | python SVR_rbf.py --station 374 --ifshuffle 31 | python SVR_rbf.py --station 393 --ifshuffle 32 | python SVR_rbf.py --station 394 --ifshuffle 33 | python SVR_rbf.py --station 396 --ifshuffle 34 | 35 | 36 | #运行GBRT 37 | python GBRT.py --station 312 --ifshuffle 38 | python GBRT.py --station 313 --ifshuffle 39 | python GBRT.py --station 314 --ifshuffle 40 | python GBRT.py --station 315 --ifshuffle 41 | python GBRT.py --station 316 --ifshuffle 42 | python GBRT.py --station 371 --ifshuffle 43 | python GBRT.py --station 372 --ifshuffle 44 | python GBRT.py --station 373 --ifshuffle 45 | python GBRT.py --station 374 --ifshuffle 46 | python GBRT.py --station 393 --ifshuffle 47 | python GBRT.py --station 394 --ifshuffle 48 | python GBRT.py --station 396 --ifshuffle 49 | 50 | 51 | # #运行lstm_ 52 | python lstm_.py --station 312 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 53 | python lstm_.py --station 313 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 54 | python lstm_.py --station 314 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 55 | python lstm_.py --station 315 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 56 | python lstm_.py --station 316 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 57 | python lstm_.py --station 371 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 58 | python lstm_.py --station 372 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 59 | python lstm_.py --station 373 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 60 | python lstm_.py --station 374 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 61 | python lstm_.py --station 393 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 62 | python lstm_.py --station 394 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 63 | python lstm_.py --station 396 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 1 --seq-len 7 64 | 65 | 66 | # #运行mlp 67 | python mlp.py --station 312 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 68 | python mlp.py --station 313 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 69 | python mlp.py --station 314 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 70 | python mlp.py --station 315 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 71 | python mlp.py --station 316 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 72 | python mlp.py --station 371 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 73 | python mlp.py --station 372 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 74 | python mlp.py --station 373 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 75 | python mlp.py --station 374 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 76 | python mlp.py --station 393 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 77 | python mlp.py --station 394 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 78 | python mlp.py --station 396 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 1 --seq-len 7 79 | 80 | # # #运行seq2seq 81 | python seq2seq.py --station 312 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 82 | python seq2seq.py --station 313 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 83 | python seq2seq.py --station 314 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 84 | python seq2seq.py --station 315 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 85 | python seq2seq.py --station 316 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 86 | python seq2seq.py --station 371 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 87 | python seq2seq.py --station 372 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 88 | python seq2seq.py --station 373 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 89 | python seq2seq.py --station 374 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 90 | python seq2seq.py --station 393 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 91 | python seq2seq.py --station 394 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 92 | python seq2seq.py --station 396 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 1 --seq-len 7 93 | 94 | 95 | # # #运行att_seq2seq 96 | python att_seq2seq.py --station 312 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 97 | python att_seq2seq.py --station 313 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 98 | python att_seq2seq.py --station 314 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 99 | python att_seq2seq.py --station 315 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 100 | python att_seq2seq.py --station 316 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 101 | python att_seq2seq.py --station 371 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 102 | python att_seq2seq.py --station 372 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 103 | python att_seq2seq.py --station 373 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 104 | python att_seq2seq.py --station 374 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 105 | python att_seq2seq.py --station 393 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 106 | python att_seq2seq.py --station 394 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 107 | python att_seq2seq.py --station 396 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 1 --seq-len 7 108 | 109 | 110 | # # #运行XGB 111 | # python XGB.py --station 312 --ifshuffle 112 | # python XGB.py --station 313 --ifshuffle 113 | # python XGB.py --station 314 --ifshuffle 114 | # python XGB.py --station 315 --ifshuffle 115 | # python XGB.py --station 316 --ifshuffle 116 | # python XGB.py --station 371 --ifshuffle 117 | # python XGB.py --station 372 --ifshuffle 118 | # python XGB.py --station 373 --ifshuffle 119 | # python XGB.py --station 374 --ifshuffle 120 | # python XGB.py --station 393 --ifshuffle 121 | # python XGB.py --station 394 --ifshuffle 122 | # python XGB.py --station 396 --ifshuffle -------------------------------------------------------------------------------- /rain_shuffle/run_time2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 第一个bash文件 3 | clear; 4 | echo 'Hello'; 5 | # source activate py3 6 | 7 | #运行ARIMA 8 | # python ARIMA.py --station 312 9 | # python ARIMA.py --station 313 10 | # python ARIMA.py --station 314 11 | # python ARIMA.py --station 315 12 | # python ARIMA.py --station 316 13 | # python ARIMA.py --station 371 14 | # python ARIMA.py --station 372 15 | # python ARIMA.py --station 373 16 | # python ARIMA.py --station 374 17 | # python ARIMA.py --station 393 18 | # python ARIMA.py --station 394 19 | # python ARIMA.py --station 396 20 | 21 | # #运行SVR_rbf 22 | python SVR_rbf.py --station 312 --ifshuffle 23 | python SVR_rbf.py --station 313 --ifshuffle 24 | python SVR_rbf.py --station 314 --ifshuffle 25 | python SVR_rbf.py --station 315 --ifshuffle 26 | python SVR_rbf.py --station 316 --ifshuffle 27 | python SVR_rbf.py --station 371 --ifshuffle 28 | python SVR_rbf.py --station 372 --ifshuffle 29 | python SVR_rbf.py --station 373 --ifshuffle 30 | python SVR_rbf.py --station 374 --ifshuffle 31 | python SVR_rbf.py --station 393 --ifshuffle 32 | python SVR_rbf.py --station 394 --ifshuffle 33 | python SVR_rbf.py --station 396 --ifshuffle 34 | 35 | 36 | #运行GBRT 37 | python GBRT.py --station 312 --ifshuffle 38 | python GBRT.py --station 313 --ifshuffle 39 | python GBRT.py --station 314 --ifshuffle 40 | python GBRT.py --station 315 --ifshuffle 41 | python GBRT.py --station 316 --ifshuffle 42 | python GBRT.py --station 371 --ifshuffle 43 | python GBRT.py --station 372 --ifshuffle 44 | python GBRT.py --station 373 --ifshuffle 45 | python GBRT.py --station 374 --ifshuffle 46 | python GBRT.py --station 393 --ifshuffle 47 | python GBRT.py --station 394 --ifshuffle 48 | python GBRT.py --station 396 --ifshuffle 49 | 50 | 51 | # #运行lstm_ 52 | python lstm_.py --station 312 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 53 | python lstm_.py --station 313 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 54 | python lstm_.py --station 314 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 55 | python lstm_.py --station 315 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 56 | python lstm_.py --station 316 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 57 | python lstm_.py --station 371 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 58 | python lstm_.py --station 372 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 59 | python lstm_.py --station 373 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 60 | python lstm_.py --station 374 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 61 | python lstm_.py --station 393 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 62 | python lstm_.py --station 394 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 63 | python lstm_.py --station 396 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 2 --seq-len 7 64 | 65 | 66 | # #运行mlp 67 | python mlp.py --station 312 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 68 | python mlp.py --station 313 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 69 | python mlp.py --station 314 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 70 | python mlp.py --station 315 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 71 | python mlp.py --station 316 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 72 | python mlp.py --station 371 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 73 | python mlp.py --station 372 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 74 | python mlp.py --station 373 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 75 | python mlp.py --station 374 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 76 | python mlp.py --station 393 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 77 | python mlp.py --station 394 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 78 | python mlp.py --station 396 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 2 --seq-len 7 79 | 80 | # # #运行seq2seq 81 | python seq2seq.py --station 312 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 82 | python seq2seq.py --station 313 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 83 | python seq2seq.py --station 314 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 84 | python seq2seq.py --station 315 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 85 | python seq2seq.py --station 316 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 86 | python seq2seq.py --station 371 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 87 | python seq2seq.py --station 372 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 88 | python seq2seq.py --station 373 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 89 | python seq2seq.py --station 374 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 90 | python seq2seq.py --station 393 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 91 | python seq2seq.py --station 394 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 92 | python seq2seq.py --station 396 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 2 --seq-len 7 93 | 94 | 95 | # # #运行att_seq2seq 96 | python att_seq2seq.py --station 312 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 97 | python att_seq2seq.py --station 313 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 98 | python att_seq2seq.py --station 314 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 99 | python att_seq2seq.py --station 315 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 100 | python att_seq2seq.py --station 316 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 101 | python att_seq2seq.py --station 371 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 102 | python att_seq2seq.py --station 372 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 103 | python att_seq2seq.py --station 373 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 104 | python att_seq2seq.py --station 374 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 105 | python att_seq2seq.py --station 393 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 106 | python att_seq2seq.py --station 394 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 107 | python att_seq2seq.py --station 396 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 2 --seq-len 7 108 | 109 | 110 | # # #运行XGB 111 | # python XGB.py --station 312 --ifshuffle 112 | # python XGB.py --station 313 --ifshuffle 113 | # python XGB.py --station 314 --ifshuffle 114 | # python XGB.py --station 315 --ifshuffle 115 | # python XGB.py --station 316 --ifshuffle 116 | # python XGB.py --station 371 --ifshuffle 117 | # python XGB.py --station 372 --ifshuffle 118 | # python XGB.py --station 373 --ifshuffle 119 | # python XGB.py --station 374 --ifshuffle 120 | # python XGB.py --station 393 --ifshuffle 121 | # python XGB.py --station 394 --ifshuffle 122 | # python XGB.py --station 396 --ifshuffle -------------------------------------------------------------------------------- /rain_shuffle/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 第一个bash文件 3 | clear; 4 | echo 'Hello'; 5 | # source activate py3 6 | 7 | #运行ARIMA 8 | # python ARIMA.py --station 312 9 | # python ARIMA.py --station 313 10 | # python ARIMA.py --station 314 11 | # python ARIMA.py --station 315 12 | # python ARIMA.py --station 316 13 | # python ARIMA.py --station 371 14 | # python ARIMA.py --station 372 15 | # python ARIMA.py --station 373 16 | # python ARIMA.py --station 374 17 | # python ARIMA.py --station 393 18 | # python ARIMA.py --station 394 19 | # python ARIMA.py --station 396 20 | 21 | # #运行SVR_rbf 22 | # python SVR_rbf.py --station 312 --ifshuffle 23 | # python SVR_rbf.py --station 313 --ifshuffle 24 | # python SVR_rbf.py --station 314 --ifshuffle 25 | # python SVR_rbf.py --station 315 --ifshuffle 26 | # python SVR_rbf.py --station 316 --ifshuffle 27 | # python SVR_rbf.py --station 371 --ifshuffle 28 | # python SVR_rbf.py --station 372 --ifshuffle 29 | # python SVR_rbf.py --station 373 --ifshuffle 30 | # python SVR_rbf.py --station 374 --ifshuffle 31 | # python SVR_rbf.py --station 393 --ifshuffle 32 | # python SVR_rbf.py --station 394 --ifshuffle 33 | # python SVR_rbf.py --station 396 --ifshuffle 34 | 35 | 36 | #运行GBRT 37 | # python GBRT.py --station 312 --ifshuffle 38 | # python GBRT.py --station 313 --ifshuffle 39 | # python GBRT.py --station 314 --ifshuffle 40 | # python GBRT.py --station 315 --ifshuffle 41 | # python GBRT.py --station 316 --ifshuffle 42 | # python GBRT.py --station 371 --ifshuffle 43 | # python GBRT.py --station 372 --ifshuffle 44 | # python GBRT.py --station 373 --ifshuffle 45 | # python GBRT.py --station 374 --ifshuffle 46 | # python GBRT.py --station 393 --ifshuffle 47 | # python GBRT.py --station 394 --ifshuffle 48 | # python GBRT.py --station 396 --ifshuffle 49 | 50 | 51 | # #运行lstm_ 52 | # python lstm_.py --station 312 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 53 | # python lstm_.py --station 313 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 54 | # python lstm_.py --station 314 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 55 | # python lstm_.py --station 315 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 56 | # python lstm_.py --station 316 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 57 | # python lstm_.py --station 371 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 58 | # python lstm_.py --station 372 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 59 | # python lstm_.py --station 373 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 60 | # python lstm_.py --station 374 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 61 | # python lstm_.py --station 393 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 62 | # python lstm_.py --station 394 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 63 | # python lstm_.py --station 396 --ifshuffle --epochs 100 --batch-size 100 --lr 1e-3 --input-dim 3 --seq-len 7 64 | 65 | 66 | # #运行mlp 67 | python mlp.py --station 312 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 68 | # python mlp.py --station 313 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 69 | # python mlp.py --station 314 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 70 | # python mlp.py --station 315 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 71 | # python mlp.py --station 316 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 72 | # python mlp.py --station 371 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 73 | # python mlp.py --station 372 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 74 | # python mlp.py --station 373 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 75 | # python mlp.py --station 374 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 76 | # python mlp.py --station 393 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 77 | # python mlp.py --station 394 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 78 | # python mlp.py --station 396 --ifshuffle --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 79 | 80 | # # #运行seq2seq 81 | # python seq2seq.py --station 312 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 82 | # python seq2seq.py --station 313 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 83 | # python seq2seq.py --station 314 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 84 | # python seq2seq.py --station 315 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 85 | # python seq2seq.py --station 316 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 86 | # python seq2seq.py --station 371 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 87 | # python seq2seq.py --station 372 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 88 | # python seq2seq.py --station 373 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 89 | # python seq2seq.py --station 374 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 90 | # python seq2seq.py --station 393 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 91 | # python seq2seq.py --station 394 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 92 | # python seq2seq.py --station 396 --ifshuffle --epochs 100 --batch-size 30 --lr 5e-3 --input-dim 3 --seq-len 7 93 | 94 | 95 | # # #运行att_seq2seq 96 | # python att_seq2seq.py --station 312 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 97 | # python att_seq2seq.py --station 313 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 98 | # python att_seq2seq.py --station 314 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 99 | # python att_seq2seq.py --station 315 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 100 | # python att_seq2seq.py --station 316 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 101 | # python att_seq2seq.py --station 371 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 102 | # python att_seq2seq.py --station 372 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 103 | # python att_seq2seq.py --station 373 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 104 | # python att_seq2seq.py --station 374 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 105 | # python att_seq2seq.py --station 393 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 106 | # python att_seq2seq.py --station 394 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 107 | # python att_seq2seq.py --station 396 --ifshuffle --epochs 120 --batch-size 32 --lr 5e-3 --input-dim 3 --seq-len 7 108 | 109 | 110 | # # #运行XGB 111 | # python XGB.py --station 312 --ifshuffle 112 | # python XGB.py --station 313 --ifshuffle 113 | # python XGB.py --station 314 --ifshuffle 114 | # python XGB.py --station 315 --ifshuffle 115 | # python XGB.py --station 316 --ifshuffle 116 | # python XGB.py --station 371 --ifshuffle 117 | # python XGB.py --station 372 --ifshuffle 118 | # python XGB.py --station 373 --ifshuffle 119 | # python XGB.py --station 374 --ifshuffle 120 | # python XGB.py --station 393 --ifshuffle 121 | # python XGB.py --station 394 --ifshuffle 122 | # python XGB.py --station 396 --ifshuffle -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 环境搭建 2 | 3 | 从纯裸的windows上跑通,步骤是: 4 | 5 | ### 安装anaconda3最新版2020.02 6 | 7 | ​ 网址 8 | 9 | ### 从清华源安装pytorch1.5cpu版: 10 | 11 | `conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/` 12 | 13 | `conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/` 14 | 15 | `conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/` 16 | 17 | ` conda config --set show_channel_urls yes ` 18 | 19 | `conda install pytorch torchvision cpuonly` 20 | 21 | ### 从清华源安装xgboost 22 | 23 | `pip install xgboost -i https://pypi.tuna.tsinghua.edu.cn/simple` 24 | 25 | 26 | 27 | # rain_shuffle代码部分 28 | 29 | 该部分为主要实现降水量预测的代码 30 | 31 | 其主要由以下几个部分构成 32 | 33 | ## hour_data文件夹 34 | 35 | 该文件夹主要保存经过预处理的对应每个气象站id的数据,csv格式 36 | 37 | ## train_test_hour文件夹 38 | 39 | 该文件夹主要保存每个气象站预处理后的数据的特征和标签文件,以9:1分为训练集和测试集,其中训练和测试集未打乱,按时间先后顺序划分。 40 | 41 | ## train_test_hour_shuffle文件夹 42 | 43 | 该文件夹主要保存每个气象站预处理后的数据的特征和标签文件,以9:1分为训练集和测试集,其中训练和测试集打乱划分。 44 | 45 | ## MODEL文件夹 46 | 47 | 主要用来存放需要进行模型集成的训练好的单独模型,属于打乱数据集模型 48 | 49 | ## MODEL_NO文件夹 50 | 51 | 主要用来存放需要进行模型集成的训练好的单独模型,属于未打乱数据集模型 52 | 53 | ## hour_cat.py 54 | 55 | 将原始数据文件按单个气象站拆分,并搜索其中数据量较大的数据集,再对其中的缺失值进行填补,最后保存在`hour_data`文件夹 56 | 57 | ## corr_data.py 58 | 59 | 测试不同气象参数的相关性 采用皮尔逊系数^[1]^,并绘制相关曲线 60 | 61 | > [[1]参考链接](https://baike.baidu.com/item/皮尔逊相关系数/12712835?fromtitle=皮尔逊系数&fromid=16955304&fr=aladdin) 62 | 63 | ## hour2txt_ifshuffle.py 64 | 65 | 通过选择其中的参数,可以修改处理数据的方式,得到是否打乱的数据或者特征代表的时间长度,还有测试集的比例 66 | 67 | ```python 68 | time_ser = 3 #小时数 69 | ratio = 0.9 70 | ifshuffle = False 71 | ``` 72 | 73 | 上述为可以调整的对应参数。 74 | 75 | ## eval.py 76 | 主要包括: 77 | 1. 评估模型的各个指标 78 | 79 | ```python 80 | def evaluation(a,b): 81 | rmse = np.sqrt(mean_squared_error(a,b)) 82 | mae = mean_absolute_error(a, b) 83 | var = explained_variance_score(a,b) 84 | mdae =median_absolute_error(a,b) 85 | r2 =r2_score(a,b) 86 | return rmse, mae, mdae, r2, var 87 | ``` 88 | 具体参考sklearn.metrics的库函数 89 | 2. 模型结果的后处理 90 | 91 | ## ARIMA.py 92 | 93 | arima的方法训练模型 采用一步预测 所有数据预测下一时刻 94 | 95 | 使用数据的10%做测试集 画出预测和测试集的效果 96 | 97 | ## SVR_rbf.py 98 | 99 | 核函数为rbf的svr模型 100 | 101 | 训练集:验证集:测试集:7:2:1 102 | 103 | 归一化特征参数 sklearn.preprocessing.scale函数 104 | 105 | 超参数:gamma C 106 | 107 | 训练不同超参数,保存训练集mse和验证集mse,取验证集最好的mse为最优模型并保存 108 | 109 | 加载保存模型, 在测试集测试,画图,使用eval.py评估 110 | 111 | ## SVR_sigmoid.py 112 | 113 | 核函数为sigmoid 114 | 115 | 同上 但超参数范围不同 116 | 117 | ## SVR_poly.py 118 | 119 | 核函数为多项式 120 | 121 | 同上 但超参数多一个 degree (具体见svr的参数) 122 | 123 | ## MLP_module.py 124 | 125 | mlp神经网络的class模块 126 | 127 | 实现功能: 128 | 129 | 1. 初始化函数: 130 | 输入维度 隐藏单元个数 层数 学习率 batch大小 131 | 2. fit函数:分割训练集 验证集 132 | ​ 训练模型返回最优验证集mse 并保存模型和中间结果 133 | 3. predict函数:加载模型 134 | ​ 打印测试集上的mse 和 返回预测值 135 | 136 | ## mlp.py 137 | 138 | mlp实现的主文件 139 | 140 | 读取文件 141 | 142 | 训练模型 143 | 144 | 评估模型 145 | 146 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。 147 | 148 | ## LSTM_module.py 149 | 150 | Last神经网络的class模块 151 | 152 | 实现功能: 153 | 154 | 1.初始化函数: 输入维度 序列的长度 隐藏单元个数 层数 学习率 batch大小 155 | 156 | 2.fit函数:分割训练集 验证集 157 | 158 | ​ 训练模型返回最优验证集mse 并保存模型和中间结果 159 | 160 | 3.predict函数:加载模型 161 | 162 | ​ 打印测试集上的mse 和 返回预测值 163 | 164 | ## lstm_.py 165 | 166 | mlp实现的主文件 167 | 168 | 读取文件 169 | 170 | 训练模型 171 | 172 | 评估模型 173 | 174 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。 175 | 176 | ## GBRT.py 177 | 178 | GBRT模型 179 | 180 | 训练集:验证集:测试集:7:2:1 181 | 182 | 归一化特征参数 sklearn.preprocessing.scale函数 183 | 184 | 超参数:学习率(learning_rate) 树的个数(n_estimators) 树的深度(max_depth) 185 | 186 | 训练不同超参数,保存训练集mse和验证集mse,取验证集最好的mse为最优模型并保存 187 | 188 | 加载保存模型, 在测试集测试,画图,使用eval.py评估 189 | 190 | ## XGB.py 191 | 192 | XGBoost模型 193 | 194 | 训练集:验证集:测试集:7:2:1 195 | 196 | 归一化特征参数 sklearn.preprocessing.scale函数 197 | 198 | 超参数:学习率(learning_rate) 树的个数(n_estimators) 树的深度(max_depth) 199 | 200 | 训练不同超参数,保存训练集mse和验证集mse,取验证集最好的mse为最优模型并保存 201 | 202 | 加载保存模型, 在测试集测试,画图,使用eval.py评估 203 | 204 | ## seq2seq_module.py 205 | 206 | ​ Seq2seq神经网络的class模块 207 | 208 | 实现功能: 209 | 210 | 1.初始化函数: 输入维度 序列的长度 隐藏单元个数 学习率 batch大小 211 | 212 | 2.fit函数:分割训练集 验证集 213 | 214 | ​ 训练模型返回最优验证集mse 并保存模型和中间结果 215 | 216 | 3.predict函数:加载模型 217 | 218 | ​ 打印测试集上的mse 和 返回预测值 219 | 220 | ## seq2seq.py 221 | 222 | seq2seq实现的主文件 223 | 224 | 读取文件 225 | 226 | 训练模型 227 | 228 | 评估模型 229 | 230 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。 231 | 232 | ## att_seq2seq_module.py 233 | 234 | ​ att_Seq2seq神经网络的class模块 (带注意力机制) 235 | 236 | 实现功能: 237 | 238 | 1.初始化函数: 输入维度 序列的长度 隐藏单元个数 学习率 batch大小 239 | 240 | 2.fit函数:分割训练集 验证集 241 | 242 | ​ 训练模型返回最优验证集mse 并保存模型和中间结果 243 | 244 | 3.predict函数:加载模型 245 | 246 | ​ 打印测试集上的mse 和 返回预测值 247 | 248 | ​ 249 | 250 | ## att_seq2seq.py 251 | 252 | att_seq2seq实现的主文件 253 | 254 | 读取文件 255 | 256 | 训练模型 257 | 258 | 评估模型 259 | 260 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。 261 | 262 | ## ensemble_learn.py 263 | 264 | 基础模型训练代码 打乱数据 265 | 266 | 包含bagging方法和stacking方法 267 | 268 | ## ensemble_learn_np.py 269 | 270 | 基础模型训练代码 未打乱数据 271 | 272 | 包含bagging方法和stacking方法 273 | 274 | ## test.py 275 | 276 | 加载模型测试`sample`文件夹中的测试集 277 | 278 | # 训练过程 279 | 280 | ### 数据集准备 281 | 282 | 首先在链接处下载数据集[小时天气数据](https://cloud.tsinghua.edu.cn/d/a96c9fb8f56d4fb5be62/) 283 | 284 | 解压其中的`hourly-weather-surface.zip`文件,可以得到`sudeste.csv`文件,并将该文件放入`rain_shuffle`文件夹中。 285 | 286 | ### 构建训练和测试集 287 | 288 | 在terminal终端进入`rain_shuffle`文件夹,然后在文件夹中执行 289 | 290 | ``` 291 | python hour_cat.py 292 | ``` 293 | 294 | 然后再继续执行 295 | 296 | ``` 297 | python hour2txt_ifshuffle.py 298 | ``` 299 | 300 | ### 训练及测试 301 | 302 | 继续在该文件夹中,运行以下指令即可训练模型 303 | 304 | ``` 305 | python seq2seq.py --station 313 --epochs 100 --batch-size 30 --lr 1e-3 --input-dim 3 --seq-len 7 306 | ``` 307 | 308 | 其中`seq2seq.py`是训练模型的代码,可以根据之前的介绍改成任意模型的代码名称,`-—statoin`代表所选的气象站的id,`--epochs`为训练代数。 309 | 310 | 执行该文件后,你可以训练得到对应的模型以及输出对应的测试集结果,可以得到RMSE,MAE,MDAE,r2-score,可释方差得分var等得分。 311 | 312 | ### 在测试样例上测试 313 | 314 | 首先将上一个文件夹中的`testset`文件夹中的`station313`~`station393`文件夹转移到`rain_shuffle`文件夹的` sample`文件夹,然后运行下列代码得到验证结果 315 | 316 | ``` 317 | python test.py --station 313 --model_type seq2seq --model SEQ2SEQ/seq2seq_313_128_0.1_0.001.pth 318 | ``` 319 | 320 | 其中可以通过更换`—-station`的参数来改变要去求解的测试样例,`—model_type`是模型的类型,需要和后面选择的模型对应。 321 | 322 | ==注==:在选择模型时必须选择和station匹配的模型,例如上述station为313,则需要找到为`seq2seq_313_128_0.1_0.001.pth`的模型,其中被`_`隔开的第一个数字代表所匹配的station。 323 | 324 | 执行该文件后,你可以训练得到对应的模型以及输出对应的测试集结果,可以得到RMSE,MAE,MDAE,var,r2-score等得分。 325 | 326 | ### 下载训练好的模型和测试样例 327 | 328 | 首先下载训练好的模型[训练模型](https://cloud.tsinghua.edu.cn/d/2fd811c7400748eabd72/) 329 | 330 | 找到其中的 331 | 332 | ``` 333 | seq2seq_313_best.pth 334 | seq2seq_314_best.pth 335 | seq2seq_371_best.pth 336 | seq2seq_372_best.pth 337 | seq2seq_393_best.pth 338 | ``` 339 | 340 | 文件下载后放入`models`文件夹,后继续在`rain_shuffle`文件夹下使用下述命令: 341 | 342 | ``` 343 | python test.py --station 313 --model_type seq2seq --model models/seq2seq_313_best.pth 344 | ``` 345 | 346 | 其中可以通过更换`—-station`的参数来改变要去求解的测试样例,`-—model_type`是模型的类型,需要和后面选择的模型对应。 347 | 348 | ==注==:但在选择模型时必须选择和station匹配的模型,例如上述station为313,则需要找到为`seq2seq_313_best.pth`的模型,其中被`_`隔开的第一个数字代表所匹配的station。 349 | 350 | **以下为理想的输出结果**: 351 | 352 | ## station313: 353 | 354 | SEQ2SEQ_rmse: 0.7847012 355 | SEQ2SEQ_mae: 0.17579529 356 | SEQ2SEQ_mdae: 0.0 357 | SEQ2SEQ_r2: 0.25680770788051666 358 | SEQ2SEQ_var: 0.2615431547164917 359 | 360 | 361 | ![seq2seq_313](./rain_shuffle/image_example/seq2seq_313.png) 362 | 363 | ## station314: 364 | 365 | SEQ2SEQ_rmse: 0.7056168 366 | SEQ2SEQ_mae: 0.1004589 367 | SEQ2SEQ_mdae: 0.0 368 | SEQ2SEQ_r2: 0.23950347308864373 369 | SEQ2SEQ_var: 0.24046140909194946 370 | 371 | ![seq2seq_314](./rain_shuffle/image_example/seq2seq_314.png) 372 | 373 | ## station371: 374 | 375 | SEQ2SEQ_rmse: 0.92071176 376 | SEQ2SEQ_mae: 0.13802044 377 | SEQ2SEQ_mdae: 0.0 378 | SEQ2SEQ_r2: 0.18028592369689478 379 | SEQ2SEQ_var: 0.180952787399292 380 | 381 | ![seq2seq_371](./rain_shuffle/image_example/seq2seq_371.png) 382 | 383 | ## station372: 384 | 385 | SEQ2SEQ_rmse: 0.56710863 386 | SEQ2SEQ_mae: 0.13634275 387 | SEQ2SEQ_mdae: 0.0 388 | SEQ2SEQ_r2: 0.253192955761976 389 | SEQ2SEQ_var: 0.2590576410293579 390 | 391 | ![seq2seq_372](./rain_shuffle/image_example/seq2seq_372.png) 392 | 393 | ## station393: 394 | 395 | SEQ2SEQ_rmse: 0.92203474 396 | SEQ2SEQ_mae: 0.16005377 397 | SEQ2SEQ_mdae: 0.0 398 | SEQ2SEQ_r2: 0.18053682665056303 399 | SEQ2SEQ_var: 0.18120914697647095 400 | 401 | ![seq2seq_393](./rain_shuffle/image_example/seq2seq_393.png) 402 | 403 | -------------------------------------------------------------------------------- /rain_shuffle/ensemble_learn_np.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from eval import evaluation, plot 4 | import joblib 5 | import torch 6 | import matplotlib.pyplot as plt 7 | from torch import Tensor 8 | from torch.autograd import Variable 9 | 10 | ''' 11 | 1. 必须把想要ensemble的模型保存在MODEL文件夹里,对于.pth模型必须要引入每个神经网络模型的类 12 | 2. 我修改了hour2txt.py文件,把数据集按照训练集和测试集分别保存在了./train_test_hour_shuffle文件夹以及./train_test_hour文件夹下 13 | ''' 14 | from FNN import Net 15 | 16 | 17 | # 引入模型的类 18 | # 。。。。 19 | # 引入模型的类 20 | 21 | 22 | def FakeBagging(path, dirlist, test_x, test_y): 23 | num = len(dirlist) 24 | y_pre = np.zeros((test_y.shape[0], 1)) 25 | rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], [] 26 | for ele in dirlist: 27 | a = ele.split('.') 28 | b = a[-1] 29 | if b == 'm': 30 | clf = joblib.load('{}/{}'.format(path, ele)) 31 | y_pre_temp = clf.predict(test_x) 32 | y_pre_temp = y_pre_temp.reshape(-1, 1) 33 | y_pre += y_pre_temp 34 | else: 35 | if ele[:3] == 'mlp': 36 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 37 | x_tensor = Tensor(test_x) 38 | x_tensor = Variable(x_tensor) 39 | # y_tensor = Tensor(test_y) 40 | y_pre_temp = clf(x_tensor).detach().numpy() 41 | else: 42 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 43 | x_tensor = Tensor(test_x) 44 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 45 | # y_tensor = Tensor(test_y) 46 | y_pre_temp = clf(x_tensor).detach().numpy() 47 | y_pre += y_pre_temp 48 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp) 49 | # print('model:',ele) 50 | # print('test_rmse: %r\n' % rmse, 51 | # 'test_mae: %r\n' % mae, 52 | # 'test_mdae: %r\n' % mdae, 53 | # 'test_r2: %r\n' % r2, 54 | # 'test_var: %r\n' % var) 55 | rmse_list.append(rmse); 56 | mae_list.append(mae); 57 | mdae_list.append(mdae); 58 | r2_list.append(r2); 59 | var_list.append(var) 60 | y_pre /= num 61 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre) 62 | rmse_list.append(rmse); 63 | mae_list.append(mae); 64 | mdae_list.append(mdae); 65 | r2_list.append(r2); 66 | var_list.append(var) 67 | 68 | # 绘图 69 | xmark = [ele.split('.')[0] for ele in dirlist] 70 | xmark.append('bagging_model') 71 | plt.figure();plt.plot(rmse_list,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 72 | plt.figure();plt.plot(mae_list,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 73 | plt.plot(mdae_list, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 74 | plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 75 | plt.plot(var_list,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 76 | plt.show() 77 | 78 | 79 | # return rmse_list, mae_list, mdae_list, r2_list, var_list 80 | 81 | 82 | def getStackingData(path, dirlist, train_x, train_y): 83 | num = len(dirlist) # 基学习期个数 84 | n, m = train_x.shape 85 | 86 | data_x = np.zeros((n, num)) # 次级学习器的输入特征 87 | data_y = train_y # 次级学习器的输出特征 88 | for i, ele in enumerate(dirlist): 89 | a = ele.split('.') 90 | b = a[-1] 91 | if b == 'm': 92 | clf = joblib.load('{}/{}'.format(path, ele)) 93 | y_pre_temp = clf.predict(train_x) 94 | else: 95 | if ele[:3] == 'mlp': 96 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 97 | x_tensor = Tensor(train_x) 98 | x_tensor = Variable(x_tensor) 99 | # y_tensor = Tensor(test_y) 100 | y_pre_temp = clf(x_tensor).detach().numpy() 101 | else: 102 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 103 | x_tensor = Tensor(train_x) 104 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 105 | # y_tensor = Tensor(test_y) 106 | y_pre_temp = clf(x_tensor).detach().numpy() 107 | y_pre_temp = y_pre_temp.squeeze() 108 | data_x[:, i] = y_pre_temp 109 | 110 | return data_x, data_y 111 | 112 | 113 | from sklearn.ensemble import GradientBoostingRegressor 114 | 115 | 116 | def stackingUseGBRT(path, dirlist, est_g, dep_g, lr_g, train_x, train_y, test_x, test_y): 117 | num = len(dirlist) # 基学习期个数 118 | rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], [] 119 | for ele in dirlist: 120 | a = ele.split('.') 121 | b = a[-1] 122 | if b == 'm': 123 | clf = joblib.load('{}/{}'.format(path, ele)) 124 | y_pre_temp = clf.predict(test_x) 125 | else: 126 | if ele[:3] == 'mlp': 127 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 128 | x_tensor = Tensor(test_x) 129 | x_tensor = Variable(x_tensor) 130 | # y_tensor = Tensor(test_y) 131 | y_pre_temp = clf(x_tensor).detach().numpy() 132 | else: 133 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 134 | x_tensor = Tensor(test_x) 135 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 136 | # y_tensor = Tensor(test_y) 137 | y_pre_temp = clf(x_tensor).detach().numpy() 138 | y_pre_temp = y_pre_temp.reshape(-1, 1) 139 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp) 140 | rmse_list.append(rmse); 141 | mae_list.append(mae); 142 | mdae_list.append(mdae); 143 | r2_list.append(r2); 144 | var_list.append(var) 145 | 146 | data_x, data_y = getStackingData(path, dirlist, train_x, train_y) 147 | gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g) 148 | gbr.fit(data_x, data_y.ravel()) 149 | test_x, test_y = getStackingData(path, dirlist, test_x, test_y) 150 | y_stacking = gbr.predict(test_x) 151 | y_stacking = y_stacking.reshape(-1, 1) 152 | 153 | rmse, mae, mdae, r2, var = evaluation(test_y, y_stacking) 154 | rmse_list.append(rmse); 155 | mae_list.append(mae); 156 | mdae_list.append(mdae); 157 | r2_list.append(r2); 158 | var_list.append(var) 159 | # 绘图 160 | xmark = [ele.split('.')[0] for ele in dirlist] 161 | xmark.append('stacking_model') 162 | plt.figure();plt.plot(rmse_list, 'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 163 | plt.figure();plt.plot(mae_list, '*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 164 | plt.plot(mdae_list,'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 165 | plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 166 | plt.plot(var_list, 'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 167 | plt.show() 168 | 169 | 170 | # return rmse_list, mae_list, mdae_list, r2_list, var_list 171 | 172 | 173 | if __name__ == '__main__': 174 | pathnow = os.getcwd() 175 | path = '{}/MODEL_NO'.format(pathnow) 176 | dirlist = os.listdir(path) # 加载保存在MODEL文件夹里的所有.pth和.m模型 177 | dirlist.remove('.DS_Store') 178 | 179 | # 加载测试数据集 180 | id_num = 371 181 | train_x = np.loadtxt("{}/train_test_hour/train{}.txt".format(pathnow, id_num)) 182 | train_y = np.loadtxt("{}/train_test_hour/train_label{}.txt".format(pathnow, id_num)) 183 | test_x = np.loadtxt("{}/train_test_hour/test{}.txt".format(pathnow, id_num)) 184 | test_y = np.loadtxt("{}/train_test_hour/test_label{}.txt".format(pathnow, id_num)) 185 | from sklearn.preprocessing import * 186 | 187 | train_x = scale(train_x, axis=0) 188 | train_y = np.reshape(train_y, (-1, 1)) 189 | test_x = scale(test_x, axis=0) 190 | test_y = np.reshape(test_y, (-1, 1)) 191 | 192 | # bagging 193 | FakeBagging(path, 194 | dirlist, 195 | test_x, 196 | test_y) 197 | 198 | # stacking use GBRT 199 | est_g = 50 200 | dep_g = 5 201 | lr_g = 0.1 202 | stackingUseGBRT(path, 203 | dirlist, 204 | est_g, 205 | dep_g, 206 | lr_g, 207 | train_x, 208 | train_y, 209 | test_x, 210 | test_y) 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /rain_shuffle/ensemble_learn.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import os 4 | from eval import evaluation, plot 5 | from sklearn.externals import joblib 6 | import torch 7 | import matplotlib.pyplot as plt 8 | from torch import Tensor 9 | from torch.autograd import Variable 10 | import seaborn as sns 11 | 12 | sns.set(style="white") #这是seaborn默认的风格 13 | sns.set_palette("muted") #常用 14 | 15 | ''' 16 | 1. 必须把想要ensemble的模型保存在MODEL文件夹里,对于.pth模型必须要引入每个神经网络模型的类 17 | 2. 我修改了hour2txt.py文件,把数据集按照训练集和测试集分别保存在了./train_test_hour_shuffle文件夹以及./train_test_hour文件夹下 18 | ''' 19 | from FNN import Net 20 | # 引入模型的类 21 | # 。。。。 22 | # 引入模型的类 23 | 24 | 25 | def FakeBagging(path, dirlist, test_x, test_y): 26 | num = len(dirlist) 27 | y_pre = np.zeros((test_y.shape[0],1)) 28 | rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], [] 29 | for ele in dirlist: 30 | a = ele.split('.') 31 | b = a[-1] 32 | if b=='m': 33 | clf = joblib.load('{}/{}'.format(path,ele)) 34 | y_pre_temp = clf.predict(test_x) 35 | y_pre_temp = y_pre_temp.reshape(-1,1) 36 | y_pre += y_pre_temp 37 | else: 38 | if ele[:3]=='mlp': 39 | clf = torch.load('{}/{}'.format(path,ele),map_location=torch.device('cpu')) 40 | x_tensor = Tensor(test_x) 41 | x_tensor = Variable(x_tensor) 42 | # y_tensor = Tensor(test_y) 43 | y_pre_temp = clf(x_tensor).detach().numpy() 44 | else: 45 | clf = torch.load('{}/{}'.format(path,ele),map_location=torch.device('cpu')) 46 | x_tensor = Tensor(test_x) 47 | x_tensor = Variable(x_tensor).view(-1,7,3) 48 | # y_tensor = Tensor(test_y) 49 | y_pre_temp = clf(x_tensor).detach().numpy() 50 | y_pre += y_pre_temp 51 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp) 52 | # print('model:',ele) 53 | # print('test_rmse: %r\n' % rmse, 54 | # 'test_mae: %r\n' % mae, 55 | # 'test_mdae: %r\n' % mdae, 56 | # 'test_r2: %r\n' % r2, 57 | # 'test_var: %r\n' % var) 58 | rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var) 59 | y_pre /= num 60 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre) 61 | rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var) 62 | 63 | # 绘图 64 | xmark = [ele.split('_')[0] for ele in dirlist] 65 | xmark.append('bagging_model') 66 | plt.figure();plt.plot(rmse_list,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 67 | plt.figure();plt.plot(mae_list,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 68 | plt.plot(mdae_list, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 69 | plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 70 | plt.plot(var_list,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 71 | plt.show() 72 | # return rmse_list, mae_list, mdae_list, r2_list, var_list 73 | 74 | 75 | def getStackingData(path, dirlist, train_x, train_y): 76 | num = len(dirlist) # 基学习期个数 77 | n,m = train_x.shape 78 | 79 | data_x = np.zeros((n, num)) # 次级学习器的输入特征 80 | data_y = train_y # 次级学习器的输出特征 81 | for i,ele in enumerate(dirlist): 82 | a = ele.split('.') 83 | b = a[-1] 84 | if b=='m': 85 | clf = joblib.load('{}/{}'.format(path,ele)) 86 | y_pre_temp = clf.predict(train_x) 87 | else: 88 | if ele[:3] == 'mlp': 89 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 90 | x_tensor = Tensor(train_x) 91 | x_tensor = Variable(x_tensor) 92 | # y_tensor = Tensor(test_y) 93 | y_pre_temp = clf(x_tensor).detach().numpy() 94 | else: 95 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 96 | x_tensor = Tensor(train_x) 97 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 98 | # y_tensor = Tensor(test_y) 99 | y_pre_temp = clf(x_tensor).detach().numpy() 100 | y_pre_temp = y_pre_temp.squeeze() 101 | data_x[:,i] = y_pre_temp 102 | 103 | return data_x, data_y 104 | 105 | 106 | from sklearn.ensemble import GradientBoostingRegressor 107 | def stackingUseGBRT(path, dirlist, est_g, dep_g, lr_g, train_x, train_y, test_x, test_y): 108 | num = len(dirlist) # 基学习期个数 109 | rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], [] 110 | for ele in dirlist: 111 | a = ele.split('.') 112 | b = a[-1] 113 | if b=='m': 114 | clf = joblib.load('{}/{}'.format(path,ele)) 115 | y_pre_temp = clf.predict(test_x) 116 | else: 117 | if ele[:3] == 'mlp': 118 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 119 | x_tensor = Tensor(test_x) 120 | x_tensor = Variable(x_tensor) 121 | # y_tensor = Tensor(test_y) 122 | y_pre_temp = clf(x_tensor).detach().numpy() 123 | else: 124 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 125 | x_tensor = Tensor(test_x) 126 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 127 | # y_tensor = Tensor(test_y) 128 | y_pre_temp = clf(x_tensor).detach().numpy() 129 | y_pre_temp = y_pre_temp.reshape(-1,1) 130 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp) 131 | rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var) 132 | 133 | data_x, data_y = getStackingData(path, dirlist, train_x, train_y) 134 | gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g) 135 | gbr.fit(data_x, data_y.ravel()) 136 | test_x, test_y = getStackingData(path, dirlist, test_x, test_y) 137 | y_stacking = gbr.predict(test_x) 138 | y_stacking = y_stacking.reshape(-1,1) 139 | 140 | rmse, mae, mdae, r2, var = evaluation(test_y, y_stacking) 141 | rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var) 142 | # 绘图 143 | xmark = [ele.split('_')[0] for ele in dirlist] 144 | xmark.append('stacking_model') 145 | plt.figure();plt.plot(rmse_list, 'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 146 | plt.figure();plt.plot(mae_list, '*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 147 | plt.plot(mdae_list,'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 148 | plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 149 | plt.plot(var_list, 'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 150 | plt.show() 151 | # return rmse_list, mae_list, mdae_list, r2_list, var_list 152 | 153 | 154 | 155 | if __name__ == '__main__': 156 | pathnow = os.getcwd() 157 | path = '{}/MODEL'.format(pathnow) 158 | dirlist = os.listdir(path)[1:] #加载保存在MODEL文件夹里的所有.pth和.m模型 159 | 160 | # 加载测试数据集 161 | id_num = 371 162 | train_x=np.loadtxt("{}/train_test_hour_shuffle/train{}.txt".format(pathnow,id_num)) 163 | train_y=np.loadtxt("{}/train_test_hour_shuffle/train_label{}.txt".format(pathnow,id_num)) 164 | test_x=np.loadtxt("{}/train_test_hour_shuffle/test{}.txt".format(pathnow,id_num)) 165 | test_y=np.loadtxt("{}/train_test_hour_shuffle/test_label{}.txt".format(pathnow,id_num)) 166 | from sklearn.preprocessing import * 167 | train_x = scale(train_x,axis=0) 168 | train_y = np.reshape(train_y,(-1,1)) 169 | test_x = scale(test_x,axis=0) 170 | test_y = np.reshape(test_y,(-1,1)) 171 | 172 | 173 | # bagging 174 | FakeBagging(path, 175 | dirlist, 176 | test_x, 177 | test_y) 178 | 179 | # stacking use GBRT 180 | est_g = 90 181 | dep_g = 8 182 | lr_g = 0.02 183 | stackingUseGBRT(path, 184 | dirlist, 185 | est_g, 186 | dep_g, 187 | lr_g, 188 | train_x, 189 | train_y, 190 | test_x, 191 | test_y) 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /rain_shuffle/ensemble_learn_np 1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from eval import evaluation, plot 4 | import joblib 5 | import torch 6 | import matplotlib.pyplot as plt 7 | from torch import Tensor 8 | from torch.autograd import Variable 9 | 10 | ''' 11 | 1. 必须把想要ensemble的模型保存在MODEL文件夹里,对于.pth模型必须要引入每个神经网络模型的类 12 | 2. 我修改了hour2txt.py文件,把数据集按照训练集和测试集分别保存在了./train_test_hour_shuffle文件夹以及./train_test_hour文件夹下 13 | ''' 14 | # from FNN import Net 15 | 16 | 17 | # 引入模型的类 18 | # 。。。。 19 | # 引入模型的类 20 | 21 | 22 | def FakeBagging(path, dirlist, test_x, test_y, id_num): 23 | num = len(dirlist) 24 | y_pre = np.zeros((test_y.shape[0], 1)) 25 | rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], [] 26 | for ele in dirlist: 27 | print(ele) 28 | a = ele.split('.') 29 | b = a[-1] 30 | if b == 'm': 31 | clf = joblib.load('{}/{}'.format(path, ele)) 32 | y_pre_temp = clf.predict(test_x) 33 | y_pre_temp = y_pre_temp.reshape(-1, 1) 34 | y_pre += y_pre_temp 35 | else: 36 | if ele[:3] == 'mlp': 37 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 38 | x_tensor = Tensor(test_x) 39 | x_tensor = Variable(x_tensor) 40 | # y_tensor = Tensor(test_y) 41 | y_pre_temp = clf(x_tensor).detach().numpy() 42 | else: 43 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 44 | x_tensor = Tensor(test_x) 45 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 46 | # y_tensor = Tensor(test_y) 47 | y_pre_temp = clf(x_tensor).detach().numpy() 48 | y_pre += y_pre_temp 49 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp) 50 | # print('model:',ele) 51 | # print('test_rmse: %r\n' % rmse, 52 | # 'test_mae: %r\n' % mae, 53 | # 'test_mdae: %r\n' % mdae, 54 | # 'test_r2: %r\n' % r2, 55 | # 'test_var: %r\n' % var) 56 | rmse_list.append(rmse); 57 | mae_list.append(mae); 58 | mdae_list.append(mdae); 59 | r2_list.append(r2); 60 | var_list.append(var) 61 | y_pre /= num 62 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre) 63 | rmse_list.append(rmse); 64 | mae_list.append(mae); 65 | mdae_list.append(mdae); 66 | r2_list.append(r2); 67 | var_list.append(var) 68 | 69 | # 绘图 70 | xmark = [ele.split('.')[0] for ele in dirlist] 71 | xmark = [ele.split('_')[0] for ele in dirlist] 72 | for i,ele in enumerate(xmark): 73 | if ele == 'train': 74 | xmark[i] = dirlist[i].split('_')[1] 75 | xmark.append('bagging_model') 76 | plt.figure();plt.plot(rmse_list,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 77 | plt.grid();plt.savefig('output/bagging_rmse_meric_%d'%id_num,bbox_inches='tight',dpi=500) 78 | plt.figure();plt.plot(mae_list,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 79 | plt.plot(mdae_list, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 80 | plt.plot(r2_list, '^-',c='r',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 81 | plt.plot(var_list,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 82 | plt.grid();plt.savefig('output/bagging_4_merics_%d'%id_num,bbox_inches='tight',dpi=500) 83 | plt.show() 84 | 85 | 86 | # return rmse_list, mae_list, mdae_list, r2_list, var_list 87 | 88 | 89 | def getStackingData(path, dirlist, train_x, train_y): 90 | num = len(dirlist) # 基学习期个数 91 | n, m = train_x.shape 92 | 93 | data_x = np.zeros((n, num)) # 次级学习器的输入特征 94 | data_y = train_y # 次级学习器的输出特征 95 | for i, ele in enumerate(dirlist): 96 | a = ele.split('.') 97 | b = a[-1] 98 | if b == 'm': 99 | clf = joblib.load('{}/{}'.format(path, ele)) 100 | y_pre_temp = clf.predict(train_x) 101 | else: 102 | if ele[:3] == 'mlp': 103 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 104 | x_tensor = Tensor(train_x) 105 | x_tensor = Variable(x_tensor) 106 | # y_tensor = Tensor(test_y) 107 | y_pre_temp = clf(x_tensor).detach().numpy() 108 | else: 109 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 110 | x_tensor = Tensor(train_x) 111 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 112 | # y_tensor = Tensor(test_y) 113 | y_pre_temp = clf(x_tensor).detach().numpy() 114 | y_pre_temp = y_pre_temp.squeeze() 115 | data_x[:, i] = y_pre_temp 116 | 117 | return data_x, data_y 118 | 119 | 120 | from sklearn.ensemble import GradientBoostingRegressor 121 | 122 | 123 | def stackingUseGBRT(path, dirlist, est_g, dep_g, lr_g, train_x, train_y, test_x, test_y, id_num): 124 | num = len(dirlist) # 基学习期个数 125 | rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], [] 126 | for ele in dirlist: 127 | a = ele.split('.') 128 | b = a[-1] 129 | if b == 'm': 130 | clf = joblib.load('{}/{}'.format(path, ele)) 131 | y_pre_temp = clf.predict(test_x) 132 | else: 133 | if ele[:3] == 'mlp': 134 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 135 | x_tensor = Tensor(test_x) 136 | x_tensor = Variable(x_tensor) 137 | # y_tensor = Tensor(test_y) 138 | y_pre_temp = clf(x_tensor).detach().numpy() 139 | else: 140 | clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu')) 141 | x_tensor = Tensor(test_x) 142 | x_tensor = Variable(x_tensor).view(-1, 7, 3) 143 | # y_tensor = Tensor(test_y) 144 | y_pre_temp = clf(x_tensor).detach().numpy() 145 | y_pre_temp = y_pre_temp.reshape(-1, 1) 146 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp) 147 | rmse_list.append(rmse); 148 | mae_list.append(mae); 149 | mdae_list.append(mdae); 150 | r2_list.append(r2); 151 | var_list.append(var) 152 | 153 | data_x, data_y = getStackingData(path, dirlist, train_x, train_y) 154 | gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g) 155 | gbr.fit(data_x, data_y.ravel()) 156 | test_x, test_y = getStackingData(path, dirlist, test_x, test_y) 157 | y_stacking = gbr.predict(test_x) 158 | y_stacking = y_stacking.reshape(-1, 1) 159 | 160 | rmse, mae, mdae, r2, var = evaluation(test_y, y_stacking) 161 | rmse_list.append(rmse); 162 | mae_list.append(mae); 163 | mdae_list.append(mdae); 164 | r2_list.append(r2); 165 | var_list.append(var) 166 | # 绘图 167 | xmark = [ele.split('_')[0] for ele in dirlist] 168 | for i,ele in enumerate(xmark): 169 | if ele == 'train': 170 | xmark[i] = dirlist[i].split('_')[1] 171 | 172 | xmark.append('stacking_model') 173 | plt.figure();plt.plot(rmse_list, 'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 174 | plt.grid();plt.savefig('output/stacking_rmse_meric_%d'%id_num,bbox_inches='tight',dpi=500) 175 | plt.figure();plt.plot(mae_list, '*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 176 | plt.plot(mdae_list,'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 177 | plt.plot(r2_list, '^-',c='r',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 178 | plt.plot(var_list, 'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45) 179 | plt.grid();plt.savefig('output/stacking_4_merics_%d'%id_num,bbox_inches='tight',dpi=500) 180 | plt.show() 181 | 182 | 183 | # return rmse_list, mae_list, mdae_list, r2_list, var_list 184 | 185 | 186 | if __name__ == '__main__': 187 | pathnow = os.getcwd() 188 | path = '{}/MODEL_NO'.format(pathnow) 189 | dirlist = os.listdir(path) # 加载保存在MODEL文件夹里的所有.pth和.m模型 190 | # dirlist.remove('.DS_Store') 191 | 192 | # 加载测试数据集 193 | id_num = 312 194 | train_x = np.loadtxt("{}/train_test_hour/train{}.txt".format(pathnow, id_num)) 195 | train_y = np.loadtxt("{}/train_test_hour/train_label{}.txt".format(pathnow, id_num)) 196 | test_x = np.loadtxt("{}/train_test_hour/test{}.txt".format(pathnow, id_num)) 197 | test_y = np.loadtxt("{}/train_test_hour/test_label{}.txt".format(pathnow, id_num)) 198 | from sklearn.preprocessing import * 199 | 200 | train_x = scale(train_x, axis=0) 201 | train_y = np.reshape(train_y, (-1, 1)) 202 | test_x = scale(test_x, axis=0) 203 | test_y = np.reshape(test_y, (-1, 1)) 204 | 205 | # bagging 206 | FakeBagging(path, 207 | dirlist, 208 | test_x, 209 | test_y, 210 | id_num) 211 | 212 | # stacking use GBRT 213 | est_g = 50 214 | dep_g = 3 215 | lr_g = 0.1 216 | stackingUseGBRT(path, 217 | dirlist, 218 | est_g, 219 | dep_g, 220 | lr_g, 221 | train_x, 222 | train_y, 223 | test_x, 224 | test_y, 225 | id_num) 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /rain_shuffle/seq2seq_module.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals, print_function, division 2 | import os 3 | import torch 4 | import torch.nn as nn 5 | from torch import optim 6 | import torch.nn.functional as F 7 | from sklearn.model_selection import train_test_split 8 | from torch.autograd import Variable 9 | import numpy as np 10 | 11 | 12 | MAX_LENGTH=10 13 | path ='./SEQ2SEQ' 14 | 15 | 16 | # 定义GetLoader类,继承Dataset方法,并重写__getitem__()和__len__()方法 17 | class GetLoader(torch.utils.data.Dataset): 18 | # 初始化函数,得到数据 19 | def __init__(self, data_root, data_label): 20 | self.data = data_root 21 | self.label = data_label 22 | # index是根据batchsize划分数据后得到的索引,最后将data和对应的labels进行一起返回 23 | def __getitem__(self, index): 24 | data = self.data[index] 25 | labels = self.label[index] 26 | return data, labels 27 | # 该函数返回数据大小长度,目的是DataLoader方便划分,如果不知道大小,DataLoader会一脸懵逼 28 | def __len__(self): 29 | return len(self.data) 30 | 31 | 32 | class EncoderRNN(nn.Module): 33 | def __init__(self, input_size, hidden_size): 34 | super(EncoderRNN, self).__init__() 35 | self.input_size = input_size 36 | self.hidden_size = hidden_size 37 | 38 | self.embedding = nn.Linear(input_size, hidden_size) 39 | self.gru = nn.GRU(input_size, hidden_size, batch_first=True) 40 | 41 | def forward(self, input, hidden): 42 | # embedded = self.embedding(input).view(-1, 1 ,self.hidden_size) 43 | # output = embedded 44 | output = input.view(-1, input.shape[1], self.input_size) 45 | output, hidden = self.gru(output, hidden) 46 | output = torch.relu(output) 47 | return output, hidden 48 | 49 | def initHidden(self, batch_size): 50 | return torch.zeros(1, batch_size, self.hidden_size) 51 | 52 | 53 | class DecoderRNN(nn.Module): 54 | def __init__(self, hidden_size, output_size, dropout_p): 55 | super(DecoderRNN, self).__init__() 56 | self.hidden_size = hidden_size 57 | 58 | self.embedding = nn.Linear(hidden_size, hidden_size) 59 | self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True) 60 | self.out = nn.Linear(hidden_size, output_size) 61 | 62 | def forward(self, input, hidden): 63 | # output = self.embedding(input).view(-1, 1 ,self.hidden_size) 64 | 65 | output = input.view(-1, input.shape[1], self.hidden_size) 66 | output, hidden = self.gru(output, hidden) 67 | output = self.out(output[:, 0, :]) 68 | 69 | output = torch.relu(output) 70 | 71 | return output, hidden 72 | 73 | def initHidden(self, batch_size): 74 | return torch.zeros(1, batch_size, self.hidden_size) 75 | 76 | 77 | 78 | class seq2seq_cell(nn.Module): 79 | def __init__(self,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device): 80 | super().__init__() 81 | self.input_dim = input_dim 82 | self.seq_len = seq_len 83 | 84 | self.output_dim = output_dim 85 | 86 | self.hidden_size = hidden_size 87 | self.dropout = dropout 88 | self.lr = learning_rate 89 | self.bs = batch_size 90 | self.device =device 91 | 92 | self.encoder = EncoderRNN(self.input_dim, self.hidden_size) 93 | self.decoder = DecoderRNN(self.hidden_size, self.output_dim, dropout_p=self.dropout) 94 | def forward(self,input): 95 | 96 | encoder_hidden = self.encoder.initHidden(input.shape[0]) 97 | encoder_outputs = torch.zeros(self.seq_len,input.shape[0], self.hidden_size) 98 | 99 | # 向前传播 100 | encoder_output, encoder_hidden = self.encoder(input, encoder_hidden) 101 | encoder_outputs= encoder_output 102 | """ 103 | 此处可能是对序列不敏感的原因 104 | """ 105 | 106 | # encoder_outputs = encoder_output.view(input.shape[1], self.hidden_size) 107 | 108 | decoder_hidden = encoder_hidden 109 | 110 | decoder_outputs = torch.zeros(input.shape[0], self.seq_len, self.output_dim) 111 | 112 | 113 | decoder_output, decoder_hidden = self.decoder( 114 | encoder_outputs, decoder_hidden) 115 | decoder_outputs = decoder_output 116 | 117 | 118 | return decoder_outputs 119 | 120 | 121 | 122 | class Seq2Seq(): 123 | def __init__(self,num,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device_pu): 124 | self.station = num 125 | self.input_dim =input_dim 126 | self.output_dim = output_dim 127 | self.seq_len =seq_len 128 | self.hidden_size = hidden_size 129 | self.dropout = dropout 130 | self.lr =learning_rate 131 | self.bs =batch_size 132 | self.device =device_pu 133 | self.model =seq2seq_cell(self.input_dim, 134 | self.seq_len , 135 | self.output_dim, 136 | self.hidden_size, 137 | self.dropout, 138 | self.lr, 139 | self.bs,self.device).to(self.device) 140 | 141 | def fit(self,data,label,shuffle,num_epoches = 100): 142 | 143 | self.shuffle =shuffle 144 | 145 | self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) 146 | self.criterion = nn.MSELoss() 147 | 148 | x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42) 149 | 150 | train = GetLoader(x_train, y_train) 151 | data_train = torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle) 152 | test = GetLoader(x_test, y_test) 153 | data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle) 154 | 155 | if os.path.exists(path): 156 | pass 157 | else: 158 | os.mkdir(path) 159 | 160 | eval_loss_best = np.inf 161 | uncorrect = True 162 | while uncorrect: 163 | f = open('{}/train_{}_{}_{}_{}.txt'.format(path,self.station, self.hidden_size, self.dropout, self.lr), 'w+') 164 | self.model =seq2seq_cell(self.input_dim, 165 | self.seq_len , 166 | self.output_dim, 167 | self.hidden_size, 168 | self.dropout, 169 | self.lr, 170 | self.bs,self.device).to(self.device) 171 | self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) 172 | self.criterion = nn.MSELoss() 173 | train_loss_last = np.inf 174 | for iter in range(1, num_epoches + 1): 175 | self.model.train() 176 | print('epoch {}'.format(iter)) 177 | print('**************************************') 178 | running_loss = 0.0 179 | 180 | for i, data in enumerate(data_train, 1): 181 | """ 182 | 随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题 183 | """ 184 | img, label = data 185 | img = Variable(img) 186 | label = Variable(label).to(self.device) 187 | 188 | img = img.view(-1,self.seq_len,self.input_dim).to(self.device) 189 | decoder_output =self.model(img) 190 | 191 | loss = self.criterion(decoder_output, label) 192 | running_loss += loss.data.item() * label.size(0) 193 | # 向后传播 194 | self.optimizer.zero_grad() 195 | loss.backward() 196 | self.optimizer.step() 197 | 198 | train_loss =running_loss / (len(y_train)) 199 | print('Finish {} epoch, Loss: {:.6f}'.format( 200 | iter, train_loss)) 201 | 202 | if train_loss_last == train_loss and iter < 3: 203 | break 204 | if train_loss_last > train_loss and iter >= 3: 205 | uncorrect = False 206 | 207 | train_loss_last =train_loss 208 | 209 | self.model.eval() 210 | eval_loss = 0. 211 | for data in data_test: 212 | img, label = data 213 | 214 | img = Variable(img) 215 | label = Variable(label).to(self.device) 216 | 217 | img = img.view( -1,self.seq_len, self.input_dim).to(self.device) 218 | 219 | decoder_output =self.model(img) 220 | 221 | loss = self.criterion(decoder_output, label) 222 | eval_loss += loss.data.item() * label.size(0) 223 | val_loss = eval_loss / (len(y_test)) 224 | print('Val Loss: {:.6f}'.format(val_loss)) 225 | 226 | 227 | f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n') 228 | 229 | if val_loss < eval_loss_best: 230 | eval_loss_best = val_loss 231 | self.eval = eval_loss_best 232 | torch.save(self.model, '{}/seq2seq_{}_{}_{}_{}.pth'.format(path, self.station,self.hidden_size, self.dropout, self.lr)) 233 | f.close() 234 | 235 | return self.eval 236 | 237 | 238 | def predict(self,test_data,test_label): 239 | 240 | test_model = torch.load('{}/seq2seq_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_size, self.dropout,self.lr)).to(self.device) 241 | 242 | test_loss = 0 243 | test = GetLoader(test_data, test_label) 244 | criterion = nn.MSELoss() 245 | data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False) 246 | 247 | y_mlp = [] 248 | for data in data_test: 249 | 250 | img, label = data 251 | img = Variable(img) 252 | label = Variable(label).to(self.device) 253 | 254 | img = img.view( -1, self.seq_len,self.input_dim).to(self.device) 255 | 256 | decoder_output = test_model(img) 257 | 258 | loss = criterion(decoder_output, label) 259 | test_loss += loss.data 260 | y_mlp.append(decoder_output.data) 261 | 262 | 263 | print('Test Loss: {:.6f}'.format(test_loss / (len( 264 | test_label)))) 265 | 266 | y_mlp = np.array(y_mlp).squeeze()[:,np.newaxis] 267 | 268 | 269 | return y_mlp 270 | 271 | 272 | 273 | 274 | 275 | -------------------------------------------------------------------------------- /rain_shuffle/att_seq2seq_module.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals, print_function, division 2 | import os 3 | import torch 4 | import torch.nn as nn 5 | from torch import optim 6 | import torch.nn.functional as F 7 | from sklearn.model_selection import train_test_split 8 | from torch.autograd import Variable 9 | import numpy as np 10 | 11 | 12 | 13 | MAX_LENGTH=10 14 | path ='./ATT_SEQ2SEQ' 15 | 16 | 17 | # 定义GetLoader类,继承Dataset方法,并重写__getitem__()和__len__()方法 18 | class GetLoader(torch.utils.data.Dataset): 19 | # 初始化函数,得到数据 20 | def __init__(self, data_root, data_label): 21 | self.data = data_root 22 | self.label = data_label 23 | # index是根据batchsize划分数据后得到的索引,最后将data和对应的labels进行一起返回 24 | def __getitem__(self, index): 25 | data = self.data[index] 26 | labels = self.label[index] 27 | return data, labels 28 | # 该函数返回数据大小长度,目的是DataLoader方便划分,如果不知道大小,DataLoader会一脸懵逼 29 | def __len__(self): 30 | return len(self.data) 31 | 32 | 33 | class EncoderRNN(nn.Module): 34 | def __init__(self, input_size, hidden_size): 35 | super(EncoderRNN, self).__init__() 36 | self.input_size = input_size 37 | self.hidden_size = hidden_size 38 | 39 | self.embedding = nn.Linear(input_size, hidden_size) 40 | self.gru = nn.GRU(input_size, hidden_size,batch_first=True) 41 | 42 | def forward(self, input, hidden): 43 | # embedded = self.embedding(input).view(-1, 1 ,self.hidden_size) 44 | # output = embedded 45 | output = input.view(-1, 1, self.input_size) 46 | output, hidden = self.gru(output, hidden) 47 | output = torch.relu(output) 48 | return output, hidden 49 | 50 | def initHidden(self,batch_size): 51 | return torch.zeros(1, batch_size , self.hidden_size) 52 | 53 | 54 | class DecoderRNN(nn.Module): 55 | def __init__(self, hidden_size, output_size,dropout_p,seq_len): 56 | super(DecoderRNN, self).__init__() 57 | self.hidden_size = hidden_size 58 | self.seq_len = seq_len 59 | self.embedding = nn.Linear(hidden_size, hidden_size) 60 | 61 | self.attn = nn.Linear(self.hidden_size * 2, self.seq_len) 62 | self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) 63 | 64 | self.gru = nn.GRU(hidden_size, hidden_size,batch_first=True) 65 | self.out = nn.Linear(hidden_size, output_size) 66 | 67 | 68 | def forward(self, input, hidden,encoder_outputs): 69 | # output = self.embedding(input).view(-1, 1 ,self.hidden_size) 70 | 71 | output = input.view(1, -1, self.hidden_size) 72 | 73 | attn_weights = F.softmax( 74 | self.attn(torch.cat((output[0], hidden[0]), 1)), dim=1) 75 | 76 | attn_weights=attn_weights.unsqueeze(0) 77 | attn_weights = attn_weights.transpose(1,0) 78 | 79 | encoder_outputs = encoder_outputs.transpose(1, 0) 80 | attn_applied = torch.bmm(attn_weights, 81 | encoder_outputs).transpose(1,0) 82 | 83 | output = torch.cat((output[0], attn_applied[0]), 1) 84 | output = self.attn_combine(output).unsqueeze(0) 85 | 86 | output = F.relu(output).transpose(1,0) 87 | output, hidden = self.gru(output, hidden) 88 | 89 | output = torch.relu(self.out(output[:,0,:])) 90 | 91 | return output, hidden,attn_weights 92 | 93 | def initHidden(self,batch_size): 94 | return torch.zeros(1,batch_size, self.hidden_size) 95 | 96 | 97 | 98 | class seq2seq_cell(nn.Module): 99 | def __init__(self,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device): 100 | super().__init__() 101 | self.input_dim = input_dim 102 | self.output_dim = output_dim 103 | self.seq_len = seq_len 104 | self.hidden_size = hidden_size 105 | self.dropout = dropout 106 | self.lr = learning_rate 107 | self.bs = batch_size 108 | self.device = device 109 | self.encoder = EncoderRNN(self.input_dim, self.hidden_size) 110 | self.decoder = DecoderRNN(self.hidden_size, self.output_dim, dropout_p=self.dropout,seq_len=self.seq_len) 111 | def forward(self,input): 112 | 113 | encoder_hidden = self.encoder.initHidden(input.shape[0]) 114 | encoder_outputs = torch.zeros( self.seq_len ,input.shape[0],self.hidden_size) 115 | 116 | # 向前传播 117 | for i in range(self.seq_len): 118 | encoder_output, encoder_hidden = self.encoder(input[:,i,:], encoder_hidden) 119 | encoder_outputs[i] = encoder_output[0] 120 | """ 121 | 此处可能是对序列不敏感的原因 122 | """ 123 | 124 | # encoder_outputs = encoder_output.view(input.shape[1], self.hidden_size) 125 | 126 | decoder_hidden = encoder_hidden 127 | 128 | decoder_outputs = torch.zeros(input.shape[0],self.seq_len, self.output_dim) 129 | 130 | for i in range(self.seq_len): 131 | decoder_output, decoder_hidden,decoder_attention = self.decoder( 132 | encoder_outputs[i], decoder_hidden,encoder_outputs) 133 | decoder_outputs[:,i,:] =decoder_output 134 | 135 | 136 | return decoder_outputs[:,0,:], decoder_attention[:,0,:] 137 | 138 | 139 | 140 | class Seq2Seq(): 141 | def __init__(self,num,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device_pu): 142 | self.station = num 143 | self.input_dim =input_dim 144 | self.output_dim = output_dim 145 | self.seq_len =seq_len 146 | self.hidden_size = hidden_size 147 | self.dropout = dropout 148 | self.lr =learning_rate 149 | self.bs =batch_size 150 | self.device =device_pu 151 | self.model =seq2seq_cell(self.input_dim, 152 | self.seq_len, 153 | self.output_dim, 154 | self.hidden_size, 155 | self.dropout, 156 | self.lr, 157 | self.bs,self.device).to(self.device ) 158 | 159 | def fit(self,data,label,shuffle,num_epoches = 100): 160 | 161 | self.shuffle =shuffle 162 | 163 | self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) 164 | self.criterion = nn.MSELoss() 165 | 166 | x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42) 167 | 168 | train = GetLoader(x_train, y_train) 169 | data_train = torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle) 170 | test = GetLoader(x_test, y_test) 171 | data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle) 172 | 173 | if os.path.exists(path): 174 | pass 175 | else: 176 | os.mkdir(path) 177 | 178 | eval_loss_best = np.inf 179 | uncorrect = True 180 | while uncorrect: 181 | f = open('{}/train_{}_{}_{}_{}.txt'.format(path,self.station, self.hidden_size, self.dropout, self.lr), 'w+') 182 | self.model =seq2seq_cell(self.input_dim, 183 | self.seq_len , 184 | self.output_dim, 185 | self.hidden_size, 186 | self.dropout, 187 | self.lr, 188 | self.bs,self.device).to(self.device) 189 | self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) 190 | self.criterion = nn.MSELoss() 191 | train_loss_last = np.inf 192 | for iter in range(1, num_epoches + 1): 193 | self.model.train() 194 | print('epoch {}'.format(iter)) 195 | print('**************************************') 196 | running_loss = 0.0 197 | 198 | for i, data in enumerate(data_train, 1): 199 | """ 200 | 随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题 201 | """ 202 | img, label = data 203 | img = Variable(img) 204 | label = Variable(label).to(self.device) 205 | 206 | img = img.view(-1,self.seq_len,self.input_dim).to(self.device) 207 | decoder_output,decoder_attention =self.model(img) 208 | 209 | loss = self.criterion(decoder_output, label) 210 | running_loss += loss.data.item() * label.size(0) 211 | # 向后传播 212 | self.optimizer.zero_grad() 213 | loss.backward() 214 | self.optimizer.step() 215 | 216 | train_loss =running_loss / (len(y_train)) 217 | print('Finish {} epoch, Loss: {:.6f}'.format( 218 | iter, train_loss)) 219 | 220 | if train_loss_last == train_loss and iter < 3: 221 | break 222 | if train_loss_last > train_loss and iter >= 3: 223 | uncorrect = False 224 | 225 | train_loss_last =train_loss 226 | 227 | self.model.eval() 228 | eval_loss = 0. 229 | for data in data_test: 230 | img, label = data 231 | 232 | img = Variable(img) 233 | label = Variable(label).to(self.device) 234 | 235 | img = img.view( -1, self.seq_len,self.input_dim).to(self.device) 236 | 237 | decoder_output,decoder_attention =self.model(img) 238 | 239 | loss = self.criterion(decoder_output, label) 240 | eval_loss += loss.data.item() * label.size(0) 241 | val_loss = eval_loss / (len(y_test)) 242 | print('Val Loss: {:.6f}'.format(val_loss)) 243 | 244 | 245 | f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n') 246 | 247 | if val_loss < eval_loss_best: 248 | eval_loss_best = val_loss 249 | self.eval = eval_loss_best 250 | torch.save(self.model, '{}/seq2seq_{}_{}_{}_{}.pth'.format(path, self.station,self.hidden_size, self.dropout, self.lr)) 251 | f.close() 252 | 253 | return self.eval 254 | 255 | 256 | def predict(self,test_data,test_label): 257 | 258 | test_model = torch.load('{}/seq2seq_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_size, self.dropout,self.lr)).to(self.device ) 259 | 260 | test_loss = 0 261 | test = GetLoader(test_data, test_label) 262 | criterion = nn.MSELoss() 263 | data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False) 264 | 265 | y_mlp = [] 266 | y_attention=[] 267 | for data in data_test: 268 | 269 | img, label = data 270 | img = Variable(img).to(self.device ) 271 | label = Variable(label).to(self.device ) 272 | 273 | img = img.view( -1, self.seq_len,self.input_dim) 274 | 275 | decoder_output,decoder_attention = test_model(img) 276 | 277 | loss = criterion(decoder_output, label) 278 | test_loss += loss.data 279 | y_mlp.append(decoder_output.data) 280 | y_attention.append(decoder_attention.data.cpu().squeeze().numpy()) 281 | 282 | 283 | print('Test Loss: {:.6f}'.format(test_loss / (len( 284 | test_label)))) 285 | 286 | y_mlp = np.array(y_mlp).squeeze()[:,np.newaxis] 287 | y_attention =np.array(y_attention) 288 | 289 | 290 | return y_mlp ,y_attention 291 | 292 | 293 | 294 | 295 | 296 | -------------------------------------------------------------------------------- /rain_shuffle/eval.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import * 2 | import numpy.linalg as la 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import os 6 | import warnings 7 | warnings.filterwarnings("ignore")#忽略警告 8 | #import seaborn as sns 9 | 10 | def evaluation(a,b): # a为真实标签 11 | rmse = np.sqrt(mean_squared_error(a,b)) 12 | mae = mean_absolute_error(a, b) 13 | var = explained_variance_score(a,b) 14 | mdae = median_absolute_error(a,b) 15 | r2 =r2_score(a,b) 16 | return rmse, mae, mdae, r2, var 17 | 18 | 19 | #绘图 20 | def plot(a, b, label_a, label_b, title=None): 21 | plt.figure() 22 | if title != None: 23 | plt.title(str(title) ) 24 | plt.plot(a, color = 'b', label = str(label_a)) 25 | plt.plot(b, color = 'r', label = str(label_b)) 26 | plt.legend(loc='best') 27 | plt.show() 28 | 29 | 30 | def readandplot(filepath, save_name, title = None): # 函数用途:模型寻优过程不同超参数下的损失 31 | f = open(filepath,"r") 32 | line_all = f.readlines() 33 | row_num = len(line_all) 34 | label_list = [] 35 | line_1 = line_all[0].strip('\n') 36 | a = line_1.split(' ') 37 | m = int(len(a) / 2) 38 | for i in [2*ele for ele in range(m)]: 39 | label_list.append(a[i].strip(':')) 40 | print(label_list) 41 | b = np.zeros((row_num,m)) 42 | for j,line in enumerate(line_all): 43 | line = line.strip('\n') 44 | c = line.split(' ') 45 | for k in range(m): 46 | l = 2 * k + 1 47 | b[j,k] = c[l] 48 | data = b 49 | ax = plt.figure() 50 | if title != None: 51 | plt.title(str(title) ) 52 | colors = ['r','b'] 53 | mark = ['v-','o-'] 54 | if save_name == 'SVR_poly_train_371_degree': 55 | data = data[:-5,:] 56 | for i in range(m): 57 | plt.plot(data[:,0],data[:,i+1], mark[i],c = colors[i],alpha = 0.4,label = label_list[i+1]) 58 | plt.xlabel(label_list[0]) 59 | plt.legend(loc='best') 60 | y_min = np.min(data[:,i+1]) 61 | index = np.argmin(data[:,i+1]) 62 | x_min = data[:,0][index] 63 | plt.text(x_min,y_min,"min",fontdict={'size':'8','color':colors[i]}) 64 | if i==m-2: 65 | break 66 | plt.grid() 67 | plt.show() 68 | ax.savefig('output/%s.png'%save_name,bbox_inches='tight',dpi=500) 69 | 70 | 71 | 72 | # def readTestResults(filepath): # 函数用途:每个模型最优参数下的5个指标值对比 73 | # f = open(filepath,"r") 74 | # line_all = f.readlines() 75 | # row_num = len(line_all) 76 | # data = np.zeros((row_num,5)) 77 | # label_list = ['ARIMA','ATT_SEQ2SEQ','GBRT','LSTM','MLP','SEQ2SEQ','SVR_poly','SVR_rbf','SVR_sigmoid','XGB'] 78 | # for j,line in enumerate(line_all): 79 | # line = line.strip('\n').split(' ') 80 | # for k in range(5): 81 | # data[j,k] = float(line[2 * k + 1]) 82 | 83 | # colors = ['r','b','darkblue','cyan','violet'] 84 | 85 | 86 | # ax = plt.figure() 87 | # plt.plot(data[:,0], label = 'rmse' ,c=colors[0], linestyle='--',marker='o') 88 | # plt.xticks(range(row_num),label_list,rotation=45) #可以是字符 89 | # plt.legend() 90 | # plt.grid() 91 | # plt.show() 92 | # ax.savefig('output/rmse.png',bbox_inches='tight',dpi=500) 93 | 94 | 95 | # ax = plt.figure() 96 | # for i,y_mark in enumerate(['mae','mdae','r2','var']): 97 | # i = i + 1 98 | # plt.plot(data[:,i], label = y_mark ,c=colors[i], linestyle='--',marker='o') 99 | # plt.xticks(range(row_num),label_list,rotation=45) #可以是字符 100 | # plt.legend() 101 | # plt.grid() 102 | # plt.show() 103 | # ax.savefig('output/the_other_4_metics.png',bbox_inches='tight',dpi=500) 104 | # return data 105 | 106 | 107 | 108 | # import glob 109 | # import xlwt 110 | # import xlrd 111 | # from xlutils.copy import copy as xl_copy 112 | # def processTestResultsOfEachModel(filefolder,name): # 读取当前模型在所有气象站上的测试结果并保存到表格 113 | # dirlist = [] 114 | # for f in glob.glob('{}/test*.txt'.format(filefolder)): # find all test*.txt files and store their paths into a list 115 | # dirlist.append(f) 116 | # station_num = len(dirlist) 117 | # data = np.zeros((5,station_num)) 118 | # numid = [ele[-7:-4] for ele in dirlist] 119 | # for i,path in enumerate(dirlist): 120 | # with open(path, 'r') as f: 121 | # line = f.readlines() 122 | # line = line[0].strip('\n').split(' ') 123 | # data[:,i] = [float(line[2 * j + 1]) for j in range(5)] 124 | # if i==0: 125 | # label_list = [line[2 * j][:-1] for j in range(5)] 126 | # 127 | # try: 128 | # rb = xlrd.open_workbook('test_data_of_each_station.xls', formatting_info=True) 129 | # workbook = xl_copy(rb) # make a copy of it 130 | # except: 131 | # 132 | # workbook = xlwt.Workbook(encoding='utf-8') 133 | # try: 134 | # booksheet = workbook.add_sheet('%s'%name,cell_overwrite_ok=True) 135 | # except: 136 | # print('%s already exists'%name) 137 | # return 138 | # booksheet.write(0,0,'station id') 139 | # for j in range(station_num): 140 | # booksheet.write(0,j+1,int(numid[j])) 141 | # 142 | # for i in range(5): 143 | # booksheet.write(i+1,0,label_list[i]) 144 | # 145 | # for i in range(5): 146 | # for j in range(station_num): 147 | # booksheet.write(i+1,j+1,data[i,j]) 148 | # 149 | # booksheet.write(0,station_num+1,'mean_value') 150 | # for i in range(5): 151 | # booksheet.write(i+1,station_num+1,data[i,:].mean()) 152 | # 153 | # booksheet.write(0,station_num+2,'mean_value_after_eliminating_2_worst') 154 | # for i in range(5): 155 | # if i == 3: 156 | # booksheet.write(i+1,station_num+2,-np.sort(-data[i,:])[:-2].mean()) 157 | # else: 158 | # booksheet.write(i+1,station_num+2,np.sort(data[i,:])[:-2].mean()) 159 | # 160 | # workbook.save('test_data_of_each_station.xls') 161 | 162 | 163 | 164 | def plotBestResults(): # 读取表格,把每个模型的最好表现对比绘图,并保存到output文件夹 165 | rb = xlrd.open_workbook('test_data_of_each_station.xls', formatting_info=True) 166 | sheetNames = rb.sheet_names() #获取所有sheet的名字,sheetNames为list类型 167 | a,b,c,d,e = [], [], [], [], [] 168 | 169 | model_name_list = [] 170 | for sheet in sheetNames: 171 | table = rb.sheet_by_name(sheet) 172 | a.append(table.row_values(1)[-1]) 173 | b.append(table.row_values(2)[-1]) 174 | c.append(table.row_values(3)[-1]) 175 | d.append(table.row_values(4)[-1]) 176 | e.append(table.row_values(5)[-1]) 177 | model_name_list.append(table.row_values(1)[0].split('_')[0]) 178 | xmark = model_name_list 179 | num = len(xmark) 180 | plt.figure();plt.plot(a,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num),xmark,rotation=45);plt.grid(); 181 | plt.savefig('output/rmse.png',bbox_inches='tight',dpi = 500);plt.show() 182 | plt.figure();plt.plot(b,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num),xmark,rotation=45); 183 | plt.plot(c, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num),xmark,rotation=45); 184 | plt.plot(d, '^-',c='r',label = 'r2_score');plt.legend();plt.xticks(range(num),xmark,rotation=45); 185 | plt.plot(e,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num),xmark,rotation=45);plt.grid(); 186 | plt.savefig('output/the_other_4_merics.png',bbox_inches='tight',dpi = 500) 187 | plt.show() 188 | 189 | 190 | 191 | import joblib 192 | import torch 193 | from torch import Tensor 194 | from torch.autograd import Variable 195 | def plotPredictionAndTruth(path, model_name, model_folder, num,test_x, test_y): # 模型预测与真实可视化 196 | if model_name.split('.')[-1] =='m': 197 | clf = joblib.load(path) 198 | y_pre_temp = clf.predict(test_x) 199 | else: 200 | if model_name[:3]=='mlp': 201 | clf = torch.load(path,map_location=torch.device('cpu')) 202 | x_tensor = Tensor(test_x) 203 | x_tensor = Variable(x_tensor) 204 | y_pre_temp = clf(x_tensor).detach().numpy() 205 | else: 206 | clf = torch.load(path,map_location=torch.device('cpu')) 207 | x_tensor = Tensor(test_x) 208 | x_tensor = Variable(x_tensor).view(-1,7,3) 209 | y_pre_temp = clf(x_tensor).detach().numpy() 210 | 211 | rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp) 212 | y_pre_temp = y_pre_temp.reshape(1,-1) 213 | test_y = test_y.reshape(1,-1) 214 | # 绘图 215 | plt.figure() 216 | plt.plot(test_y.squeeze() ,c='r', label = 'true value') 217 | plt.plot(y_pre_temp.squeeze() ,c='b', alpha = 0.5, label = 'prediction value') 218 | y_max = max(np.append(test_y,y_pre_temp)) 219 | dy = 2 220 | plt.text(0, y_max, "model:%s"%model_folder, size = 10,color = "black", style = "italic", weight = "light") 221 | plt.text(0, y_max- dy, "station:%d"%num, size = 10,color = "black", style = "italic", weight = "light") 222 | plt.text(0, y_max-2*dy, "rmse:%.3f"%rmse, size = 10,color = "black", style = "italic", weight = "light") 223 | plt.text(0, y_max-3 * dy, "mae:%.3f"%mae, size = 10,color = "black", style = "italic", weight = "light") 224 | plt.text(0, y_max-4 * dy, "mdae:%.3f"%mdae, size = 10,color = "black", style = "italic", weight = "light") 225 | plt.text(0, y_max-5 * dy, "r2:%.3f"%r2, size = 10,color = "black", style = "italic", weight = "light") 226 | plt.text(0, y_max-6 * dy, "var:%.3f"%var, size = 10,color = "black", style = "italic", weight = "light") 227 | 228 | plt.legend() 229 | plt.savefig('output/visualization_%s.png'%model_name.split('.')[0],bbox_inches='tight',dpi = 500) 230 | plt.show() 231 | 232 | 233 | def netModelFindBest(filepath): 234 | name = filepath.split('/')[-1] 235 | txt_list = glob.glob('{}/train_371*.txt'.format(filepath)) 236 | num = len(txt_list) 237 | min_val_loss_list = [] 238 | hyp_list = [] 239 | for i in range(num): 240 | # hyp_list = txt_list[i].split('\\')[-1].split('_')[2:] 241 | # hyp_list[-1] = hyp_list[-1].split('.txt')[0] 242 | # hyp_list = [float(ele) for ele in hyp_list] 243 | hyp = txt_list[i].split('\\')[-1].strip('.txt')[9:] 244 | f = open(txt_list[i],'r') 245 | line_all = f.readlines() 246 | min_val_loss = 100 247 | for line in line_all: 248 | 249 | temp_val_loss = float(line_all[0].strip('\n').split()[-1]) 250 | if temp_val_loss < min_val_loss: 251 | min_val_loss = temp_val_loss 252 | min_val_loss_list.append(min_val_loss) 253 | hyp_list.append(hyp) 254 | 255 | plt.figure() 256 | plt.plot(min_val_loss_list, 'v-',c='blue',alpha = 0.5,label = 'val_loss') 257 | plt.legend() 258 | plt.xticks(range(num),hyp_list,rotation=45) 259 | plt.grid() 260 | y_min = np.min(min_val_loss_list) 261 | x_min = np.argmin(min_val_loss_list) 262 | plt.text(x_min,y_min,"min",fontdict={'size':'8','color':'b'}) 263 | plt.savefig('output/find_best_hyp_%s.png'%name,bbox_inches='tight',dpi = 500) 264 | plt.show() 265 | 266 | 267 | 268 | 269 | 270 | if __name__ == '__main__': 271 | pathnow = os.getcwd() 272 | 273 | # .m模型模型的寻优过程,图保存在output文件夹里 274 | # filepath = '{}/SVR_poly/train_371_C.txt'.format(pathnow) 275 | # filepath2 = '{}/SVR_poly/train_371_degree.txt'.format(pathnow) 276 | # filepath3 = '{}/SVR_poly/train_371_gamma.txt'.format(pathnow) 277 | # filepath4 = '{}/GBRT/train_371_depth.txt'.format(pathnow) 278 | # filepath5 = '{}/GBRT/train_371_est.txt'.format(pathnow) 279 | # filepath6 = '{}/GBRT/train_371_lr.txt'.format(pathnow) 280 | # filepath7 = '{}/XGB/train_371_depth.txt'.format(pathnow) 281 | # filepath8 = '{}/XGB/train_371_est.txt'.format(pathnow) 282 | # filepath9 = '{}/XGB/train_371_lr.txt'.format(pathnow) 283 | # filepath10 = '{}/SVR_sigmoid/train_371_C.txt'.format(pathnow) 284 | # filepath11 = '{}/SVR_sigmoid/train_371_gamma.txt'.format(pathnow) 285 | # filepath12 = '{}/SVR_rbf/train_371_C.txt'.format(pathnow) 286 | # filepath13 = '{}/SVR_rbf/train_371_gamma.txt'.format(pathnow) 287 | # readandplot(filepath,save_name = 'SVR_poly_train_371_C',title = None) 288 | # readandplot(filepath2,save_name = 'SVR_poly_train_371_degree') 289 | # readandplot(filepath3,save_name = 'SVR_poly_train_371_gamma') 290 | # readandplot(filepath4,save_name = 'GBRT_train_371_depth') 291 | # readandplot(filepath5,save_name = 'GBRT_train_371_est') 292 | # readandplot(filepath6,save_name = 'GBRT_train_371_lr') 293 | # readandplot(filepath7,save_name = 'XGB_train_371_depth') 294 | # readandplot(filepath8,save_name = 'XGB_train_371_est') 295 | # readandplot(filepath9,save_name = 'XGB_train_371_lr') 296 | # readandplot(filepath10,save_name = 'SVR_sigmoid_train_371_C') 297 | # readandplot(filepath11,save_name = 'SVR_sigmoid_train_371_gamma') 298 | # readandplot(filepath12,save_name = 'SVR_rbf_train_371_C') 299 | # readandplot(filepath13,save_name = 'SVR_rbf_train_371_gamma') 300 | 301 | 302 | 303 | 304 | # 读取每个模型在所有气象站上的测试结果并保存到表格, 305 | # 再读取表格,把每个模型的最好表现对比绘图,并保存到output文件夹 306 | # name_list = ['ARIMA','ATT_SEQ2SEQ','GBRT','LSTM','MLP','SEQ2SEQ','SVR_rbf','XGB'] 307 | # for name in name_list: 308 | # filefolder = '{}/{}'.format(pathnow,name) 309 | # processTestResultsOfEachModel(filefolder,name) 310 | # plotBestResults() 311 | 312 | 313 | 314 | 315 | # 目标值与预测值可视化 316 | # model_folder = 'MLP' 317 | 318 | # path_2 = 'train_test_hour' 319 | 320 | # from sklearn.preprocessing import * 321 | # path = pathnow 322 | # num_list = [312,313,314,315,316,371,372,373,374,393,394,396] 323 | # for num in num_list: 324 | # try: 325 | # path_to_model = glob.glob('{}/{}/*{}*.m'.format(pathnow,model_folder,num)) [0] 326 | # except: 327 | # path_to_model = glob.glob('{}/{}/*{}*.pth'.format(pathnow,model_folder,num))[0] 328 | # model_name = path_to_model.split('\\')[-1] 329 | # test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32) 330 | # test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32) 331 | # test_feature = scale(test_feature, axis=0) 332 | # test_rain = np.reshape(test_rain, (-1, 1)) 333 | # plotPredictionAndTruth(path_to_model, model_name, model_folder, num, test_feature, test_rain) 334 | 335 | 336 | 337 | 338 | # .pth文件的寻优过程 339 | # filepath = '{}/MLP'.format(pathnow) 340 | # netModelFindBest(filepath) 341 | 342 | 343 | 344 | 345 | # ATT_SEQ2SEQ的可视化 346 | # num = 312 347 | # path_2 = 'train_test_hour' 348 | # test_feature = np.loadtxt("{}/{}/test{}.txt".format(pathnow,path_2, num)).astype(np.float32) 349 | # test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(pathnow,path_2, num)).astype(np.float32) 350 | # from sklearn.preprocessing import * 351 | # test_feature = scale(test_feature, axis=0) 352 | # test_rain = np.reshape(test_rain, (-1, 1)) 353 | 354 | # model = torch.load('{}\ATT_SEQ2SEQ\seq2seq_312_128_0.1_0.01.pth'.format(pathnow),map_location=torch.device('cpu')) 355 | # x_tensor = Tensor(test_feature) 356 | # # x_tensor = Variable(x_tensor).view(-1,7,3) 357 | # x_tensor = Variable(x_tensor).view(-1,3,7) 358 | # y_seq2seq,y_attention = model(x_tensor) 359 | 360 | 361 | 362 | 363 | 364 | # plt.figure() 365 | # plt.plot(test_rain.squeeze() ,c='r', label = 'true value') 366 | # plt.plot(y_seq2seq.squeeze() ,c='b', alpha = 0.5, label = 'prediction value') 367 | # y_max = max(np.append(test_rain,y_seq2seq.detach().numpy())) 368 | # dy = 2 369 | # plt.text(0, y_max, "model:%s"%model_folder, size = 10,color = "black", style = "italic", weight = "light") 370 | # plt.text(0, y_max- dy, "station:%d"%num, size = 10,color = "black", style = "italic", weight = "light") 371 | # plt.text(0, y_max-2*dy, "rmse:%.3f"%rmse, size = 10,color = "black", style = "italic", weight = "light") 372 | # plt.text(0, y_max-3 * dy, "mae:%.3f"%mae, size = 10,color = "black", style = "italic", weight = "light") 373 | # plt.text(0, y_max-4 * dy, "mdae:%.3f"%mdae, size = 10,color = "black", style = "italic", weight = "light") 374 | # plt.text(0, y_max-5 * dy, "r2:%.3f"%r2, size = 10,color = "black", style = "italic", weight = "light") 375 | # plt.text(0, y_max-6 * dy, "var:%.3f"%var, size = 10,color = "black", style = "italic", weight = "light") 376 | 377 | # plt.legend() 378 | # plt.savefig('output/visualization_%s.png'%model_name.split('.')[0],bbox_inches='tight',dpi = 500) 379 | # plt.show() 380 | 381 | 382 | 383 | # fig =plt.figure() 384 | # ax = fig.add_subplot(111) 385 | # cax = ax.matshow(y_attention[:5,:], cmap='bone') 386 | # fig.colorbar(cax) 387 | # plt.show() 388 | 389 | # path ='./ATT_SEQ2SEQ' 390 | # f = open('{}/test_ATT_SEQ2SEQ_{}.txt'.format(path,num), 'w+') 391 | # f.write('ATT_SEQ2SEQ_rmse: %r ' % rmse + 392 | # 'ATT_SEQ2SEQ_mae: %r ' % mae + 393 | # 'ATT_SEQ2SEQ_mdae: %r ' % mdae + 394 | # 'ATT_SEQ2SEQ_r2: %r ' % r2 + 395 | # 'ATT_SEQ2SEQ_var: %r ' % var) 396 | # f.close() 397 | 398 | 399 | --------------------------------------------------------------------------------